267 lines
8.2 KiB
PL/PgSQL
267 lines
8.2 KiB
PL/PgSQL
-- Burmddit Database Schema
|
|
-- PostgreSQL
|
|
|
|
-- Categories table
|
|
CREATE TABLE IF NOT EXISTS categories (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(100) NOT NULL UNIQUE,
|
|
name_burmese VARCHAR(100) NOT NULL,
|
|
slug VARCHAR(100) NOT NULL UNIQUE,
|
|
description TEXT,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Insert default categories
|
|
INSERT INTO categories (name, name_burmese, slug, description) VALUES
|
|
('AI News', 'AI သတင်းများ', 'ai-news', 'Latest AI industry news and updates'),
|
|
('AI Tutorials', 'AI သင်ခန်းစာများ', 'tutorials', 'Step-by-step guides and how-tos'),
|
|
('Tips & Tricks', 'အကြံပြုချက်များ', 'tips-tricks', 'Productivity hacks and best practices'),
|
|
('Upcoming Releases', 'လာမည့် ထုတ်ပြန်မှုများ', 'upcoming', 'New AI models, tools, and products')
|
|
ON CONFLICT (slug) DO NOTHING;
|
|
|
|
-- Articles table
|
|
CREATE TABLE IF NOT EXISTS articles (
|
|
id SERIAL PRIMARY KEY,
|
|
title TEXT NOT NULL,
|
|
title_burmese TEXT NOT NULL,
|
|
slug VARCHAR(200) NOT NULL UNIQUE,
|
|
content TEXT NOT NULL,
|
|
content_burmese TEXT NOT NULL,
|
|
excerpt TEXT,
|
|
excerpt_burmese TEXT,
|
|
category_id INTEGER REFERENCES categories(id),
|
|
|
|
-- Metadata
|
|
author VARCHAR(200) DEFAULT 'Burmddit AI',
|
|
reading_time INTEGER, -- in minutes
|
|
featured_image TEXT,
|
|
images TEXT[], -- 🔥 Multiple images
|
|
videos TEXT[], -- 🔥 Video embeds (YouTube, etc.)
|
|
|
|
-- SEO
|
|
meta_description TEXT,
|
|
meta_keywords TEXT[],
|
|
|
|
-- Source tracking
|
|
source_articles JSONB, -- Array of source URLs
|
|
original_sources TEXT[],
|
|
|
|
-- Status
|
|
status VARCHAR(20) DEFAULT 'draft', -- draft, published, archived
|
|
published_at TIMESTAMP,
|
|
|
|
-- Analytics
|
|
view_count INTEGER DEFAULT 0,
|
|
share_count INTEGER DEFAULT 0,
|
|
|
|
-- Timestamps
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Create indexes
|
|
CREATE INDEX idx_articles_slug ON articles(slug);
|
|
CREATE INDEX idx_articles_category ON articles(category_id);
|
|
CREATE INDEX idx_articles_status ON articles(status);
|
|
CREATE INDEX idx_articles_published ON articles(published_at DESC);
|
|
CREATE INDEX idx_articles_views ON articles(view_count DESC);
|
|
|
|
-- Full-text search index (for Burmese content)
|
|
CREATE INDEX idx_articles_search ON articles USING gin(to_tsvector('simple', title_burmese || ' ' || content_burmese));
|
|
|
|
-- Raw scraped articles (before processing)
|
|
CREATE TABLE IF NOT EXISTS raw_articles (
|
|
id SERIAL PRIMARY KEY,
|
|
url TEXT NOT NULL UNIQUE,
|
|
title TEXT NOT NULL,
|
|
content TEXT NOT NULL,
|
|
author VARCHAR(200),
|
|
published_date TIMESTAMP,
|
|
source VARCHAR(100), -- medium, techcrunch, etc
|
|
category_hint VARCHAR(50), -- detected category
|
|
|
|
-- Processing status
|
|
processed BOOLEAN DEFAULT FALSE,
|
|
compiled_into INTEGER REFERENCES articles(id),
|
|
|
|
-- Timestamps
|
|
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_raw_articles_processed ON raw_articles(processed);
|
|
CREATE INDEX idx_raw_articles_source ON raw_articles(source);
|
|
|
|
-- Tags table
|
|
CREATE TABLE IF NOT EXISTS tags (
|
|
id SERIAL PRIMARY KEY,
|
|
name VARCHAR(100) NOT NULL UNIQUE,
|
|
name_burmese VARCHAR(100),
|
|
slug VARCHAR(100) NOT NULL UNIQUE,
|
|
article_count INTEGER DEFAULT 0,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Article-Tag junction table
|
|
CREATE TABLE IF NOT EXISTS article_tags (
|
|
article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE,
|
|
tag_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
|
|
PRIMARY KEY (article_id, tag_id)
|
|
);
|
|
|
|
-- Analytics tracking
|
|
CREATE TABLE IF NOT EXISTS page_views (
|
|
id SERIAL PRIMARY KEY,
|
|
article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE,
|
|
ip_hash VARCHAR(64), -- Hashed IP for privacy
|
|
user_agent TEXT,
|
|
referrer TEXT,
|
|
country VARCHAR(2),
|
|
viewed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_page_views_article ON page_views(article_id);
|
|
CREATE INDEX idx_page_views_date ON page_views(viewed_at);
|
|
|
|
-- Newsletter subscribers
|
|
CREATE TABLE IF NOT EXISTS subscribers (
|
|
id SERIAL PRIMARY KEY,
|
|
email VARCHAR(255) NOT NULL UNIQUE,
|
|
status VARCHAR(20) DEFAULT 'active', -- active, unsubscribed
|
|
subscribed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
unsubscribed_at TIMESTAMP
|
|
);
|
|
|
|
-- Pipeline logs (for monitoring)
|
|
CREATE TABLE IF NOT EXISTS pipeline_logs (
|
|
id SERIAL PRIMARY KEY,
|
|
pipeline_run TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
stage VARCHAR(50), -- crawl, cluster, compile, translate, publish
|
|
status VARCHAR(20), -- started, completed, failed
|
|
articles_processed INTEGER,
|
|
error_message TEXT,
|
|
duration_seconds INTEGER,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX idx_pipeline_logs_run ON pipeline_logs(pipeline_run);
|
|
|
|
-- Create view for published articles with category info
|
|
CREATE OR REPLACE VIEW published_articles AS
|
|
SELECT
|
|
a.id,
|
|
a.title,
|
|
a.title_burmese,
|
|
a.slug,
|
|
a.excerpt_burmese,
|
|
a.featured_image,
|
|
a.reading_time,
|
|
a.view_count,
|
|
a.published_at,
|
|
c.name as category_name,
|
|
c.name_burmese as category_name_burmese,
|
|
c.slug as category_slug
|
|
FROM articles a
|
|
JOIN categories c ON a.category_id = c.id
|
|
WHERE a.status = 'published'
|
|
ORDER BY a.published_at DESC;
|
|
|
|
-- Function to update article view count
|
|
CREATE OR REPLACE FUNCTION increment_view_count(article_slug VARCHAR)
|
|
RETURNS VOID AS $$
|
|
BEGIN
|
|
UPDATE articles
|
|
SET view_count = view_count + 1,
|
|
updated_at = CURRENT_TIMESTAMP
|
|
WHERE slug = article_slug;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Function to get trending articles (last 7 days, by views)
|
|
CREATE OR REPLACE FUNCTION get_trending_articles(limit_count INTEGER DEFAULT 10)
|
|
RETURNS TABLE (
|
|
id INTEGER,
|
|
title_burmese TEXT,
|
|
slug VARCHAR,
|
|
view_count INTEGER,
|
|
category_name_burmese VARCHAR
|
|
) AS $$
|
|
BEGIN
|
|
RETURN QUERY
|
|
SELECT
|
|
a.id,
|
|
a.title_burmese,
|
|
a.slug,
|
|
a.view_count,
|
|
c.name_burmese
|
|
FROM articles a
|
|
JOIN categories c ON a.category_id = c.id
|
|
WHERE a.status = 'published'
|
|
AND a.published_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
|
|
ORDER BY a.view_count DESC
|
|
LIMIT limit_count;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Function to get related articles (by category and tags)
|
|
CREATE OR REPLACE FUNCTION get_related_articles(article_id_param INTEGER, limit_count INTEGER DEFAULT 5)
|
|
RETURNS TABLE (
|
|
id INTEGER,
|
|
title_burmese TEXT,
|
|
slug VARCHAR,
|
|
excerpt_burmese TEXT,
|
|
featured_image TEXT
|
|
) AS $$
|
|
BEGIN
|
|
RETURN QUERY
|
|
SELECT DISTINCT
|
|
a.id,
|
|
a.title_burmese,
|
|
a.slug,
|
|
a.excerpt_burmese,
|
|
a.featured_image
|
|
FROM articles a
|
|
WHERE a.id != article_id_param
|
|
AND a.status = 'published'
|
|
AND (
|
|
a.category_id = (SELECT category_id FROM articles WHERE id = article_id_param)
|
|
OR a.id IN (
|
|
SELECT at2.article_id
|
|
FROM article_tags at1
|
|
JOIN article_tags at2 ON at1.tag_id = at2.tag_id
|
|
WHERE at1.article_id = article_id_param
|
|
AND at2.article_id != article_id_param
|
|
)
|
|
)
|
|
ORDER BY a.published_at DESC
|
|
LIMIT limit_count;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
-- Trigger to update updated_at timestamp
|
|
CREATE OR REPLACE FUNCTION update_updated_at_column()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
NEW.updated_at = CURRENT_TIMESTAMP;
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CREATE TRIGGER update_articles_updated_at
|
|
BEFORE UPDATE ON articles
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION update_updated_at_column();
|
|
|
|
-- Initial data: Some common tags
|
|
INSERT INTO tags (name, name_burmese, slug) VALUES
|
|
('ChatGPT', 'ChatGPT', 'chatgpt'),
|
|
('OpenAI', 'OpenAI', 'openai'),
|
|
('Anthropic', 'Anthropic', 'anthropic'),
|
|
('Google', 'Google', 'google'),
|
|
('Machine Learning', 'စက်သင်ယူမှု', 'machine-learning'),
|
|
('Deep Learning', 'နက်ရှိုင်းသောသင်ယူမှု', 'deep-learning'),
|
|
('GPT-4', 'GPT-4', 'gpt-4'),
|
|
('Claude', 'Claude', 'claude'),
|
|
('Prompt Engineering', 'Prompt Engineering', 'prompt-engineering'),
|
|
('AI Safety', 'AI ဘေးကင်းရေး', 'ai-safety')
|
|
ON CONFLICT (slug) DO NOTHING;
|