-- Burmddit Database Schema -- PostgreSQL -- Categories table CREATE TABLE IF NOT EXISTS categories ( id SERIAL PRIMARY KEY, name VARCHAR(100) NOT NULL UNIQUE, name_burmese VARCHAR(100) NOT NULL, slug VARCHAR(100) NOT NULL UNIQUE, description TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); -- Insert default categories INSERT INTO categories (name, name_burmese, slug, description) VALUES ('AI News', 'AI သတင်းများ', 'ai-news', 'Latest AI industry news and updates'), ('AI Tutorials', 'AI သင်ခန်းစာများ', 'tutorials', 'Step-by-step guides and how-tos'), ('Tips & Tricks', 'အကြံပြုချက်များ', 'tips-tricks', 'Productivity hacks and best practices'), ('Upcoming Releases', 'လာမည့် ထုတ်ပြန်မှုများ', 'upcoming', 'New AI models, tools, and products') ON CONFLICT (slug) DO NOTHING; -- Articles table CREATE TABLE IF NOT EXISTS articles ( id SERIAL PRIMARY KEY, title TEXT NOT NULL, title_burmese TEXT NOT NULL, slug VARCHAR(200) NOT NULL UNIQUE, content TEXT NOT NULL, content_burmese TEXT NOT NULL, excerpt TEXT, excerpt_burmese TEXT, category_id INTEGER REFERENCES categories(id), -- Metadata author VARCHAR(200) DEFAULT 'Burmddit AI', reading_time INTEGER, -- in minutes featured_image TEXT, images TEXT[], -- 🔥 Multiple images videos TEXT[], -- 🔥 Video embeds (YouTube, etc.) -- SEO meta_description TEXT, meta_keywords TEXT[], -- Source tracking source_articles JSONB, -- Array of source URLs original_sources TEXT[], -- Status status VARCHAR(20) DEFAULT 'draft', -- draft, published, archived published_at TIMESTAMP, -- Analytics view_count INTEGER DEFAULT 0, share_count INTEGER DEFAULT 0, -- Timestamps created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); -- Create indexes CREATE INDEX idx_articles_slug ON articles(slug); CREATE INDEX idx_articles_category ON articles(category_id); CREATE INDEX idx_articles_status ON articles(status); CREATE INDEX idx_articles_published ON articles(published_at DESC); CREATE INDEX idx_articles_views ON articles(view_count DESC); -- Full-text search index (for Burmese content) CREATE INDEX idx_articles_search ON articles USING gin(to_tsvector('simple', title_burmese || ' ' || content_burmese)); -- Raw scraped articles (before processing) CREATE TABLE IF NOT EXISTS raw_articles ( id SERIAL PRIMARY KEY, url TEXT NOT NULL UNIQUE, title TEXT NOT NULL, content TEXT NOT NULL, author VARCHAR(200), published_date TIMESTAMP, source VARCHAR(100), -- medium, techcrunch, etc category_hint VARCHAR(50), -- detected category -- Processing status processed BOOLEAN DEFAULT FALSE, compiled_into INTEGER REFERENCES articles(id), -- Timestamps scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_raw_articles_processed ON raw_articles(processed); CREATE INDEX idx_raw_articles_source ON raw_articles(source); -- Tags table CREATE TABLE IF NOT EXISTS tags ( id SERIAL PRIMARY KEY, name VARCHAR(100) NOT NULL UNIQUE, name_burmese VARCHAR(100), slug VARCHAR(100) NOT NULL UNIQUE, article_count INTEGER DEFAULT 0, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); -- Article-Tag junction table CREATE TABLE IF NOT EXISTS article_tags ( article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE, tag_id INTEGER REFERENCES tags(id) ON DELETE CASCADE, PRIMARY KEY (article_id, tag_id) ); -- Analytics tracking CREATE TABLE IF NOT EXISTS page_views ( id SERIAL PRIMARY KEY, article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE, ip_hash VARCHAR(64), -- Hashed IP for privacy user_agent TEXT, referrer TEXT, country VARCHAR(2), viewed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_page_views_article ON page_views(article_id); CREATE INDEX idx_page_views_date ON page_views(viewed_at); -- Newsletter subscribers CREATE TABLE IF NOT EXISTS subscribers ( id SERIAL PRIMARY KEY, email VARCHAR(255) NOT NULL UNIQUE, status VARCHAR(20) DEFAULT 'active', -- active, unsubscribed subscribed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, unsubscribed_at TIMESTAMP ); -- Pipeline logs (for monitoring) CREATE TABLE IF NOT EXISTS pipeline_logs ( id SERIAL PRIMARY KEY, pipeline_run TIMESTAMP DEFAULT CURRENT_TIMESTAMP, stage VARCHAR(50), -- crawl, cluster, compile, translate, publish status VARCHAR(20), -- started, completed, failed articles_processed INTEGER, error_message TEXT, duration_seconds INTEGER, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX idx_pipeline_logs_run ON pipeline_logs(pipeline_run); -- Create view for published articles with category info CREATE OR REPLACE VIEW published_articles AS SELECT a.id, a.title, a.title_burmese, a.slug, a.excerpt_burmese, a.featured_image, a.reading_time, a.view_count, a.published_at, c.name as category_name, c.name_burmese as category_name_burmese, c.slug as category_slug FROM articles a JOIN categories c ON a.category_id = c.id WHERE a.status = 'published' ORDER BY a.published_at DESC; -- Function to update article view count CREATE OR REPLACE FUNCTION increment_view_count(article_slug VARCHAR) RETURNS VOID AS $$ BEGIN UPDATE articles SET view_count = view_count + 1, updated_at = CURRENT_TIMESTAMP WHERE slug = article_slug; END; $$ LANGUAGE plpgsql; -- Function to get trending articles (last 7 days, by views) CREATE OR REPLACE FUNCTION get_trending_articles(limit_count INTEGER DEFAULT 10) RETURNS TABLE ( id INTEGER, title_burmese TEXT, slug VARCHAR, view_count INTEGER, category_name_burmese VARCHAR ) AS $$ BEGIN RETURN QUERY SELECT a.id, a.title_burmese, a.slug, a.view_count, c.name_burmese FROM articles a JOIN categories c ON a.category_id = c.id WHERE a.status = 'published' AND a.published_at >= CURRENT_TIMESTAMP - INTERVAL '7 days' ORDER BY a.view_count DESC LIMIT limit_count; END; $$ LANGUAGE plpgsql; -- Function to get related articles (by category and tags) CREATE OR REPLACE FUNCTION get_related_articles(article_id_param INTEGER, limit_count INTEGER DEFAULT 5) RETURNS TABLE ( id INTEGER, title_burmese TEXT, slug VARCHAR, excerpt_burmese TEXT, featured_image TEXT ) AS $$ BEGIN RETURN QUERY SELECT DISTINCT a.id, a.title_burmese, a.slug, a.excerpt_burmese, a.featured_image FROM articles a WHERE a.id != article_id_param AND a.status = 'published' AND ( a.category_id = (SELECT category_id FROM articles WHERE id = article_id_param) OR a.id IN ( SELECT at2.article_id FROM article_tags at1 JOIN article_tags at2 ON at1.tag_id = at2.tag_id WHERE at1.article_id = article_id_param AND at2.article_id != article_id_param ) ) ORDER BY a.published_at DESC LIMIT limit_count; END; $$ LANGUAGE plpgsql; -- Trigger to update updated_at timestamp CREATE OR REPLACE FUNCTION update_updated_at_column() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = CURRENT_TIMESTAMP; RETURN NEW; END; $$ LANGUAGE plpgsql; CREATE TRIGGER update_articles_updated_at BEFORE UPDATE ON articles FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); -- Initial data: Some common tags INSERT INTO tags (name, name_burmese, slug) VALUES ('ChatGPT', 'ChatGPT', 'chatgpt'), ('OpenAI', 'OpenAI', 'openai'), ('Anthropic', 'Anthropic', 'anthropic'), ('Google', 'Google', 'google'), ('Machine Learning', 'စက်သင်ယူမှု', 'machine-learning'), ('Deep Learning', 'နက်ရှိုင်းသောသင်ယူမှု', 'deep-learning'), ('GPT-4', 'GPT-4', 'gpt-4'), ('Claude', 'Claude', 'claude'), ('Prompt Engineering', 'Prompt Engineering', 'prompt-engineering'), ('AI Safety', 'AI ဘေးကင်းရေး', 'ai-safety') ON CONFLICT (slug) DO NOTHING;