Initial Burmddit deployment - AI news aggregator in Burmese

2026-02-19 02:52:58 +00:00
commit dddb86ea94
27 changed files with 5039 additions and 0 deletions
--- a/database/schema.sql
+++ b/database/schema.sql
@@ -0,0 +1,266 @@
+-- Burmddit Database Schema
+-- PostgreSQL
+
+-- Categories table
+CREATE TABLE IF NOT EXISTS categories (
+    id SERIAL PRIMARY KEY,
+    name VARCHAR(100) NOT NULL UNIQUE,
+    name_burmese VARCHAR(100) NOT NULL,
+    slug VARCHAR(100) NOT NULL UNIQUE,
+    description TEXT,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Insert default categories
+INSERT INTO categories (name, name_burmese, slug, description) VALUES
+('AI News', 'AI သတင်းများ', 'ai-news', 'Latest AI industry news and updates'),
+('AI Tutorials', 'AI သင်ခန်းစာများ', 'tutorials', 'Step-by-step guides and how-tos'),
+('Tips & Tricks', 'အကြံပြုချက်များ', 'tips-tricks', 'Productivity hacks and best practices'),
+('Upcoming Releases', 'လာမည့် ထုတ်ပြန်မှုများ', 'upcoming', 'New AI models, tools, and products')
+ON CONFLICT (slug) DO NOTHING;
+
+-- Articles table
+CREATE TABLE IF NOT EXISTS articles (
+    id SERIAL PRIMARY KEY,
+    title TEXT NOT NULL,
+    title_burmese TEXT NOT NULL,
+    slug VARCHAR(200) NOT NULL UNIQUE,
+    content TEXT NOT NULL,
+    content_burmese TEXT NOT NULL,
+    excerpt TEXT,
+    excerpt_burmese TEXT,
+    category_id INTEGER REFERENCES categories(id),
+    
+    -- Metadata
+    author VARCHAR(200) DEFAULT 'Burmddit AI',
+    reading_time INTEGER,  -- in minutes
+    featured_image TEXT,
+    images TEXT[],  -- 🔥 Multiple images
+    videos TEXT[],  -- 🔥 Video embeds (YouTube, etc.)
+    
+    -- SEO
+    meta_description TEXT,
+    meta_keywords TEXT[],
+    
+    -- Source tracking
+    source_articles JSONB,  -- Array of source URLs
+    original_sources TEXT[],
+    
+    -- Status
+    status VARCHAR(20) DEFAULT 'draft',  -- draft, published, archived
+    published_at TIMESTAMP,
+    
+    -- Analytics
+    view_count INTEGER DEFAULT 0,
+    share_count INTEGER DEFAULT 0,
+    
+    -- Timestamps
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Create indexes
+CREATE INDEX idx_articles_slug ON articles(slug);
+CREATE INDEX idx_articles_category ON articles(category_id);
+CREATE INDEX idx_articles_status ON articles(status);
+CREATE INDEX idx_articles_published ON articles(published_at DESC);
+CREATE INDEX idx_articles_views ON articles(view_count DESC);
+
+-- Full-text search index (for Burmese content)
+CREATE INDEX idx_articles_search ON articles USING gin(to_tsvector('simple', title_burmese || ' ' || content_burmese));
+
+-- Raw scraped articles (before processing)
+CREATE TABLE IF NOT EXISTS raw_articles (
+    id SERIAL PRIMARY KEY,
+    url TEXT NOT NULL UNIQUE,
+    title TEXT NOT NULL,
+    content TEXT NOT NULL,
+    author VARCHAR(200),
+    published_date TIMESTAMP,
+    source VARCHAR(100),  -- medium, techcrunch, etc
+    category_hint VARCHAR(50),  -- detected category
+    
+    -- Processing status
+    processed BOOLEAN DEFAULT FALSE,
+    compiled_into INTEGER REFERENCES articles(id),
+    
+    -- Timestamps
+    scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+CREATE INDEX idx_raw_articles_processed ON raw_articles(processed);
+CREATE INDEX idx_raw_articles_source ON raw_articles(source);
+
+-- Tags table
+CREATE TABLE IF NOT EXISTS tags (
+    id SERIAL PRIMARY KEY,
+    name VARCHAR(100) NOT NULL UNIQUE,
+    name_burmese VARCHAR(100),
+    slug VARCHAR(100) NOT NULL UNIQUE,
+    article_count INTEGER DEFAULT 0,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Article-Tag junction table
+CREATE TABLE IF NOT EXISTS article_tags (
+    article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE,
+    tag_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
+    PRIMARY KEY (article_id, tag_id)
+);
+
+-- Analytics tracking
+CREATE TABLE IF NOT EXISTS page_views (
+    id SERIAL PRIMARY KEY,
+    article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE,
+    ip_hash VARCHAR(64),  -- Hashed IP for privacy
+    user_agent TEXT,
+    referrer TEXT,
+    country VARCHAR(2),
+    viewed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+CREATE INDEX idx_page_views_article ON page_views(article_id);
+CREATE INDEX idx_page_views_date ON page_views(viewed_at);
+
+-- Newsletter subscribers
+CREATE TABLE IF NOT EXISTS subscribers (
+    id SERIAL PRIMARY KEY,
+    email VARCHAR(255) NOT NULL UNIQUE,
+    status VARCHAR(20) DEFAULT 'active',  -- active, unsubscribed
+    subscribed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    unsubscribed_at TIMESTAMP
+);
+
+-- Pipeline logs (for monitoring)
+CREATE TABLE IF NOT EXISTS pipeline_logs (
+    id SERIAL PRIMARY KEY,
+    pipeline_run TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    stage VARCHAR(50),  -- crawl, cluster, compile, translate, publish
+    status VARCHAR(20),  -- started, completed, failed
+    articles_processed INTEGER,
+    error_message TEXT,
+    duration_seconds INTEGER,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+CREATE INDEX idx_pipeline_logs_run ON pipeline_logs(pipeline_run);
+
+-- Create view for published articles with category info
+CREATE OR REPLACE VIEW published_articles AS
+SELECT 
+    a.id,
+    a.title,
+    a.title_burmese,
+    a.slug,
+    a.excerpt_burmese,
+    a.featured_image,
+    a.reading_time,
+    a.view_count,
+    a.published_at,
+    c.name as category_name,
+    c.name_burmese as category_name_burmese,
+    c.slug as category_slug
+FROM articles a
+JOIN categories c ON a.category_id = c.id
+WHERE a.status = 'published'
+ORDER BY a.published_at DESC;
+
+-- Function to update article view count
+CREATE OR REPLACE FUNCTION increment_view_count(article_slug VARCHAR)
+RETURNS VOID AS $$
+BEGIN
+    UPDATE articles 
+    SET view_count = view_count + 1,
+        updated_at = CURRENT_TIMESTAMP
+    WHERE slug = article_slug;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Function to get trending articles (last 7 days, by views)
+CREATE OR REPLACE FUNCTION get_trending_articles(limit_count INTEGER DEFAULT 10)
+RETURNS TABLE (
+    id INTEGER,
+    title_burmese TEXT,
+    slug VARCHAR,
+    view_count INTEGER,
+    category_name_burmese VARCHAR
+) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT 
+        a.id,
+        a.title_burmese,
+        a.slug,
+        a.view_count,
+        c.name_burmese
+    FROM articles a
+    JOIN categories c ON a.category_id = c.id
+    WHERE a.status = 'published'
+        AND a.published_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
+    ORDER BY a.view_count DESC
+    LIMIT limit_count;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Function to get related articles (by category and tags)
+CREATE OR REPLACE FUNCTION get_related_articles(article_id_param INTEGER, limit_count INTEGER DEFAULT 5)
+RETURNS TABLE (
+    id INTEGER,
+    title_burmese TEXT,
+    slug VARCHAR,
+    excerpt_burmese TEXT,
+    featured_image TEXT
+) AS $$
+BEGIN
+    RETURN QUERY
+    SELECT DISTINCT
+        a.id,
+        a.title_burmese,
+        a.slug,
+        a.excerpt_burmese,
+        a.featured_image
+    FROM articles a
+    WHERE a.id != article_id_param
+        AND a.status = 'published'
+        AND (
+            a.category_id = (SELECT category_id FROM articles WHERE id = article_id_param)
+            OR a.id IN (
+                SELECT at2.article_id
+                FROM article_tags at1
+                JOIN article_tags at2 ON at1.tag_id = at2.tag_id
+                WHERE at1.article_id = article_id_param
+                    AND at2.article_id != article_id_param
+            )
+        )
+    ORDER BY a.published_at DESC
+    LIMIT limit_count;
+END;
+$$ LANGUAGE plpgsql;
+
+-- Trigger to update updated_at timestamp
+CREATE OR REPLACE FUNCTION update_updated_at_column()
+RETURNS TRIGGER AS $$
+BEGIN
+    NEW.updated_at = CURRENT_TIMESTAMP;
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE TRIGGER update_articles_updated_at
+BEFORE UPDATE ON articles
+FOR EACH ROW
+EXECUTE FUNCTION update_updated_at_column();
+
+-- Initial data: Some common tags
+INSERT INTO tags (name, name_burmese, slug) VALUES
+('ChatGPT', 'ChatGPT', 'chatgpt'),
+('OpenAI', 'OpenAI', 'openai'),
+('Anthropic', 'Anthropic', 'anthropic'),
+('Google', 'Google', 'google'),
+('Machine Learning', 'စက်သင်ယူမှု', 'machine-learning'),
+('Deep Learning', 'နက်ရှိုင်းသောသင်ယူမှု', 'deep-learning'),
+('GPT-4', 'GPT-4', 'gpt-4'),
+('Claude', 'Claude', 'claude'),
+('Prompt Engineering', 'Prompt Engineering', 'prompt-engineering'),
+('AI Safety', 'AI ဘေးကင်းရေး', 'ai-safety')
+ON CONFLICT (slug) DO NOTHING;