Initial Burmddit deployment - AI news aggregator in Burmese

This commit is contained in:
Zeya Phyo
2026-02-19 02:52:58 +00:00
commit dddb86ea94
27 changed files with 5039 additions and 0 deletions

266
database/schema.sql Normal file
View File

@@ -0,0 +1,266 @@
-- Burmddit Database Schema
-- PostgreSQL
-- Categories table
CREATE TABLE IF NOT EXISTS categories (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL UNIQUE,
name_burmese VARCHAR(100) NOT NULL,
slug VARCHAR(100) NOT NULL UNIQUE,
description TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Insert default categories
INSERT INTO categories (name, name_burmese, slug, description) VALUES
('AI News', 'AI သတင်းများ', 'ai-news', 'Latest AI industry news and updates'),
('AI Tutorials', 'AI သင်ခန်းစာများ', 'tutorials', 'Step-by-step guides and how-tos'),
('Tips & Tricks', 'အကြံပြုချက်များ', 'tips-tricks', 'Productivity hacks and best practices'),
('Upcoming Releases', 'လာမည့် ထုတ်ပြန်မှုများ', 'upcoming', 'New AI models, tools, and products')
ON CONFLICT (slug) DO NOTHING;
-- Articles table
CREATE TABLE IF NOT EXISTS articles (
id SERIAL PRIMARY KEY,
title TEXT NOT NULL,
title_burmese TEXT NOT NULL,
slug VARCHAR(200) NOT NULL UNIQUE,
content TEXT NOT NULL,
content_burmese TEXT NOT NULL,
excerpt TEXT,
excerpt_burmese TEXT,
category_id INTEGER REFERENCES categories(id),
-- Metadata
author VARCHAR(200) DEFAULT 'Burmddit AI',
reading_time INTEGER, -- in minutes
featured_image TEXT,
images TEXT[], -- 🔥 Multiple images
videos TEXT[], -- 🔥 Video embeds (YouTube, etc.)
-- SEO
meta_description TEXT,
meta_keywords TEXT[],
-- Source tracking
source_articles JSONB, -- Array of source URLs
original_sources TEXT[],
-- Status
status VARCHAR(20) DEFAULT 'draft', -- draft, published, archived
published_at TIMESTAMP,
-- Analytics
view_count INTEGER DEFAULT 0,
share_count INTEGER DEFAULT 0,
-- Timestamps
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Create indexes
CREATE INDEX idx_articles_slug ON articles(slug);
CREATE INDEX idx_articles_category ON articles(category_id);
CREATE INDEX idx_articles_status ON articles(status);
CREATE INDEX idx_articles_published ON articles(published_at DESC);
CREATE INDEX idx_articles_views ON articles(view_count DESC);
-- Full-text search index (for Burmese content)
CREATE INDEX idx_articles_search ON articles USING gin(to_tsvector('simple', title_burmese || ' ' || content_burmese));
-- Raw scraped articles (before processing)
CREATE TABLE IF NOT EXISTS raw_articles (
id SERIAL PRIMARY KEY,
url TEXT NOT NULL UNIQUE,
title TEXT NOT NULL,
content TEXT NOT NULL,
author VARCHAR(200),
published_date TIMESTAMP,
source VARCHAR(100), -- medium, techcrunch, etc
category_hint VARCHAR(50), -- detected category
-- Processing status
processed BOOLEAN DEFAULT FALSE,
compiled_into INTEGER REFERENCES articles(id),
-- Timestamps
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_raw_articles_processed ON raw_articles(processed);
CREATE INDEX idx_raw_articles_source ON raw_articles(source);
-- Tags table
CREATE TABLE IF NOT EXISTS tags (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL UNIQUE,
name_burmese VARCHAR(100),
slug VARCHAR(100) NOT NULL UNIQUE,
article_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Article-Tag junction table
CREATE TABLE IF NOT EXISTS article_tags (
article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE,
tag_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
PRIMARY KEY (article_id, tag_id)
);
-- Analytics tracking
CREATE TABLE IF NOT EXISTS page_views (
id SERIAL PRIMARY KEY,
article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE,
ip_hash VARCHAR(64), -- Hashed IP for privacy
user_agent TEXT,
referrer TEXT,
country VARCHAR(2),
viewed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_page_views_article ON page_views(article_id);
CREATE INDEX idx_page_views_date ON page_views(viewed_at);
-- Newsletter subscribers
CREATE TABLE IF NOT EXISTS subscribers (
id SERIAL PRIMARY KEY,
email VARCHAR(255) NOT NULL UNIQUE,
status VARCHAR(20) DEFAULT 'active', -- active, unsubscribed
subscribed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
unsubscribed_at TIMESTAMP
);
-- Pipeline logs (for monitoring)
CREATE TABLE IF NOT EXISTS pipeline_logs (
id SERIAL PRIMARY KEY,
pipeline_run TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
stage VARCHAR(50), -- crawl, cluster, compile, translate, publish
status VARCHAR(20), -- started, completed, failed
articles_processed INTEGER,
error_message TEXT,
duration_seconds INTEGER,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_pipeline_logs_run ON pipeline_logs(pipeline_run);
-- Create view for published articles with category info
CREATE OR REPLACE VIEW published_articles AS
SELECT
a.id,
a.title,
a.title_burmese,
a.slug,
a.excerpt_burmese,
a.featured_image,
a.reading_time,
a.view_count,
a.published_at,
c.name as category_name,
c.name_burmese as category_name_burmese,
c.slug as category_slug
FROM articles a
JOIN categories c ON a.category_id = c.id
WHERE a.status = 'published'
ORDER BY a.published_at DESC;
-- Function to update article view count
CREATE OR REPLACE FUNCTION increment_view_count(article_slug VARCHAR)
RETURNS VOID AS $$
BEGIN
UPDATE articles
SET view_count = view_count + 1,
updated_at = CURRENT_TIMESTAMP
WHERE slug = article_slug;
END;
$$ LANGUAGE plpgsql;
-- Function to get trending articles (last 7 days, by views)
CREATE OR REPLACE FUNCTION get_trending_articles(limit_count INTEGER DEFAULT 10)
RETURNS TABLE (
id INTEGER,
title_burmese TEXT,
slug VARCHAR,
view_count INTEGER,
category_name_burmese VARCHAR
) AS $$
BEGIN
RETURN QUERY
SELECT
a.id,
a.title_burmese,
a.slug,
a.view_count,
c.name_burmese
FROM articles a
JOIN categories c ON a.category_id = c.id
WHERE a.status = 'published'
AND a.published_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
ORDER BY a.view_count DESC
LIMIT limit_count;
END;
$$ LANGUAGE plpgsql;
-- Function to get related articles (by category and tags)
CREATE OR REPLACE FUNCTION get_related_articles(article_id_param INTEGER, limit_count INTEGER DEFAULT 5)
RETURNS TABLE (
id INTEGER,
title_burmese TEXT,
slug VARCHAR,
excerpt_burmese TEXT,
featured_image TEXT
) AS $$
BEGIN
RETURN QUERY
SELECT DISTINCT
a.id,
a.title_burmese,
a.slug,
a.excerpt_burmese,
a.featured_image
FROM articles a
WHERE a.id != article_id_param
AND a.status = 'published'
AND (
a.category_id = (SELECT category_id FROM articles WHERE id = article_id_param)
OR a.id IN (
SELECT at2.article_id
FROM article_tags at1
JOIN article_tags at2 ON at1.tag_id = at2.tag_id
WHERE at1.article_id = article_id_param
AND at2.article_id != article_id_param
)
)
ORDER BY a.published_at DESC
LIMIT limit_count;
END;
$$ LANGUAGE plpgsql;
-- Trigger to update updated_at timestamp
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_articles_updated_at
BEFORE UPDATE ON articles
FOR EACH ROW
EXECUTE FUNCTION update_updated_at_column();
-- Initial data: Some common tags
INSERT INTO tags (name, name_burmese, slug) VALUES
('ChatGPT', 'ChatGPT', 'chatgpt'),
('OpenAI', 'OpenAI', 'openai'),
('Anthropic', 'Anthropic', 'anthropic'),
('Google', 'Google', 'google'),
('Machine Learning', 'စက်သင်ယူမှု', 'machine-learning'),
('Deep Learning', 'နက်ရှိုင်းသောသင်ယူမှု', 'deep-learning'),
('GPT-4', 'GPT-4', 'gpt-4'),
('Claude', 'Claude', 'claude'),
('Prompt Engineering', 'Prompt Engineering', 'prompt-engineering'),
('AI Safety', 'AI ဘေးကင်းရေး', 'ai-safety')
ON CONFLICT (slug) DO NOTHING;