Initial Burmddit deployment - AI news aggregator in Burmese
This commit is contained in:
266
database/schema.sql
Normal file
266
database/schema.sql
Normal file
@@ -0,0 +1,266 @@
|
||||
-- Burmddit Database Schema
|
||||
-- PostgreSQL
|
||||
|
||||
-- Categories table
|
||||
CREATE TABLE IF NOT EXISTS categories (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(100) NOT NULL UNIQUE,
|
||||
name_burmese VARCHAR(100) NOT NULL,
|
||||
slug VARCHAR(100) NOT NULL UNIQUE,
|
||||
description TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Insert default categories
|
||||
INSERT INTO categories (name, name_burmese, slug, description) VALUES
|
||||
('AI News', 'AI သတင်းများ', 'ai-news', 'Latest AI industry news and updates'),
|
||||
('AI Tutorials', 'AI သင်ခန်းစာများ', 'tutorials', 'Step-by-step guides and how-tos'),
|
||||
('Tips & Tricks', 'အကြံပြုချက်များ', 'tips-tricks', 'Productivity hacks and best practices'),
|
||||
('Upcoming Releases', 'လာမည့် ထုတ်ပြန်မှုများ', 'upcoming', 'New AI models, tools, and products')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
|
||||
-- Articles table
|
||||
CREATE TABLE IF NOT EXISTS articles (
|
||||
id SERIAL PRIMARY KEY,
|
||||
title TEXT NOT NULL,
|
||||
title_burmese TEXT NOT NULL,
|
||||
slug VARCHAR(200) NOT NULL UNIQUE,
|
||||
content TEXT NOT NULL,
|
||||
content_burmese TEXT NOT NULL,
|
||||
excerpt TEXT,
|
||||
excerpt_burmese TEXT,
|
||||
category_id INTEGER REFERENCES categories(id),
|
||||
|
||||
-- Metadata
|
||||
author VARCHAR(200) DEFAULT 'Burmddit AI',
|
||||
reading_time INTEGER, -- in minutes
|
||||
featured_image TEXT,
|
||||
images TEXT[], -- 🔥 Multiple images
|
||||
videos TEXT[], -- 🔥 Video embeds (YouTube, etc.)
|
||||
|
||||
-- SEO
|
||||
meta_description TEXT,
|
||||
meta_keywords TEXT[],
|
||||
|
||||
-- Source tracking
|
||||
source_articles JSONB, -- Array of source URLs
|
||||
original_sources TEXT[],
|
||||
|
||||
-- Status
|
||||
status VARCHAR(20) DEFAULT 'draft', -- draft, published, archived
|
||||
published_at TIMESTAMP,
|
||||
|
||||
-- Analytics
|
||||
view_count INTEGER DEFAULT 0,
|
||||
share_count INTEGER DEFAULT 0,
|
||||
|
||||
-- Timestamps
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Create indexes
|
||||
CREATE INDEX idx_articles_slug ON articles(slug);
|
||||
CREATE INDEX idx_articles_category ON articles(category_id);
|
||||
CREATE INDEX idx_articles_status ON articles(status);
|
||||
CREATE INDEX idx_articles_published ON articles(published_at DESC);
|
||||
CREATE INDEX idx_articles_views ON articles(view_count DESC);
|
||||
|
||||
-- Full-text search index (for Burmese content)
|
||||
CREATE INDEX idx_articles_search ON articles USING gin(to_tsvector('simple', title_burmese || ' ' || content_burmese));
|
||||
|
||||
-- Raw scraped articles (before processing)
|
||||
CREATE TABLE IF NOT EXISTS raw_articles (
|
||||
id SERIAL PRIMARY KEY,
|
||||
url TEXT NOT NULL UNIQUE,
|
||||
title TEXT NOT NULL,
|
||||
content TEXT NOT NULL,
|
||||
author VARCHAR(200),
|
||||
published_date TIMESTAMP,
|
||||
source VARCHAR(100), -- medium, techcrunch, etc
|
||||
category_hint VARCHAR(50), -- detected category
|
||||
|
||||
-- Processing status
|
||||
processed BOOLEAN DEFAULT FALSE,
|
||||
compiled_into INTEGER REFERENCES articles(id),
|
||||
|
||||
-- Timestamps
|
||||
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX idx_raw_articles_processed ON raw_articles(processed);
|
||||
CREATE INDEX idx_raw_articles_source ON raw_articles(source);
|
||||
|
||||
-- Tags table
|
||||
CREATE TABLE IF NOT EXISTS tags (
|
||||
id SERIAL PRIMARY KEY,
|
||||
name VARCHAR(100) NOT NULL UNIQUE,
|
||||
name_burmese VARCHAR(100),
|
||||
slug VARCHAR(100) NOT NULL UNIQUE,
|
||||
article_count INTEGER DEFAULT 0,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
-- Article-Tag junction table
|
||||
CREATE TABLE IF NOT EXISTS article_tags (
|
||||
article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE,
|
||||
tag_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
|
||||
PRIMARY KEY (article_id, tag_id)
|
||||
);
|
||||
|
||||
-- Analytics tracking
|
||||
CREATE TABLE IF NOT EXISTS page_views (
|
||||
id SERIAL PRIMARY KEY,
|
||||
article_id INTEGER REFERENCES articles(id) ON DELETE CASCADE,
|
||||
ip_hash VARCHAR(64), -- Hashed IP for privacy
|
||||
user_agent TEXT,
|
||||
referrer TEXT,
|
||||
country VARCHAR(2),
|
||||
viewed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX idx_page_views_article ON page_views(article_id);
|
||||
CREATE INDEX idx_page_views_date ON page_views(viewed_at);
|
||||
|
||||
-- Newsletter subscribers
|
||||
CREATE TABLE IF NOT EXISTS subscribers (
|
||||
id SERIAL PRIMARY KEY,
|
||||
email VARCHAR(255) NOT NULL UNIQUE,
|
||||
status VARCHAR(20) DEFAULT 'active', -- active, unsubscribed
|
||||
subscribed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
unsubscribed_at TIMESTAMP
|
||||
);
|
||||
|
||||
-- Pipeline logs (for monitoring)
|
||||
CREATE TABLE IF NOT EXISTS pipeline_logs (
|
||||
id SERIAL PRIMARY KEY,
|
||||
pipeline_run TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
stage VARCHAR(50), -- crawl, cluster, compile, translate, publish
|
||||
status VARCHAR(20), -- started, completed, failed
|
||||
articles_processed INTEGER,
|
||||
error_message TEXT,
|
||||
duration_seconds INTEGER,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX idx_pipeline_logs_run ON pipeline_logs(pipeline_run);
|
||||
|
||||
-- Create view for published articles with category info
|
||||
CREATE OR REPLACE VIEW published_articles AS
|
||||
SELECT
|
||||
a.id,
|
||||
a.title,
|
||||
a.title_burmese,
|
||||
a.slug,
|
||||
a.excerpt_burmese,
|
||||
a.featured_image,
|
||||
a.reading_time,
|
||||
a.view_count,
|
||||
a.published_at,
|
||||
c.name as category_name,
|
||||
c.name_burmese as category_name_burmese,
|
||||
c.slug as category_slug
|
||||
FROM articles a
|
||||
JOIN categories c ON a.category_id = c.id
|
||||
WHERE a.status = 'published'
|
||||
ORDER BY a.published_at DESC;
|
||||
|
||||
-- Function to update article view count
|
||||
CREATE OR REPLACE FUNCTION increment_view_count(article_slug VARCHAR)
|
||||
RETURNS VOID AS $$
|
||||
BEGIN
|
||||
UPDATE articles
|
||||
SET view_count = view_count + 1,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
WHERE slug = article_slug;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function to get trending articles (last 7 days, by views)
|
||||
CREATE OR REPLACE FUNCTION get_trending_articles(limit_count INTEGER DEFAULT 10)
|
||||
RETURNS TABLE (
|
||||
id INTEGER,
|
||||
title_burmese TEXT,
|
||||
slug VARCHAR,
|
||||
view_count INTEGER,
|
||||
category_name_burmese VARCHAR
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT
|
||||
a.id,
|
||||
a.title_burmese,
|
||||
a.slug,
|
||||
a.view_count,
|
||||
c.name_burmese
|
||||
FROM articles a
|
||||
JOIN categories c ON a.category_id = c.id
|
||||
WHERE a.status = 'published'
|
||||
AND a.published_at >= CURRENT_TIMESTAMP - INTERVAL '7 days'
|
||||
ORDER BY a.view_count DESC
|
||||
LIMIT limit_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Function to get related articles (by category and tags)
|
||||
CREATE OR REPLACE FUNCTION get_related_articles(article_id_param INTEGER, limit_count INTEGER DEFAULT 5)
|
||||
RETURNS TABLE (
|
||||
id INTEGER,
|
||||
title_burmese TEXT,
|
||||
slug VARCHAR,
|
||||
excerpt_burmese TEXT,
|
||||
featured_image TEXT
|
||||
) AS $$
|
||||
BEGIN
|
||||
RETURN QUERY
|
||||
SELECT DISTINCT
|
||||
a.id,
|
||||
a.title_burmese,
|
||||
a.slug,
|
||||
a.excerpt_burmese,
|
||||
a.featured_image
|
||||
FROM articles a
|
||||
WHERE a.id != article_id_param
|
||||
AND a.status = 'published'
|
||||
AND (
|
||||
a.category_id = (SELECT category_id FROM articles WHERE id = article_id_param)
|
||||
OR a.id IN (
|
||||
SELECT at2.article_id
|
||||
FROM article_tags at1
|
||||
JOIN article_tags at2 ON at1.tag_id = at2.tag_id
|
||||
WHERE at1.article_id = article_id_param
|
||||
AND at2.article_id != article_id_param
|
||||
)
|
||||
)
|
||||
ORDER BY a.published_at DESC
|
||||
LIMIT limit_count;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Trigger to update updated_at timestamp
|
||||
CREATE OR REPLACE FUNCTION update_updated_at_column()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = CURRENT_TIMESTAMP;
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TRIGGER update_articles_updated_at
|
||||
BEFORE UPDATE ON articles
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION update_updated_at_column();
|
||||
|
||||
-- Initial data: Some common tags
|
||||
INSERT INTO tags (name, name_burmese, slug) VALUES
|
||||
('ChatGPT', 'ChatGPT', 'chatgpt'),
|
||||
('OpenAI', 'OpenAI', 'openai'),
|
||||
('Anthropic', 'Anthropic', 'anthropic'),
|
||||
('Google', 'Google', 'google'),
|
||||
('Machine Learning', 'စက်သင်ယူမှု', 'machine-learning'),
|
||||
('Deep Learning', 'နက်ရှိုင်းသောသင်ယူမှု', 'deep-learning'),
|
||||
('GPT-4', 'GPT-4', 'gpt-4'),
|
||||
('Claude', 'Claude', 'claude'),
|
||||
('Prompt Engineering', 'Prompt Engineering', 'prompt-engineering'),
|
||||
('AI Safety', 'AI ဘေးကင်းရေး', 'ai-safety')
|
||||
ON CONFLICT (slug) DO NOTHING;
|
||||
Reference in New Issue
Block a user