Initial Burmddit deployment - AI news aggregator in Burmese

This commit is contained in:
Zeya Phyo
2026-02-19 02:52:58 +00:00
commit dddb86ea94
27 changed files with 5039 additions and 0 deletions

142
backend/config.py Normal file
View File

@@ -0,0 +1,142 @@
# Burmddit Configuration
import os
from dotenv import load_dotenv
load_dotenv()
# Database
DATABASE_URL = os.getenv('DATABASE_URL', 'postgresql://localhost/burmddit')
# APIs
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') # Optional, for embeddings
# Scraping sources - 🔥 EXPANDED for more content!
SOURCES = {
'medium': {
'enabled': True,
'tags': ['artificial-intelligence', 'machine-learning', 'chatgpt', 'ai-tools',
'generative-ai', 'deeplearning', 'prompt-engineering', 'ai-news'],
'url_pattern': 'https://medium.com/tag/{tag}/latest',
'articles_per_tag': 15 # Increased from 10
},
'techcrunch': {
'enabled': True,
'category': 'artificial-intelligence',
'url': 'https://techcrunch.com/category/artificial-intelligence/feed/',
'articles_limit': 30 # Increased from 20
},
'venturebeat': {
'enabled': True,
'url': 'https://venturebeat.com/category/ai/feed/',
'articles_limit': 25 # Increased from 15
},
'mit_tech_review': {
'enabled': True,
'url': 'https://www.technologyreview.com/feed/',
'filter_ai': True,
'articles_limit': 20 # Increased from 10
},
'theverge': {
'enabled': True,
'url': 'https://www.theverge.com/ai-artificial-intelligence/rss/index.xml',
'articles_limit': 20
},
'wired_ai': {
'enabled': True,
'url': 'https://www.wired.com/feed/tag/ai/latest/rss',
'articles_limit': 15
},
'arstechnica': {
'enabled': True,
'url': 'https://arstechnica.com/tag/artificial-intelligence/feed/',
'articles_limit': 15
},
'hackernews': {
'enabled': True,
'url': 'https://hnrss.org/newest?q=AI+OR+ChatGPT+OR+OpenAI',
'articles_limit': 30
}
}
# Content pipeline settings
PIPELINE = {
'articles_per_day': 30, # 🔥 INCREASED! More content = more traffic
'min_article_length': 600, # Shorter, easier to read
'max_article_length': 1000, # Keep it concise
'sources_per_article': 3, # How many articles to compile into one
'clustering_threshold': 0.6, # Lower threshold = more diverse topics
'research_time_minutes': 90, # Spend 1.5 hours researching daily
}
# Category mapping (keyword-based)
CATEGORY_KEYWORDS = {
'AI News': ['news', 'announcement', 'report', 'industry', 'company', 'funding', 'release'],
'AI Tutorials': ['how to', 'tutorial', 'guide', 'step by step', 'learn', 'beginners', 'course'],
'Tips & Tricks': ['tips', 'tricks', 'hacks', 'productivity', 'best practices', 'optimize', 'improve'],
'Upcoming Releases': ['upcoming', 'soon', 'preview', 'roadmap', 'future', 'expected', 'announce']
}
# Translation settings
TRANSLATION = {
'model': 'claude-3-5-sonnet-20241022',
'max_tokens': 4000,
'temperature': 0.5, # Higher = more natural, casual translation
'preserve_terms': [ # Technical terms to keep in English
'AI', 'ChatGPT', 'GPT', 'Claude', 'API', 'ML', 'NLP',
'LLM', 'Transformer', 'Neural Network', 'Python', 'GitHub',
'DeepSeek', 'OpenAI', 'Anthropic', 'Google', 'Meta'
],
'style': 'casual', # Casual, conversational tone
'target_audience': 'general', # Not just tech experts
'simplify_jargon': True, # Explain technical terms simply
}
# Publishing settings
PUBLISHING = {
'status_default': 'published', # or 'draft' for manual review
'publish_interval_hours': 1, # Space out publications
'featured_image_required': False,
'auto_generate_excerpt': True,
'excerpt_length': 200, # characters
'require_featured_image': True, # Every article needs an image
'extract_videos': True, # Extract YouTube/video embeds
'max_images_per_article': 5, # Include multiple images
'image_fallback': 'generate' # If no image, generate AI image
}
# SEO settings
SEO = {
'meta_description_length': 160,
'keywords_per_article': 10,
'auto_generate_slug': True
}
# Burmese font settings
BURMESE = {
'font_family': 'Pyidaungsu',
'fallback_fonts': ['Noto Sans Myanmar', 'Myanmar Text'],
'unicode_range': 'U+1000-109F' # Myanmar Unicode range
}
# Admin
ADMIN_PASSWORD = os.getenv('ADMIN_PASSWORD', 'change_me_in_production')
# Logging
LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
LOG_FILE = 'burmddit_pipeline.log'
# Rate limiting
RATE_LIMITS = {
'requests_per_minute': 30,
'anthropic_rpm': 50,
'delay_between_requests': 2 # seconds
}
# Retry settings
RETRY = {
'max_attempts': 3,
'backoff_factor': 2,
'timeout': 30 # seconds
}