Initial Burmddit deployment - AI news aggregator in Burmese

2026-02-19 02:52:58 +00:00
commit dddb86ea94
27 changed files with 5039 additions and 0 deletions
--- a/backend/config.py
+++ b/backend/config.py
@@ -0,0 +1,142 @@
+# Burmddit Configuration
+
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+
+# Database
+DATABASE_URL = os.getenv('DATABASE_URL', 'postgresql://localhost/burmddit')
+
+# APIs
+ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
+OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')  # Optional, for embeddings
+
+# Scraping sources - 🔥 EXPANDED for more content!
+SOURCES = {
+    'medium': {
+        'enabled': True,
+        'tags': ['artificial-intelligence', 'machine-learning', 'chatgpt', 'ai-tools', 
+                'generative-ai', 'deeplearning', 'prompt-engineering', 'ai-news'],
+        'url_pattern': 'https://medium.com/tag/{tag}/latest',
+        'articles_per_tag': 15  # Increased from 10
+    },
+    'techcrunch': {
+        'enabled': True,
+        'category': 'artificial-intelligence',
+        'url': 'https://techcrunch.com/category/artificial-intelligence/feed/',
+        'articles_limit': 30  # Increased from 20
+    },
+    'venturebeat': {
+        'enabled': True,
+        'url': 'https://venturebeat.com/category/ai/feed/',
+        'articles_limit': 25  # Increased from 15
+    },
+    'mit_tech_review': {
+        'enabled': True,
+        'url': 'https://www.technologyreview.com/feed/',
+        'filter_ai': True,
+        'articles_limit': 20  # Increased from 10
+    },
+    'theverge': {
+        'enabled': True,
+        'url': 'https://www.theverge.com/ai-artificial-intelligence/rss/index.xml',
+        'articles_limit': 20
+    },
+    'wired_ai': {
+        'enabled': True,
+        'url': 'https://www.wired.com/feed/tag/ai/latest/rss',
+        'articles_limit': 15
+    },
+    'arstechnica': {
+        'enabled': True,
+        'url': 'https://arstechnica.com/tag/artificial-intelligence/feed/',
+        'articles_limit': 15
+    },
+    'hackernews': {
+        'enabled': True,
+        'url': 'https://hnrss.org/newest?q=AI+OR+ChatGPT+OR+OpenAI',
+        'articles_limit': 30
+    }
+}
+
+# Content pipeline settings
+PIPELINE = {
+    'articles_per_day': 30,  # 🔥 INCREASED! More content = more traffic
+    'min_article_length': 600,  # Shorter, easier to read
+    'max_article_length': 1000,  # Keep it concise
+    'sources_per_article': 3,  # How many articles to compile into one
+    'clustering_threshold': 0.6,  # Lower threshold = more diverse topics
+    'research_time_minutes': 90,  # Spend 1.5 hours researching daily
+}
+
+# Category mapping (keyword-based)
+CATEGORY_KEYWORDS = {
+    'AI News': ['news', 'announcement', 'report', 'industry', 'company', 'funding', 'release'],
+    'AI Tutorials': ['how to', 'tutorial', 'guide', 'step by step', 'learn', 'beginners', 'course'],
+    'Tips & Tricks': ['tips', 'tricks', 'hacks', 'productivity', 'best practices', 'optimize', 'improve'],
+    'Upcoming Releases': ['upcoming', 'soon', 'preview', 'roadmap', 'future', 'expected', 'announce']
+}
+
+# Translation settings
+TRANSLATION = {
+    'model': 'claude-3-5-sonnet-20241022',
+    'max_tokens': 4000,
+    'temperature': 0.5,  # Higher = more natural, casual translation
+    'preserve_terms': [  # Technical terms to keep in English
+        'AI', 'ChatGPT', 'GPT', 'Claude', 'API', 'ML', 'NLP', 
+        'LLM', 'Transformer', 'Neural Network', 'Python', 'GitHub',
+        'DeepSeek', 'OpenAI', 'Anthropic', 'Google', 'Meta'
+    ],
+    'style': 'casual',  # Casual, conversational tone
+    'target_audience': 'general',  # Not just tech experts
+    'simplify_jargon': True,  # Explain technical terms simply
+}
+
+# Publishing settings
+PUBLISHING = {
+    'status_default': 'published',  # or 'draft' for manual review
+    'publish_interval_hours': 1,  # Space out publications
+    'featured_image_required': False,
+    'auto_generate_excerpt': True,
+    'excerpt_length': 200,  # characters
+    'require_featured_image': True,  # Every article needs an image
+    'extract_videos': True,  # Extract YouTube/video embeds
+    'max_images_per_article': 5,  # Include multiple images
+    'image_fallback': 'generate'  # If no image, generate AI image
+}
+
+# SEO settings
+SEO = {
+    'meta_description_length': 160,
+    'keywords_per_article': 10,
+    'auto_generate_slug': True
+}
+
+# Burmese font settings
+BURMESE = {
+    'font_family': 'Pyidaungsu',
+    'fallback_fonts': ['Noto Sans Myanmar', 'Myanmar Text'],
+    'unicode_range': 'U+1000-109F'  # Myanmar Unicode range
+}
+
+# Admin
+ADMIN_PASSWORD = os.getenv('ADMIN_PASSWORD', 'change_me_in_production')
+
+# Logging
+LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
+LOG_FILE = 'burmddit_pipeline.log'
+
+# Rate limiting
+RATE_LIMITS = {
+    'requests_per_minute': 30,
+    'anthropic_rpm': 50,
+    'delay_between_requests': 2  # seconds
+}
+
+# Retry settings
+RETRY = {
+    'max_attempts': 3,
+    'backoff_factor': 2,
+    'timeout': 30  # seconds
+}