# Burmddit Configuration import os from dotenv import load_dotenv load_dotenv() # Database DATABASE_URL = os.getenv('DATABASE_URL', 'postgresql://localhost/burmddit') # APIs ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY') OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') # Optional, for embeddings # Scraping sources - 🔥 EXPANDED for more content! SOURCES = { 'medium': { 'enabled': True, 'tags': ['artificial-intelligence', 'machine-learning', 'chatgpt', 'ai-tools', 'generative-ai', 'deeplearning', 'prompt-engineering', 'ai-news'], 'url_pattern': 'https://medium.com/tag/{tag}/latest', 'articles_per_tag': 15 # Increased from 10 }, 'techcrunch': { 'enabled': True, 'category': 'artificial-intelligence', 'url': 'https://techcrunch.com/category/artificial-intelligence/feed/', 'articles_limit': 30 # Increased from 20 }, 'venturebeat': { 'enabled': True, 'url': 'https://venturebeat.com/category/ai/feed/', 'articles_limit': 25 # Increased from 15 }, 'mit_tech_review': { 'enabled': True, 'url': 'https://www.technologyreview.com/feed/', 'filter_ai': True, 'articles_limit': 20 # Increased from 10 }, 'theverge': { 'enabled': True, 'url': 'https://www.theverge.com/ai-artificial-intelligence/rss/index.xml', 'articles_limit': 20 }, 'wired_ai': { 'enabled': True, 'url': 'https://www.wired.com/feed/tag/ai/latest/rss', 'articles_limit': 15 }, 'arstechnica': { 'enabled': True, 'url': 'https://arstechnica.com/tag/artificial-intelligence/feed/', 'articles_limit': 15 }, 'hackernews': { 'enabled': True, 'url': 'https://hnrss.org/newest?q=AI+OR+ChatGPT+OR+OpenAI', 'articles_limit': 30 } } # Content pipeline settings PIPELINE = { 'articles_per_day': 30, # 🔥 INCREASED! More content = more traffic 'min_article_length': 600, # Shorter, easier to read 'max_article_length': 1000, # Keep it concise 'sources_per_article': 3, # How many articles to compile into one 'clustering_threshold': 0.6, # Lower threshold = more diverse topics 'research_time_minutes': 90, # Spend 1.5 hours researching daily } # Category mapping (keyword-based) CATEGORY_KEYWORDS = { 'AI News': ['news', 'announcement', 'report', 'industry', 'company', 'funding', 'release'], 'AI Tutorials': ['how to', 'tutorial', 'guide', 'step by step', 'learn', 'beginners', 'course'], 'Tips & Tricks': ['tips', 'tricks', 'hacks', 'productivity', 'best practices', 'optimize', 'improve'], 'Upcoming Releases': ['upcoming', 'soon', 'preview', 'roadmap', 'future', 'expected', 'announce'] } # Translation settings TRANSLATION = { 'model': 'claude-3-5-sonnet-20241022', 'max_tokens': 4000, 'temperature': 0.5, # Higher = more natural, casual translation 'preserve_terms': [ # Technical terms to keep in English 'AI', 'ChatGPT', 'GPT', 'Claude', 'API', 'ML', 'NLP', 'LLM', 'Transformer', 'Neural Network', 'Python', 'GitHub', 'DeepSeek', 'OpenAI', 'Anthropic', 'Google', 'Meta' ], 'style': 'casual', # Casual, conversational tone 'target_audience': 'general', # Not just tech experts 'simplify_jargon': True, # Explain technical terms simply } # Publishing settings PUBLISHING = { 'status_default': 'published', # or 'draft' for manual review 'publish_interval_hours': 1, # Space out publications 'featured_image_required': False, 'auto_generate_excerpt': True, 'excerpt_length': 200, # characters 'require_featured_image': True, # Every article needs an image 'extract_videos': True, # Extract YouTube/video embeds 'max_images_per_article': 5, # Include multiple images 'image_fallback': 'generate' # If no image, generate AI image } # SEO settings SEO = { 'meta_description_length': 160, 'keywords_per_article': 10, 'auto_generate_slug': True } # Burmese font settings BURMESE = { 'font_family': 'Pyidaungsu', 'fallback_fonts': ['Noto Sans Myanmar', 'Myanmar Text'], 'unicode_range': 'U+1000-109F' # Myanmar Unicode range } # Admin ADMIN_PASSWORD = os.getenv('ADMIN_PASSWORD', 'change_me_in_production') # Logging LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO') LOG_FILE = 'burmddit_pipeline.log' # Rate limiting RATE_LIMITS = { 'requests_per_minute': 30, 'anthropic_rpm': 50, 'delay_between_requests': 2 # seconds } # Retry settings RETRY = { 'max_attempts': 3, 'backoff_factor': 2, 'timeout': 30 # seconds }