Initial Burmddit deployment - AI news aggregator in Burmese

This commit is contained in:
Zeya Phyo
2026-02-19 02:52:58 +00:00
commit dddb86ea94
27 changed files with 5039 additions and 0 deletions

160
backend/run_pipeline.py Normal file
View File

@@ -0,0 +1,160 @@
#!/usr/bin/env python3
# Main pipeline orchestrator - Runs entire content generation pipeline
import sys
import time
from datetime import datetime
from loguru import logger
import config
# Import pipeline stages
from scraper import run_scraper
from compiler import run_compiler
from translator import run_translator
from publisher import run_publisher
import database
# Configure logging
logger.remove() # Remove default handler
logger.add(sys.stderr, level=config.LOG_LEVEL)
logger.add(config.LOG_FILE, rotation="1 day", retention="7 days", level="INFO")
class Pipeline:
def __init__(self):
self.start_time = None
self.stats = {
'scraped': 0,
'compiled': 0,
'translated': 0,
'published': 0
}
def run(self):
"""Execute full pipeline"""
self.start_time = time.time()
logger.info("="*60)
logger.info(f"🚀 Starting Burmddit Content Pipeline - {datetime.now()}")
logger.info("="*60)
try:
# Stage 1: Scrape
logger.info("\n📥 STAGE 1: SCRAPING")
logger.info("-" * 40)
scraped_count = run_scraper()
self.stats['scraped'] = scraped_count
if scraped_count == 0:
logger.warning("⚠️ No articles scraped. Exiting pipeline.")
return self.finish()
logger.info(f"✅ Scraped {scraped_count} articles")
# Stage 2: Compile
logger.info("\n🔨 STAGE 2: COMPILING")
logger.info("-" * 40)
compiled_articles = run_compiler()
self.stats['compiled'] = len(compiled_articles)
if not compiled_articles:
logger.warning("⚠️ No articles compiled. Exiting pipeline.")
return self.finish()
logger.info(f"✅ Compiled {len(compiled_articles)} articles")
# Stage 3: Translate
logger.info("\n🌍 STAGE 3: TRANSLATING TO BURMESE")
logger.info("-" * 40)
translated_articles = run_translator(compiled_articles)
self.stats['translated'] = len(translated_articles)
if not translated_articles:
logger.warning("⚠️ No articles translated. Exiting pipeline.")
return self.finish()
logger.info(f"✅ Translated {len(translated_articles)} articles")
# Stage 4: Publish
logger.info("\n📤 STAGE 4: PUBLISHING")
logger.info("-" * 40)
published_count = run_publisher(translated_articles)
self.stats['published'] = published_count
if published_count == 0:
logger.warning("⚠️ No articles published.")
else:
logger.info(f"✅ Published {published_count} articles")
# Finish
return self.finish()
except KeyboardInterrupt:
logger.warning("\n⚠️ Pipeline interrupted by user")
return self.finish(interrupted=True)
except Exception as e:
logger.error(f"\n❌ Pipeline failed with error: {e}")
import traceback
logger.error(traceback.format_exc())
return self.finish(failed=True)
def finish(self, interrupted=False, failed=False):
"""Finish pipeline and display summary"""
duration = int(time.time() - self.start_time)
logger.info("\n" + "="*60)
logger.info("📊 PIPELINE SUMMARY")
logger.info("="*60)
if interrupted:
status = "⚠️ INTERRUPTED"
elif failed:
status = "❌ FAILED"
elif self.stats['published'] > 0:
status = "✅ SUCCESS"
else:
status = "⚠️ COMPLETED WITH WARNINGS"
logger.info(f"Status: {status}")
logger.info(f"Duration: {duration}s ({duration // 60}m {duration % 60}s)")
logger.info(f"")
logger.info(f"Articles scraped: {self.stats['scraped']}")
logger.info(f"Articles compiled: {self.stats['compiled']}")
logger.info(f"Articles translated: {self.stats['translated']}")
logger.info(f"Articles published: {self.stats['published']}")
logger.info("="*60)
# Get site stats
try:
site_stats = database.get_site_stats()
logger.info(f"\n📈 SITE STATISTICS")
logger.info(f"Total articles: {site_stats['total_articles']}")
logger.info(f"Total views: {site_stats['total_views']}")
logger.info(f"Subscribers: {site_stats['subscribers']}")
logger.info("="*60)
except Exception as e:
logger.error(f"Error fetching site stats: {e}")
return self.stats['published']
def main():
"""Main entry point"""
# Check environment
if not config.ANTHROPIC_API_KEY:
logger.error("❌ ANTHROPIC_API_KEY not set in environment!")
logger.error("Please set it in .env file or environment variables.")
sys.exit(1)
if not config.DATABASE_URL:
logger.error("❌ DATABASE_URL not set!")
sys.exit(1)
# Run pipeline
pipeline = Pipeline()
published = pipeline.run()
# Exit with status code
sys.exit(0 if published > 0 else 1)
if __name__ == '__main__':
main()