forked from minzeyaphyo/burmddit
Initial Burmddit deployment - AI news aggregator in Burmese
This commit is contained in:
160
backend/run_pipeline.py
Normal file
160
backend/run_pipeline.py
Normal file
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env python3
|
||||
# Main pipeline orchestrator - Runs entire content generation pipeline
|
||||
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
import config
|
||||
|
||||
# Import pipeline stages
|
||||
from scraper import run_scraper
|
||||
from compiler import run_compiler
|
||||
from translator import run_translator
|
||||
from publisher import run_publisher
|
||||
import database
|
||||
|
||||
# Configure logging
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(sys.stderr, level=config.LOG_LEVEL)
|
||||
logger.add(config.LOG_FILE, rotation="1 day", retention="7 days", level="INFO")
|
||||
|
||||
class Pipeline:
|
||||
def __init__(self):
|
||||
self.start_time = None
|
||||
self.stats = {
|
||||
'scraped': 0,
|
||||
'compiled': 0,
|
||||
'translated': 0,
|
||||
'published': 0
|
||||
}
|
||||
|
||||
def run(self):
|
||||
"""Execute full pipeline"""
|
||||
self.start_time = time.time()
|
||||
logger.info("="*60)
|
||||
logger.info(f"🚀 Starting Burmddit Content Pipeline - {datetime.now()}")
|
||||
logger.info("="*60)
|
||||
|
||||
try:
|
||||
# Stage 1: Scrape
|
||||
logger.info("\n📥 STAGE 1: SCRAPING")
|
||||
logger.info("-" * 40)
|
||||
scraped_count = run_scraper()
|
||||
self.stats['scraped'] = scraped_count
|
||||
|
||||
if scraped_count == 0:
|
||||
logger.warning("⚠️ No articles scraped. Exiting pipeline.")
|
||||
return self.finish()
|
||||
|
||||
logger.info(f"✅ Scraped {scraped_count} articles")
|
||||
|
||||
# Stage 2: Compile
|
||||
logger.info("\n🔨 STAGE 2: COMPILING")
|
||||
logger.info("-" * 40)
|
||||
compiled_articles = run_compiler()
|
||||
self.stats['compiled'] = len(compiled_articles)
|
||||
|
||||
if not compiled_articles:
|
||||
logger.warning("⚠️ No articles compiled. Exiting pipeline.")
|
||||
return self.finish()
|
||||
|
||||
logger.info(f"✅ Compiled {len(compiled_articles)} articles")
|
||||
|
||||
# Stage 3: Translate
|
||||
logger.info("\n🌍 STAGE 3: TRANSLATING TO BURMESE")
|
||||
logger.info("-" * 40)
|
||||
translated_articles = run_translator(compiled_articles)
|
||||
self.stats['translated'] = len(translated_articles)
|
||||
|
||||
if not translated_articles:
|
||||
logger.warning("⚠️ No articles translated. Exiting pipeline.")
|
||||
return self.finish()
|
||||
|
||||
logger.info(f"✅ Translated {len(translated_articles)} articles")
|
||||
|
||||
# Stage 4: Publish
|
||||
logger.info("\n📤 STAGE 4: PUBLISHING")
|
||||
logger.info("-" * 40)
|
||||
published_count = run_publisher(translated_articles)
|
||||
self.stats['published'] = published_count
|
||||
|
||||
if published_count == 0:
|
||||
logger.warning("⚠️ No articles published.")
|
||||
else:
|
||||
logger.info(f"✅ Published {published_count} articles")
|
||||
|
||||
# Finish
|
||||
return self.finish()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
logger.warning("\n⚠️ Pipeline interrupted by user")
|
||||
return self.finish(interrupted=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"\n❌ Pipeline failed with error: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
return self.finish(failed=True)
|
||||
|
||||
def finish(self, interrupted=False, failed=False):
|
||||
"""Finish pipeline and display summary"""
|
||||
duration = int(time.time() - self.start_time)
|
||||
|
||||
logger.info("\n" + "="*60)
|
||||
logger.info("📊 PIPELINE SUMMARY")
|
||||
logger.info("="*60)
|
||||
|
||||
if interrupted:
|
||||
status = "⚠️ INTERRUPTED"
|
||||
elif failed:
|
||||
status = "❌ FAILED"
|
||||
elif self.stats['published'] > 0:
|
||||
status = "✅ SUCCESS"
|
||||
else:
|
||||
status = "⚠️ COMPLETED WITH WARNINGS"
|
||||
|
||||
logger.info(f"Status: {status}")
|
||||
logger.info(f"Duration: {duration}s ({duration // 60}m {duration % 60}s)")
|
||||
logger.info(f"")
|
||||
logger.info(f"Articles scraped: {self.stats['scraped']}")
|
||||
logger.info(f"Articles compiled: {self.stats['compiled']}")
|
||||
logger.info(f"Articles translated: {self.stats['translated']}")
|
||||
logger.info(f"Articles published: {self.stats['published']}")
|
||||
logger.info("="*60)
|
||||
|
||||
# Get site stats
|
||||
try:
|
||||
site_stats = database.get_site_stats()
|
||||
logger.info(f"\n📈 SITE STATISTICS")
|
||||
logger.info(f"Total articles: {site_stats['total_articles']}")
|
||||
logger.info(f"Total views: {site_stats['total_views']}")
|
||||
logger.info(f"Subscribers: {site_stats['subscribers']}")
|
||||
logger.info("="*60)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching site stats: {e}")
|
||||
|
||||
return self.stats['published']
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
|
||||
# Check environment
|
||||
if not config.ANTHROPIC_API_KEY:
|
||||
logger.error("❌ ANTHROPIC_API_KEY not set in environment!")
|
||||
logger.error("Please set it in .env file or environment variables.")
|
||||
sys.exit(1)
|
||||
|
||||
if not config.DATABASE_URL:
|
||||
logger.error("❌ DATABASE_URL not set!")
|
||||
sys.exit(1)
|
||||
|
||||
# Run pipeline
|
||||
pipeline = Pipeline()
|
||||
published = pipeline.run()
|
||||
|
||||
# Exit with status code
|
||||
sys.exit(0 if published > 0 else 1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user