#!/usr/bin/env python3 # Main pipeline orchestrator - Runs entire content generation pipeline import sys import time from datetime import datetime from loguru import logger import config # Import pipeline stages from scraper_v2 import run_scraper # Using improved v2 scraper from compiler import run_compiler from translator_v2 import run_translator # Using improved v2 translator from publisher import run_publisher import database # Configure logging logger.remove() # Remove default handler logger.add(sys.stderr, level=config.LOG_LEVEL) logger.add(config.LOG_FILE, rotation="1 day", retention="7 days", level="INFO") class Pipeline: def __init__(self): self.start_time = None self.stats = { 'scraped': 0, 'compiled': 0, 'translated': 0, 'published': 0 } def run(self): """Execute full pipeline""" self.start_time = time.time() logger.info("="*60) logger.info(f"šŸš€ Starting Burmddit Content Pipeline - {datetime.now()}") logger.info("="*60) try: # Stage 1: Scrape logger.info("\nšŸ“„ STAGE 1: SCRAPING") logger.info("-" * 40) scraped_count = run_scraper() self.stats['scraped'] = scraped_count if scraped_count == 0: logger.warning("āš ļø No articles scraped. Exiting pipeline.") return self.finish() logger.info(f"āœ… Scraped {scraped_count} articles") # Stage 2: Compile logger.info("\nšŸ”Ø STAGE 2: COMPILING") logger.info("-" * 40) compiled_articles = run_compiler() self.stats['compiled'] = len(compiled_articles) if not compiled_articles: logger.warning("āš ļø No articles compiled. Exiting pipeline.") return self.finish() logger.info(f"āœ… Compiled {len(compiled_articles)} articles") # Stage 3: Translate logger.info("\nšŸŒ STAGE 3: TRANSLATING TO BURMESE") logger.info("-" * 40) translated_articles = run_translator(compiled_articles) self.stats['translated'] = len(translated_articles) if not translated_articles: logger.warning("āš ļø No articles translated. Exiting pipeline.") return self.finish() logger.info(f"āœ… Translated {len(translated_articles)} articles") # Stage 4: Publish logger.info("\nšŸ“¤ STAGE 4: PUBLISHING") logger.info("-" * 40) published_count = run_publisher(translated_articles) self.stats['published'] = published_count if published_count == 0: logger.warning("āš ļø No articles published.") else: logger.info(f"āœ… Published {published_count} articles") # Finish return self.finish() except KeyboardInterrupt: logger.warning("\nāš ļø Pipeline interrupted by user") return self.finish(interrupted=True) except Exception as e: logger.error(f"\nāŒ Pipeline failed with error: {e}") import traceback logger.error(traceback.format_exc()) return self.finish(failed=True) def finish(self, interrupted=False, failed=False): """Finish pipeline and display summary""" duration = int(time.time() - self.start_time) logger.info("\n" + "="*60) logger.info("šŸ“Š PIPELINE SUMMARY") logger.info("="*60) if interrupted: status = "āš ļø INTERRUPTED" elif failed: status = "āŒ FAILED" elif self.stats['published'] > 0: status = "āœ… SUCCESS" else: status = "āš ļø COMPLETED WITH WARNINGS" logger.info(f"Status: {status}") logger.info(f"Duration: {duration}s ({duration // 60}m {duration % 60}s)") logger.info(f"") logger.info(f"Articles scraped: {self.stats['scraped']}") logger.info(f"Articles compiled: {self.stats['compiled']}") logger.info(f"Articles translated: {self.stats['translated']}") logger.info(f"Articles published: {self.stats['published']}") logger.info("="*60) # Get site stats try: site_stats = database.get_site_stats() logger.info(f"\nšŸ“ˆ SITE STATISTICS") logger.info(f"Total articles: {site_stats['total_articles']}") logger.info(f"Total views: {site_stats['total_views']}") logger.info(f"Subscribers: {site_stats['subscribers']}") logger.info("="*60) except Exception as e: logger.error(f"Error fetching site stats: {e}") return self.stats['published'] def main(): """Main entry point""" # Check environment if not config.ANTHROPIC_API_KEY: logger.error("āŒ ANTHROPIC_API_KEY not set in environment!") logger.error("Please set it in .env file or environment variables.") sys.exit(1) if not config.DATABASE_URL: logger.error("āŒ DATABASE_URL not set!") sys.exit(1) # Run pipeline pipeline = Pipeline() published = pipeline.run() # Exit with status code sys.exit(0 if published > 0 else 1) if __name__ == '__main__': main()