161 lines
5.4 KiB
Python
161 lines
5.4 KiB
Python
#!/usr/bin/env python3
|
|
# Main pipeline orchestrator - Runs entire content generation pipeline
|
|
|
|
import sys
|
|
import time
|
|
from datetime import datetime
|
|
from loguru import logger
|
|
import config
|
|
|
|
# Import pipeline stages
|
|
from scraper import run_scraper
|
|
from compiler import run_compiler
|
|
from translator import run_translator
|
|
from publisher import run_publisher
|
|
import database
|
|
|
|
# Configure logging
|
|
logger.remove() # Remove default handler
|
|
logger.add(sys.stderr, level=config.LOG_LEVEL)
|
|
logger.add(config.LOG_FILE, rotation="1 day", retention="7 days", level="INFO")
|
|
|
|
class Pipeline:
|
|
def __init__(self):
|
|
self.start_time = None
|
|
self.stats = {
|
|
'scraped': 0,
|
|
'compiled': 0,
|
|
'translated': 0,
|
|
'published': 0
|
|
}
|
|
|
|
def run(self):
|
|
"""Execute full pipeline"""
|
|
self.start_time = time.time()
|
|
logger.info("="*60)
|
|
logger.info(f"🚀 Starting Burmddit Content Pipeline - {datetime.now()}")
|
|
logger.info("="*60)
|
|
|
|
try:
|
|
# Stage 1: Scrape
|
|
logger.info("\n📥 STAGE 1: SCRAPING")
|
|
logger.info("-" * 40)
|
|
scraped_count = run_scraper()
|
|
self.stats['scraped'] = scraped_count
|
|
|
|
if scraped_count == 0:
|
|
logger.warning("⚠️ No articles scraped. Exiting pipeline.")
|
|
return self.finish()
|
|
|
|
logger.info(f"✅ Scraped {scraped_count} articles")
|
|
|
|
# Stage 2: Compile
|
|
logger.info("\n🔨 STAGE 2: COMPILING")
|
|
logger.info("-" * 40)
|
|
compiled_articles = run_compiler()
|
|
self.stats['compiled'] = len(compiled_articles)
|
|
|
|
if not compiled_articles:
|
|
logger.warning("⚠️ No articles compiled. Exiting pipeline.")
|
|
return self.finish()
|
|
|
|
logger.info(f"✅ Compiled {len(compiled_articles)} articles")
|
|
|
|
# Stage 3: Translate
|
|
logger.info("\n🌍 STAGE 3: TRANSLATING TO BURMESE")
|
|
logger.info("-" * 40)
|
|
translated_articles = run_translator(compiled_articles)
|
|
self.stats['translated'] = len(translated_articles)
|
|
|
|
if not translated_articles:
|
|
logger.warning("⚠️ No articles translated. Exiting pipeline.")
|
|
return self.finish()
|
|
|
|
logger.info(f"✅ Translated {len(translated_articles)} articles")
|
|
|
|
# Stage 4: Publish
|
|
logger.info("\n📤 STAGE 4: PUBLISHING")
|
|
logger.info("-" * 40)
|
|
published_count = run_publisher(translated_articles)
|
|
self.stats['published'] = published_count
|
|
|
|
if published_count == 0:
|
|
logger.warning("⚠️ No articles published.")
|
|
else:
|
|
logger.info(f"✅ Published {published_count} articles")
|
|
|
|
# Finish
|
|
return self.finish()
|
|
|
|
except KeyboardInterrupt:
|
|
logger.warning("\n⚠️ Pipeline interrupted by user")
|
|
return self.finish(interrupted=True)
|
|
|
|
except Exception as e:
|
|
logger.error(f"\n❌ Pipeline failed with error: {e}")
|
|
import traceback
|
|
logger.error(traceback.format_exc())
|
|
return self.finish(failed=True)
|
|
|
|
def finish(self, interrupted=False, failed=False):
|
|
"""Finish pipeline and display summary"""
|
|
duration = int(time.time() - self.start_time)
|
|
|
|
logger.info("\n" + "="*60)
|
|
logger.info("📊 PIPELINE SUMMARY")
|
|
logger.info("="*60)
|
|
|
|
if interrupted:
|
|
status = "⚠️ INTERRUPTED"
|
|
elif failed:
|
|
status = "❌ FAILED"
|
|
elif self.stats['published'] > 0:
|
|
status = "✅ SUCCESS"
|
|
else:
|
|
status = "⚠️ COMPLETED WITH WARNINGS"
|
|
|
|
logger.info(f"Status: {status}")
|
|
logger.info(f"Duration: {duration}s ({duration // 60}m {duration % 60}s)")
|
|
logger.info(f"")
|
|
logger.info(f"Articles scraped: {self.stats['scraped']}")
|
|
logger.info(f"Articles compiled: {self.stats['compiled']}")
|
|
logger.info(f"Articles translated: {self.stats['translated']}")
|
|
logger.info(f"Articles published: {self.stats['published']}")
|
|
logger.info("="*60)
|
|
|
|
# Get site stats
|
|
try:
|
|
site_stats = database.get_site_stats()
|
|
logger.info(f"\n📈 SITE STATISTICS")
|
|
logger.info(f"Total articles: {site_stats['total_articles']}")
|
|
logger.info(f"Total views: {site_stats['total_views']}")
|
|
logger.info(f"Subscribers: {site_stats['subscribers']}")
|
|
logger.info("="*60)
|
|
except Exception as e:
|
|
logger.error(f"Error fetching site stats: {e}")
|
|
|
|
return self.stats['published']
|
|
|
|
def main():
|
|
"""Main entry point"""
|
|
|
|
# Check environment
|
|
if not config.ANTHROPIC_API_KEY:
|
|
logger.error("❌ ANTHROPIC_API_KEY not set in environment!")
|
|
logger.error("Please set it in .env file or environment variables.")
|
|
sys.exit(1)
|
|
|
|
if not config.DATABASE_URL:
|
|
logger.error("❌ DATABASE_URL not set!")
|
|
sys.exit(1)
|
|
|
|
# Run pipeline
|
|
pipeline = Pipeline()
|
|
published = pipeline.run()
|
|
|
|
# Exit with status code
|
|
sys.exit(0 if published > 0 else 1)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|