diff --git a/backend/requirements-pipeline.txt b/backend/requirements-pipeline.txt index 3a791e4..1070ee4 100644 --- a/backend/requirements-pipeline.txt +++ b/backend/requirements-pipeline.txt @@ -4,7 +4,7 @@ beautifulsoup4==4.12.3 requests==2.31.0 feedparser==6.0.11 -newspaper3k==0.2.8 +newspaper4k>=0.9.3 lxml_html_clean # Database diff --git a/backend/scraper.py b/backend/scraper.py index a79ba9d..adeca38 100644 --- a/backend/scraper.py +++ b/backend/scraper.py @@ -31,7 +31,7 @@ class AINewsScraper: try: if source_name == 'medium': articles = self.scrape_medium(source_config) - elif source_name in ['techcrunch', 'venturebeat', 'mit_tech_review']: + elif 'url' in source_config: articles = self.scrape_rss_feed(source_config) else: logger.warning(f"Unknown source: {source_name}")