Fix scraper: use newspaper4k, handle all RSS sources

This commit is contained in:
Min Zeya Phyo
2026-02-19 19:34:14 +08:00
parent 879fdc3849
commit 9d7e028550
2 changed files with 2 additions and 2 deletions

View File

@@ -4,7 +4,7 @@
beautifulsoup4==4.12.3
requests==2.31.0
feedparser==6.0.11
newspaper3k==0.2.8
newspaper4k>=0.9.3
lxml_html_clean
# Database

View File

@@ -31,7 +31,7 @@ class AINewsScraper:
try:
if source_name == 'medium':
articles = self.scrape_medium(source_config)
elif source_name in ['techcrunch', 'venturebeat', 'mit_tech_review']:
elif 'url' in source_config:
articles = self.scrape_rss_feed(source_config)
else:
logger.warning(f"Unknown source: {source_name}")