Fix scraper: use newspaper4k, handle all RSS sources

This commit is contained in:
Min Zeya Phyo
2026-02-19 19:34:14 +08:00
parent 879fdc3849
commit 9d7e028550
2 changed files with 2 additions and 2 deletions

View File

@@ -4,7 +4,7 @@
beautifulsoup4==4.12.3 beautifulsoup4==4.12.3
requests==2.31.0 requests==2.31.0
feedparser==6.0.11 feedparser==6.0.11
newspaper3k==0.2.8 newspaper4k>=0.9.3
lxml_html_clean lxml_html_clean
# Database # Database

View File

@@ -31,7 +31,7 @@ class AINewsScraper:
try: try:
if source_name == 'medium': if source_name == 'medium':
articles = self.scrape_medium(source_config) articles = self.scrape_medium(source_config)
elif source_name in ['techcrunch', 'venturebeat', 'mit_tech_review']: elif 'url' in source_config:
articles = self.scrape_rss_feed(source_config) articles = self.scrape_rss_feed(source_config)
else: else:
logger.warning(f"Unknown source: {source_name}") logger.warning(f"Unknown source: {source_name}")