forked from minzeyaphyo/burmddit
Fix scraper: use newspaper4k, handle all RSS sources
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
beautifulsoup4==4.12.3
|
||||
requests==2.31.0
|
||||
feedparser==6.0.11
|
||||
newspaper3k==0.2.8
|
||||
newspaper4k>=0.9.3
|
||||
lxml_html_clean
|
||||
|
||||
# Database
|
||||
|
||||
@@ -31,7 +31,7 @@ class AINewsScraper:
|
||||
try:
|
||||
if source_name == 'medium':
|
||||
articles = self.scrape_medium(source_config)
|
||||
elif source_name in ['techcrunch', 'venturebeat', 'mit_tech_review']:
|
||||
elif 'url' in source_config:
|
||||
articles = self.scrape_rss_feed(source_config)
|
||||
else:
|
||||
logger.warning(f"Unknown source: {source_name}")
|
||||
|
||||
Reference in New Issue
Block a user