forked from minzeyaphyo/burmddit
Frontend changes: - Add /admin dashboard for article management - Add AdminButton component (Alt+Shift+A on articles) - Add /api/admin/article API endpoints Backend improvements: - scraper_v2.py: Multi-layer fallback extraction (newspaper → trafilatura → readability) - translator_v2.py: Better chunking, repetition detection, validation - admin_tools.py: CLI admin commands - test_scraper.py: Individual source testing Docs: - WEB-ADMIN-GUIDE.md: Web admin usage - ADMIN-GUIDE.md: CLI admin usage - SCRAPER-IMPROVEMENT-PLAN.md: Scraper fixes details - TRANSLATION-FIX.md: Translation improvements - ADMIN-FEATURES-SUMMARY.md: Implementation summary Fixes: - Article scraping from 0 → 96+ articles working - Translation quality issues (repetition, truncation) - Added 13 new RSS sources
91 lines
2.3 KiB
Python
Executable File
91 lines
2.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Re-translate article ID 50 which has broken/truncated translation
|
|
"""
|
|
|
|
import sys
|
|
from loguru import logger
|
|
from translator_v2 import BurmeseTranslator
|
|
import database
|
|
|
|
def fix_article(article_id: int):
|
|
"""Re-translate a specific article"""
|
|
|
|
logger.info(f"Fixing article {article_id}...")
|
|
|
|
# Get article from database
|
|
import psycopg2
|
|
from dotenv import load_dotenv
|
|
import os
|
|
|
|
load_dotenv()
|
|
conn = psycopg2.connect(os.getenv('DATABASE_URL'))
|
|
cur = conn.cursor()
|
|
|
|
cur.execute('''
|
|
SELECT id, title, excerpt, content
|
|
FROM articles
|
|
WHERE id = %s
|
|
''', (article_id,))
|
|
|
|
row = cur.fetchone()
|
|
if not row:
|
|
logger.error(f"Article {article_id} not found")
|
|
return False
|
|
|
|
article = {
|
|
'id': row[0],
|
|
'title': row[1],
|
|
'excerpt': row[2],
|
|
'content': row[3]
|
|
}
|
|
|
|
logger.info(f"Article: {article['title'][:50]}...")
|
|
logger.info(f"Content length: {len(article['content'])} chars")
|
|
|
|
# Translate
|
|
translator = BurmeseTranslator()
|
|
translated = translator.translate_article(article)
|
|
|
|
logger.info(f"Translation complete:")
|
|
logger.info(f" Title Burmese: {len(translated['title_burmese'])} chars")
|
|
logger.info(f" Excerpt Burmese: {len(translated['excerpt_burmese'])} chars")
|
|
logger.info(f" Content Burmese: {len(translated['content_burmese'])} chars")
|
|
|
|
# Validate
|
|
ratio = len(translated['content_burmese']) / len(article['content'])
|
|
logger.info(f" Length ratio: {ratio:.2f} (should be 0.5-2.0)")
|
|
|
|
if ratio < 0.3:
|
|
logger.error("Translation still too short! Not updating.")
|
|
return False
|
|
|
|
# Update database
|
|
cur.execute('''
|
|
UPDATE articles
|
|
SET title_burmese = %s,
|
|
excerpt_burmese = %s,
|
|
content_burmese = %s
|
|
WHERE id = %s
|
|
''', (
|
|
translated['title_burmese'],
|
|
translated['excerpt_burmese'],
|
|
translated['content_burmese'],
|
|
article_id
|
|
))
|
|
|
|
conn.commit()
|
|
logger.info(f"✅ Article {article_id} updated successfully")
|
|
|
|
cur.close()
|
|
conn.close()
|
|
|
|
return True
|
|
|
|
if __name__ == '__main__':
|
|
import config
|
|
logger.add(sys.stdout, level="INFO")
|
|
|
|
article_id = int(sys.argv[1]) if len(sys.argv) > 1 else 50
|
|
fix_article(article_id)
|