Files
burmddit/backend/fix_article_50.py
Zeya Phyo f51ac4afa4 Add web admin features + fix scraper & translator
Frontend changes:
- Add /admin dashboard for article management
- Add AdminButton component (Alt+Shift+A on articles)
- Add /api/admin/article API endpoints

Backend improvements:
- scraper_v2.py: Multi-layer fallback extraction (newspaper → trafilatura → readability)
- translator_v2.py: Better chunking, repetition detection, validation
- admin_tools.py: CLI admin commands
- test_scraper.py: Individual source testing

Docs:
- WEB-ADMIN-GUIDE.md: Web admin usage
- ADMIN-GUIDE.md: CLI admin usage
- SCRAPER-IMPROVEMENT-PLAN.md: Scraper fixes details
- TRANSLATION-FIX.md: Translation improvements
- ADMIN-FEATURES-SUMMARY.md: Implementation summary

Fixes:
- Article scraping from 0 → 96+ articles working
- Translation quality issues (repetition, truncation)
- Added 13 new RSS sources
2026-02-26 09:17:50 +00:00

91 lines
2.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Re-translate article ID 50 which has broken/truncated translation
"""
import sys
from loguru import logger
from translator_v2 import BurmeseTranslator
import database
def fix_article(article_id: int):
"""Re-translate a specific article"""
logger.info(f"Fixing article {article_id}...")
# Get article from database
import psycopg2
from dotenv import load_dotenv
import os
load_dotenv()
conn = psycopg2.connect(os.getenv('DATABASE_URL'))
cur = conn.cursor()
cur.execute('''
SELECT id, title, excerpt, content
FROM articles
WHERE id = %s
''', (article_id,))
row = cur.fetchone()
if not row:
logger.error(f"Article {article_id} not found")
return False
article = {
'id': row[0],
'title': row[1],
'excerpt': row[2],
'content': row[3]
}
logger.info(f"Article: {article['title'][:50]}...")
logger.info(f"Content length: {len(article['content'])} chars")
# Translate
translator = BurmeseTranslator()
translated = translator.translate_article(article)
logger.info(f"Translation complete:")
logger.info(f" Title Burmese: {len(translated['title_burmese'])} chars")
logger.info(f" Excerpt Burmese: {len(translated['excerpt_burmese'])} chars")
logger.info(f" Content Burmese: {len(translated['content_burmese'])} chars")
# Validate
ratio = len(translated['content_burmese']) / len(article['content'])
logger.info(f" Length ratio: {ratio:.2f} (should be 0.5-2.0)")
if ratio < 0.3:
logger.error("Translation still too short! Not updating.")
return False
# Update database
cur.execute('''
UPDATE articles
SET title_burmese = %s,
excerpt_burmese = %s,
content_burmese = %s
WHERE id = %s
''', (
translated['title_burmese'],
translated['excerpt_burmese'],
translated['content_burmese'],
article_id
))
conn.commit()
logger.info(f"✅ Article {article_id} updated successfully")
cur.close()
conn.close()
return True
if __name__ == '__main__':
import config
logger.add(sys.stdout, level="INFO")
article_id = int(sys.argv[1]) if len(sys.argv) > 1 else 50
fix_article(article_id)