forked from minzeyaphyo/burmddit
Add web admin features + fix scraper & translator
Frontend changes: - Add /admin dashboard for article management - Add AdminButton component (Alt+Shift+A on articles) - Add /api/admin/article API endpoints Backend improvements: - scraper_v2.py: Multi-layer fallback extraction (newspaper → trafilatura → readability) - translator_v2.py: Better chunking, repetition detection, validation - admin_tools.py: CLI admin commands - test_scraper.py: Individual source testing Docs: - WEB-ADMIN-GUIDE.md: Web admin usage - ADMIN-GUIDE.md: CLI admin usage - SCRAPER-IMPROVEMENT-PLAN.md: Scraper fixes details - TRANSLATION-FIX.md: Translation improvements - ADMIN-FEATURES-SUMMARY.md: Implementation summary Fixes: - Article scraping from 0 → 96+ articles working - Translation quality issues (repetition, truncation) - Added 13 new RSS sources
This commit is contained in:
393
backend/admin_tools.py
Executable file
393
backend/admin_tools.py
Executable file
@@ -0,0 +1,393 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Admin tools for managing burmddit articles
|
||||
"""
|
||||
|
||||
import psycopg2
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
import sys
|
||||
|
||||
load_dotenv()
|
||||
|
||||
def get_connection():
|
||||
"""Get database connection"""
|
||||
return psycopg2.connect(os.getenv('DATABASE_URL'))
|
||||
|
||||
def list_articles(status=None, limit=20):
|
||||
"""List articles with optional status filter"""
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
|
||||
if status:
|
||||
cur.execute('''
|
||||
SELECT id, title, status, published_at, view_count,
|
||||
LENGTH(content) as content_len,
|
||||
LENGTH(content_burmese) as burmese_len
|
||||
FROM articles
|
||||
WHERE status = %s
|
||||
ORDER BY published_at DESC
|
||||
LIMIT %s
|
||||
''', (status, limit))
|
||||
else:
|
||||
cur.execute('''
|
||||
SELECT id, title, status, published_at, view_count,
|
||||
LENGTH(content) as content_len,
|
||||
LENGTH(content_burmese) as burmese_len
|
||||
FROM articles
|
||||
ORDER BY published_at DESC
|
||||
LIMIT %s
|
||||
''', (limit,))
|
||||
|
||||
articles = []
|
||||
for row in cur.fetchall():
|
||||
articles.append({
|
||||
'id': row[0],
|
||||
'title': row[1][:60] + '...' if len(row[1]) > 60 else row[1],
|
||||
'status': row[2],
|
||||
'published_at': row[3],
|
||||
'views': row[4] or 0,
|
||||
'content_len': row[5],
|
||||
'burmese_len': row[6]
|
||||
})
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
return articles
|
||||
|
||||
def unpublish_article(article_id: int, reason: str = "Error/Quality issue"):
|
||||
"""Unpublish an article (change status to draft)"""
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
|
||||
# Get article info first
|
||||
cur.execute('SELECT id, title, status FROM articles WHERE id = %s', (article_id,))
|
||||
article = cur.fetchone()
|
||||
|
||||
if not article:
|
||||
logger.error(f"Article {article_id} not found")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
logger.info(f"Unpublishing article {article_id}: {article[1][:60]}...")
|
||||
logger.info(f"Current status: {article[2]}")
|
||||
logger.info(f"Reason: {reason}")
|
||||
|
||||
# Update status to draft
|
||||
cur.execute('''
|
||||
UPDATE articles
|
||||
SET status = 'draft',
|
||||
updated_at = NOW()
|
||||
WHERE id = %s
|
||||
''', (article_id,))
|
||||
|
||||
conn.commit()
|
||||
logger.info(f"✅ Article {article_id} unpublished successfully")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
return True
|
||||
|
||||
def republish_article(article_id: int):
|
||||
"""Republish an article (change status to published)"""
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
|
||||
# Get article info first
|
||||
cur.execute('SELECT id, title, status FROM articles WHERE id = %s', (article_id,))
|
||||
article = cur.fetchone()
|
||||
|
||||
if not article:
|
||||
logger.error(f"Article {article_id} not found")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
logger.info(f"Republishing article {article_id}: {article[1][:60]}...")
|
||||
logger.info(f"Current status: {article[2]}")
|
||||
|
||||
# Update status to published
|
||||
cur.execute('''
|
||||
UPDATE articles
|
||||
SET status = 'published',
|
||||
updated_at = NOW()
|
||||
WHERE id = %s
|
||||
''', (article_id,))
|
||||
|
||||
conn.commit()
|
||||
logger.info(f"✅ Article {article_id} republished successfully")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
return True
|
||||
|
||||
def delete_article(article_id: int):
|
||||
"""Permanently delete an article"""
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
|
||||
# Get article info first
|
||||
cur.execute('SELECT id, title, status FROM articles WHERE id = %s', (article_id,))
|
||||
article = cur.fetchone()
|
||||
|
||||
if not article:
|
||||
logger.error(f"Article {article_id} not found")
|
||||
cur.close()
|
||||
conn.close()
|
||||
return False
|
||||
|
||||
logger.warning(f"⚠️ DELETING article {article_id}: {article[1][:60]}...")
|
||||
|
||||
# Delete from database
|
||||
cur.execute('DELETE FROM articles WHERE id = %s', (article_id,))
|
||||
|
||||
conn.commit()
|
||||
logger.info(f"✅ Article {article_id} deleted permanently")
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
return True
|
||||
|
||||
def find_problem_articles():
|
||||
"""Find articles with potential issues"""
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
|
||||
issues = []
|
||||
|
||||
# Issue 1: Translation too short (< 30% of original)
|
||||
cur.execute('''
|
||||
SELECT id, title,
|
||||
LENGTH(content) as en_len,
|
||||
LENGTH(content_burmese) as mm_len,
|
||||
ROUND(100.0 * LENGTH(content_burmese) / NULLIF(LENGTH(content), 0), 1) as ratio
|
||||
FROM articles
|
||||
WHERE status = 'published'
|
||||
AND LENGTH(content_burmese) < LENGTH(content) * 0.3
|
||||
ORDER BY ratio ASC
|
||||
LIMIT 10
|
||||
''')
|
||||
|
||||
for row in cur.fetchall():
|
||||
issues.append({
|
||||
'id': row[0],
|
||||
'title': row[1][:50],
|
||||
'issue': 'Translation too short',
|
||||
'details': f'EN: {row[2]} chars, MM: {row[3]} chars ({row[4]}%)'
|
||||
})
|
||||
|
||||
# Issue 2: Missing Burmese content
|
||||
cur.execute('''
|
||||
SELECT id, title
|
||||
FROM articles
|
||||
WHERE status = 'published'
|
||||
AND (content_burmese IS NULL OR LENGTH(content_burmese) < 100)
|
||||
LIMIT 10
|
||||
''')
|
||||
|
||||
for row in cur.fetchall():
|
||||
issues.append({
|
||||
'id': row[0],
|
||||
'title': row[1][:50],
|
||||
'issue': 'Missing Burmese translation',
|
||||
'details': 'No or very short Burmese content'
|
||||
})
|
||||
|
||||
# Issue 3: Very short articles (< 500 chars)
|
||||
cur.execute('''
|
||||
SELECT id, title, LENGTH(content) as len
|
||||
FROM articles
|
||||
WHERE status = 'published'
|
||||
AND LENGTH(content) < 500
|
||||
LIMIT 10
|
||||
''')
|
||||
|
||||
for row in cur.fetchall():
|
||||
issues.append({
|
||||
'id': row[0],
|
||||
'title': row[1][:50],
|
||||
'issue': 'Article too short',
|
||||
'details': f'Only {row[2]} chars'
|
||||
})
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
return issues
|
||||
|
||||
def get_article_details(article_id: int):
|
||||
"""Get detailed info about an article"""
|
||||
conn = get_connection()
|
||||
cur = conn.cursor()
|
||||
|
||||
cur.execute('''
|
||||
SELECT id, title, title_burmese, slug, status,
|
||||
LENGTH(content) as content_len,
|
||||
LENGTH(content_burmese) as burmese_len,
|
||||
category_id, author, reading_time,
|
||||
published_at, view_count, created_at, updated_at,
|
||||
LEFT(content, 200) as content_preview,
|
||||
LEFT(content_burmese, 200) as burmese_preview
|
||||
FROM articles
|
||||
WHERE id = %s
|
||||
''', (article_id,))
|
||||
|
||||
row = cur.fetchone()
|
||||
|
||||
if not row:
|
||||
return None
|
||||
|
||||
article = {
|
||||
'id': row[0],
|
||||
'title': row[1],
|
||||
'title_burmese': row[2],
|
||||
'slug': row[3],
|
||||
'status': row[4],
|
||||
'content_length': row[5],
|
||||
'burmese_length': row[6],
|
||||
'translation_ratio': round(100.0 * row[6] / row[5], 1) if row[5] > 0 else 0,
|
||||
'category_id': row[7],
|
||||
'author': row[8],
|
||||
'reading_time': row[9],
|
||||
'published_at': row[10],
|
||||
'view_count': row[11] or 0,
|
||||
'created_at': row[12],
|
||||
'updated_at': row[13],
|
||||
'content_preview': row[14],
|
||||
'burmese_preview': row[15]
|
||||
}
|
||||
|
||||
cur.close()
|
||||
conn.close()
|
||||
|
||||
return article
|
||||
|
||||
def print_article_table(articles):
|
||||
"""Print articles in a nice table format"""
|
||||
print()
|
||||
print("=" * 100)
|
||||
print(f"{'ID':<5} {'Title':<50} {'Status':<12} {'Views':<8} {'Ratio':<8}")
|
||||
print("-" * 100)
|
||||
|
||||
for a in articles:
|
||||
ratio = f"{100.0 * a['burmese_len'] / a['content_len']:.1f}%" if a['content_len'] > 0 else "N/A"
|
||||
print(f"{a['id']:<5} {a['title']:<50} {a['status']:<12} {a['views']:<8} {ratio:<8}")
|
||||
|
||||
print("=" * 100)
|
||||
print()
|
||||
|
||||
def main():
|
||||
"""Main CLI interface"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Burmddit Admin Tools')
|
||||
subparsers = parser.add_subparsers(dest='command', help='Commands')
|
||||
|
||||
# List command
|
||||
list_parser = subparsers.add_parser('list', help='List articles')
|
||||
list_parser.add_argument('--status', choices=['published', 'draft'], help='Filter by status')
|
||||
list_parser.add_argument('--limit', type=int, default=20, help='Number of articles')
|
||||
|
||||
# Unpublish command
|
||||
unpublish_parser = subparsers.add_parser('unpublish', help='Unpublish an article')
|
||||
unpublish_parser.add_argument('article_id', type=int, help='Article ID')
|
||||
unpublish_parser.add_argument('--reason', default='Error/Quality issue', help='Reason for unpublishing')
|
||||
|
||||
# Republish command
|
||||
republish_parser = subparsers.add_parser('republish', help='Republish an article')
|
||||
republish_parser.add_argument('article_id', type=int, help='Article ID')
|
||||
|
||||
# Delete command
|
||||
delete_parser = subparsers.add_parser('delete', help='Delete an article permanently')
|
||||
delete_parser.add_argument('article_id', type=int, help='Article ID')
|
||||
delete_parser.add_argument('--confirm', action='store_true', help='Confirm deletion')
|
||||
|
||||
# Find problems command
|
||||
subparsers.add_parser('find-problems', help='Find articles with issues')
|
||||
|
||||
# Details command
|
||||
details_parser = subparsers.add_parser('details', help='Show article details')
|
||||
details_parser.add_argument('article_id', type=int, help='Article ID')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Configure logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout, format="<level>{message}</level>", level="INFO")
|
||||
|
||||
if args.command == 'list':
|
||||
articles = list_articles(status=args.status, limit=args.limit)
|
||||
print_article_table(articles)
|
||||
print(f"Total: {len(articles)} articles")
|
||||
|
||||
elif args.command == 'unpublish':
|
||||
unpublish_article(args.article_id, args.reason)
|
||||
|
||||
elif args.command == 'republish':
|
||||
republish_article(args.article_id)
|
||||
|
||||
elif args.command == 'delete':
|
||||
if not args.confirm:
|
||||
logger.error("⚠️ Deletion requires --confirm flag to prevent accidents")
|
||||
return
|
||||
delete_article(args.article_id)
|
||||
|
||||
elif args.command == 'find-problems':
|
||||
issues = find_problem_articles()
|
||||
if not issues:
|
||||
logger.info("✅ No issues found!")
|
||||
else:
|
||||
print()
|
||||
print("=" * 100)
|
||||
print(f"Found {len(issues)} potential issues:")
|
||||
print("-" * 100)
|
||||
for issue in issues:
|
||||
print(f"ID {issue['id']}: {issue['title']}")
|
||||
print(f" Issue: {issue['issue']}")
|
||||
print(f" Details: {issue['details']}")
|
||||
print()
|
||||
print("=" * 100)
|
||||
print()
|
||||
print("To unpublish an article: python3 admin_tools.py unpublish <ID>")
|
||||
|
||||
elif args.command == 'details':
|
||||
article = get_article_details(args.article_id)
|
||||
if not article:
|
||||
logger.error(f"Article {args.article_id} not found")
|
||||
return
|
||||
|
||||
print()
|
||||
print("=" * 80)
|
||||
print(f"Article {article['id']} Details")
|
||||
print("=" * 80)
|
||||
print(f"Title (EN): {article['title']}")
|
||||
print(f"Title (MM): {article['title_burmese']}")
|
||||
print(f"Slug: {article['slug']}")
|
||||
print(f"Status: {article['status']}")
|
||||
print(f"Author: {article['author']}")
|
||||
print(f"Published: {article['published_at']}")
|
||||
print(f"Views: {article['view_count']}")
|
||||
print()
|
||||
print(f"Content length: {article['content_length']} chars")
|
||||
print(f"Burmese length: {article['burmese_length']} chars")
|
||||
print(f"Translation ratio: {article['translation_ratio']}%")
|
||||
print()
|
||||
print("English preview:")
|
||||
print(article['content_preview'])
|
||||
print()
|
||||
print("Burmese preview:")
|
||||
print(article['burmese_preview'])
|
||||
print("=" * 80)
|
||||
|
||||
else:
|
||||
parser.print_help()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user