forked from minzeyaphyo/burmddit
Frontend changes: - Add /admin dashboard for article management - Add AdminButton component (Alt+Shift+A on articles) - Add /api/admin/article API endpoints Backend improvements: - scraper_v2.py: Multi-layer fallback extraction (newspaper → trafilatura → readability) - translator_v2.py: Better chunking, repetition detection, validation - admin_tools.py: CLI admin commands - test_scraper.py: Individual source testing Docs: - WEB-ADMIN-GUIDE.md: Web admin usage - ADMIN-GUIDE.md: CLI admin usage - SCRAPER-IMPROVEMENT-PLAN.md: Scraper fixes details - TRANSLATION-FIX.md: Translation improvements - ADMIN-FEATURES-SUMMARY.md: Implementation summary Fixes: - Article scraping from 0 → 96+ articles working - Translation quality issues (repetition, truncation) - Added 13 new RSS sources
394 lines
12 KiB
Python
Executable File
394 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Admin tools for managing burmddit articles
|
|
"""
|
|
|
|
import psycopg2
|
|
from dotenv import load_dotenv
|
|
import os
|
|
from datetime import datetime
|
|
from loguru import logger
|
|
import sys
|
|
|
|
load_dotenv()
|
|
|
|
def get_connection():
|
|
"""Get database connection"""
|
|
return psycopg2.connect(os.getenv('DATABASE_URL'))
|
|
|
|
def list_articles(status=None, limit=20):
|
|
"""List articles with optional status filter"""
|
|
conn = get_connection()
|
|
cur = conn.cursor()
|
|
|
|
if status:
|
|
cur.execute('''
|
|
SELECT id, title, status, published_at, view_count,
|
|
LENGTH(content) as content_len,
|
|
LENGTH(content_burmese) as burmese_len
|
|
FROM articles
|
|
WHERE status = %s
|
|
ORDER BY published_at DESC
|
|
LIMIT %s
|
|
''', (status, limit))
|
|
else:
|
|
cur.execute('''
|
|
SELECT id, title, status, published_at, view_count,
|
|
LENGTH(content) as content_len,
|
|
LENGTH(content_burmese) as burmese_len
|
|
FROM articles
|
|
ORDER BY published_at DESC
|
|
LIMIT %s
|
|
''', (limit,))
|
|
|
|
articles = []
|
|
for row in cur.fetchall():
|
|
articles.append({
|
|
'id': row[0],
|
|
'title': row[1][:60] + '...' if len(row[1]) > 60 else row[1],
|
|
'status': row[2],
|
|
'published_at': row[3],
|
|
'views': row[4] or 0,
|
|
'content_len': row[5],
|
|
'burmese_len': row[6]
|
|
})
|
|
|
|
cur.close()
|
|
conn.close()
|
|
|
|
return articles
|
|
|
|
def unpublish_article(article_id: int, reason: str = "Error/Quality issue"):
|
|
"""Unpublish an article (change status to draft)"""
|
|
conn = get_connection()
|
|
cur = conn.cursor()
|
|
|
|
# Get article info first
|
|
cur.execute('SELECT id, title, status FROM articles WHERE id = %s', (article_id,))
|
|
article = cur.fetchone()
|
|
|
|
if not article:
|
|
logger.error(f"Article {article_id} not found")
|
|
cur.close()
|
|
conn.close()
|
|
return False
|
|
|
|
logger.info(f"Unpublishing article {article_id}: {article[1][:60]}...")
|
|
logger.info(f"Current status: {article[2]}")
|
|
logger.info(f"Reason: {reason}")
|
|
|
|
# Update status to draft
|
|
cur.execute('''
|
|
UPDATE articles
|
|
SET status = 'draft',
|
|
updated_at = NOW()
|
|
WHERE id = %s
|
|
''', (article_id,))
|
|
|
|
conn.commit()
|
|
logger.info(f"✅ Article {article_id} unpublished successfully")
|
|
|
|
cur.close()
|
|
conn.close()
|
|
|
|
return True
|
|
|
|
def republish_article(article_id: int):
|
|
"""Republish an article (change status to published)"""
|
|
conn = get_connection()
|
|
cur = conn.cursor()
|
|
|
|
# Get article info first
|
|
cur.execute('SELECT id, title, status FROM articles WHERE id = %s', (article_id,))
|
|
article = cur.fetchone()
|
|
|
|
if not article:
|
|
logger.error(f"Article {article_id} not found")
|
|
cur.close()
|
|
conn.close()
|
|
return False
|
|
|
|
logger.info(f"Republishing article {article_id}: {article[1][:60]}...")
|
|
logger.info(f"Current status: {article[2]}")
|
|
|
|
# Update status to published
|
|
cur.execute('''
|
|
UPDATE articles
|
|
SET status = 'published',
|
|
updated_at = NOW()
|
|
WHERE id = %s
|
|
''', (article_id,))
|
|
|
|
conn.commit()
|
|
logger.info(f"✅ Article {article_id} republished successfully")
|
|
|
|
cur.close()
|
|
conn.close()
|
|
|
|
return True
|
|
|
|
def delete_article(article_id: int):
|
|
"""Permanently delete an article"""
|
|
conn = get_connection()
|
|
cur = conn.cursor()
|
|
|
|
# Get article info first
|
|
cur.execute('SELECT id, title, status FROM articles WHERE id = %s', (article_id,))
|
|
article = cur.fetchone()
|
|
|
|
if not article:
|
|
logger.error(f"Article {article_id} not found")
|
|
cur.close()
|
|
conn.close()
|
|
return False
|
|
|
|
logger.warning(f"⚠️ DELETING article {article_id}: {article[1][:60]}...")
|
|
|
|
# Delete from database
|
|
cur.execute('DELETE FROM articles WHERE id = %s', (article_id,))
|
|
|
|
conn.commit()
|
|
logger.info(f"✅ Article {article_id} deleted permanently")
|
|
|
|
cur.close()
|
|
conn.close()
|
|
|
|
return True
|
|
|
|
def find_problem_articles():
|
|
"""Find articles with potential issues"""
|
|
conn = get_connection()
|
|
cur = conn.cursor()
|
|
|
|
issues = []
|
|
|
|
# Issue 1: Translation too short (< 30% of original)
|
|
cur.execute('''
|
|
SELECT id, title,
|
|
LENGTH(content) as en_len,
|
|
LENGTH(content_burmese) as mm_len,
|
|
ROUND(100.0 * LENGTH(content_burmese) / NULLIF(LENGTH(content), 0), 1) as ratio
|
|
FROM articles
|
|
WHERE status = 'published'
|
|
AND LENGTH(content_burmese) < LENGTH(content) * 0.3
|
|
ORDER BY ratio ASC
|
|
LIMIT 10
|
|
''')
|
|
|
|
for row in cur.fetchall():
|
|
issues.append({
|
|
'id': row[0],
|
|
'title': row[1][:50],
|
|
'issue': 'Translation too short',
|
|
'details': f'EN: {row[2]} chars, MM: {row[3]} chars ({row[4]}%)'
|
|
})
|
|
|
|
# Issue 2: Missing Burmese content
|
|
cur.execute('''
|
|
SELECT id, title
|
|
FROM articles
|
|
WHERE status = 'published'
|
|
AND (content_burmese IS NULL OR LENGTH(content_burmese) < 100)
|
|
LIMIT 10
|
|
''')
|
|
|
|
for row in cur.fetchall():
|
|
issues.append({
|
|
'id': row[0],
|
|
'title': row[1][:50],
|
|
'issue': 'Missing Burmese translation',
|
|
'details': 'No or very short Burmese content'
|
|
})
|
|
|
|
# Issue 3: Very short articles (< 500 chars)
|
|
cur.execute('''
|
|
SELECT id, title, LENGTH(content) as len
|
|
FROM articles
|
|
WHERE status = 'published'
|
|
AND LENGTH(content) < 500
|
|
LIMIT 10
|
|
''')
|
|
|
|
for row in cur.fetchall():
|
|
issues.append({
|
|
'id': row[0],
|
|
'title': row[1][:50],
|
|
'issue': 'Article too short',
|
|
'details': f'Only {row[2]} chars'
|
|
})
|
|
|
|
cur.close()
|
|
conn.close()
|
|
|
|
return issues
|
|
|
|
def get_article_details(article_id: int):
|
|
"""Get detailed info about an article"""
|
|
conn = get_connection()
|
|
cur = conn.cursor()
|
|
|
|
cur.execute('''
|
|
SELECT id, title, title_burmese, slug, status,
|
|
LENGTH(content) as content_len,
|
|
LENGTH(content_burmese) as burmese_len,
|
|
category_id, author, reading_time,
|
|
published_at, view_count, created_at, updated_at,
|
|
LEFT(content, 200) as content_preview,
|
|
LEFT(content_burmese, 200) as burmese_preview
|
|
FROM articles
|
|
WHERE id = %s
|
|
''', (article_id,))
|
|
|
|
row = cur.fetchone()
|
|
|
|
if not row:
|
|
return None
|
|
|
|
article = {
|
|
'id': row[0],
|
|
'title': row[1],
|
|
'title_burmese': row[2],
|
|
'slug': row[3],
|
|
'status': row[4],
|
|
'content_length': row[5],
|
|
'burmese_length': row[6],
|
|
'translation_ratio': round(100.0 * row[6] / row[5], 1) if row[5] > 0 else 0,
|
|
'category_id': row[7],
|
|
'author': row[8],
|
|
'reading_time': row[9],
|
|
'published_at': row[10],
|
|
'view_count': row[11] or 0,
|
|
'created_at': row[12],
|
|
'updated_at': row[13],
|
|
'content_preview': row[14],
|
|
'burmese_preview': row[15]
|
|
}
|
|
|
|
cur.close()
|
|
conn.close()
|
|
|
|
return article
|
|
|
|
def print_article_table(articles):
|
|
"""Print articles in a nice table format"""
|
|
print()
|
|
print("=" * 100)
|
|
print(f"{'ID':<5} {'Title':<50} {'Status':<12} {'Views':<8} {'Ratio':<8}")
|
|
print("-" * 100)
|
|
|
|
for a in articles:
|
|
ratio = f"{100.0 * a['burmese_len'] / a['content_len']:.1f}%" if a['content_len'] > 0 else "N/A"
|
|
print(f"{a['id']:<5} {a['title']:<50} {a['status']:<12} {a['views']:<8} {ratio:<8}")
|
|
|
|
print("=" * 100)
|
|
print()
|
|
|
|
def main():
|
|
"""Main CLI interface"""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description='Burmddit Admin Tools')
|
|
subparsers = parser.add_subparsers(dest='command', help='Commands')
|
|
|
|
# List command
|
|
list_parser = subparsers.add_parser('list', help='List articles')
|
|
list_parser.add_argument('--status', choices=['published', 'draft'], help='Filter by status')
|
|
list_parser.add_argument('--limit', type=int, default=20, help='Number of articles')
|
|
|
|
# Unpublish command
|
|
unpublish_parser = subparsers.add_parser('unpublish', help='Unpublish an article')
|
|
unpublish_parser.add_argument('article_id', type=int, help='Article ID')
|
|
unpublish_parser.add_argument('--reason', default='Error/Quality issue', help='Reason for unpublishing')
|
|
|
|
# Republish command
|
|
republish_parser = subparsers.add_parser('republish', help='Republish an article')
|
|
republish_parser.add_argument('article_id', type=int, help='Article ID')
|
|
|
|
# Delete command
|
|
delete_parser = subparsers.add_parser('delete', help='Delete an article permanently')
|
|
delete_parser.add_argument('article_id', type=int, help='Article ID')
|
|
delete_parser.add_argument('--confirm', action='store_true', help='Confirm deletion')
|
|
|
|
# Find problems command
|
|
subparsers.add_parser('find-problems', help='Find articles with issues')
|
|
|
|
# Details command
|
|
details_parser = subparsers.add_parser('details', help='Show article details')
|
|
details_parser.add_argument('article_id', type=int, help='Article ID')
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Configure logger
|
|
logger.remove()
|
|
logger.add(sys.stdout, format="<level>{message}</level>", level="INFO")
|
|
|
|
if args.command == 'list':
|
|
articles = list_articles(status=args.status, limit=args.limit)
|
|
print_article_table(articles)
|
|
print(f"Total: {len(articles)} articles")
|
|
|
|
elif args.command == 'unpublish':
|
|
unpublish_article(args.article_id, args.reason)
|
|
|
|
elif args.command == 'republish':
|
|
republish_article(args.article_id)
|
|
|
|
elif args.command == 'delete':
|
|
if not args.confirm:
|
|
logger.error("⚠️ Deletion requires --confirm flag to prevent accidents")
|
|
return
|
|
delete_article(args.article_id)
|
|
|
|
elif args.command == 'find-problems':
|
|
issues = find_problem_articles()
|
|
if not issues:
|
|
logger.info("✅ No issues found!")
|
|
else:
|
|
print()
|
|
print("=" * 100)
|
|
print(f"Found {len(issues)} potential issues:")
|
|
print("-" * 100)
|
|
for issue in issues:
|
|
print(f"ID {issue['id']}: {issue['title']}")
|
|
print(f" Issue: {issue['issue']}")
|
|
print(f" Details: {issue['details']}")
|
|
print()
|
|
print("=" * 100)
|
|
print()
|
|
print("To unpublish an article: python3 admin_tools.py unpublish <ID>")
|
|
|
|
elif args.command == 'details':
|
|
article = get_article_details(args.article_id)
|
|
if not article:
|
|
logger.error(f"Article {args.article_id} not found")
|
|
return
|
|
|
|
print()
|
|
print("=" * 80)
|
|
print(f"Article {article['id']} Details")
|
|
print("=" * 80)
|
|
print(f"Title (EN): {article['title']}")
|
|
print(f"Title (MM): {article['title_burmese']}")
|
|
print(f"Slug: {article['slug']}")
|
|
print(f"Status: {article['status']}")
|
|
print(f"Author: {article['author']}")
|
|
print(f"Published: {article['published_at']}")
|
|
print(f"Views: {article['view_count']}")
|
|
print()
|
|
print(f"Content length: {article['content_length']} chars")
|
|
print(f"Burmese length: {article['burmese_length']} chars")
|
|
print(f"Translation ratio: {article['translation_ratio']}%")
|
|
print()
|
|
print("English preview:")
|
|
print(article['content_preview'])
|
|
print()
|
|
print("Burmese preview:")
|
|
print(article['burmese_preview'])
|
|
print("=" * 80)
|
|
|
|
else:
|
|
parser.print_help()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|