Trigger redeploy: Category pages + Quality control

This commit is contained in:
Zeya Phyo
2026-02-20 02:41:34 +00:00
parent 785910b81d
commit f9c1c1ea10
5 changed files with 756 additions and 0 deletions

329
backend/quality_control.py Normal file
View File

@@ -0,0 +1,329 @@
#!/usr/bin/env python3
"""
Burmddit Quality Control System
Automatically checks article quality and takes corrective actions
"""
import psycopg2
from dotenv import load_dotenv
import os
from loguru import logger
import re
from datetime import datetime, timedelta
import requests
from bs4 import BeautifulSoup
load_dotenv()
class QualityControl:
def __init__(self):
self.conn = psycopg2.connect(os.getenv('DATABASE_URL'))
self.issues_found = []
def run_all_checks(self):
"""Run all quality checks"""
logger.info("🔍 Starting Quality Control Checks...")
self.check_missing_images()
self.check_translation_quality()
self.check_content_length()
self.check_duplicate_content()
self.check_broken_slugs()
return self.generate_report()
def check_missing_images(self):
"""Check for articles without images"""
logger.info("📸 Checking for missing images...")
cur = self.conn.cursor()
cur.execute("""
SELECT id, slug, title_burmese, featured_image
FROM articles
WHERE status = 'published'
AND (featured_image IS NULL OR featured_image = '' OR featured_image = '/placeholder.jpg')
""")
articles = cur.fetchall()
if articles:
logger.warning(f"Found {len(articles)} articles without images")
self.issues_found.append({
'type': 'missing_images',
'count': len(articles),
'action': 'set_placeholder',
'articles': [{'id': a[0], 'slug': a[1]} for a in articles]
})
# Action: Set default AI-related placeholder image
self.fix_missing_images(articles)
cur.close()
def fix_missing_images(self, articles):
"""Fix articles with missing images"""
cur = self.conn.cursor()
# Use a default AI-themed image URL
default_image = 'https://images.unsplash.com/photo-1677442136019-21780ecad995?w=1200&h=630&fit=crop'
for article in articles:
article_id = article[0]
cur.execute("""
UPDATE articles
SET featured_image = %s
WHERE id = %s
""", (default_image, article_id))
self.conn.commit()
logger.info(f"✅ Fixed {len(articles)} articles with placeholder image")
cur.close()
def check_translation_quality(self):
"""Check for translation issues"""
logger.info("🔤 Checking translation quality...")
cur = self.conn.cursor()
# Check 1: Very short content (likely failed translation)
cur.execute("""
SELECT id, slug, title_burmese, LENGTH(content_burmese) as len
FROM articles
WHERE status = 'published'
AND LENGTH(content_burmese) < 500
""")
short_articles = cur.fetchall()
# Check 2: Repeated text patterns (translation loops)
cur.execute("""
SELECT id, slug, title_burmese, content_burmese
FROM articles
WHERE status = 'published'
AND content_burmese ~ '(.{50,})\\1{2,}'
""")
repeated_articles = cur.fetchall()
# Check 3: Contains untranslated English blocks
cur.execute("""
SELECT id, slug, title_burmese
FROM articles
WHERE status = 'published'
AND content_burmese ~ '[a-zA-Z]{100,}'
""")
english_articles = cur.fetchall()
problem_articles = []
if short_articles:
logger.warning(f"Found {len(short_articles)} articles with short content")
problem_articles.extend([a[0] for a in short_articles])
if repeated_articles:
logger.warning(f"Found {len(repeated_articles)} articles with repeated text")
problem_articles.extend([a[0] for a in repeated_articles])
if english_articles:
logger.warning(f"Found {len(english_articles)} articles with untranslated English")
problem_articles.extend([a[0] for a in english_articles])
if problem_articles:
# Remove duplicates
problem_articles = list(set(problem_articles))
self.issues_found.append({
'type': 'translation_quality',
'count': len(problem_articles),
'action': 'archive',
'articles': problem_articles
})
# Action: Archive broken articles
self.archive_broken_articles(problem_articles)
cur.close()
def archive_broken_articles(self, article_ids):
"""Archive articles with quality issues"""
cur = self.conn.cursor()
for article_id in article_ids:
cur.execute("""
UPDATE articles
SET status = 'archived'
WHERE id = %s
""", (article_id,))
self.conn.commit()
logger.info(f"✅ Archived {len(article_ids)} broken articles")
cur.close()
def check_content_length(self):
"""Check if content meets length requirements"""
logger.info("📏 Checking content length...")
cur = self.conn.cursor()
cur.execute("""
SELECT COUNT(*)
FROM articles
WHERE status = 'published'
AND (
LENGTH(content_burmese) < 600
OR LENGTH(content_burmese) > 3000
)
""")
count = cur.fetchone()[0]
if count > 0:
logger.warning(f"Found {count} articles with length issues")
self.issues_found.append({
'type': 'content_length',
'count': count,
'action': 'review_needed'
})
cur.close()
def check_duplicate_content(self):
"""Check for duplicate articles"""
logger.info("🔁 Checking for duplicates...")
cur = self.conn.cursor()
cur.execute("""
SELECT title_burmese, COUNT(*) as cnt
FROM articles
WHERE status = 'published'
GROUP BY title_burmese
HAVING COUNT(*) > 1
""")
duplicates = cur.fetchall()
if duplicates:
logger.warning(f"Found {len(duplicates)} duplicate titles")
self.issues_found.append({
'type': 'duplicates',
'count': len(duplicates),
'action': 'manual_review'
})
cur.close()
def check_broken_slugs(self):
"""Check for invalid slugs"""
logger.info("🔗 Checking slugs...")
cur = self.conn.cursor()
cur.execute("""
SELECT id, slug
FROM articles
WHERE status = 'published'
AND (
slug IS NULL
OR slug = ''
OR LENGTH(slug) > 200
OR slug ~ '[^a-z0-9-]'
)
""")
broken = cur.fetchall()
if broken:
logger.warning(f"Found {len(broken)} articles with invalid slugs")
self.issues_found.append({
'type': 'broken_slugs',
'count': len(broken),
'action': 'regenerate_slugs'
})
cur.close()
def generate_report(self):
"""Generate quality control report"""
report = {
'timestamp': datetime.now().isoformat(),
'total_issues': len(self.issues_found),
'issues': self.issues_found,
'summary': {}
}
# Count by type
for issue in self.issues_found:
issue_type = issue['type']
report['summary'][issue_type] = issue['count']
logger.info("=" * 80)
logger.info("📊 QUALITY CONTROL REPORT")
logger.info("=" * 80)
logger.info(f"Total Issues Found: {len(self.issues_found)}")
for issue in self.issues_found:
logger.info(f"{issue['type']}: {issue['count']} articles → {issue['action']}")
logger.info("=" * 80)
return report
def get_article_stats(self):
"""Get overall article statistics"""
cur = self.conn.cursor()
cur.execute("SELECT COUNT(*) FROM articles WHERE status = 'published'")
total = cur.fetchone()[0]
cur.execute("SELECT COUNT(*) FROM articles WHERE status = 'archived'")
archived = cur.fetchone()[0]
cur.execute("SELECT COUNT(*) FROM articles WHERE status = 'draft'")
draft = cur.fetchone()[0]
cur.execute("""
SELECT COUNT(*) FROM articles
WHERE status = 'published'
AND featured_image IS NOT NULL
AND featured_image != ''
""")
with_images = cur.fetchone()[0]
stats = {
'total_published': total,
'total_archived': archived,
'total_draft': draft,
'with_images': with_images,
'without_images': total - with_images
}
cur.close()
return stats
def close(self):
"""Close database connection"""
self.conn.close()
def main():
"""Run quality control"""
qc = QualityControl()
# Get stats before
logger.info("📊 Statistics Before Quality Control:")
stats_before = qc.get_article_stats()
for key, value in stats_before.items():
logger.info(f" {key}: {value}")
# Run checks
report = qc.run_all_checks()
# Get stats after
logger.info("\n📊 Statistics After Quality Control:")
stats_after = qc.get_article_stats()
for key, value in stats_after.items():
logger.info(f" {key}: {value}")
qc.close()
return report
if __name__ == "__main__":
main()