# Automatic tagging system for Burmddit articles import database from typing import List, Dict import re # Common AI-related keywords that should become tags TAG_KEYWORDS = { 'ChatGPT': 'chatgpt', 'GPT-4': 'gpt-4', 'GPT-5': 'gpt-5', 'OpenAI': 'openai', 'Claude': 'claude', 'Anthropic': 'anthropic', 'Google': 'google', 'Gemini': 'gemini', 'Microsoft': 'microsoft', 'Copilot': 'copilot', 'Meta': 'meta', 'Llama': 'llama', 'DeepMind': 'deepmind', 'DeepSeek': 'deepseek', 'Mistral': 'mistral', 'Hugging Face': 'hugging-face', 'AGI': 'agi', 'LLM': 'llm', 'AI Safety': 'ai-safety', 'Neural Network': 'neural-network', 'Transformer': 'transformer', 'Machine Learning': 'machine-learning', 'Deep Learning': 'deep-learning', 'NLP': 'nlp', 'Computer Vision': 'computer-vision', 'Robotics': 'robotics', 'Autonomous': 'autonomous', 'Generative AI': 'generative-ai', } def extract_tags_from_text(title: str, content: str) -> List[str]: """ Extract relevant tags from article title and content Returns list of tag slugs """ text = f"{title} {content}".lower() found_tags = [] for keyword, slug in TAG_KEYWORDS.items(): if keyword.lower() in text: found_tags.append(slug) return list(set(found_tags)) # Remove duplicates def ensure_tag_exists(tag_name: str, tag_slug: str) -> int: """ Ensure tag exists in database, create if not Returns tag ID """ # Check if tag exists with database.get_db_connection() as conn: with conn.cursor() as cur: cur.execute( "SELECT id FROM tags WHERE slug = %s", (tag_slug,) ) result = cur.fetchone() if result: return result[0] # Create tag if doesn't exist cur.execute( """ INSERT INTO tags (name, name_burmese, slug) VALUES (%s, %s, %s) RETURNING id """, (tag_name, tag_name, tag_slug) # Use English name for both initially ) return cur.fetchone()[0] def assign_tags_to_article(article_id: int, tag_slugs: List[str]): """ Assign tags to an article """ if not tag_slugs: return with database.get_db_connection() as conn: with conn.cursor() as cur: for slug in tag_slugs: # Get tag_id cur.execute("SELECT id FROM tags WHERE slug = %s", (slug,)) result = cur.fetchone() if result: tag_id = result[0] # Insert article-tag relationship (ignore if already exists) cur.execute( """ INSERT INTO article_tags (article_id, tag_id) VALUES (%s, %s) ON CONFLICT DO NOTHING """, (article_id, tag_id) ) # Update tag article count cur.execute( """ UPDATE tags SET article_count = ( SELECT COUNT(*) FROM article_tags WHERE tag_id = %s ) WHERE id = %s """, (tag_id, tag_id) ) def auto_tag_article(article_id: int, title: str, content: str) -> List[str]: """ Automatically tag an article based on its content Returns list of assigned tag slugs """ # Extract tags tag_slugs = extract_tags_from_text(title, content) if not tag_slugs: return [] # Ensure all tags exist for slug in tag_slugs: # Find the tag name from our keywords tag_name = None for keyword, keyword_slug in TAG_KEYWORDS.items(): if keyword_slug == slug: tag_name = keyword break if tag_name: ensure_tag_exists(tag_name, slug) # Assign tags to article assign_tags_to_article(article_id, tag_slugs) return tag_slugs if __name__ == '__main__': # Test auto-tagging test_title = "OpenAI Releases GPT-5 with ChatGPT Integration" test_content = "OpenAI announced GPT-5 today with improved Claude-like capabilities and better AI safety measures..." tags = extract_tags_from_text(test_title, test_content) print(f"Found tags: {tags}")