burmddit/backend/auto_tagging.py

# Automatic tagging system for Burmddit articles

import database
from typing import List, Dict
import re

# Common AI-related keywords that should become tags
TAG_KEYWORDS = {
    'ChatGPT': 'chatgpt',
    'GPT-4': 'gpt-4',
    'GPT-5': 'gpt-5',
    'OpenAI': 'openai',
    'Claude': 'claude',
    'Anthropic': 'anthropic',
    'Google': 'google',
    'Gemini': 'gemini',
    'Microsoft': 'microsoft',
    'Copilot': 'copilot',
    'Meta': 'meta',
    'Llama': 'llama',
    'DeepMind': 'deepmind',
    'DeepSeek': 'deepseek',
    'Mistral': 'mistral',
    'Hugging Face': 'hugging-face',
    'AGI': 'agi',
    'LLM': 'llm',
    'AI Safety': 'ai-safety',
    'Neural Network': 'neural-network',
    'Transformer': 'transformer',
    'Machine Learning': 'machine-learning',
    'Deep Learning': 'deep-learning',
    'NLP': 'nlp',
    'Computer Vision': 'computer-vision',
    'Robotics': 'robotics',
    'Autonomous': 'autonomous',
    'Generative AI': 'generative-ai',
}

def extract_tags_from_text(title: str, content: str) -> List[str]:
    """
    Extract relevant tags from article title and content
    Returns list of tag slugs
    """
    text = f"{title} {content}".lower()
    found_tags = []

    for keyword, slug in TAG_KEYWORDS.items():
        if keyword.lower() in text:
            found_tags.append(slug)

    return list(set(found_tags))  # Remove duplicates

def ensure_tag_exists(tag_name: str, tag_slug: str) -> int:
    """
    Ensure tag exists in database, create if not
    Returns tag ID
    """
    # Check if tag exists
    with database.get_db_connection() as conn:
        with conn.cursor() as cur:
            cur.execute(
                "SELECT id FROM tags WHERE slug = %s",
                (tag_slug,)
            )
            result = cur.fetchone()

            if result:
                return result[0]

            # Create tag if doesn't exist
            cur.execute(
                """
                INSERT INTO tags (name, name_burmese, slug)
                VALUES (%s, %s, %s)
                RETURNING id
                """,
                (tag_name, tag_name, tag_slug)  # Use English name for both initially
            )
            return cur.fetchone()[0]

def assign_tags_to_article(article_id: int, tag_slugs: List[str]):
    """
    Assign tags to an article
    """
    if not tag_slugs:
        return

    with database.get_db_connection() as conn:
        with conn.cursor() as cur:
            for slug in tag_slugs:
                # Get tag_id
                cur.execute("SELECT id FROM tags WHERE slug = %s", (slug,))
                result = cur.fetchone()

                if result:
                    tag_id = result[0]

                    # Insert article-tag relationship (ignore if already exists)
                    cur.execute(
                        """
                        INSERT INTO article_tags (article_id, tag_id)
                        VALUES (%s, %s)
                        ON CONFLICT DO NOTHING
                        """,
                        (article_id, tag_id)
                    )

                    # Update tag article count
                    cur.execute(
                        """
                        UPDATE tags
                        SET article_count = (
                            SELECT COUNT(*) FROM article_tags WHERE tag_id = %s
                        )
                        WHERE id = %s
                        """,
                        (tag_id, tag_id)
                    )

def auto_tag_article(article_id: int, title: str, content: str) -> List[str]:
    """
    Automatically tag an article based on its content
    Returns list of assigned tag slugs
    """
    # Extract tags
    tag_slugs = extract_tags_from_text(title, content)

    if not tag_slugs:
        return []

    # Ensure all tags exist
    for slug in tag_slugs:
        # Find the tag name from our keywords
        tag_name = None
        for keyword, keyword_slug in TAG_KEYWORDS.items():
            if keyword_slug == slug:
                tag_name = keyword
                break

        if tag_name:
            ensure_tag_exists(tag_name, slug)

    # Assign tags to article
    assign_tags_to_article(article_id, tag_slugs)

    return tag_slugs

if __name__ == '__main__':
    # Test auto-tagging
    test_title = "OpenAI Releases GPT-5 with ChatGPT Integration"
    test_content = "OpenAI announced GPT-5 today with improved Claude-like capabilities and better AI safety measures..."

    tags = extract_tags_from_text(test_title, test_content)
    print(f"Found tags: {tags}")