Files
burmddit/backend/auto_tagging.py
Zeya Phyo 161dce1501 UI/UX Improvements: Modern design + hashtag system + cover images
- Added modern CSS design system with better typography
- Created hashtag/tag functionality with auto-tagging
- Improved homepage with hero section and trending tags
- Enhanced article pages with full-width cover images
- Added tag pages for filtering articles by hashtag
- Better mobile responsive design
- Smoother animations and transitions
- Auto-tag system analyzes content and assigns relevant tags
- 30+ predefined AI-related tags (ChatGPT, OpenAI, etc.)
2026-02-19 13:49:53 +00:00

155 lines
4.6 KiB
Python

# Automatic tagging system for Burmddit articles
import database
from typing import List, Dict
import re
# Common AI-related keywords that should become tags
TAG_KEYWORDS = {
'ChatGPT': 'chatgpt',
'GPT-4': 'gpt-4',
'GPT-5': 'gpt-5',
'OpenAI': 'openai',
'Claude': 'claude',
'Anthropic': 'anthropic',
'Google': 'google',
'Gemini': 'gemini',
'Microsoft': 'microsoft',
'Copilot': 'copilot',
'Meta': 'meta',
'Llama': 'llama',
'DeepMind': 'deepmind',
'DeepSeek': 'deepseek',
'Mistral': 'mistral',
'Hugging Face': 'hugging-face',
'AGI': 'agi',
'LLM': 'llm',
'AI Safety': 'ai-safety',
'Neural Network': 'neural-network',
'Transformer': 'transformer',
'Machine Learning': 'machine-learning',
'Deep Learning': 'deep-learning',
'NLP': 'nlp',
'Computer Vision': 'computer-vision',
'Robotics': 'robotics',
'Autonomous': 'autonomous',
'Generative AI': 'generative-ai',
}
def extract_tags_from_text(title: str, content: str) -> List[str]:
"""
Extract relevant tags from article title and content
Returns list of tag slugs
"""
text = f"{title} {content}".lower()
found_tags = []
for keyword, slug in TAG_KEYWORDS.items():
if keyword.lower() in text:
found_tags.append(slug)
return list(set(found_tags)) # Remove duplicates
def ensure_tag_exists(tag_name: str, tag_slug: str) -> int:
"""
Ensure tag exists in database, create if not
Returns tag ID
"""
# Check if tag exists
with database.get_db_connection() as conn:
with conn.cursor() as cur:
cur.execute(
"SELECT id FROM tags WHERE slug = %s",
(tag_slug,)
)
result = cur.fetchone()
if result:
return result[0]
# Create tag if doesn't exist
cur.execute(
"""
INSERT INTO tags (name, name_burmese, slug)
VALUES (%s, %s, %s)
RETURNING id
""",
(tag_name, tag_name, tag_slug) # Use English name for both initially
)
return cur.fetchone()[0]
def assign_tags_to_article(article_id: int, tag_slugs: List[str]):
"""
Assign tags to an article
"""
if not tag_slugs:
return
with database.get_db_connection() as conn:
with conn.cursor() as cur:
for slug in tag_slugs:
# Get tag_id
cur.execute("SELECT id FROM tags WHERE slug = %s", (slug,))
result = cur.fetchone()
if result:
tag_id = result[0]
# Insert article-tag relationship (ignore if already exists)
cur.execute(
"""
INSERT INTO article_tags (article_id, tag_id)
VALUES (%s, %s)
ON CONFLICT DO NOTHING
""",
(article_id, tag_id)
)
# Update tag article count
cur.execute(
"""
UPDATE tags
SET article_count = (
SELECT COUNT(*) FROM article_tags WHERE tag_id = %s
)
WHERE id = %s
""",
(tag_id, tag_id)
)
def auto_tag_article(article_id: int, title: str, content: str) -> List[str]:
"""
Automatically tag an article based on its content
Returns list of assigned tag slugs
"""
# Extract tags
tag_slugs = extract_tags_from_text(title, content)
if not tag_slugs:
return []
# Ensure all tags exist
for slug in tag_slugs:
# Find the tag name from our keywords
tag_name = None
for keyword, keyword_slug in TAG_KEYWORDS.items():
if keyword_slug == slug:
tag_name = keyword
break
if tag_name:
ensure_tag_exists(tag_name, slug)
# Assign tags to article
assign_tags_to_article(article_id, tag_slugs)
return tag_slugs
if __name__ == '__main__':
# Test auto-tagging
test_title = "OpenAI Releases GPT-5 with ChatGPT Integration"
test_content = "OpenAI announced GPT-5 today with improved Claude-like capabilities and better AI safety measures..."
tags = extract_tags_from_text(test_title, test_content)
print(f"Found tags: {tags}")