Add backend pipeline Dockerfile with lightweight deps

This commit is contained in:
Min Zeya Phyo
2026-02-19 19:18:35 +08:00
parent 98af1c7cec
commit ba2c7955f4
2 changed files with 53 additions and 0 deletions

25
backend/Dockerfile Normal file
View File

@@ -0,0 +1,25 @@
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies for newspaper3k and psycopg2
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libxml2-dev \
libxslt1-dev \
libjpeg-dev \
zlib1g-dev \
libpq-dev \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies
COPY requirements-pipeline.txt ./requirements.txt
RUN pip install --no-cache-dir -r requirements.txt
# Download NLTK data needed by newspaper3k
RUN python -c "import nltk; nltk.download('punkt_tab', quiet=True)"
# Copy application code
COPY . .
CMD ["python", "run_pipeline.py"]

View File

@@ -0,0 +1,28 @@
# Burmddit Pipeline - Lightweight requirements (no PyTorch/Scrapy)
# Web scraping
beautifulsoup4==4.12.3
requests==2.31.0
feedparser==6.0.11
newspaper3k==0.2.8
# Database
psycopg2-binary==2.9.9
# AI (Claude for translation/compilation)
anthropic==0.18.1
# Text processing
scikit-learn==1.4.0
python-slugify==8.0.2
markdown==3.5.2
bleach==6.1.0
# Utilities
python-dotenv==1.0.1
python-dateutil==2.8.2
pytz==2024.1
pyyaml==6.0.1
# Logging
loguru==0.7.2