Add backend pipeline Dockerfile with lightweight deps
This commit is contained in:
25
backend/Dockerfile
Normal file
25
backend/Dockerfile
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies for newspaper3k and psycopg2
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
gcc \
|
||||||
|
libxml2-dev \
|
||||||
|
libxslt1-dev \
|
||||||
|
libjpeg-dev \
|
||||||
|
zlib1g-dev \
|
||||||
|
libpq-dev \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements-pipeline.txt ./requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Download NLTK data needed by newspaper3k
|
||||||
|
RUN python -c "import nltk; nltk.download('punkt_tab', quiet=True)"
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
CMD ["python", "run_pipeline.py"]
|
||||||
28
backend/requirements-pipeline.txt
Normal file
28
backend/requirements-pipeline.txt
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# Burmddit Pipeline - Lightweight requirements (no PyTorch/Scrapy)
|
||||||
|
|
||||||
|
# Web scraping
|
||||||
|
beautifulsoup4==4.12.3
|
||||||
|
requests==2.31.0
|
||||||
|
feedparser==6.0.11
|
||||||
|
newspaper3k==0.2.8
|
||||||
|
|
||||||
|
# Database
|
||||||
|
psycopg2-binary==2.9.9
|
||||||
|
|
||||||
|
# AI (Claude for translation/compilation)
|
||||||
|
anthropic==0.18.1
|
||||||
|
|
||||||
|
# Text processing
|
||||||
|
scikit-learn==1.4.0
|
||||||
|
python-slugify==8.0.2
|
||||||
|
markdown==3.5.2
|
||||||
|
bleach==6.1.0
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
python-dotenv==1.0.1
|
||||||
|
python-dateutil==2.8.2
|
||||||
|
pytz==2024.1
|
||||||
|
pyyaml==6.0.1
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
loguru==0.7.2
|
||||||
Reference in New Issue
Block a user