Add backend pipeline Dockerfile with lightweight deps
This commit is contained in:
25
backend/Dockerfile
Normal file
25
backend/Dockerfile
Normal file
@@ -0,0 +1,25 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies for newspaper3k and psycopg2
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libxml2-dev \
|
||||
libxslt1-dev \
|
||||
libjpeg-dev \
|
||||
zlib1g-dev \
|
||||
libpq-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies
|
||||
COPY requirements-pipeline.txt ./requirements.txt
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Download NLTK data needed by newspaper3k
|
||||
RUN python -c "import nltk; nltk.download('punkt_tab', quiet=True)"
|
||||
|
||||
# Copy application code
|
||||
COPY . .
|
||||
|
||||
CMD ["python", "run_pipeline.py"]
|
||||
28
backend/requirements-pipeline.txt
Normal file
28
backend/requirements-pipeline.txt
Normal file
@@ -0,0 +1,28 @@
|
||||
# Burmddit Pipeline - Lightweight requirements (no PyTorch/Scrapy)
|
||||
|
||||
# Web scraping
|
||||
beautifulsoup4==4.12.3
|
||||
requests==2.31.0
|
||||
feedparser==6.0.11
|
||||
newspaper3k==0.2.8
|
||||
|
||||
# Database
|
||||
psycopg2-binary==2.9.9
|
||||
|
||||
# AI (Claude for translation/compilation)
|
||||
anthropic==0.18.1
|
||||
|
||||
# Text processing
|
||||
scikit-learn==1.4.0
|
||||
python-slugify==8.0.2
|
||||
markdown==3.5.2
|
||||
bleach==6.1.0
|
||||
|
||||
# Utilities
|
||||
python-dotenv==1.0.1
|
||||
python-dateutil==2.8.2
|
||||
pytz==2024.1
|
||||
pyyaml==6.0.1
|
||||
|
||||
# Logging
|
||||
loguru==0.7.2
|
||||
Reference in New Issue
Block a user