From ba2c7955f42fa86a366596a7e7019602de5d1e95 Mon Sep 17 00:00:00 2001 From: Min Zeya Phyo Date: Thu, 19 Feb 2026 19:18:35 +0800 Subject: [PATCH] Add backend pipeline Dockerfile with lightweight deps --- backend/Dockerfile | 25 +++++++++++++++++++++++++ backend/requirements-pipeline.txt | 28 ++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 backend/Dockerfile create mode 100644 backend/requirements-pipeline.txt diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..16fda15 --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,25 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies for newspaper3k and psycopg2 +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libxml2-dev \ + libxslt1-dev \ + libjpeg-dev \ + zlib1g-dev \ + libpq-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install Python dependencies +COPY requirements-pipeline.txt ./requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# Download NLTK data needed by newspaper3k +RUN python -c "import nltk; nltk.download('punkt_tab', quiet=True)" + +# Copy application code +COPY . . + +CMD ["python", "run_pipeline.py"] diff --git a/backend/requirements-pipeline.txt b/backend/requirements-pipeline.txt new file mode 100644 index 0000000..fffc9d4 --- /dev/null +++ b/backend/requirements-pipeline.txt @@ -0,0 +1,28 @@ +# Burmddit Pipeline - Lightweight requirements (no PyTorch/Scrapy) + +# Web scraping +beautifulsoup4==4.12.3 +requests==2.31.0 +feedparser==6.0.11 +newspaper3k==0.2.8 + +# Database +psycopg2-binary==2.9.9 + +# AI (Claude for translation/compilation) +anthropic==0.18.1 + +# Text processing +scikit-learn==1.4.0 +python-slugify==8.0.2 +markdown==3.5.2 +bleach==6.1.0 + +# Utilities +python-dotenv==1.0.1 +python-dateutil==2.8.2 +pytz==2024.1 +pyyaml==6.0.1 + +# Logging +loguru==0.7.2