# =============================================================
#  DemIA HF Text Classification Starter — Dockerfile
#
#  Builds a container that:
#    1. Installs all Python dependencies (CPU PyTorch).
#    2. Copies the project source.
#    3. Starts the FastAPI inference server on port 8000.
#
#  Build:
#    docker build -t demia-hf-clf .
#
#  Run API server:
#    docker run -p 8000:8000 \
#      -e HF_MODEL_ID=distilbert-base-uncased \
#      -v $(pwd)/outputs:/app/outputs \
#      demia-hf-clf
#
#  Run training:
#    docker run --rm \
#      -e HF_MODEL_ID=distilbert-base-uncased \
#      -v $(pwd)/outputs:/app/outputs \
#      demia-hf-clf \
#      python scripts/run.py train
#
#  GPU variant: replace the base image with
#    pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime
#  and add --gpus all to the docker run command.
# =============================================================

FROM python:3.11-slim

# System dependencies needed by some HF tokeniser backends.
RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    curl \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# ── Install CPU-only PyTorch first (smaller image) ────────────
RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir \
      torch==2.3.0+cpu \
      torchvision==0.18.0+cpu \
      --index-url https://download.pytorch.org/whl/cpu

# ── Install project dependencies ──────────────────────────────
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# ── Copy source ───────────────────────────────────────────────
COPY . .

# ── Create output directory ───────────────────────────────────
RUN mkdir -p /app/outputs

# ── Environment defaults ──────────────────────────────────────
ENV HF_MODEL_ID=distilbert-base-uncased \
    OUTPUT_DIR=/app/outputs \
    API_HOST=0.0.0.0 \
    API_PORT=8000 \
    LOG_LEVEL=INFO \
    # Avoid HF Hub progress bars in non-interactive mode.
    HF_HUB_DISABLE_PROGRESS_BARS=1 \
    # Use a fixed cache directory inside the container.
    HF_HOME=/app/.cache/huggingface

EXPOSE 8000

# Default command: start the API server.
# Override with 'docker run ... python scripts/run.py train' for training.
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"]
