# =============================================================================
# Dockerfile – Zero-Shot Classification service
#
# Build:
#   docker build -t zsc-starter .
#
# Run the API:
#   docker run -p 8000:8000 zsc-starter
#
# Run the Streamlit UI:
#   docker run -p 8501:8501 zsc-starter streamlit run app/app.py
#
# Override the model at runtime:
#   docker run -e HF_MODEL_ID=typeform/distilbart-mnli-12-3 -p 8000:8000 zsc-starter
#
# Mount a local model cache to avoid re-downloading weights:
#   docker run -v ~/.cache/huggingface:/root/.cache/huggingface -p 8000:8000 zsc-starter
# =============================================================================

# ── Base image ────────────────────────────────────────────────────────────────
# Use the official PyTorch CPU image as the base so that torch installs
# correctly without a CUDA build step.
# Switch to pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime for GPU support.
FROM python:3.11-slim AS base

LABEL maintainer="DemIA Living Lab – Universidad de Salamanca (USAL) / BISITE <bisite@usal.es>"
LABEL description="Zero-shot text classification with Hugging Face Transformers"
LABEL version="1.0.0"

# ── System dependencies ───────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
        build-essential \
        curl \
        git \
    && rm -rf /var/lib/apt/lists/*

# ── Python environment ────────────────────────────────────────────────────────
ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    PYTHONIOENCODING=utf-8 \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1

# ── Working directory ─────────────────────────────────────────────────────────
WORKDIR /app

# ── Dependencies (cached layer) ───────────────────────────────────────────────
COPY requirements.txt .

# Install CPU-only torch to keep the image size manageable.
# For GPU, remove the --index-url line and use the CUDA-enabled base image.
RUN pip install --upgrade pip && \
    pip install torch --index-url https://download.pytorch.org/whl/cpu && \
    pip install -r requirements.txt

# ── Application code ──────────────────────────────────────────────────────────
COPY . .

# ── Hugging Face cache directory ──────────────────────────────────────────────
# Set HF_HOME so that model weights are stored in a predictable location
# that can be bind-mounted from the host.
ENV HF_HOME=/root/.cache/huggingface

# ── Pre-download the default model (optional build-time step) ─────────────────
# Uncomment the following RUN block to bake the model weights into the image.
# This increases image size (~1.6 GB for bart-large-mnli) but eliminates the
# first-run download delay.
#
# ARG MODEL_ID=facebook/bart-large-mnli
# RUN python - <<'EOF'
# from transformers import pipeline
# pipeline("zero-shot-classification", model="${MODEL_ID}")
# EOF

# ── Expose ports ──────────────────────────────────────────────────────────────
EXPOSE 8000   # FastAPI
EXPOSE 8501   # Streamlit

# ── Health check ──────────────────────────────────────────────────────────────
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:8000/healthz || exit 1

# ── Default command ───────────────────────────────────────────────────────────
# Start the FastAPI server.  Override with `docker run ... streamlit run app/app.py`
# to launch the UI instead.
CMD ["python", "scripts/run.py", "serve", "--host", "0.0.0.0", "--port", "8000"]
