# ── Base image ────────────────────────────────────────────────────────────────
# Use a slim Python base; install PyTorch CPU-only to keep the image manageable.
# For GPU support swap for pytorch/pytorch:2.3.0-cuda12.1-cudnn8-runtime.
FROM python:3.11-slim

LABEL maintainer="DemIA Living Lab - Universidad de Salamanca (USAL) / BISITE"
LABEL description="Hugging Face Extractive QA Starter — FastAPI service"

# ── System dependencies ───────────────────────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
        build-essential \
        curl \
    && rm -rf /var/lib/apt/lists/*

# ── Working directory ─────────────────────────────────────────────────────────
WORKDIR /app

# ── Python dependencies ───────────────────────────────────────────────────────
# Copy requirements first to exploit Docker layer caching.
COPY requirements.txt .

# Install PyTorch CPU wheel, then the rest of the requirements.
RUN pip install --no-cache-dir \
        torch --index-url https://download.pytorch.org/whl/cpu \
    && pip install --no-cache-dir -r requirements.txt

# ── Application source ────────────────────────────────────────────────────────
COPY . .

# ── Hugging Face cache inside the container ───────────────────────────────────
# Mount a volume here to persist model downloads across container restarts:
#   docker run -v $(pwd)/.hf_cache:/app/hf_cache ...
ENV HF_CACHE_DIR=/app/hf_cache
RUN mkdir -p /app/hf_cache

# ── Default model (override with -e HF_MODEL_ID=...) ─────────────────────────
ENV HF_MODEL_ID=deepset/roberta-base-squad2
ENV LOG_LEVEL=INFO
ENV API_HOST=0.0.0.0
ENV API_PORT=8000

# ── Expose API port ───────────────────────────────────────────────────────────
EXPOSE 8000

# ── Health check ──────────────────────────────────────────────────────────────
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:8000/health || exit 1

# ── Entrypoint ────────────────────────────────────────────────────────────────
# The model is downloaded on first request (lazy loading).
# To pre-download during build, uncomment the RUN line below and set HF_MODEL_ID as a build-arg:
# ARG HF_MODEL_ID=deepset/roberta-base-squad2
# RUN python -c "from transformers import pipeline; pipeline('question-answering', model='${HF_MODEL_ID}')"

CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"]
