# =============================================================================
# Dockerfile — Hugging Face Text Generation Starter
# =============================================================================
# Builds a container that serves the FastAPI inference API.
#
# Build:
#   docker build -t hf-text-gen .
#
# Run (CPU):
#   docker run -p 8000:8000 -e HF_MODEL_ID=gpt2 hf-text-gen
#
# Run (GPU):
#   docker run --gpus all -p 8000:8000 -e HF_MODEL_ID=gpt2 hf-text-gen
#
# Run the Gradio UI instead:
#   docker run -p 7860:7860 -e HF_MODEL_ID=gpt2 hf-text-gen \
#       python scripts/run.py --mode app
# =============================================================================

FROM python:3.11-slim AS base

# Install system dependencies for PyTorch / tokenizers / sentencepiece.
RUN apt-get update && apt-get install -y --no-install-recommends \
        build-essential \
        git \
        curl \
        && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# ---------------------------------------------------------------------------
# Dependency layer (cached unless requirements.txt changes)
# ---------------------------------------------------------------------------
FROM base AS deps

COPY requirements.txt .

# Install CPU-only PyTorch first (smaller image; swap for CUDA build if needed):
#   pip install torch --index-url https://download.pytorch.org/whl/cu121
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \
    pip install --no-cache-dir -r requirements.txt

# ---------------------------------------------------------------------------
# Application layer
# ---------------------------------------------------------------------------
FROM deps AS app

# Copy source code.
COPY . .

# Pre-download the default model into the image cache (optional).
# Uncomment to bake weights into the image (increases image size significantly):
# RUN python -c "from src.model import GenerativeModel; from src.utils import load_config; \
#     cfg=load_config(); gm=GenerativeModel.from_config(cfg); gm.load()"

# ---------------------------------------------------------------------------
# Runtime configuration
# ---------------------------------------------------------------------------

# Model selection — override at runtime with -e HF_MODEL_ID=<model>
ENV HF_MODEL_ID=gpt2
ENV HF_CACHE_DIR=/app/.cache/huggingface
ENV LOG_LEVEL=INFO
ENV API_HOST=0.0.0.0
ENV API_PORT=8000
ENV APP_PORT=7860

# Model cache volume (mount externally to persist between runs).
VOLUME ["/app/.cache/huggingface"]

EXPOSE 8000 7860

# Default command: start the FastAPI API.
CMD ["python", "scripts/run.py", "--mode", "api"]
