# =============================================================================
# Dockerfile – Hugging Face Embeddings Starter
# =============================================================================
# Multi-stage build: "builder" installs deps; "runtime" is the lean image.
#
# Build:
#   docker build -t hf-embeddings-starter .
#
# Run FastAPI service:
#   docker run -p 8000:8000 hf-embeddings-starter
#
# Override the model at runtime:
#   docker run -e HF_MODEL_ID=BAAI/bge-large-en-v1.5 -p 8000:8000 hf-embeddings-starter
#
# Pre-bake weights into the image (avoids download at startup):
#   docker build --build-arg MODEL_ID=sentence-transformers/all-MiniLM-L6-v2 \
#          -t hf-embeddings-starter .
# =============================================================================

ARG PYTHON_VERSION=3.11
ARG MODEL_ID=sentence-transformers/all-MiniLM-L6-v2

# ---------------------------------------------------------------------------
# Stage 1 – builder: install Python deps into /install
# ---------------------------------------------------------------------------
FROM python:${PYTHON_VERSION}-slim AS builder

WORKDIR /install

# Install build tools
RUN apt-get update && apt-get install -y --no-install-recommends \
        gcc \
        g++ \
        && rm -rf /var/lib/apt/lists/*

COPY requirements.txt .
RUN pip install --upgrade pip && \
    pip install --prefix=/install --no-cache-dir -r requirements.txt

# ---------------------------------------------------------------------------
# Stage 2 – runtime image
# ---------------------------------------------------------------------------
FROM python:${PYTHON_VERSION}-slim AS runtime

ARG MODEL_ID
ENV HF_MODEL_ID=${MODEL_ID} \
    HF_HOME=/app/model_cache \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1

WORKDIR /app

# Copy installed packages from builder
COPY --from=builder /install /usr/local

# Copy application source
COPY . .

# Pre-download the model weights so the container starts instantly
# (Remove this RUN layer if you prefer to download at first request.)
RUN python - <<'EOF'
import os
from sentence_transformers import SentenceTransformer
model_id = os.environ.get("HF_MODEL_ID", "sentence-transformers/all-MiniLM-L6-v2")
print(f"Pre-downloading model: {model_id}")
SentenceTransformer(model_id, cache_folder=os.environ["HF_HOME"])
print("Done.")
EOF

EXPOSE 8000

# Default command: serve the FastAPI application
CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000"]
