# =============================================================================
# Dockerfile — LangChain RAG Starter
# Serves the FastAPI endpoint on port 8000.
# =============================================================================
#
# Build:
#   docker build -t rag-starter .
#
# Run (pass API keys via --env-file):
#   docker run -p 8000:8000 --env-file .env rag-starter
#
# Mount documents from the host:
#   docker run -p 8000:8000 --env-file .env \
#              -v $(pwd)/data:/app/data \
#              rag-starter
# =============================================================================

FROM python:3.11-slim AS base

# System dependencies needed by Chroma, pypdf, and sentence-transformers
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    git \
    curl \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# ---- Dependencies -----------------------------------------------------------
# Copy only the requirements first to leverage Docker layer cache.
COPY requirements.txt ./
RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

# ---- Application code -------------------------------------------------------
COPY config/          config/
COPY src/             src/
COPY api/             api/
COPY data/            data/
# (app/ is excluded — Streamlit not served from Docker by default)

# ---- HuggingFace cache directory -------------------------------------------
ENV HF_HOME=/app/.hf_cache
RUN mkdir -p /app/.hf_cache /app/.chroma_db

# ---- Non-root user ----------------------------------------------------------
RUN useradd -m -u 1001 raguser && chown -R raguser:raguser /app
USER raguser

# ---- Expose and launch ------------------------------------------------------
EXPOSE 8000

# Pre-download the default embeddings model during build so the container
# starts immediately without a network fetch.  Comment this out if you plan
# to use a different embeddings model or if image size is a concern.
RUN python -c "\
from sentence_transformers import SentenceTransformer; \
SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', cache_folder='/app/.hf_cache')"

CMD ["uvicorn", "api.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]
