# ---- Build stage ----
FROM python:3.12-slim AS builder

LABEL maintainer="Theodolite <support@theodolite.io>"
LABEL description="Theodolite data discovery scanner for cloud storage"

RUN apt-get update && apt-get install -y --no-install-recommends gcc g++ && rm -rf /var/lib/apt/lists/*

WORKDIR /build
COPY pyproject.toml README.md ./
COPY theodolite_scanner/ theodolite_scanner/

# CPU-only PyTorch (~800MB vs 2.5GB with CUDA)
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir ".[all,gliner]" spacy

# Pre-download models so they're baked into the image
ENV HF_HOME=/build/.cache/huggingface
RUN python -m spacy download en_core_web_sm
RUN python -c "from gliner import GLiNER; GLiNER.from_pretrained('gliner-community/gliner_large-v2.5')"

# ---- Runtime stage ----
FROM python:3.12-slim

ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    HF_HOME=/app/.cache/huggingface

WORKDIR /app

# Create non-root user before COPY --chown
RUN groupadd --gid 1001 scanner && useradd --uid 1001 --gid scanner --no-create-home scanner

# Copy installed packages, entrypoint, and model cache
COPY --from=builder /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
COPY --from=builder /usr/local/bin/theodolite-scan /usr/local/bin/theodolite-scan
COPY --chown=scanner:scanner --from=builder /build/.cache/huggingface /app/.cache/huggingface

USER scanner

ENTRYPOINT ["theodolite-scan"]
