## Task environment image for MLRC-Bench benchmarks.
## - Creates a task-specific conda env from a provided environment YAML
## - Runs prepare.py during image build (so data/artifacts land in the image)

FROM mambaorg/micromamba:jammy

SHELL ["/bin/bash", "-lc"]

ARG ENV_NAME=python

# Remap the default non-root user to match the host user's UID/GID so that bind-mounted
# credential files (e.g. ~/.kaggle/kaggle.json with mode 600) remain readable in-container.
# Defaults match the upstream image defaults (typically 1000:1000).
ARG HOST_UID=1000
ARG HOST_GID=1000

# Only required for the product-recommendation task.
ARG AICROWD_API_KEY
ENV AICROWD_API_KEY=${AICROWD_API_KEY}
ARG REQUIRE_AICROWD_API_KEY=0
RUN if [[ "${REQUIRE_AICROWD_API_KEY}" == "1" ]]; then \
    if [[ -z "${AICROWD_API_KEY}" ]]; then \
    echo "ERROR: AICROWD_API_KEY is not set." >&2; \
    exit 1; \
    fi; \
    echo "AICROWD_API_KEY is set"; \
    fi

# Optional Hugging Face auth token (some tasks may use it to pull gated/private artifacts).
ARG HF_AUTH_TOKEN
ENV HF_AUTH_TOKEN=${HF_AUTH_TOKEN}
ARG REQUIRE_HF_AUTH_TOKEN=0
RUN if [[ "${REQUIRE_HF_AUTH_TOKEN}" == "1" ]]; then \
    if [[ -z "${HF_AUTH_TOKEN}" ]]; then \
    echo "ERROR: HF_AUTH_TOKEN is not set." >&2; \
    exit 1; \
    fi; \
    echo "HF_AUTH_TOKEN is set."; \
    fi

USER root

# Keep the repo in a stable location; scripts tend to assume relative paths.
WORKDIR /workspace

# Align $MAMBA_USER uid/gid with the host user (when non-root) so bind mounts work
# cleanly without chmod/chown hacks.
RUN set -euo pipefail; \
    if [[ "${HOST_UID}" != "0" && "${HOST_GID}" != "0" ]]; then \
      if getent group "$MAMBA_USER" >/dev/null 2>&1; then \
        groupmod -o -g "${HOST_GID}" "$MAMBA_USER"; \
      else \
        groupadd -o -g "${HOST_GID}" "$MAMBA_USER"; \
      fi; \
      usermod -o -u "${HOST_UID}" -g "${HOST_GID}" "$MAMBA_USER"; \
    fi

# Ensure the runtime user can create new folders directly under /workspace (one prepare.py requires it).
RUN chown "$MAMBA_USER:$MAMBA_USER" /workspace

# System deps commonly needed by conda/pip builds and ML/CV packages
RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    ca-certificates \
    curl \
    wget \
    build-essential \
    ffmpeg \
    libglib2.0-0 \
    libgl1 \
    && rm -rf /var/lib/apt/lists/*

# Ensure expected credential directories exist for runtime mounts.
RUN mkdir -p /home/$MAMBA_USER/.kaggle && chown -R $MAMBA_USER:$MAMBA_USER /home/$MAMBA_USER/.kaggle

# Copy all the task folders into workspace
COPY --chown=$MAMBA_USER:$MAMBA_USER . /workspace


WORKDIR /workspace/scripts
USER $MAMBA_USER

# Create the task env from the provided YAML.
RUN micromamba env create -y -n "${ENV_NAME}" -f environment.yml && \
    micromamba clean -a -y

# Ensure the micromamba env's Python is the default when attaching/exec'ing into
# the container (activation done during a RUN layer does not persist at runtime).
ENV CONDA_DEFAULT_ENV="${ENV_NAME}"
ENV PATH="/opt/conda/envs/${ENV_NAME}/bin:${PATH}"


# Run prepare.py during build (staged under /workspace/prepare_src).
RUN if [[ -f "prepare.py" ]]; then \
    micromamba run -n "${ENV_NAME}" python -u prepare.py; \
    else \
    echo "No prepare.py found, skipping."; \
    fi

USER root
# Lock down scripts so $MAMBA_USER cannot read/execute them (root-only).
RUN chown -R root:root /workspace/scripts && chmod -R go-rwx /workspace/scripts
# Ensure $MAMBA_USER can read/execute MLAgentBench.
RUN chown -R $MAMBA_USER:$MAMBA_USER /workspace/env/MLAgentBench && chmod -R u=rX /workspace/env/MLAgentBench


# Make selected files/dirs read-only for $MAMBA_USER (by making them root-owned).
# The list is relative to /workspace/env (the task repo root inside the image).
RUN if [[ -f /workspace/scripts/read_only_files.txt ]]; then \
    set -euo pipefail; \
    shopt -s nullglob globstar; \
    while IFS= read -r rel || [[ -n "$rel" ]]; do \
    # trim whitespace, skip blank/comment lines
    rel="$(echo "$rel" | sed -e 's/#.*$//' -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"; \
    [[ -z "$rel" ]] && continue; \
    for target in /workspace/env/$rel; do \
    [[ -e "$target" ]] || continue; \
    chown -R root:root "$target"; \
    chmod -R u=rwX,go=rX "$target"; \
    done; \
    done < /workspace/scripts/read_only_files.txt; \
    else \
    echo "No read_only_files.txt found; skipping read-only lock-down."; \
    fi

WORKDIR /workspace/env

USER $MAMBA_USER
