#!/usr/bin/env bash
set -euo pipefail

usage() {
    cat <<'USAGE'
Usage: daylily-stage-analysis-samples [options] <analysis_samples.tsv>

Options:
  --stage-target PATH     Remote staging directory (default: /fsx/staged_sample_data)
  --region REGION         AWS region containing the cluster (defaults to AWS_REGION/AWS_DEFAULT_REGION)
  --profile PROFILE       AWS CLI profile to use (defaults to AWS_PROFILE)
  --cluster NAME          Cluster name to target (prompted if omitted)
  --pem FILE              SSH PEM key for the cluster head node (prompted if omitted)
  --remote-repo PATH      Remote path to a daylily-ephemeral-cluster checkout (fallback if daylily-ec is not installed on the head node)
  --remote-tmp PATH       Remote directory for uploading the TSV (default: /tmp)
  --ref-bucket NAME       REQUIRED. S3 bucket name for references (replaces <REF-BUCKET-NAME> in the TSV before upload)
  --config-dir PATH       Local directory to place downloaded config files (default: <tsv_dir>)
  --output PATH           Alias for --config-dir (deprecated)
  --no-download           Skip downloading the config files back to the local machine
  --debug                 Enable debug mode (print command traces)
  -h, --help              Show this help message and exit
USAGE
}

require_cmd() {
    if ! command -v "$1" >/dev/null 2>&1; then
        echo "Error: required command '$1' not found in PATH" >&2
        exit 1
    fi
}

choose_from_list() {
    local prompt="$1"; shift
    local -a options=("$@")
    if [[ ${#options[@]} -eq 0 ]]; then
        echo "Error: no options available for selection (${prompt})" >&2
        exit 1
    fi
    if [[ ${#options[@]} -eq 1 ]]; then
        echo "${options[0]}"
        return 0
    fi
    PS3="$prompt "
    select opt in "${options[@]}"; do
        if [[ -n "$opt" ]]; then
            echo "$opt"
            return 0
        fi
        echo "Invalid selection, try again." >&2
    done
}

stage_target="/fsx/staged_sample_data"
region="${AWS_REGION:-${AWS_DEFAULT_REGION:-}}"
aws_profile="${AWS_PROFILE:-}"
cluster_name=""
pem_file=""
remote_repo="~/projects/daylily-ephemeral-cluster"
remote_tmp="/tmp"
download_config="true"
local_config_dir=""
ts_path=""
debug_mode=0
reference_bucket=""


while [[ $# -gt 0 ]]; do
    case "$1" in
        --stage-target)
            stage_target="$2"
            shift 2
            ;;
        --region)
            region="$2"
            shift 2
            ;;
        --ref-bucket)
            reference_bucket="$2"
            shift 2
            ;;
        --profile)
            aws_profile="$2"
            shift 2
            ;;
        --cluster)
            cluster_name="$2"
            shift 2
            ;;
        --pem)
            pem_file="$2"
            shift 2
            ;;
        --remote-repo)
            remote_repo="$2"
            shift 2
            ;;
        --remote-tmp)
            remote_tmp="$2"
            shift 2
            ;;
        --config-dir)
            local_config_dir="$2"
            shift 2
            ;;
        --output)
            local_config_dir="$2"
            shift 2
            ;;
        --no-download)
            download_config="false"
            shift
            ;;
        --debug)
            debug_mode=1
            shift
            ;;
        -h|--help)
            usage
            exit 0
            ;;
        --)
            shift
            break
            ;;
        -*)
            echo "Unknown option: $1" >&2
            usage
            exit 1
            ;;
        *)
            if [[ -z "$ts_path" ]]; then
                ts_path="$1"
                shift
            else
                echo "Unexpected argument: $1" >&2
                usage
                exit 1
            fi
            ;;
    esac
done

if [[ "$debug_mode" == "1" ]]; then
    echo "ℹ️ Debug mode enabled. Command traces will be printed."
    set -x
fi

require_cmd aws
require_cmd pcluster
require_cmd ssh
require_cmd scp
require_cmd python3


if [[ -z "$reference_bucket" ]]; then
    echo "Error: --ref-bucket is required." >&2
    usage
    exit 1
fi



if [[ -z "$ts_path" ]]; then
    echo "Error: analysis_samples TSV path is required" >&2
    usage
    exit 1
fi

if [[ ! -f "$ts_path" ]]; then
    echo "Error: TSV file '$ts_path' not found" >&2
    exit 1
fi

if [[ -z "$aws_profile" ]]; then
    echo "Error: AWS profile not specified. Set AWS_PROFILE or pass --profile." >&2
    exit 1
fi

if [[ -z "$region" ]]; then
    echo "Selecting AWS region..."
    mapfile -t region_options < <(aws ec2 describe-regions --region us-west-2 --profile "$aws_profile" --query 'Regions[].RegionName' --output text 2>/dev/null | tr '\t' '\n')
    region="$(choose_from_list "Select region:" "${region_options[@]}")"
fi

if [[ -z "$cluster_name" ]]; then
    echo "Locating clusters in region $region using profile $aws_profile..."
    mapfile -t cluster_options < <(AWS_PROFILE="$aws_profile" pcluster list-clusters --region "$region" | grep clusterName | awk '{print $2}' | cut -d '"' -f 2)
    if [[ ${#cluster_options[@]} -eq 0 ]]; then
        echo "Error: No clusters found in region $region." >&2
        exit 1
    fi
    cluster_name="$(choose_from_list "Select cluster:" "${cluster_options[@]}")"
fi

if [[ -z "$pem_file" ]]; then
    mapfile -t pem_candidates < <(ls -1 ~/.ssh/*.pem 2>/dev/null)
    if [[ ${#pem_candidates[@]} -eq 0 ]]; then
        echo "Error: No PEM files found in ~/.ssh. Provide one with --pem." >&2
        exit 1
    fi
    pem_file="$(choose_from_list "Select PEM file:" "${pem_candidates[@]}")"
fi

if [[ ! -f "$pem_file" ]]; then
    echo "Error: PEM file '$pem_file' not found" >&2
    exit 1
fi

echo "Using AWS profile: $aws_profile"
echo "Using region: $region"
echo "Target cluster: $cluster_name"
echo "SSH key: $pem_file"
echo "Stage target: $stage_target"
 
echo "Remote repo: $remote_repo"
cluster_ip=$(
  AWS_PROFILE="$aws_profile" pcluster describe-cluster \
    --region "$region" \
    -n "$cluster_name"| grep publicIpAddress | perl -pe 's/(.*)( \")(.*)(\"\,.*)/$3/g;')

echo "Resolved head node IP: $cluster_ip"

if [[ -z "$cluster_ip" ]]; then
    echo "Error: Failed to resolve head node IP for cluster $cluster_name" >&2
    exit 1
fi

echo "Head node IP: $cluster_ip"

# Replace <REF-BUCKET-NAME> in the provided TSV before upload (no-op if not present).
processed_tsv="$(mktemp)"
cp "$ts_path" "$processed_tsv"
echo "Injecting reference bucket into uploaded TSV (if placeholder present) ..."
tmpfile="$(mktemp)"
sed "s|<REF-BUCKET-NAME>|$reference_bucket|g" "$processed_tsv" > "$tmpfile" && mv "$tmpfile" "$processed_tsv"

# Timestamp once for all remote uploads
remote_timestamp="$(date +%s)"
remote_basename="$(basename "$ts_path")"
remote_tsv="$remote_tmp/${remote_basename%.tsv}_$remote_timestamp.tsv"

ssh_opts=(-i "$pem_file" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null)

echo "Preparing remote workspace ${remote_tmp}..."
ssh "${ssh_opts[@]}" ubuntu@"$cluster_ip" "mkdir -p '$remote_tmp'"

echo "Uploading TSV to head node..."
scp "${ssh_opts[@]}" "$processed_tsv" "ubuntu@${cluster_ip}:${remote_tsv}"
 
remote_repo_remote="${remote_repo/#\~//home/ubuntu}"

echo "Running remote staging script..."

ssh "${ssh_opts[@]}" ubuntu@"$cluster_ip" \
 "REMOTE_REPO='$remote_repo_remote' REMOTE_TSV='$remote_tsv' STAGE_TARGET='$stage_target' AWS_PROFILE='$aws_profile' bash -s" <<'REMOTE'
set -euo pipefail

resolve_remote_root() {
  if command -v daylily-ec >/dev/null 2>&1; then
    daylily-ec resources-dir
    return 0
  fi
  if command -v python3 >/dev/null 2>&1; then
    python3 - <<'PY' 2>/dev/null || return 1
from daylily_ec.resources import ensure_extracted
print(ensure_extracted())
PY
    return 0
  fi
  return 1
}

REMOTE_ROOT=""
if REMOTE_ROOT="$(resolve_remote_root)"; then
  :
elif [[ -n "${REMOTE_REPO:-}" && -d "${REMOTE_REPO}/bin" ]]; then
  REMOTE_ROOT="${REMOTE_REPO}"
else
  echo "Error: could not locate Daylily tools on the head node." >&2
  echo "Install daylily-ephemeral-cluster on the head node (pip) or provide --remote-repo pointing to a checkout." >&2
  exit 2
fi

if [[ ! -f "${REMOTE_ROOT}/bin/daylily-analysis-samples-to-manifest-new.py" ]]; then
  echo "Error: expected script not found: ${REMOTE_ROOT}/bin/daylily-analysis-samples-to-manifest-new.py" >&2
  exit 3
fi

AWS_PROFILE="$AWS_PROFILE" python3 "${REMOTE_ROOT}/bin/daylily-analysis-samples-to-manifest-new.py" "$REMOTE_TSV" "$STAGE_TARGET"
REMOTE

rm -f "$processed_tsv" >/dev/null 2>&1 || true

echo "Remote staging complete. Config files located at $stage_target/{samples,units}.tsv"

if [[ "$download_config" == "true" ]]; then
    if [[ -z "$local_config_dir" ]]; then
        local_config_dir=$(cd "$(dirname "$ts_path")" && pwd)
    else
        if [[ "$local_config_dir" == *.csv ]]; then
            echo "Warning: --output now expects a directory; using parent directory of $local_config_dir" >&2
            local_config_dir=$(python3 -c 'import os,sys; print(os.path.abspath(os.path.dirname(sys.argv[1])))' "$local_config_dir")
        else
            local_config_dir=$(python3 -c 'import os,sys; print(os.path.abspath(sys.argv[1]))' "$local_config_dir")
        fi
    fi

    mkdir -p "$local_config_dir"
    local_samples="$local_config_dir/samples.tsv"
    local_units="$local_config_dir/units.tsv"

    echo "Downloading config files to $local_config_dir..."
    if ssh "${ssh_opts[@]}" ubuntu@"$cluster_ip" "test -f '$stage_target/samples.tsv' && test -f '$stage_target/units.tsv'"; then
        scp "${ssh_opts[@]}" "ubuntu@${cluster_ip}:${stage_target}/samples.tsv" "$local_samples"
        scp "${ssh_opts[@]}" "ubuntu@${cluster_ip}:${stage_target}/units.tsv" "$local_units"
        echo "Config files downloaded:"
        echo "  samples -> $local_samples"
        echo "  units   -> $local_units"
        echo "Copy them into your workflow checkout (e.g. cp $local_samples config/samples.tsv)."
    else
        echo "Warning: Config files not found at $stage_target/{{samples,units}.tsv} on head node; skipping download." >&2
    fi
fi

echo "Cleaning up remote TSV..."
ssh "${ssh_opts[@]}" ubuntu@"$cluster_ip" "rm -f '$remote_tsv'"

echo "Done."
