#!/usr/bin/env bash
set -euo pipefail

SCRIPT_VERSION="run_oleg_univ3_usdc_weth_001_monthly_pipeline_2026_05_05_fallback_ohlcv"
echo "[script_version] $0 SCRIPT_VERSION=${SCRIPT_VERSION}"

# Ethereum Uniswap V3 USDC/WETH 0.01%.
# Chosen from GeckoTerminal priority discovery because token0 is USDC and token1
# is WETH, matching the current replay orientation assumptions.
POOL="${POOL:-0xe0554a476a092703abdb3ef35c80e0d76d32939f}"
POOL_NAME="${POOL_NAME:-mainnet_USDC_WETH_UNIV3_001}"
FEE_RATE="${FEE_RATE:-0.0001}"
GECKO_NETWORK="${GECKO_NETWORK:-eth}"
GECKO_TIMEFRAME="${GECKO_TIMEFRAME:-hour}"
GECKO_TOKEN="${GECKO_TOKEN:-quote}"

if [[ -z "${ETH_RPC_URL:-}" ]]; then
  if [[ -z "${METAMASK_API_KEY:-}" ]]; then
    echo "ERROR: set ETH_RPC_URL or METAMASK_API_KEY. The key is not printed by this script." >&2
    exit 1
  fi
  export ETH_RPC_URL="https://mainnet.infura.io/v3/${METAMASK_API_KEY}"
fi

MONTHS=(
  "2026-02-01T00:00:00Z 2026-03-01T00:00:00Z feb2026"
  "2026-03-01T00:00:00Z 2026-04-01T00:00:00Z mar2026"
  "2026-04-01T00:00:00Z 2026-05-01T00:00:00Z apr2026"
)

mkdir -p DEX_REPORTS/oleg_data_pipeline_logs

run_ohlcv_fallback() {
  local from_ts="$1"
  local to_ts="$2"
  local tag="$3"
  local ohlcv_csv="DEX_DATA/ohlcv_proxy/${POOL_NAME}_${tag}_${GECKO_TOKEN}_${GECKO_TIMEFRAME}.csv"
  local proxy_npz="DEX_DATA/fast_npz/${POOL_NAME}_${tag}_ohlcv_proxy.npz"

  echo "[fallback] event RPC failed or unavailable; fetching GeckoTerminal OHLCV proxy for ${tag}"
  echo "[fallback] WARNING: proxy NPZ is regime-screening only, not exact fee-share/capacity evidence"

  if [[ ! -s "${ohlcv_csv}" ]]; then
    python3 dex_platform/data_collectors/full_cycle_import/fetch_geckoterminal_pool_ohlcv_v1.py \
      --network "${GECKO_NETWORK}" \
      --pool "${POOL}" \
      --timeframe "${GECKO_TIMEFRAME}" \
      --aggregate "${GECKO_AGGREGATE:-1}" \
      --time-from "${from_ts}" \
      --time-to "${to_ts}" \
      --out-csv "${ohlcv_csv}" \
      --token "${GECKO_TOKEN}" \
      --sleep-s "${GECKO_SLEEP_S:-2.2}"
  else
    echo "[skip] OHLCV proxy csv already exists: ${ohlcv_csv}"
  fi

  if [[ ! -s "${proxy_npz}" ]]; then
    python3 dex_platform/data_collectors/full_cycle_import/build_ohlcv_proxy_npz_v1.py \
      --ohlcv-csv "${ohlcv_csv}" \
      --out-npz "${proxy_npz}" \
      --pool-name "${POOL_NAME}_${tag}_ohlcv_proxy" \
      --network "${GECKO_NETWORK}" \
      --pool "${POOL}" \
      --token0 USDC \
      --token1 WETH \
      --dec0 6 \
      --dec1 18 \
      --fee-rate "${FEE_RATE}" \
      --price-orientation token0_per_token1
  else
    echo "[skip] OHLCV proxy npz already exists: ${proxy_npz}"
  fi
}

for item in "${MONTHS[@]}"; do
  read -r FROM_TS TO_TS TAG <<< "${item}"
  OUT_DIR="DEX_DATA/uniswap_v3_mainnet/${POOL_NAME}_${TAG}"
  OUT_NPZ="DEX_DATA/fast_npz/${POOL_NAME}_${TAG}_fee_replay_v2.npz"

  echo "[month] ${TAG} ${FROM_TS} -> ${TO_TS}"

  FETCH_OK=1
  if [[ ! -s "${OUT_DIR}/events_all.csv" ]]; then
    if ! python3 dex_platform/data_collectors/full_cycle_import/fetch_cl_pool_events_evm_v4.py \
      --pool "${POOL}" \
      --rpc-env ETH_RPC_URL \
      --expected-chain-id 1 \
      --time-from "${FROM_TS}" \
      --time-to "${TO_TS}" \
      --out-dir "${OUT_DIR}" \
      --chunk-size "${CHUNK_SIZE:-5000}" \
      --min-chunk-size "${MIN_CHUNK_SIZE:-100}" \
      --sleep-s "${SLEEP_S:-0.05}" \
      --events "${EVENTS:-Swap}" \
      --limit-retries "${LIMIT_RETRIES:-1}" \
      --no-parquet; then
      FETCH_OK=0
      run_ohlcv_fallback "${FROM_TS}" "${TO_TS}" "${TAG}"
    fi
  else
    echo "[skip] events already exist: ${OUT_DIR}/events_all.csv"
  fi

  if [[ "${FETCH_OK}" -eq 1 && ! -s "${OUT_NPZ}" ]]; then
    python3 dex_platform/data_collectors/full_cycle_import/build_cl_fee_replay_npz_v2.py \
      --events "${OUT_DIR}/events_all.csv" \
      --out-npz "${OUT_NPZ}" \
      --pool-name "${POOL_NAME}_${TAG}" \
      --token0 USDC \
      --token1 WETH \
      --dec0 6 \
      --dec1 18 \
      --quote-token token0 \
      --fee-rate "${FEE_RATE}"
  elif [[ "${FETCH_OK}" -eq 0 ]]; then
    echo "[skip] exact NPZ not built for ${TAG}; using separate *_ohlcv_proxy.npz until event RPC succeeds"
  else
    echo "[skip] npz already exists: ${OUT_NPZ}"
  fi
done

echo "[done] ${SCRIPT_VERSION}"
