#!/usr/bin/env bash
# watch_bio_usdc_fresh_update_incremental_v1.sh
PYTHON3=/var/www/vps2.happyuser.info/top/backtest_SK/.venv38/bin/python3
# Incremental BIO/USDC event update ? persistent dir, no disk bloat.
# Replaces watch_bio_usdc_fresh_update_real_v2.sh which recreated full window each run.

set -euo pipefail

PROJECT_ROOT="${PROJECT_ROOT:-$(pwd)}"
cd "$PROJECT_ROOT"

LOOP_SLEEP_S="${LOOP_SLEEP_S:-900}"
POOL="${POOL:-0xd40bffa9c9e35493b88a2b6744c49d8716b00898}"
POOL_NAME="${POOL_NAME:-bio_usdc_fresh_paper_live}"
FEE_RATE="${FEE_RATE:-0.003}"
INITIAL_TIME_FROM="${INITIAL_TIME_FROM:-2026-05-03T22:11:41Z}"
TARGET_NPZ="${TARGET_NPZ:-DEX_DATA/fast_npz/bio_usdc_fresh_paper_live_fee_replay_v2.npz}"
PERSIST_DIR="${PERSIST_DIR:-DEX_DATA/aerodrome_slipstream/bio_usdc_fresh_paper_live_persistent}"

COLLECTOR="dex_platform/data_collectors/full_cycle_import/fetch_aerodrome_slipstream_events_v2.py"
BUILDER="dex_platform/data_collectors/full_cycle_import/build_cl_fee_replay_npz_v2.py"
RPC_URL="${BASE_RPC_URL:-https://mainnet.base.org}"

mkdir -p "$PERSIST_DIR" DEX_DATA/fast_npz/_tmp DEX_REPORTS/paper_live_bio_macro_router_v1

# --- Seed from best existing raw folder if persistent CSV missing ---
if [[ ! -s "$PERSIST_DIR/events_all.csv" ]]; then
  BEST=$(ls -t DEX_DATA/aerodrome_slipstream/bio_usdc_fresh_paper_live_2026-05-03* 2>/dev/null | head -1 || true)
  if [[ -n "$BEST" && -s "$BEST/events_all.csv" ]]; then
    echo "[init] seeding persistent dir from $BEST"
    cp "$BEST/events_all.csv" "$PERSIST_DIR/events_all.csv"
  fi
fi

while true; do
  TS="$(date -u +%Y%m%dT%H%M%SZ)"
  TEMP_DIR="DEX_DATA/aerodrome_slipstream/_tmp_inc_${TS}"
  LOG="DEX_REPORTS/paper_live_bio_macro_router_v1/update_real_${TS}.log"

  # --- Determine TIME_FROM: last event in persistent CSV ---
  if [[ -s "$PERSIST_DIR/events_all.csv" ]]; then
    TIME_FROM=$(/var/www/vps2.happyuser.info/top/backtest_SK/.venv38/bin/python3 -c "
import csv, datetime, sys
try:
    with open('$PERSIST_DIR/events_all.csv') as f:
        rows = [r for r in csv.DictReader(f) if r.get('timestamp','').strip().lstrip('-').isdigit()]
    if rows:
        max_ts = max(int(r['timestamp']) for r in rows)
        dt = datetime.datetime.fromtimestamp(max_ts, tz=datetime.timezone.utc)
        print(dt.strftime('%Y-%m-%dT%H:%M:%SZ'))
    else:
        print('$INITIAL_TIME_FROM')
except Exception as e:
    print('$INITIAL_TIME_FROM', file=sys.stderr)
    print('$INITIAL_TIME_FROM')
" 2>>"$LOG")
  else
    TIME_FROM="$INITIAL_TIME_FROM"
  fi

  TIME_TO="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
  echo "[update] ${TS} from=${TIME_FROM} to=${TIME_TO}" | tee "$LOG"

  mkdir -p "$TEMP_DIR"

  # --- Collect only new events ---
  COLLECT_OK=0
  $PYTHON3 "$COLLECTOR" \
    --pool "$POOL" \
    --time-from "$TIME_FROM" \
    --time-to "$TIME_TO" \
    --out-dir "$TEMP_DIR" \
    --rpc-url "$RPC_URL" \
    --chunk-size 1000 \
    --min-chunk-size 100 \
    --sleep-s 0.5 \
    --attempts 8 \
    --events Swap,Mint,Burn,Collect \
    --no-parquet 2>>"$LOG" && COLLECT_OK=1 || true

  if [[ "$COLLECT_OK" == "1" && -s "$TEMP_DIR/events_all.csv" ]]; then
    NEW_ROWS=$(tail -n +2 "$TEMP_DIR/events_all.csv" | wc -l)
    echo "[new_events] ${NEW_ROWS} rows from ${TIME_FROM}" | tee -a "$LOG"

    if [[ "$NEW_ROWS" -gt 0 ]]; then
      # Append (skip header) to persistent CSV
      if [[ -s "$PERSIST_DIR/events_all.csv" ]]; then
        tail -n +2 "$TEMP_DIR/events_all.csv" >> "$PERSIST_DIR/events_all.csv"
      else
        cp "$TEMP_DIR/events_all.csv" "$PERSIST_DIR/events_all.csv"
      fi

      TOTAL=$(wc -l < "$PERSIST_DIR/events_all.csv")
      echo "[persistent] total lines: ${TOTAL}" | tee -a "$LOG"

      # Rebuild NPZ from full persistent CSV
      TMP_NPZ="DEX_DATA/fast_npz/_tmp/bio_usdc_fresh_paper_live_fee_replay_v2_${TS}.npz"
      $PYTHON3 "$BUILDER" \
        --events "$PERSIST_DIR/events_all.csv" \
        --out-npz "$TMP_NPZ" \
        --pool-name "$POOL_NAME" \
        --token0 BIO --token1 USDC \
        --dec0 18 --dec1 6 \
        --quote-token token1 \
        --fee-rate "$FEE_RATE" 2>>"$LOG" && \
      if [[ -s "$TMP_NPZ" ]]; then
        mv "$TMP_NPZ" "$TARGET_NPZ"
        stat -c "[updated] %y %s %n" "$TARGET_NPZ" | tee -a "$LOG"
      fi
    else
      echo "[idle] no new events since $TIME_FROM" | tee -a "$LOG"
    fi
  else
    echo "[error] collector failed or produced no output" | tee -a "$LOG"
  fi

  # Cleanup temp dir
  rm -rf "$TEMP_DIR"

  sleep "$LOOP_SLEEP_S"
done
