#!/usr/bin/env bash
set -Eeuo pipefail

# Cron-safe Codex supervisor for DEX project.
# Default project root comes from current handoff; override via PROJECT_ROOT=/path/to/repo.
PROJECT_ROOT="${PROJECT_ROOT:-/home/happyuser/projects/DEX}"
AGENT_DIR="$PROJECT_ROOT/.agent"
LOG_DIR="$AGENT_DIR/logs"
SNAPSHOT_DIR="$AGENT_DIR/snapshots"
PROMPT_FILE="$AGENT_DIR/prompts/cron_session_prompt.md"
LOCK_FILE="$AGENT_DIR/codex_autorun.lock"
SESSION_ID_FILE="$AGENT_DIR/codex_session_id"
STATE_FILE="$PROJECT_ROOT/docs/AGENT_STATE.md"
RUN_ID="$(date -u +%Y%m%dT%H%M%SZ)"
RUN_LOG="$LOG_DIR/codex_autorun_${RUN_ID}.log"
TIMEBOX_SECONDS="${TIMEBOX_SECONDS:-1650}"   # 27.5 min: leaves buffer before 30 min cron interval.
CODEX_BIN="${CODEX_BIN:-codex}"
SANDBOX_MODE="${SANDBOX_MODE:-workspace-write}"
APPROVAL_POLICY="${APPROVAL_POLICY:-never}"

mkdir -p "$AGENT_DIR" "$LOG_DIR" "$SNAPSHOT_DIR" "$AGENT_DIR/prompts"

# Prevent overlapping sessions. Non-overlap is mandatory; otherwise two agents will fight over git/tmux/docs.
exec 9>"$LOCK_FILE"
if ! flock -n 9; then
  echo "[$(date -u +%FT%TZ)] another codex_autorun is active; exiting" >> "$RUN_LOG"
  exit 0
fi

cd "$PROJECT_ROOT"

HAS_GIT=0
if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
  HAS_GIT=1
fi

# Ensure a minimal persistent state exists. Codex must not rely only on chat/session memory.
if [[ ! -f "$STATE_FILE" ]]; then
  cat > "$STATE_FILE" <<'STATE'
# AGENT_STATE

## Current Objective
Keep DEX LP research and paper-live validation moving safely. No signed transactions from automation.

## Active tmux Sessions
Unknown until next supervisor run.

## Last Run Summary
None yet.

## Next Safe Actions
- Inspect tmux sessions and logs.
- Update this file with concrete status.
- Fix only clear low-risk issues.
- Do not touch private keys, .env, or signed transaction code paths.
STATE
fi

# Copy prompt template into project-local agent dir if missing.
if [[ ! -f "$PROMPT_FILE" ]]; then
  cat > "$PROMPT_FILE" <<'PROMPT'
You are continuing autonomous maintenance of this DEX LP research project.

Hard boundaries:
- Do not initiate, sign, broadcast, or prepare signed live transactions.
- Do not read, print, edit, copy, or exfiltrate private keys, .env files, wallet secrets, RPC secrets, API keys, or seed phrases.
- If git is available, finish each session by committing the version you produced before you exit.
- If you need to undo a prior version, use `git revert` or a new reversing commit; do not rewrite history.
- Do not push to remote unless the current instruction explicitly allows it.
- Do not delete DEX_DATA or DEX_REPORTS. Read them when needed; write only small summaries/reports.
- Do not restart RPC collectors aggressively after 429/rate-limit failures. Prefer slower chunking, sleep, checkpoint/resume, or documentation of the blocker.
- Do not edit unrelated files. Keep diffs minimal.

Session goal:
1. Read docs/HANDOFF_TO_LIVE_PLAN_2026_05_05.md, docs/LIVE_PREP_STATUS_2026_05_05.md, and docs/AGENT_STATE.md.
2. Inspect the fresh machine snapshot in .agent/snapshots/latest.txt.
3. Check active tmux sessions and recent logs/reports.
4. If a tmux job finished, summarize result and decide the next safe step.
5. If a tmux job crashed and the cause is clear, make a minimal fix and restart only the same safe paper/research job.
6. If new data appeared, update status, metrics, and next actions.
7. End by updating docs/AGENT_STATE.md with:
   - UTC run time
   - active tmux sessions
   - observations
   - files changed
   - decisions made
   - next safe action
   - blockers
8. Also update docs/HANDOFF_TO_LIVE_PLAN_2026_05_05.md only when the live plan materially changes.

Use this evidence standard:
- OHLCV proxy is only screening evidence.
- TheGraph events are better but not final capacity proof.
- Raw RPC replay is the best available backtest truth.
- No live promotion from Level 1 evidence alone.

At the end, print a concise final summary with changed files and next action.
PROMPT
fi

# Produce fresh snapshot before Codex starts. Deterministic shell is better than making Codex discover everything manually.
SNAPSHOT_FILE="$SNAPSHOT_DIR/snapshot_${RUN_ID}.txt"
{
  echo "# Snapshot $RUN_ID"
  echo
  echo "## pwd"; pwd
  echo
  echo "## git status"
  if [[ "$HAS_GIT" == "1" ]]; then
    git status --short --branch || true
  else
    echo "not a git repository"
  fi
  echo
  echo "## git diff stat"
  if [[ "$HAS_GIT" == "1" ]]; then
    git diff --stat || true
  else
    echo "not a git repository"
  fi
  echo
  echo "## tmux sessions"; tmux list-sessions 2>/dev/null || echo "no tmux sessions"
  echo
  echo "## tmux panes tail"
  if command -v tmux >/dev/null 2>&1; then
    while IFS= read -r sess; do
      [[ -z "$sess" ]] && continue
      echo "--- tmux:$sess ---"
      tmux capture-pane -t "$sess" -p -S -120 2>/dev/null || true
    done < <(tmux list-sessions -F '#S' 2>/dev/null || true)
  fi
  echo
  echo "## recent reports/files"
  find DEX_REPORTS dex_platform docs -maxdepth 3 -type f 2>/dev/null \
    -printf '%TY-%Tm-%Td %TH:%TM %p\n' | sort -r | head -80 || true
  echo
  echo "## disk"; df -h . || true
  echo
  echo "## memory"; free -h || true
  echo
  echo "## python processes"
  ps -eo pid,ppid,etime,cmd | grep -E 'python|codex|tmux' | grep -v grep || true
} > "$SNAPSHOT_FILE"
ln -sfn "$SNAPSHOT_FILE" "$SNAPSHOT_DIR/latest.txt"

PROMPT_TEXT="$(cat "$PROMPT_FILE")

Fresh snapshot is at: .agent/snapshots/latest.txt
Current UTC run id: $RUN_ID
Timebox: $TIMEBOX_SECONDS seconds
Run now."

{
  echo "[$(date -u +%FT%TZ)] starting Codex autorun $RUN_ID"
  echo "PROJECT_ROOT=$PROJECT_ROOT"
  echo "CODEX_BIN=$CODEX_BIN"
  echo "SANDBOX_MODE=$SANDBOX_MODE"
  if [[ -s "$SESSION_ID_FILE" ]]; then
    echo "SESSION_ID=$(tr -d '[:space:]' < "$SESSION_ID_FILE")"
  else
    echo "SESSION_ID_FILE missing or empty: $SESSION_ID_FILE"
  fi
} | tee -a "$RUN_LOG"

if [[ ! -s "$SESSION_ID_FILE" ]]; then
  echo "ERROR: missing explicit Codex session id file: $SESSION_ID_FILE" | tee -a "$RUN_LOG"
  echo "Refusing to use resume --last in supervisor mode." | tee -a "$RUN_LOG"
  exit 3
fi

SESSION_ID="$(tr -d '[:space:]' < "$SESSION_ID_FILE")"
if [[ -z "$SESSION_ID" ]]; then
  echo "ERROR: empty Codex session id in $SESSION_ID_FILE" | tee -a "$RUN_LOG"
  exit 3
fi

# Always resume the pinned worker session. Do not use resume --last here; it can
# collide with manual Codex sessions opened in the same project path.
set +e
timeout --kill-after=20s "$TIMEBOX_SECONDS" \
  "$CODEX_BIN" exec \
    --sandbox "$SANDBOX_MODE" \
    --skip-git-repo-check \
    resume "$SESSION_ID" \
    "$PROMPT_TEXT" 2>&1 | tee -a "$RUN_LOG"
CODEX_RC=${PIPESTATUS[0]}
set -e

{
  echo
  echo "[$(date -u +%FT%TZ)] Codex exit code: $CODEX_RC"
  echo "## post-run git status"
  if [[ "$HAS_GIT" == "1" ]]; then
    git status --short --branch || true
  else
    echo "not a git repository"
  fi
  echo "## post-run diff stat"
  if [[ "$HAS_GIT" == "1" ]]; then
    git diff --stat || true
  else
    echo "not a git repository"
  fi
} | tee -a "$RUN_LOG"

# Keep only recent logs/snapshots to avoid disk creep.
find "$LOG_DIR" -type f -name 'codex_autorun_*.log' -mtime +14 -delete 2>/dev/null || true
find "$SNAPSHOT_DIR" -type f -name 'snapshot_*.txt' -mtime +14 -delete 2>/dev/null || true

exit "$CODEX_RC"
