#!/usr/bin/env python3
"""
market_neutral_lp_v1.py — Market-Neutral DEX LP Strategies

Базується на висновках "DEX-алгоритми з реальною доказовістю.pdf":

Головне: DEX LP визначають НЕ технічні індикатори (RSI/EMA/DEMA),
а: realized vol, LVR, swap arrival intensity, trade-size distribution.

Три нові алгоритми:

[A] VOL-GATED LP
    Деплоїти LP лише коли realized_vol < fee_rate × k
    (fee yield перевищує LVR при низькій волатильності)
    При vol_spike → вийти в USDC і чекати

[B] RANGE-ORDER (Synthetic Limit Order)
    100% USDC → вузький діапазон вище ринку
    Якщо ціна проходить range: отримали BIO + fees → закрити
    Або 100% BIO → вузький range нижче → продати вище + fees
    РИНКОВО НЕЙТРАЛЬНА: не залежить від напрямку

[C] SWAP-INTENSITY FILTER (Toxicity gate)
    Великі свопи = informed flow = LVR > fees → пауза
    Малі роздрібні свопи = fee edge > LVR → деплоїти
    Перемикання на основі rolling median swap size

Метрики з PDF:
    LVR ≈ 0.5 × σ² × L (де σ = realized vol, L = capital)
    fee_yield = fee_rate × volume × share
    deploy_condition: fee_yield > LVR  ↔  fee_rate × arrival_intensity > 0.5 × σ²
"""
from __future__ import annotations
import argparse, json, math
from pathlib import Path
import numpy as np
import pandas as pd

SCRIPT_VERSION = "market_neutral_lp_v1_2026_05_04"


# ── Core math ────────────────────────────────────────────────────────────────

def realized_vol_rolling(prices: np.ndarray, window: int) -> np.ndarray:
    """Rolling realized volatility (log returns std)."""
    log_ret = np.diff(np.log(np.maximum(prices, 1e-300)))
    vol = np.full(len(prices), np.nan)
    for i in range(window, len(prices)):
        vol[i] = np.std(log_ret[i-window:i]) * np.sqrt(window)
    return vol

def lvr_rate(realized_vol: float, price: float) -> float:
    """LVR rate per unit of capital per swap (Fritsch & Canidio, 2024)."""
    return 0.5 * realized_vol**2

def sqrt_raw(p, d0=6, d1=18):
    return math.sqrt(10**(d1-d0) / max(p, 1e-300))

def liquidity_for_capital(cap, p0, lo, up, d0=6, d1=18):
    sp = sqrt_raw(p0, d0, d1); sa = sqrt_raw(up, d0, d1); sb = sqrt_raw(lo, d0, d1)
    if p0 <= lo: uval = (sb-sa)/10**d1*p0
    elif p0 >= up: uval = (sb-sa)/(sa*sb)/10**d0
    else: uval = (sb-sp)/(sp*sb)/10**d0 + (sp-sa)/10**d1*p0
    return cap/uval if uval > 1e-300 else 0

def position_value(L, p, lo, up, d0=6, d1=18):
    sp = sqrt_raw(p, d0, d1); sa = sqrt_raw(up, d0, d1); sb = sqrt_raw(lo, d0, d1)
    if p <= lo: a0, a1 = 0.0, L*(sb-sa)
    elif p >= up: a0, a1 = L*(sb-sa)/(sa*sb), 0.0
    else: a0 = L*(sb-sp)/(sp*sb); a1 = L*(sp-sa)
    return a0/10**d0 + a1/10**d1*p

def max_drawdown(eq): 
    pk = np.maximum.accumulate(eq)
    return float(np.nanmin(eq/np.where(pk==0,np.nan,pk)-1)*100)


# ════════════════════════════════════════════════════════════════════
# ALGORITHM A: VOL-GATED LP
# PDF: "deployed лише коли fee_yield > LVR"
# fee_yield ≈ fee_rate × vol_window_volume × share
# LVR_rate ≈ 0.5 × σ²
# Deploy if: fee_rate > k × σ²  (simple, fast check)
# ════════════════════════════════════════════════════════════════════

def run_vol_gated(prices, volumes, active_liq, ts, capital, fee_rate, d0, d1,
                  vol_window, vol_k, lower_pct, upper_pct):
    """
    Deploy LP only when: fee_rate > vol_k × realized_vol²
    vol_k: sensitivity (higher = less aggressive, exit sooner)
    """
    n = len(prices)
    rv = realized_vol_rolling(prices, vol_window)
    
    equity = np.empty(n); fees_cum = 0.0
    cash = capital; L = lo = up = 0.0; in_lp = False
    in_range_arr = np.zeros(n, np.int8)
    share_arr = np.zeros(n)
    rebalances = 0; idle_count = 0
    
    def enter(p0, cap):
        nonlocal L, lo, up, in_lp, cash
        l = p0*(1-lower_pct/100); u = p0*(1+upper_pct/100)
        Lv = liquidity_for_capital(cap, p0, l, u, d0, d1)
        if Lv > 0:
            L, lo, up, cash, in_lp = Lv, l, u, 0.0, True
    
    def exit_(p):
        nonlocal L, lo, up, in_lp, cash, fees_cum
        if in_lp:
            cash = position_value(L, p, lo, up, d0, d1) + fees_cum
            fees_cum = 0.0; L = lo = up = 0.0; in_lp = False
    
    for i in range(n):
        p, v, al, rv_i = prices[i], volumes[i], active_liq[i], rv[i]
        
        if np.isnan(rv_i):
            equity[i] = cash if not in_lp else position_value(L, p, lo, up, d0, d1) + fees_cum
            continue
        
        deploy_ok = fee_rate > vol_k * rv_i**2
        
        if not in_lp and deploy_ok and cash > 0:
            enter(p, cash); rebalances += 1
        elif in_lp and not deploy_ok:
            exit_(p); rebalances += 1; idle_count += 1
        
        if in_lp:
            ir = (p >= lo) & (p <= up)
            in_range_arr[i] = int(ir)
            if ir and al > 0:
                sh = L/(al+L); share_arr[i] = sh; fees_cum += sh*fee_rate*v
            equity[i] = position_value(L, p, lo, up, d0, d1) + fees_cum
        else:
            equity[i] = cash
    
    ir_in = share_arr[in_range_arr.astype(bool)]
    return dict(algo='vol_gated', return_pct=(equity[-1]/capital-1)*100,
                mdd_pct=max_drawdown(equity), equity_end=float(equity[-1]),
                fees=float(fees_cum), rebalances=rebalances, idle_periods=idle_count,
                time_in_range=float(in_range_arr.mean()*100),
                avg_share=float(ir_in.mean()*100) if len(ir_in) else 0,
                p99_share=float(np.percentile(ir_in,99)*100) if len(ir_in) else 0,
                params=f"vol_k={vol_k} window={vol_window} lower={lower_pct} upper={upper_pct}")


# ════════════════════════════════════════════════════════════════════
# ALGORITHM B: RANGE ORDER (Synthetic Limit Order + fee)
# PDF: "one-sided deposit у вузький діапазон працює як limit-order + збирає fees"
# Start 100% USDC → narrow range above market
# When price crosses upper: convert to BIO at better price + earned fees → close
# If never crosses: keep earning fees (insurance)
# After close: repeat with new range
# ════════════════════════════════════════════════════════════════════

def run_range_order(prices, volumes, active_liq, ts, capital, fee_rate, d0, d1,
                   entry_offset_pct, exit_width_pct, cooldown_h):
    """
    Range order: enter with USDC below current price range.
    Range = [current_price, current_price*(1+exit_width_pct/100)]
    When range fully crossed → position = all tokens → close → pocket gains+fees → re-enter
    """
    n = len(prices)
    equity = np.empty(n); cash = capital; fees_cum = 0.0
    L = lo = up = 0.0; in_lp = False
    share_arr = np.zeros(n); in_range_arr = np.zeros(n, np.int8)
    closed_count = 0; rebalances = 0
    last_close_ts = ts[0]
    
    def enter_range_order(p0, cap):
        """Enter just above current price: range = [p0*(1-small), p0*(1+exit_width)]"""
        nonlocal L, lo, up, in_lp, cash
        # Tiny lower offset so we start inside range collecting fees
        lo_p = p0 * (1 - entry_offset_pct/100)
        up_p = p0 * (1 + exit_width_pct/100)
        Lv = liquidity_for_capital(cap, p0, lo_p, up_p, d0, d1)
        if Lv > 0:
            L, lo, up, cash, in_lp = Lv, lo_p, up_p, 0.0, True
    
    for i in range(n):
        p, v, al, t = prices[i], volumes[i], active_liq[i], ts[i]
        
        if not in_lp and cash > 0 and (t - last_close_ts) >= cooldown_h*3600:
            enter_range_order(p, cash); rebalances += 1
        
        if in_lp:
            # Check if fully exited range above (all volatile tokens → take profit)
            if p >= up:
                val = position_value(L, p, lo, up, d0, d1) + fees_cum
                cash = val; fees_cum = 0.0; L = lo = up = 0.0
                in_lp = False; closed_count += 1; last_close_ts = t
                rebalances += 1
                equity[i] = cash
                continue
            
            ir = (p >= lo) & (p <= up)
            in_range_arr[i] = int(ir)
            if ir and al > 0:
                sh = L/(al+L); share_arr[i] = sh; fees_cum += sh*fee_rate*v
            equity[i] = position_value(L, p, lo, up, d0, d1) + fees_cum
        else:
            equity[i] = cash
    
    ir_in = share_arr[in_range_arr.astype(bool)]
    return dict(algo='range_order', return_pct=(equity[-1]/capital-1)*100,
                mdd_pct=max_drawdown(equity), equity_end=float(equity[-1]),
                fees=float(fees_cum), rebalances=rebalances, orders_filled=closed_count,
                time_in_range=float(in_range_arr.mean()*100),
                avg_share=float(ir_in.mean()*100) if len(ir_in) else 0,
                p99_share=float(np.percentile(ir_in,99)*100) if len(ir_in) else 0,
                params=f"offset={entry_offset_pct} width={exit_width_pct} cooldown={cooldown_h}h")


# ════════════════════════════════════════════════════════════════════
# ALGORITHM C: SWAP-INTENSITY / TOXICITY FILTER
# PDF: "swap arrival intensity, trade-size distribution"
# Large swaps = informed (institutional) = high LVR → pause LP
# Small swaps = retail/arbitrage = fee edge positive → deploy LP
# Metric: rolling median swap size vs threshold
# ════════════════════════════════════════════════════════════════════

def run_toxicity_gated(prices, volumes, active_liq, ts, capital, fee_rate, d0, d1,
                       toxicity_window, size_threshold_pct, lower_pct, upper_pct,
                       cooldown_h):
    """
    Deploy LP only when median swap size < threshold × capital
    Large swaps = informed flow (adverse selection) → pause
    Small swaps = retail fees → deploy
    threshold_pct: % of capital that defines "large" swap
    """
    n = len(prices)
    # Rolling median swap size
    med_size = np.full(n, np.nan)
    for i in range(toxicity_window, n):
        med_size[i] = np.median(volumes[i-toxicity_window:i])
    
    threshold = capital * size_threshold_pct / 100
    
    equity = np.empty(n); cash = capital; fees_cum = 0.0
    L = lo = up = 0.0; in_lp = False
    share_arr = np.zeros(n); in_range_arr = np.zeros(n, np.int8)
    rebalances = 0; last_rebal_ts = ts[0]
    
    def enter(p0, cap):
        nonlocal L, lo, up, in_lp, cash
        l = p0*(1-lower_pct/100); u = p0*(1+upper_pct/100)
        Lv = liquidity_for_capital(cap, p0, l, u, d0, d1)
        if Lv > 0: L, lo, up, cash, in_lp = Lv, l, u, 0.0, True
    
    def exit_(p):
        nonlocal L, lo, up, in_lp, cash, fees_cum
        if in_lp:
            cash = position_value(L, p, lo, up, d0, d1) + fees_cum
            fees_cum = 0.0; L = lo = up = 0.0; in_lp = False
    
    for i in range(n):
        p, v, al, t, ms = prices[i], volumes[i], active_liq[i], ts[i], med_size[i]
        
        if np.isnan(ms):
            equity[i] = cash if not in_lp else position_value(L, p, lo, up, d0, d1)+fees_cum
            continue
        
        low_toxicity = ms < threshold
        cooldown_ok = (t - last_rebal_ts) >= cooldown_h*3600
        
        if not in_lp and low_toxicity and cash > 0 and cooldown_ok:
            enter(p, cash); rebalances += 1; last_rebal_ts = t
        elif in_lp and not low_toxicity and cooldown_ok:
            exit_(p); rebalances += 1; last_rebal_ts = t
        
        if in_lp:
            ir = (p >= lo) & (p <= up)
            in_range_arr[i] = int(ir)
            if ir and al > 0:
                sh = L/(al+L); share_arr[i] = sh; fees_cum += sh*fee_rate*v
            equity[i] = position_value(L, p, lo, up, d0, d1) + fees_cum
        else:
            equity[i] = cash
    
    ir_in = share_arr[in_range_arr.astype(bool)]
    return dict(algo='toxicity_gated', return_pct=(equity[-1]/capital-1)*100,
                mdd_pct=max_drawdown(equity), equity_end=float(equity[-1]),
                fees=float(fees_cum), rebalances=rebalances,
                time_in_range=float(in_range_arr.mean()*100),
                avg_share=float(ir_in.mean()*100) if len(ir_in) else 0,
                p99_share=float(np.percentile(ir_in,99)*100) if len(ir_in) else 0,
                params=f"window={toxicity_window} threshold={size_threshold_pct}% lower={lower_pct} upper={upper_pct}")


# ════════════════════════════════════════════════════════════════════
# GRID RUNNER
# ════════════════════════════════════════════════════════════════════

def run_all(npz_path, out_dir, capital, fee_rate, d0, d1, days):
    z = np.load(npz_path, allow_pickle=False)
    pr = z['price'].astype(np.float64)
    vol = z['input_usd'].astype(np.float64)
    al = z['active_liquidity'].astype(np.float64)
    ts = z['ts'].astype(np.int64)
    meta = json.loads(str(z['meta_json']))
    
    print(f"\n{Path(npz_path).stem}: {len(pr)} swaps, {days:.1f}d")
    print(f"  price {pr.min():.4f}-{pr.max():.4f}, vol ${vol.sum():,.0f}")
    
    rows = []
    
    # [A] Vol-Gated grid
    for vw in [50, 100, 200, 500, 1000]:
        for vk in [0.5, 1, 2, 5, 10, 20, 50]:
            for ll in [10, 20, 30, 50]:
                for uu in [20, 40, 70, 90]:
                    r = run_vol_gated(pr, vol, al, ts, capital, fee_rate, d0, d1,
                                      vw, vk, ll, uu)
                    r.update({'vol_window': vw, 'vol_k': vk, 'lower': ll, 'upper': uu})
                    rows.append(r)
    
    # [B] Range-Order grid
    for offset in [0.5, 1, 2, 5]:
        for width in [5, 10, 15, 20, 30]:
            for cd in [24, 72, 168]:
                r = run_range_order(pr, vol, al, ts, capital, fee_rate, d0, d1,
                                    offset, width, cd)
                r.update({'offset_pct': offset, 'width_pct': width, 'cooldown_h': cd})
                rows.append(r)
    
    # [C] Toxicity-Gated grid
    for tw in [50, 100, 200]:
        for thresh in [0.5, 1, 2, 5, 10]:
            for ll in [10, 20, 30]:
                for uu in [20, 40, 70]:
                    r = run_toxicity_gated(pr, vol, al, ts, capital, fee_rate, d0, d1,
                                           tw, thresh, ll, uu, 72)
                    r.update({'tox_window': tw, 'tox_thresh': thresh,
                               'lower': ll, 'upper': uu})
                    rows.append(r)
    
    df = pd.DataFrame(rows)
    df['annual_pct'] = df['return_pct'] / days * 365
    df['pnl_mdd'] = df['return_pct'].abs() / df['mdd_pct'].abs().replace(0, float('nan'))
    
    valid = df[(df['mdd_pct'] >= -20) & (df['pnl_mdd'] >= 2) & (df['return_pct'] > 0) &
               (df['p99_share'] < 10)]
    
    out = Path(out_dir)
    out.mkdir(parents=True, exist_ok=True)
    df.to_csv(out / 'summary.csv', index=False)
    valid.to_csv(out / 'valid.csv', index=False)
    df.sort_values('annual_pct', ascending=False).head(50).to_csv(out / 'best.csv', index=False)
    
    print(f"  Total: {len(df)}, Valid (MDD<20,PnL/MDD>2,p99<10): {len(valid)}")
    
    if len(valid):
        top = valid.sort_values('annual_pct', ascending=False).head(3)
        for _, r in top.iterrows():
            print(f"  [{r['algo']}] {r['params']} → {r['annual_pct']:.0f}% ann "
                  f"MDD={r['mdd_pct']:.1f}% fee=${r['fees']:.0f}")
    
    # By algo
    for algo in ['vol_gated', 'range_order', 'toxicity_gated']:
        sub = valid[valid['algo'] == algo] if len(valid) else pd.DataFrame()
        cnt = len(sub)
        if cnt > 0:
            b = sub.sort_values('annual_pct', ascending=False).iloc[0]
            print(f"  {algo}: {cnt} valid, best={b['annual_pct']:.0f}% ann MDD={b['mdd_pct']:.1f}%")
        else:
            top_algo = df[df['algo']==algo].sort_values('annual_pct',ascending=False).iloc[0] if len(df[df['algo']==algo]) else None
            if top_algo is not None:
                print(f"  {algo}: 0 valid, best={top_algo['annual_pct']:.0f}% MDD={top_algo['mdd_pct']:.1f}%")
    
    return df, valid


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('--npz', required=True)
    ap.add_argument('--out-dir', required=True)
    ap.add_argument('--capital', type=float, default=600)
    ap.add_argument('--fee-rate', type=float, default=0.003)
    ap.add_argument('--dec0', type=int, default=6)
    ap.add_argument('--dec1', type=int, default=18)
    ap.add_argument('--days', type=float, default=30)
    args = ap.parse_args()
    print(f"[{SCRIPT_VERSION}]")
    run_all(args.npz, args.out_dir, args.capital, args.fee_rate,
            args.dec0, args.dec1, args.days)

if __name__ == '__main__':
    main()