#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
fetch_build_cache_v13.py
- SQLite-compatible (INSERT OR REPLACE)
- Produces table 'price_indicators' with columns:
  symbol TEXT, datetime_utc TEXT, open REAL, high REAL, low REAL, close REAL, volume REAL,
  rsi REAL, stochastic REAL, mfi REAL, overbought_index REAL,
  atr_ratio REAL, gain_24h_before REAL, dp6h REAL, dp12h REAL,
  quote_volume REAL, qv_24h REAL, vol_surge_mult REAL
- Market resolve robust for BingX (and CCXT exchanges)

Example:
  python3 fetch_build_cache_v13.py \
    -i universe_symbols_bingx.csv \
    -t 1h --limit 500 \
    -o combined_cache_500_18.08.db \
    --exchange bingx \
    --fresh
"""
import os
import sys
import argparse
import sqlite3
from typing import List, Optional, Tuple

import pandas as pd
import numpy as np

try:
    import ccxt  # type: ignore
except Exception:
    ccxt = None


# -------- Symbol helpers --------

def normalize_token(s: str) -> str:
    return str(s).strip().upper()

def parse_base_quote(raw: str) -> Tuple[str, Optional[str]]:
    """
    Accepts: BTC, BTCUSDT, BTC-USDT, BTC/USDT, BTC/USDT:USDT
    Returns (base, quote or None)
    """
    s = normalize_token(raw)
    if "/" in s:
        base, rest = s.split("/", 1)
        return base, rest.split(":")[0]
    if "-" in s:
        parts = s.split("-")
        if len(parts) >= 2:
            return parts[0], parts[1]
    if s.endswith("USDT") and len(s) > 4:
        return s[:-4], "USDT"
    if s.endswith("USDC") and len(s) > 4:
        return s[:-4], "USDC"
    return s, None

def resolve_market(ex, raw: str) -> Optional[str]:
    """
    Try to find a valid CCXT market id on the exchange.
    Order preference: perp USDT, spot USDT, perp USDC, spot USDC.
    Bias to provided quote if present.
    """
    s = normalize_token(raw)
    markets = ex.markets if getattr(ex, "markets", None) else ex.load_markets()
    if s in markets:
        return s

    base, guess = parse_base_quote(s)
    candidates = [f"{base}/USDT:USDT", f"{base}/USDT", f"{base}/USDC:USDC", f"{base}/USDC"]
    if guess == "USDC":
        candidates = [f"{base}/USDC:USDC", f"{base}/USDC", f"{base}/USDT:USDT", f"{base}/USDT"]
    elif guess == "USDT":
        candidates = [f"{base}/USDT:USDT", f"{base}/USDT", f"{base}/USDC:USDC", f"{base}/USDC"]

    for c in candidates:
        if c in markets:
            return c
    return None


# -------- DB helpers --------

def ensure_schema(db_path: str) -> None:
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    cur.execute(
        """CREATE TABLE IF NOT EXISTS price_indicators(
            symbol TEXT,
            datetime_utc TEXT,
            open REAL, high REAL, low REAL, close REAL, volume REAL,
            rsi REAL, stochastic REAL, mfi REAL, overbought_index REAL,
            atr_ratio REAL,
            gain_24h_before REAL,
            dp6h REAL, dp12h REAL,
            quote_volume REAL, qv_24h REAL, vol_surge_mult REAL,
            PRIMARY KEY (symbol, datetime_utc)
        )"""
    )
    cur.execute("PRAGMA journal_mode=WAL;")
    con.commit()
    con.close()

def insert_or_replace_rows(db_path: str, rows: List[dict]) -> None:
    if not rows:
        return
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    cols = [
        "symbol","datetime_utc","open","high","low","close","volume",
        "rsi","stochastic","mfi","overbought_index","atr_ratio","gain_24h_before",
        "dp6h","dp12h","quote_volume","qv_24h","vol_surge_mult"
    ]
    placeholders = ",".join(["?"] * len(cols))
    data = [tuple(r.get(c) for c in cols) for r in rows]
    cur.executemany(
        f"INSERT OR REPLACE INTO price_indicators ({','.join(cols)}) VALUES ({placeholders})",
        data
    )
    con.commit()
    con.close()


# -------- Feature engineering --------

def calc_atr_ratio(df: pd.DataFrame, period: int = 14) -> pd.Series:
    prev_close = df["close"].shift(1)
    tr = pd.concat(
        [
            (df["high"] - df["low"]).abs(),
            (df["high"] - prev_close).abs(),
            (df["low"] - prev_close).abs(),
        ],
        axis=1,
    ).max(axis=1)
    atr = tr.ewm(alpha=1 / period, adjust=False).mean()
    return (atr / df["close"]).replace([np.inf, -np.inf], np.nan).fillna(0.0)

def compute_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    out["gain_24h_before"] = (out["close"] / out["close"].shift(24) - 1.0).fillna(0.0)
    out["dp6h"] = (out["close"] / out["close"].shift(6) - 1.0).fillna(0.0)
    out["dp12h"] = (out["close"] / out["close"].shift(12) - 1.0).fillna(0.0)
    out["atr_ratio"] = calc_atr_ratio(out, 14)
    out["quote_volume"] = (out["volume"] * out["close"]).replace([np.inf, -np.inf], np.nan).fillna(0.0)
    out["qv_24h"] = out["quote_volume"].rolling(24, min_periods=1).sum()
    avg1 = out["qv_24h"] / 24.0
    with np.errstate(divide="ignore", invalid="ignore"):
        out["vol_surge_mult"] = np.where(avg1 > 0, out["quote_volume"] / avg1, 0.0)
    # placeholders for compatibility
    out["rsi"] = 0.0
    out["stochastic"] = 0.0
    out["mfi"] = 0.0
    out["overbought_index"] = 0.0
    return out


# -------- CCXT fetch --------

def fetch_ohlcv(ex, market: str, timeframe: str, limit: int) -> pd.DataFrame:
    ohlcv = ex.fetch_ohlcv(market, timeframe=timeframe, limit=limit)
    if not ohlcv:
        return pd.DataFrame()
    df = pd.DataFrame(ohlcv, columns=["ts", "open", "high", "low", "close", "volume"])
    df["datetime_utc"] = pd.to_datetime(df["ts"], unit="ms", utc=True).dt.strftime("%Y-%m-%dT%H:%M:%S+00:00")
    df = df.set_index("datetime_utc")[["open", "high", "low", "close", "volume"]].astype(float)
    return df


# -------- Main --------

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("-i", "--input-csv", required=True, help="CSV with column 'symbol' (BTC, BTC-USDT, BTCUSDT, BTC/USDT, BTC/USDT:USDT)")
    ap.add_argument("-t", "--timeframe", default="1h")
    ap.add_argument("--limit", type=int, default=500)
    ap.add_argument("-o", "--output", required=True)
    ap.add_argument("--exchange", default="bingx")
    ap.add_argument("--fresh", action="store_true")
    args = ap.parse_args()

    if ccxt is None:
        print("ERROR: ccxt not installed. pip install ccxt", file=sys.stderr)
        sys.exit(2)

    ensure_schema(args.output)
    if args.fresh:
        con = sqlite3.connect(args.output)
        cur = con.cursor()
        cur.execute("DROP TABLE IF EXISTS price_indicators")
        con.commit()
        con.close()
        ensure_schema(args.output)

    ex = getattr(ccxt, args.exchange)()
    ex.enableRateLimit = True
    ex.load_markets()

    uni = pd.read_csv(args.input_csv)
    if "symbol" not in uni.columns:
        raise SystemExit("CSV must contain 'symbol' column.")
    bases = [normalize_token(x) for x in uni["symbol"].dropna().unique().tolist()]

    for raw in bases:
        mkt = resolve_market(ex, raw)
        if not mkt:
            print(f"[SKIP] {raw} — no matching market on {args.exchange}")
            continue
        try:
            df = fetch_ohlcv(ex, mkt, args.timeframe, args.limit)
            if df.empty:
                print(f"[WARN] {mkt} — no OHLCV")
                continue
            feats = compute_features(df)
            rows = []
            for idx, r in feats.iterrows():
                rows.append({
                    "symbol": mkt,
                    "datetime_utc": idx,
                    "open": float(r["open"]),
                    "high": float(r["high"]),
                    "low": float(r["low"]),
                    "close": float(r["close"]),
                    "volume": float(r["volume"]),
                    "rsi": float(r["rsi"]),
                    "stochastic": float(r["stochastic"]),
                    "mfi": float(r["mfi"]),
                    "overbought_index": float(r["overbought_index"]),
                    "atr_ratio": float(r["atr_ratio"]),
                    "gain_24h_before": float(r["gain_24h_before"]),
                    "dp6h": float(r["dp6h"]),
                    "dp12h": float(r["dp12h"]),
                    "quote_volume": float(r["quote_volume"]),
                    "qv_24h": float(r["qv_24h"]),
                    "vol_surge_mult": float(r["vol_surge_mult"]),
                })
            insert_or_replace_rows(args.output, rows)
            print(f"[OK] {raw} -> {mkt} rows={len(rows)}")
        except Exception as e:
            print(f"[ERR] {raw} -> {mkt} {e}", file=sys.stderr)

if __name__ == "__main__":
    main()
