#!/usr/bin/env python3 """ univariate_param_correlation_search.py For a fixed entry hour (default 1), sweeps each parameter individually while keeping others at base values, runs the backtest for each value, and computes Spearman correlation between that parameter and total_return_pct. Reports for each parameter: correlation (rho, p-value), best value (by return), and its metrics. Supports hiding per-run visual output via --no-visual and silencing backtest internals via --quiet-backtest. """ import subprocess import sqlite3 import argparse import os import sys import csv from datetime import datetime import pandas as pd from scipy.stats import spearmanr DEFAULT_BACKTEST_SCRIPT = "short_top_gainers_backtest.py" UNIVERSE_FILE = "universe.txt" CACHE_DB = "combined_cache.db" # Base (reference) settings taken from previously profitable setup BASE_CONFIG = { "hold_hours": 24, "cooldown_days": 3, "min_overbought_index": 70, "min_rsi": 60, "require_at_least_n_high": 2, "max_atr_ratio": 0.05, "risk_pct": 0.03, "base_tp_multiplier": 0.0, "max_extra_tp": 0.0, } PARAM_SWEEPS_DEFAULT = { "min_overbought_index": [0, 30, 50, 60, 70, 80, 90], "min_rsi": [0, 30, 50, 60, 70, 80, 90], "require_at_least_n_high": [0, 1, 2, 3], "max_atr_ratio": [0.0, 0.01, 0.03, 0.05, 0.08], "risk_pct": [0.0, 0.01, 0.02, 0.03, 0.05], "base_tp_multiplier": [0.0, 0.5, 1.0, 1.5, 2.0], "max_extra_tp": [0.0, 0.1, 0.25, 0.5], "hold_hours": [6, 12, 24, 36, 48], "cooldown_days": [0, 1, 2, 3, 5], } def compute_metrics_from_db(db_path): if not os.path.exists(db_path): return None conn = sqlite3.connect(db_path) try: df = pd.read_sql_query("SELECT * FROM trades", conn, parse_dates=["open_time_utc"]) finally: conn.close() if df.empty: return None df = df.sort_values(["open_time_utc", "symbol"]) df["short_return_decimal"] = df["short_return_pct"] / 100.0 wins = df[df["short_return_decimal"] > 0] losses = df[df["short_return_decimal"] <= 0] win_rate = len(wins) / len(df) if len(df) else 0 equity = (1 + df["short_return_decimal"]).cumprod() peak = equity.cummax() drawdown = (equity - peak) / peak max_dd = drawdown.min() total_return = equity.iloc[-1] - 1 if not equity.empty else 0 avg_win = wins["short_return_decimal"].mean() if not wins.empty else 0 avg_loss = losses["short_return_decimal"].mean() if not losses.empty else 0 gross_win = wins["short_return_decimal"].sum() gross_loss = abs(losses["short_return_decimal"].sum()) profit_factor = gross_win / gross_loss if gross_loss != 0 else float("inf") return { "total_return_pct": total_return * 100, "win_rate": win_rate, "max_drawdown_pct": max_dd * 100, "profit_factor": profit_factor, "equity": equity, "trades": len(df), } def run_single_param_sweep(param, values, fixed_hour, base_config, args): results = [] for v in values: out_db = f"temp_{param}_{v}.db" cmd = [ "python3", args.backtest_script, "-c", CACHE_DB, "-b", out_db, "-o", str(fixed_hour), "-n", "4", "--hold-hours", str(base_config["hold_hours"]), "--cooldown-days", str(base_config["cooldown_days"]), "--min-overbought-index", str(base_config["min_overbought_index"]), "--min-rsi", str(base_config["min_rsi"]), "--require-at-least-n-high", str(base_config["require_at_least_n_high"]), "--max-atr-ratio", str(base_config["max_atr_ratio"]), "--risk-pct", str(base_config["risk_pct"]), "--base-tp-multiplier", str(base_config["base_tp_multiplier"]), "--max-extra-tp", str(base_config["max_extra_tp"]), "-u", UNIVERSE_FILE, ] # override current param flag = "--" + param.replace("_", "-") cmd += [flag, str(v)] if args.quiet_backtest: cmd.append("--quiet") if not args.no_visual: cmd.append("--ascii-equity") if os.path.exists(out_db): os.remove(out_db) if not args.no_verbose: print(f"[{datetime.now().isoformat()}] Sweeping {param}={v} ...", flush=True) try: subprocess.run(cmd, check=True, stdout=(subprocess.DEVNULL if args.quiet_backtest else None)) except subprocess.CalledProcessError as e: print(f"[WARN] backtest failed for {param}={v}: {e}", file=sys.stderr) continue metrics = compute_metrics_from_db(out_db) if metrics is None: if not args.no_verbose: print(f" -> no trades for {param}={v}") else: results.append((v, metrics)) if not args.no_verbose: print(f" -> return {metrics['total_return_pct']:.2f}%, win_rate {metrics['win_rate']:.2%}, pf {metrics['profit_factor']:.3f}") if os.path.exists(out_db): os.remove(out_db) return results def summarize_param(param, sweep_results, args): if not sweep_results: print(f"[{param}] no data collected.") return None vals = [v for v, m in sweep_results] returns = [m["total_return_pct"] for v, m in sweep_results] try: rho, pval = spearmanr(vals, returns) except Exception: rho, pval = 0.0, 1.0 best = max(sweep_results, key=lambda x: x[1]["total_return_pct"]) print(f"\n=== Parameter: {param} ===") print(f"Spearman rho vs total_return_pct: {rho:.3f}, p-value: {pval:.3g}") print(f"Best {param} = {best[0]} -> return {best[1]['total_return_pct']:.2f}%, win_rate {best[1]['win_rate']:.2%}, profit_factor {best[1]['profit_factor']:.3f}, max_dd {best[1]['max_drawdown_pct']:.2f}%") if not args.no_visual: try: import numpy as _np blocks = "▁▂▃▄▅▆▇█" arr = _np.array(returns, dtype=float) mn, mx = arr.min(), arr.max() if mx - mn == 0: spark = blocks[0] * len(arr) else: scaled = (arr - mn) / (mx - mn) spark = "".join(blocks[min(int(s * (len(blocks) - 1)), len(blocks) - 1)] for s in scaled) print(f"Return sparkline across {param} sweep: {spark}") except ImportError: pass return { "param": param, "rho": rho, "pval": pval, "best_value": best[0], "best_return_pct": best[1]["total_return_pct"], "best_win_rate": best[1]["win_rate"], "best_profit_factor": best[1]["profit_factor"], "best_max_dd": best[1]["max_drawdown_pct"], } def main(): parser = argparse.ArgumentParser(description="Univariate correlation search for each parameter (fix hour)") parser.add_argument("--backtest-script", default=DEFAULT_BACKTEST_SCRIPT, help="path to short_top_gainers_backtest.py") parser.add_argument("--hour", "-o", type=int, default=1, help="fixed entry hour (Kyiv)") parser.add_argument("--no-visual", action="store_true", help="suppress sparkline visual output per parameter") parser.add_argument("--quiet-backtest", action="store_true", help="pass --quiet to underlying backtest to suppress its internal logging") parser.add_argument("--no-verbose", action="store_true", help="suppress sweep progress lines") parser.add_argument("--sweep-min-overbought-index", nargs="+", type=float, help="values to sweep for min_overbought_index") parser.add_argument("--sweep-min-rsi", nargs="+", type=float) parser.add_argument("--sweep-require-at-least-n-high", nargs="+", type=int) parser.add_argument("--sweep-max-atr-ratio", nargs="+", type=float) parser.add_argument("--sweep-risk-pct", nargs="+", type=float) parser.add_argument("--sweep-base-tp-multiplier", nargs="+", type=float) parser.add_argument("--sweep-max-extra-tp", nargs="+", type=float) parser.add_argument("--sweep-hold-hours", nargs="+", type=int) parser.add_argument("--sweep-cooldown-days", nargs="+", type=int) args = parser.parse_args() sweeps = { "min_overbought_index": args.sweep_min_overbought_index or PARAM_SWEEPS_DEFAULT["min_overbought_index"], "min_rsi": args.sweep_min_rsi or PARAM_SWEEPS_DEFAULT["min_rsi"], "require_at_least_n_high": args.sweep_require_at_least_n_high or PARAM_SWEEPS_DEFAULT["require_at_least_n_high"], "max_atr_ratio": args.sweep_max_atr_ratio or PARAM_SWEEPS_DEFAULT["max_atr_ratio"], "risk_pct": args.sweep_risk_pct or PARAM_SWEEPS_DEFAULT["risk_pct"], "base_tp_multiplier": args.sweep_base_tp_multiplier or PARAM_SWEEPS_DEFAULT["base_tp_multiplier"], "max_extra_tp": args.sweep_max_extra_tp or PARAM_SWEEPS_DEFAULT["max_extra_tp"], "hold_hours": args.sweep_hold_hours or PARAM_SWEEPS_DEFAULT["hold_hours"], "cooldown_days": args.sweep_cooldown_days or PARAM_SWEEPS_DEFAULT["cooldown_days"], } overall_summary = [] for param, values in sweeps.items(): sweep_results = run_single_param_sweep(param, values, args.hour, BASE_CONFIG, args) summary = summarize_param(param, sweep_results, args) if summary: overall_summary.append(summary) out_csv = f"univariate_param_correlation_hour{args.hour}.csv" with open(out_csv, "w", newline="") as f: writer = csv.DictWriter(f, fieldnames=[ "param", "rho", "pval", "best_value", "best_return_pct", "best_win_rate", "best_profit_factor", "best_max_dd" ]) writer.writeheader() for row in overall_summary: writer.writerow(row) print(f"\nSaved summary to {out_csv}") if __name__ == "__main__": main()