
import csv, json
from pathlib import Path
from collections import defaultdict

BASE_DIR = Path("/var/www/vps2.happyuser.info/AIMA_bot")

hyp_sent    = defaultdict(int)
hyp_replied = defaultdict(int)
hyp_texts   = {}
hyp_replies = defaultdict(list)

for log_csv in sorted((BASE_DIR/"data/processed").glob("aima_far_v*_telegram_log.csv")):
    if "test" in log_csv.stem:
        continue
    with log_csv.open(encoding="utf-8-sig") as f:
        for row in csv.DictReader(f):
            if row.get("gate1_message_sent","").lower() != "yes":
                continue
            hyp = row.get("hypothesis","").strip() or "unknown"
            hyp_sent[hyp] += 1
            if row.get("replied","").lower() == "yes":
                hyp_replied[hyp] += 1
                txt = (row.get("reply_text") or "").strip()
                if txt and txt not in hyp_replies[hyp]:
                    hyp_replies[hyp].append(txt[:200])

for contacts_csv in sorted((BASE_DIR/"data/processed").glob("aima_far_v*_contacts.csv")):
    with contacts_csv.open(encoding="utf-8-sig") as f:
        for row in csv.DictReader(f):
            hyp = row.get("hypothesis","").strip()
            txt = row.get("gate1_text","").strip()
            if hyp and txt and hyp not in hyp_texts:
                hyp_texts[hyp] = txt

result = {
    "total_sent": sum(hyp_sent.values()),
    "total_replied": sum(hyp_replied.values()),
    "hypotheses": []
}
for hyp in set(list(hyp_sent.keys()) + list(hyp_replied.keys())):
    s = hyp_sent.get(hyp, 0)
    r = hyp_replied.get(hyp, 0)
    result["hypotheses"].append({
        "name": hyp,
        "sent": s,
        "replied": r,
        "rate": round(r/s*100, 1) if s else 0,
        "text": hyp_texts.get(hyp, ""),
        "replies": hyp_replies.get(hyp, [])
    })

result["hypotheses"].sort(key=lambda x: (-x["rate"], -x["sent"]))

out_path = BASE_DIR / "data/processed/stats_for_report.json"
out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
print("OK")
print(f"total_sent={result['total_sent']} total_replied={result['total_replied']}")
