
import csv, json
from pathlib import Path
from collections import defaultdict

BASE_DIR = Path("/var/www/vps2.happyuser.info/AIMA_bot")

# Зібрати всі sent і replies з усіх log CSV
hyp_sent    = defaultdict(int)
hyp_replied = defaultdict(int)
hyp_texts   = {}        # hypothesis -> gate1_text
hyp_replies = defaultdict(list)  # hypothesis -> [reply texts]

total_sent    = 0
total_replied = 0

for log_csv in sorted((BASE_DIR/"data/processed").glob("aima_far_v*_telegram_log.csv")):
    if "test" in log_csv.stem:
        continue
    batch = log_csv.stem.replace("_telegram_log","")
    with log_csv.open(encoding="utf-8-sig") as f:
        for row in csv.DictReader(f):
            if row.get("gate1_message_sent","").lower() != "yes":
                continue
            hyp = row.get("hypothesis","").strip() or "unknown"
            total_sent += 1
            hyp_sent[hyp] += 1
            if row.get("replied","").lower() == "yes":
                total_replied += 1
                hyp_replied[hyp] += 1
                txt = (row.get("reply_text") or "").strip()
                if txt and txt not in hyp_replies[hyp]:
                    hyp_replies[hyp].append(txt[:200])

# Зібрати тексти гіпотез з contacts CSV
for contacts_csv in sorted((BASE_DIR/"data/processed").glob("aima_far_v*_contacts.csv")):
    with contacts_csv.open(encoding="utf-8-sig") as f:
        for row in csv.DictReader(f):
            hyp = row.get("hypothesis","").strip()
            txt = row.get("gate1_text","").strip()
            if hyp and txt and hyp not in hyp_texts:
                hyp_texts[hyp] = txt

# Підрахунок
print(f"TOTAL_SENT:{total_sent}")
print(f"TOTAL_REPLIED:{total_replied}")
print(f"RATE:{total_replied/total_sent*100:.1f}" if total_sent else "RATE:0")

# Рейтинг
hyp_list = []
for hyp in set(list(hyp_sent.keys()) + list(hyp_replied.keys())):
    s = hyp_sent.get(hyp, 0)
    r = hyp_replied.get(hyp, 0)
    rate = r/s*100 if s else 0
    hyp_list.append((hyp, s, r, rate))

hyp_list.sort(key=lambda x: (-x[3], -x[1]))

for hyp, s, r, rate in hyp_list:
    txt = hyp_texts.get(hyp, "")
    replies = hyp_replies.get(hyp, [])
    print(f"HYP:{hyp}|SENT:{s}|REPLIED:{r}|RATE:{rate:.0f}|TEXT:{txt[:160]}|REPLIES:{' /// '.join(replies[:5])}")
