"""
Build v14 batch: select 30 unprocessed candidates from users_without_shop_202605191510,
assign v13 hypotheses (FAR_A / FAR_C), write CSVs, update bot state.
Run on server BEFORE aima_batch_send_v14.py.
"""
import asyncio, csv, json, os, sqlite3
from datetime import datetime
from pathlib import Path

BASE_DIR   = Path(__file__).parent
DB         = BASE_DIR / "data/processed/aima_conversion_shadow.sqlite"
TG_SESSION = BASE_DIR / "data/processed/telegram/aima_support_session"
STATE_FILE = BASE_DIR / "data/processed/aima_bot_state.json"
DATASET    = "lost_shops_202605191501"
BATCH_KEY  = "v16"
BATCH_SIZE = 30
MIN_BATCH  = 25  # accept partial batch if dataset running low
SCREEN_CHUNK = 50  # ImportContactsRequest per call

HYP_A = {
    "label": "variant_A",
    "text": "Привіт, це Настя з AIMA. Хотіла коротко уточнити: для вас ще актуальна ідея створити власний інтернет-магазин?",
}
HYP_C = {
    "label": "variant_B",
    "text": "Привіт, це Настя з AIMA. Вже 500+ українців запустили магазини через нас. Ваша ніша ще вільна?",
}

CONTACTS_CSV = BASE_DIR / "data/processed/aima_far_{}_contacts.csv".format(BATCH_KEY)
LOG_CSV      = BASE_DIR / "data/processed/aima_far_{}_telegram_log.csv".format(BATCH_KEY)
SEND_SCRIPT  = BASE_DIR / "src/aima_batch_send_{}.py".format(BATCH_KEY)

CONTACT_FIELDS = [
    "pilot_id","pilot_name","lead_id","dataset","segment","variant","hypothesis",
    "gate1_text","phone","first_name","last_name","registered_at","last_activity_at",
    "worker-do-not-send","pre_contact_quality","contamination_status","notes",
]
LOG_FIELDS = [
    "pilot_id","lead_id","dataset","hypothesis","phone","worker-do-not-send",
    "added_to_telegram","telegram_contact_name","gate1_message_sent","replied",
    "reply_text","next_gate","notes","sent_at_utc","recipient_id","message_len",
    "message_preview","chat_started","replied_at_utc","reply_category",
    "pre_contact_quality","contamination_status","qualified_product_reply",
    "next_step_accepted","negative_stop","reply_class",
]


def load_env():
    for p in [BASE_DIR / ".env"]:
        if not p.exists(): continue
        for line in p.read_text(errors="ignore").splitlines():
            line = line.strip()
            if line and not line.startswith("#") and "=" in line:
                k, v = line.split("=", 1)
                os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'"))


def normalize(ph):
    return "".join(c for c in str(ph or "") if c.isdigit() or c == "+")


def collect_used():
    used_leads, used_phones = set(), set()
    for p in sorted((BASE_DIR / "data/processed").glob("*.csv")):
        with p.open(encoding="utf-8-sig") as f:
            for row in csv.DictReader(f):
                lid = str(row.get("lead_id", "") or "").strip()
                ph  = str(row.get("phone",   "") or "").strip()
                if lid: used_leads.add(lid)
                if ph:  used_phones.add(ph)
    return used_leads, used_phones


def next_pilot_id():
    max_id = 120
    for p in (BASE_DIR / "data/processed").glob("aima_far_v*_contacts.csv"):
        with p.open(encoding="utf-8-sig") as f:
            for row in csv.DictReader(f):
                try: max_id = max(max_id, int(row.get("pilot_id", 0) or 0))
                except: pass
    return max_id + 1


async def screen_candidates(candidates):
    """Return (confirmed, retry_only) lists — both reachable with Premium."""
    load_env()
    api_id   = os.environ.get("TG_API_ID")
    api_hash = os.environ.get("TG_API_HASH")
    from telethon import TelegramClient
    from telethon.tl.functions.contacts import ImportContactsRequest
    from telethon.tl.types import InputPhoneContact

    client = TelegramClient(str(TG_SESSION), int(api_id), api_hash)
    await client.connect()

    confirmed, retry_only = [], []
    total_scanned = 0

    for offset in range(0, len(candidates), SCREEN_CHUNK):
        chunk = candidates[offset:offset + SCREEN_CHUNK]
        contacts_req = [
            InputPhoneContact(client_id=offset+i, phone=r[3],
                              first_name=r[1] or r[3][-4:], last_name=r[2] or "")
            for i, r in enumerate(chunk)
        ]
        result = await client(ImportContactsRequest(contacts_req))
        found_phones = {
            normalize(getattr(u, "phone", "") or "").lstrip("+")
            for u in result.users
        }
        retry_ids = {rc for rc in getattr(result, "retry_contacts", [])}
        cid_map = {offset+i: r for i, r in enumerate(chunk)}

        for r in chunk:
            if normalize(r[3]).lstrip("+") in found_phones:
                confirmed.append(r)
            elif any(cid_map.get(offset+i) is r for i in range(len(chunk))
                     if offset+i in {rc for rc in retry_ids}):
                retry_only.append(r)

        # Simpler retry check using client_id mapping
        retry_only_this = [cid_map[cid] for cid in retry_ids if cid in cid_map]
        for r in retry_only_this:
            if r not in confirmed and r not in retry_only:
                retry_only.append(r)

        total_scanned += len(chunk)
        print(f"  scanned={total_scanned}/{len(candidates)} "
              f"confirmed={len(confirmed)} retry={len(retry_only)}")
        if len(confirmed) >= BATCH_SIZE:
            break

    await client.disconnect()
    return confirmed, retry_only


def main():
    load_env()

    if CONTACTS_CSV.exists():
        print(f"[warn] {CONTACTS_CSV.name} already exists — will overwrite")

    used_leads, used_phones = collect_used()
    print(f"[used] leads={len(used_leads)} phones={len(used_phones)}")

    conn = sqlite3.connect(str(DB))
    cur = conn.cursor()
    cur.execute(
        "SELECT lead_id, first_name, last_name, phone, registered_at, last_activity_at "
        "FROM aima_imported_contacts WHERE dataset=? ORDER BY row_index",
        (DATASET,),
    )
    all_rows = cur.fetchall()
    conn.close()

    candidates = [
        r for r in all_rows
        if r[3] and str(r[0]) not in used_leads and r[3] not in used_phones
    ]
    print(f"[candidates] total_in_db={len(all_rows)} unprocessed={len(candidates)}")

    if len(candidates) < BATCH_SIZE:
        raise SystemExit(f"Not enough candidates: {len(candidates)} < {BATCH_SIZE}")

    print("[screen] checking Telegram availability...")
    confirmed, retry_only = asyncio.get_event_loop().run_until_complete(
        screen_candidates(candidates)
    )

    # Use ONLY confirmed contacts (retry_contacts can't be resolved by phone even with Premium)
    selected = confirmed[:BATCH_SIZE]
    if len(selected) < MIN_BATCH:
        raise SystemExit(
            f"Not enough CONFIRMED contacts: {len(selected)} < {MIN_BATCH} "
            f"(retry={len(retry_only)} but unreachable by phone)"
        )
    if len(selected) < BATCH_SIZE:
        print(f"[warn] Only {len(selected)} confirmed found (target={BATCH_SIZE}), proceeding with partial batch")

    print(f"[selected] {len(selected)} confirmed candidates (retry skipped={len(retry_only)})")

    pilot_id_start = next_pilot_id()
    half = BATCH_SIZE // 2

    contact_rows, log_rows = [], []
    for i, (lead_id, first_name, last_name, phone, reg_at, last_act) in enumerate(selected):
        pilot_id = pilot_id_start + i
        hyp = HYP_A if i < half else HYP_C

        contact_rows.append({
            "pilot_id": pilot_id,
            "pilot_name": f"manual_far_{BATCH_KEY}",
            "lead_id": lead_id,
            "dataset": DATASET,
            "segment": "opened_no_store",
            "variant": hyp["label"],
            "hypothesis": hyp["label"],
            "gate1_text": hyp["text"],
            "phone": phone,
            "first_name": first_name or "",
            "last_name": last_name or "",
            "registered_at": reg_at or "",
            "last_activity_at": last_act or "",
            "worker-do-not-send": "",
            "pre_contact_quality": "confirmed_tg",
            "contamination_status": "unknown",
            "notes": "",
        })
        base = {f: "" for f in LOG_FIELDS}
        base.update({
            "pilot_id": pilot_id,
            "lead_id": lead_id,
            "dataset": DATASET,
            "hypothesis": hyp["label"],
            "phone": phone,
            "pre_contact_quality": "confirmed_tg",
            "contamination_status": "unknown",
        })
        log_rows.append(base)

    # Write contacts CSV
    with CONTACTS_CSV.open("w", encoding="utf-8-sig", newline="") as f:
        w = csv.DictWriter(f, fieldnames=CONTACT_FIELDS, extrasaction="ignore")
        w.writeheader(); w.writerows(contact_rows)
    print(f"[wrote] {CONTACTS_CSV.name} ({len(contact_rows)} rows)")

    # Write empty log CSV (header only)
    if not LOG_CSV.exists():
        with LOG_CSV.open("w", encoding="utf-8-sig", newline="") as f:
            w = csv.DictWriter(f, fieldnames=LOG_FIELDS, extrasaction="ignore")
            w.writeheader()
        print(f"[wrote] {LOG_CSV.name} (header only)")

    # Update bot state
    state = json.loads(STATE_FILE.read_text(encoding="utf-8"))
    state["proposals"][BATCH_KEY] = {
        "name": f"LOST_SHOPS {BATCH_KEY.upper()} - {BATCH_SIZE} (Premium TG)",
        "contacts_csv": str(CONTACTS_CSV),
        "log_csv": str(LOG_CSV),
        "send_script": str(SEND_SCRIPT),
        "gdrive_url": "",
    }
    state["active_batch"] = BATCH_KEY
    state["pending"] = BATCH_KEY
    state["daily"] = {
        "date": datetime.now().strftime("%Y-%m-%d"),
        "status": "approved",
        "morning_msg_id": None,
        "send_triggered": False,
        "sent_count": None,
        "approved_by": "manual",
        "approved_at": datetime.now().isoformat(),
        "sent_at": None,
        "reminder_sent": False,
    }
    STATE_FILE.write_text(json.dumps(state, ensure_ascii=False, indent=2), encoding="utf-8")
    print(f"[state] active_batch={BATCH_KEY}, status=approved")
    print(f"\n[ready] Run: python src/aima_batch_send_{BATCH_KEY}.py")


if __name__ == "__main__":
    main()
