"""
Standalone TG Premium screening test.
Scans all unprocessed candidates from both datasets.
Reports: total, found (TG confirmed), retry_contacts (privacy-restricted, Premium may resolve).
Run on server: python tmp_premium_screen.py
"""
import asyncio, csv, json, os, sqlite3
from pathlib import Path

BASE_DIR   = Path(__file__).parent
DB         = BASE_DIR / "data/processed/aima_conversion_shadow.sqlite"
TG_SESSION = BASE_DIR / "data/processed/telegram/aima_support_session"
SCREEN_BATCH_SIZE = 100

DATASETS = [
    "users_without_shop_202605191510",  # what v14 failed on (379 candidates, 0-1 TG)
    "lost_shops_202605191501",          # what v13 used (700+ total)
]


def load_env():
    for p in [BASE_DIR / ".env"]:
        if not p.exists():
            continue
        for line in p.read_text(encoding="utf-8", errors="ignore").splitlines():
            line = line.strip()
            if not line or line.startswith("#") or "=" not in line:
                continue
            k, v = line.split("=", 1)
            os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'"))


def normalize_phone(ph):
    return "".join(c for c in str(ph or "") if c.isdigit() or c == "+")


def collect_used():
    used_leads, used_phones = set(), set()
    for p in sorted((BASE_DIR / "data/processed").glob("*.csv")):
        with p.open(encoding="utf-8-sig") as f:
            for row in csv.DictReader(f):
                lid = str(row.get("lead_id", "") or "").strip()
                ph  = str(row.get("phone",   "") or "").strip()
                if lid: used_leads.add(lid)
                if ph:  used_phones.add(ph)
    return used_leads, used_phones


def get_candidates(dataset, used_leads, used_phones):
    conn = sqlite3.connect(str(DB))
    cur = conn.cursor()
    cur.execute(
        "SELECT lead_id, first_name, last_name, phone FROM aima_imported_contacts "
        "WHERE dataset = ? ORDER BY row_index",
        (dataset,),
    )
    rows = cur.fetchall()
    conn.close()
    candidates = [
        r for r in rows
        if r[3] and str(r[0]) not in used_leads and r[3] not in used_phones
    ]
    return rows, candidates


async def screen(candidates, label):
    load_env()
    api_id   = os.environ.get("TG_API_ID")
    api_hash = os.environ.get("TG_API_HASH")
    if not api_id or not api_hash:
        print("ERROR: TG_API_ID/TG_API_HASH missing"); return

    from telethon import TelegramClient
    from telethon.tl.functions.contacts import ImportContactsRequest
    from telethon.tl.types import InputPhoneContact

    client = TelegramClient(str(TG_SESSION), int(api_id), api_hash)
    await client.connect()
    if not await client.is_user_authorized():
        print("ERROR: session not authorized"); return

    # Check if Premium
    me = await client.get_me()
    print(f"\n[{label}] Logged in as: {me.first_name} | Premium: {getattr(me, 'premium', False)}")

    found_confirmed = []
    found_retry_resolved = []
    total_retry = 0
    total_scanned = 0

    for offset in range(0, len(candidates), SCREEN_BATCH_SIZE):
        chunk = candidates[offset:offset + SCREEN_BATCH_SIZE]
        contacts_req = [
            InputPhoneContact(client_id=offset + i, phone=r[3],
                              first_name=r[1] or r[3][-4:], last_name=r[2] or "")
            for i, r in enumerate(chunk)
        ]
        result = await client(ImportContactsRequest(contacts_req))
        found_phones = {
            normalize_phone(getattr(u, "phone", "") or "").lstrip("+")
            for u in result.users
        }
        for r in chunk:
            if normalize_phone(r[3]).lstrip("+") in found_phones:
                found_confirmed.append(r)

        # retry_contacts = exist on TG but privacy blocks ImportContacts lookup.
        # With Premium, direct messages still go through — count them as reachable.
        retry_ids = set(getattr(result, "retry_contacts", []))
        retry_in_batch = len(retry_ids)
        total_retry += retry_in_batch
        if retry_ids:
            cid_map = {offset + i: r for i, r in enumerate(chunk)}
            for cid in retry_ids:
                if cid in cid_map:
                    found_retry_resolved.append(cid_map[cid])

        total_scanned += len(chunk)
        print(f"  scanned={total_scanned}/{len(candidates)} "
              f"confirmed={len(found_confirmed)} retry(premium)={total_retry} "
              f"total_reachable={len(found_confirmed)+len(found_retry_resolved)}")

    await client.disconnect()

    total_reachable = len(found_confirmed) + len(found_retry_resolved)
    total_reachable = len(found_confirmed) + len(found_retry_resolved)
    print(f"\n=== [{label}] RESULTS ===")
    print(f"  Кандидатів (не відправлених): {len(candidates)}")
    print(f"  TG confirmed (ImportContacts): {len(found_confirmed)}")
    print(f"  Retry (privacy, Premium OK):   {total_retry}")
    print(f"  ВСЬОГО ДОСЯЖНИХ:               {total_reachable}")
    print(f"  Достатньо для батча 30?        {'✅ ТАК' if total_reachable >= 30 else '❌ НІ'}")
    return total_reachable


async def main():
    load_env()
    used_leads, used_phones = collect_used()
    print(f"Used leads: {len(used_leads)}, used phones: {len(used_phones)}")

    for dataset in DATASETS:
        all_rows, candidates = get_candidates(dataset, used_leads, used_phones)
        print(f"\nDataset: {dataset}")
        print(f"  Total in DB: {len(all_rows)}")
        print(f"  Unprocessed: {len(candidates)}")
        if candidates:
            await screen(candidates, dataset)
        else:
            print("  (нема кандидатів)")


if __name__ == "__main__":
    asyncio.run(main())
