
import asyncio, csv, json, os, sys, sqlite3, time as t
from pathlib import Path

BASE_DIR   = Path("/var/www/vps2.happyuser.info/AIMA_bot")
DATASET    = "users_without_shop_202605191510"
BATCH_SIZE = 30
SCREEN_BATCH = 50  # менше за раз щоб уникнути проблем
TG_SESSION = BASE_DIR / "data/processed/telegram/aima_support_session"

def load_env():
    for path in [BASE_DIR / ".env"]:
        if not path.exists(): continue
        for raw in path.read_text(encoding="utf-8", errors="ignore").splitlines():
            line = raw.strip()
            if not line or line.startswith("#") or "=" not in line: continue
            k, v = line.split("=", 1)
            os.environ.setdefault(k.strip(), v.strip().strip('"').strip("'"))

def norm(v):
    return "".join(c for c in str(v or "") if c.isdigit() or c == "+")

def collect_used():
    used_leads, used_phones = set(), set()
    for p in sorted((BASE_DIR/"data/processed").glob("*.csv")):
        with p.open(encoding="utf-8-sig") as f:
            for row in csv.DictReader(f):
                lid = str(row.get("lead_id","") or "").strip()
                ph  = str(row.get("phone","")   or "").strip()
                if lid: used_leads.add(lid)
                if ph:  used_phones.add(ph)
    return used_leads, used_phones

async def screen(candidates, needed):
    load_env()
    api_id   = os.environ.get("TG_API_ID")
    api_hash = os.environ.get("TG_API_HASH")
    from telethon import TelegramClient
    from telethon.tl.functions.contacts import ImportContactsRequest
    from telethon.tl.types import InputPhoneContact

    client = TelegramClient(str(TG_SESSION), int(api_id), api_hash)
    await client.connect()
    if not await client.is_user_authorized():
        print("[error] Not authorized"); sys.exit(1)
    print("[ok] Connected", flush=True)

    found = []
    total_scanned = 0

    for offset in range(0, len(candidates), SCREEN_BATCH):
        if len(found) >= needed: break
        chunk = candidates[offset:offset+SCREEN_BATCH]
        # client_id починається від 1, не 0
        req = [InputPhoneContact(client_id=offset+i+1, phone=r[3],
               first_name=r[1] or r[3][-4:], last_name=r[2] or "")
               for i,r in enumerate(chunk)]

        ts = t.time()
        result = await client(ImportContactsRequest(req))
        elapsed = t.time() - ts

        found_phones = {norm(getattr(u,"phone","") or "").lstrip("+") for u in result.users}

        # retry fallback
        retry_ids = {rc for rc in getattr(result,"retry_contacts",[])}
        cid_map   = {offset+i+1: r for i,r in enumerate(chunk)}
        retry_ok  = 0
        for cid in retry_ids:
            if cid in cid_map and len(found) < needed:
                r = cid_map[cid]
                try:
                    await client.get_entity(r[3])
                    found_phones.add(norm(r[3]).lstrip("+"))
                    retry_ok += 1
                except Exception:
                    pass

        for r in chunk:
            if norm(r[3]).lstrip("+") in found_phones:
                found.append(r)
                if len(found) >= needed: break

        total_scanned += len(chunk)
        print(f"[screen] scanned={total_scanned} direct={len(result.users)} retry_ok={retry_ok} found={len(found)} elapsed={elapsed:.1f}s", flush=True)

    await client.disconnect()
    print(f"[done] {len(found)} досяжних (перевірено {total_scanned})", flush=True)
    return found[:needed]

load_env()
conn = sqlite3.connect(str(BASE_DIR/"data/processed/aima_conversion_shadow.sqlite"))
cur  = conn.cursor()
# Спробуємо з ПОЧАТКУ датасету (row_index ASC) — старіші реєстрації більш досяжні
cur.execute("SELECT lead_id, first_name, last_name, phone, registered_at, last_activity_at FROM aima_imported_contacts WHERE dataset=? ORDER BY row_index ASC", (DATASET,))
all_rows = cur.fetchall()
conn.close()

used_leads, used_phones = collect_used()
candidates = [r for r in all_rows if r[3] and str(r[0]) not in used_leads and r[3] not in used_phones]
print(f"[screen] Кандидатів: {len(candidates)}", flush=True)

found = asyncio.get_event_loop().run_until_complete(screen(candidates, BATCH_SIZE))

if not found:
    print("[warn] Досяжних не знайдено"); sys.exit(2)

result_path = BASE_DIR / "data/processed/aima_screened_v11_candidates.json"
result_data = [{"lead_id": r[0], "first_name": r[1] or "", "last_name": r[2] or "",
                "phone": r[3], "registered_at": r[4] or "", "last_activity_at": r[5] or ""}
               for r in found]
result_path.write_text(json.dumps(result_data, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"[ok] Збережено {len(found)} → {result_path.name}", flush=True)
for r in found:
    print(f"  {r[3]} {r[1]} {r[2]}")
