import argparse
import csv
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple


DEFAULT_LOG_CSV = Path("data/processed/aima_manual_segmented_tail20_v3_telegram_log.csv")
DEFAULT_LEDGER_CSV = Path("data/processed/aima_telegram_message_ledger.csv")

BASE_FIELDS = [
    "pilot_id",
    "lead_id",
    "dataset",
    "hypothesis",
    "phone",
    "added_to_telegram",
    "telegram_contact_name",
    "gate1_message_sent",
    "replied",
    "reply_text",
    "next_gate",
    "notes",
]

EXTRA_FIELDS = [
    "sent_at_utc",
    "recipient_id",
    "message_len",
    "message_preview",
    "chat_started",
    "replied_at_utc",
    "reply_category",
]

LEDGER_FIELDS = [
    "created_at_utc",
    "direction",
    "channel",
    "pilot_id",
    "lead_id",
    "dataset",
    "hypothesis",
    "phone",
    "recipient_id",
    "message_len",
    "message_preview",
    "status",
    "notes",
]


def utc_now() -> str:
    return datetime.now(timezone.utc).replace(microsecond=0).isoformat()


def normalize_phone(value: str) -> str:
    return "".join(ch for ch in str(value) if ch.isdigit() or ch == "+")


def read_csv(path: Path) -> Tuple[List[str], List[Dict[str, str]]]:
    if not path.exists():
        return BASE_FIELDS + EXTRA_FIELDS, []
    with path.open("r", encoding="utf-8-sig", newline="") as handle:
        reader = csv.DictReader(handle)
        fieldnames = list(reader.fieldnames or [])
        rows = [dict(row) for row in reader]
    for field in BASE_FIELDS + EXTRA_FIELDS:
        if field not in fieldnames:
            fieldnames.append(field)
            for row in rows:
                row.setdefault(field, "")
    return fieldnames, rows


def write_csv(path: Path, fieldnames: List[str], rows: List[Dict[str, Any]]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.open("w", encoding="utf-8-sig", newline="") as handle:
        writer = csv.DictWriter(handle, fieldnames=fieldnames, extrasaction="ignore")
        writer.writeheader()
        writer.writerows(rows)


def find_row(rows: List[Dict[str, str]], pilot_id: str = "", phone: str = "") -> Dict[str, str]:
    phone_norm = normalize_phone(phone).lstrip("+")
    for row in rows:
        if pilot_id and str(row.get("pilot_id", "")) == str(pilot_id):
            return row
        if phone_norm and normalize_phone(row.get("phone", "")).lstrip("+") == phone_norm:
            return row
    raise SystemExit("No matching log row found")


def ensure_log(path: Path) -> Dict[str, Any]:
    fieldnames, rows = read_csv(path)
    write_csv(path, fieldnames, rows)
    return {"log": str(path), "rows": len(rows), "fields": fieldnames}


def append_ledger(path: Path, row: Dict[str, Any]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    exists = path.exists()
    with path.open("a", encoding="utf-8-sig", newline="") as handle:
        writer = csv.DictWriter(handle, fieldnames=LEDGER_FIELDS, extrasaction="ignore")
        if not exists:
            writer.writeheader()
        out = {field: "" for field in LEDGER_FIELDS}
        out.update(row)
        writer.writerow(out)


def log_sent(
    log_csv: Path,
    ledger_csv: Path,
    *,
    pilot_id: str = "",
    phone: str = "",
    recipient_id: str = "",
    message: str,
    status: str = "sent",
    notes: str = "",
) -> Dict[str, Any]:
    fieldnames, rows = read_csv(log_csv)
    now = utc_now()
    matched = None  # type: Optional[Dict[str, str]]
    try:
        matched = find_row(rows, pilot_id=pilot_id, phone=phone)
        matched["added_to_telegram"] = matched.get("added_to_telegram") or "yes"
        matched["gate1_message_sent"] = "yes"
        matched["sent_at_utc"] = now
        matched["recipient_id"] = recipient_id
        matched["message_len"] = str(len(message))
        matched["message_preview"] = message[:160]
        matched["notes"] = "; ".join(part for part in [matched.get("notes", ""), notes or f"sent_at={now}"] if part)
        write_csv(log_csv, fieldnames, rows)
    except SystemExit:
        matched = None

    ledger_row = {
        "created_at_utc": now,
        "direction": "outbound",
        "channel": "telegram_user_session",
        "pilot_id": pilot_id or (matched or {}).get("pilot_id", ""),
        "lead_id": (matched or {}).get("lead_id", ""),
        "dataset": (matched or {}).get("dataset", ""),
        "hypothesis": (matched or {}).get("hypothesis", ""),
        "phone": phone or (matched or {}).get("phone", ""),
        "recipient_id": recipient_id,
        "message_len": len(message),
        "message_preview": message[:160],
        "status": status,
        "notes": notes,
    }
    append_ledger(ledger_csv, ledger_row)
    return {"logged": True, "matched_pilot_row": matched is not None, "ledger": str(ledger_csv), "log": str(log_csv)}


def mark_added(args: argparse.Namespace) -> Dict[str, Any]:
    fieldnames, rows = read_csv(args.log_csv)
    row = find_row(rows, pilot_id=args.pilot_id, phone=args.phone)
    row["added_to_telegram"] = "yes"
    if args.name:
        row["telegram_contact_name"] = args.name
    write_csv(args.log_csv, fieldnames, rows)
    return {"updated": row.get("pilot_id"), "added_to_telegram": "yes"}


def mark_replied(args: argparse.Namespace) -> Dict[str, Any]:
    fieldnames, rows = read_csv(args.log_csv)
    row = find_row(rows, pilot_id=args.pilot_id, phone=args.phone)
    now = utc_now()
    row["replied"] = "yes"
    row["chat_started"] = "yes"
    row["replied_at_utc"] = args.replied_at or now
    row["reply_text"] = args.reply_text or row.get("reply_text", "")
    row["reply_category"] = args.category or row.get("reply_category", "")
    row["next_gate"] = args.next_gate or row.get("next_gate", "Gate2_barrier")
    write_csv(args.log_csv, fieldnames, rows)
    append_ledger(
        args.ledger_csv,
        {
            "created_at_utc": row["replied_at_utc"],
            "direction": "inbound",
            "channel": "telegram_user_session",
            "pilot_id": row.get("pilot_id", ""),
            "lead_id": row.get("lead_id", ""),
            "dataset": row.get("dataset", ""),
            "hypothesis": row.get("hypothesis", ""),
            "phone": row.get("phone", ""),
            "message_len": len(row.get("reply_text", "")),
            "message_preview": row.get("reply_text", "")[:160],
            "status": "replied",
            "notes": args.category or "",
        },
    )
    return {"updated": row.get("pilot_id"), "replied": "yes", "chat_started": "yes"}


def stats(path: Path, ledger_path: Path) -> Dict[str, Any]:
    _, rows = read_csv(path)
    total = len(rows)
    added = sum(1 for r in rows if r.get("added_to_telegram") == "yes")
    sent = sum(1 for r in rows if r.get("gate1_message_sent") == "yes")
    replied = sum(1 for r in rows if r.get("replied") == "yes")
    chats = sum(1 for r in rows if r.get("chat_started") == "yes" or r.get("replied") == "yes")
    by_hypothesis = {}  # type: Dict[str, Dict[str, int]]
    for row in rows:
        key = row.get("hypothesis", "unknown") or "unknown"
        bucket = by_hypothesis.setdefault(key, {"total": 0, "added": 0, "sent": 0, "replied": 0, "chats_started": 0})
        bucket["total"] += 1
        bucket["added"] += int(row.get("added_to_telegram") == "yes")
        bucket["sent"] += int(row.get("gate1_message_sent") == "yes")
        bucket["replied"] += int(row.get("replied") == "yes")
        bucket["chats_started"] += int(row.get("chat_started") == "yes" or row.get("replied") == "yes")
    return {
        "log": str(path),
        "ledger": str(ledger_path),
        "total": total,
        "added_to_telegram": added,
        "sent": sent,
        "replied": replied,
        "chats_started": chats,
        "reply_rate_on_sent": round(replied / sent, 3) if sent else 0,
        "by_hypothesis": by_hypothesis,
    }


def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(description="AIMA Telegram manual pilot logging and stats.")
    parser.add_argument("--log-csv", type=Path, default=DEFAULT_LOG_CSV)
    parser.add_argument("--ledger-csv", type=Path, default=DEFAULT_LEDGER_CSV)
    sub = parser.add_subparsers(dest="command", required=True)
    sub.add_parser("ensure-log")
    added = sub.add_parser("mark-added")
    added.add_argument("--pilot-id", default="")
    added.add_argument("--phone", default="")
    added.add_argument("--name", default="")
    replied = sub.add_parser("mark-replied")
    replied.add_argument("--pilot-id", default="")
    replied.add_argument("--phone", default="")
    replied.add_argument("--reply-text", default="")
    replied.add_argument("--category", default="")
    replied.add_argument("--next-gate", default="Gate2_barrier")
    replied.add_argument("--replied-at", default="")
    sent = sub.add_parser("log-sent")
    sent.add_argument("--pilot-id", default="")
    sent.add_argument("--phone", default="")
    sent.add_argument("--recipient-id", default="")
    sent.add_argument("--message", required=True)
    sent.add_argument("--status", default="sent")
    sent.add_argument("--notes", default="")
    sub.add_parser("stats")
    return parser


def main() -> None:
    args = build_parser().parse_args()
    if args.command == "ensure-log":
        result = ensure_log(args.log_csv)
    elif args.command == "mark-added":
        result = mark_added(args)
    elif args.command == "mark-replied":
        result = mark_replied(args)
    elif args.command == "log-sent":
        result = log_sent(
            args.log_csv,
            args.ledger_csv,
            pilot_id=args.pilot_id,
            phone=args.phone,
            recipient_id=args.recipient_id,
            message=args.message,
            status=args.status,
            notes=args.notes,
        )
    elif args.command == "stats":
        result = stats(args.log_csv, args.ledger_csv)
    else:
        raise SystemExit(f"unknown command: {args.command}")
    print(json.dumps(result, ensure_ascii=False, indent=2))


if __name__ == "__main__":
    main()