#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ llm_cache_sqlite_v1.py ====================== A tiny, dependency-light caching wrapper for AI model calls with a SQLite backend. You can use it as a library (wrap your provider) *and* as a CLI tool. Why: - So you can share a single DB file with your teammate (or me), and I can replay runs offline using the cache (set mode=readonly). Features: - SQLite cache file (default: ./ai_cache.sqlite) - Keyed by a SHA256 of the canonicalized request payload (model + messages + params) - Stores full request JSON and full response JSON - Modes: readwrite (default), readonly (fail on miss), writeonly (always call provider, update cache), off (bypass) - Provider: OpenAI out-of-the-box (requires `openai` >= 1.0). You can add your own via subclassing. - CLI: * ask a question and read/write cache * lookup by prompt substring * export to JSONL / import from JSONL Schema: table cache( key_hash TEXT PRIMARY KEY, model TEXT, payload_json TEXT, response_json TEXT, created_at TEXT, last_used TEXT, hits INTEGER, prompt_excerpt TEXT, response_excerpt TEXT ) Usage (library): from llm_cache_sqlite_v1 import CachedOpenAI client = CachedOpenAI(db_path="ai_cache.sqlite", mode="readwrite") # or "readonly" resp = client.chat( model="gpt-4o-mini", messages=[ {"role":"system","content":"You are helpful."}, {"role":"user","content":"Say hi to Ukraine!"} ], temperature=0.2 ) print(resp) Usage (CLI): python llm_cache_sqlite_v1.py --db ai_cache.sqlite \ chat --model gpt-4o-mini --system "You are helpful." \ --user "Say hi to Ukraine!" --mode readwrite python llm_cache_sqlite_v1.py --db ai_cache.sqlite grep "Ukraine" python llm_cache_sqlite_v1.py --db ai_cache.sqlite export cache.jsonl python llm_cache_sqlite_v1.py --db ai_cache.sqlite import cache.jsonl Notes: - If `--mode readonly` and a key is missing, you'll get a CacheMissError. - Set env LLM_CACHE_PROVIDER=openai (default) and OPENAI_API_KEY as usual. - For fully offline dry-runs, you can set mode=readonly and reuse an existing DB. © 2025. MIT License. """ import argparse, json, os, sqlite3, sys, hashlib, datetime as dt from typing import Any, Dict, List, Optional # ──────────────────────────────────────────────────────────────────────────────── # utils def canonical_json(obj: Any) -> str: """Deterministic JSON for hashing. Sort keys recursively; round floats reasonably.""" def _normalize(x): if isinstance(x, dict): return {k: _normalize(x[k]) for k in sorted(x.keys())} if isinstance(x, list): return [_normalize(i) for i in x] if isinstance(x, float): # reduce tiny diffs return float(f"{x:.12g}") return x norm = _normalize(obj) return json.dumps(norm, ensure_ascii=False, separators=(",", ":"), sort_keys=True) def sha256(s: str) -> str: return hashlib.sha256(s.encode("utf-8")).hexdigest() def now_iso() -> str: return dt.datetime.utcnow().replace(microsecond=0).isoformat() + "Z" def excerpt(s: str, n: int = 240) -> str: s = (s or "").strip().replace("\n", " ") return s[:n] # ──────────────────────────────────────────────────────────────────────────────── # DB class CacheDB: def __init__(self, path: str): self.path = path self._conn = sqlite3.connect(self.path) self._conn.execute("PRAGMA journal_mode=WAL;") self._conn.execute("PRAGMA synchronous=NORMAL;") self._ensure_schema() def _ensure_schema(self): self._conn.execute(""" CREATE TABLE IF NOT EXISTS cache( key_hash TEXT PRIMARY KEY, model TEXT, payload_json TEXT, response_json TEXT, created_at TEXT, last_used TEXT, hits INTEGER, prompt_excerpt TEXT, response_excerpt TEXT )""") self._conn.commit() def get(self, key_hash: str) -> Optional[Dict[str, Any]]: cur = self._conn.execute("SELECT model,payload_json,response_json,created_at,last_used,hits,prompt_excerpt,response_excerpt FROM cache WHERE key_hash=?", (key_hash,)) row = cur.fetchone() if not row: return None model, payload_json, response_json, created_at, last_used, hits, p_ex, r_ex = row # update last_used + hits self._conn.execute("UPDATE cache SET last_used=?, hits=? WHERE key_hash=?", (now_iso(), (hits or 0) + 1, key_hash)) self._conn.commit() return { "key_hash": key_hash, "model": model, "payload_json": payload_json, "response_json": response_json, "created_at": created_at, "last_used": last_used, "hits": (hits or 0) + 1, "prompt_excerpt": p_ex, "response_excerpt": r_ex } def put(self, key_hash: str, model: str, payload_json: str, response_json: str, prompt_excerpt_text: str, response_excerpt_text: str): now = now_iso() self._conn.execute(""" INSERT OR REPLACE INTO cache(key_hash,model,payload_json,response_json,created_at,last_used,hits,prompt_excerpt,response_excerpt) VALUES(?,?,?,?,?,?,?,?,?) """, (key_hash, model, payload_json, response_json, now, now, 1, prompt_excerpt_text, response_excerpt_text)) self._conn.commit() def grep(self, query: str) -> List[Dict[str, Any]]: like = f"%{query}%" cur = self._conn.execute(""" SELECT key_hash, model, created_at, hits, prompt_excerpt, response_excerpt FROM cache WHERE prompt_excerpt LIKE ? OR response_excerpt LIKE ? ORDER BY last_used DESC LIMIT 200 """, (like, like)) out = [] for row in cur.fetchall(): out.append({ "key_hash": row[0], "model": row[1], "created_at": row[2], "hits": row[3], "prompt_excerpt": row[4], "response_excerpt": row[5], }) return out def export_jsonl(self, dst_path: str): cur = self._conn.execute("""SELECT key_hash, model, payload_json, response_json, created_at, last_used, hits, prompt_excerpt, response_excerpt FROM cache""") with open(dst_path, "w", encoding="utf-8") as f: for row in cur.fetchall(): rec = { "key_hash": row[0], "model": row[1], "payload": json.loads(row[2]), "response": json.loads(row[3]), "created_at": row[4], "last_used": row[5], "hits": row[6], "prompt_excerpt": row[7], "response_excerpt": row[8], } f.write(json.dumps(rec, ensure_ascii=False) + "\n") def import_jsonl(self, src_path: str, dedupe: bool = True): count = 0 with open(src_path, "r", encoding="utf-8") as f: for line in f: if not line.strip(): continue rec = json.loads(line) key_hash = rec.get("key_hash") or sha256(canonical_json(rec.get("payload"))) self.put( key_hash=key_hash, model=rec.get("model",""), payload_json=json.dumps(rec.get("payload"), ensure_ascii=False), response_json=json.dumps(rec.get("response"), ensure_ascii=False), prompt_excerpt_text=excerpt(self._prompt_from_payload(rec.get("payload"))), response_excerpt_text=excerpt(self._text_from_response(rec.get("response"))) ) count += 1 return count @staticmethod def _prompt_from_payload(payload: Dict[str, Any]) -> str: msgs = payload.get("messages") or [] pieces = [] for m in msgs: role = m.get("role","") content = m.get("content","") if isinstance(content, list): # OpenAI "content" sometimes is a list of parts parts = [] for p in content: if isinstance(p, dict) and "text" in p: parts.append(p["text"]) elif isinstance(p, str): parts.append(p) content = "\n".join(parts) pieces.append(f"{role}: {content}") return "\n".join(pieces) @staticmethod def _text_from_response(resp: Dict[str, Any]) -> str: # Works with OpenAI-like responses (chat.completions.create) try: choices = resp.get("choices") or [] for ch in choices: msg = ch.get("message") or {} cnt = msg.get("content") if isinstance(cnt, str): return cnt if isinstance(cnt, list): # content parts for p in cnt: if isinstance(p, dict) and "text" in p: return p["text"] # as a fallback return json.dumps(resp, ensure_ascii=False)[:500] except Exception: return "" # ──────────────────────────────────────────────────────────────────────────────── # Provider wrappers class CacheMissError(Exception): pass class CachedOpenAI: """ A wrapper for OpenAI's Chat Completions with SQLite cache. Modes: - "readwrite": check cache, on miss call provider and store (default) - "readonly": only read cache; on miss raise CacheMissError - "writeonly": always call provider and overwrite cache - "off": bypass cache and always call provider """ def __init__(self, db_path: str = "ai_cache.sqlite", mode: str = "readwrite", org: Optional[str] = None, api_key: Optional[str] = None): self.db = CacheDB(db_path) self.mode = mode.lower() self.org = org or os.environ.get("OPENAI_ORG") self.api_key = api_key or os.environ.get("OPENAI_API_KEY") self._client = None # lazy # internal: build canonical payload for hashing def _build_payload(self, model: str, messages: List[Dict[str, Any]], **params) -> Dict[str, Any]: payload = {"model": model, "messages": messages} # include only stable parameters that affect output for k in ["temperature","top_p","seed","presence_penalty","frequency_penalty","logit_bias","n","response_format","tools","tool_choice","stop","max_tokens"]: if k in params and params[k] is not None: payload[k] = params[k] return payload def _hash_for(self, payload: Dict[str, Any]) -> str: return sha256(canonical_json(payload)) def _get_client(self): if self._client is not None: return self._client try: # openai>=1.0 from openai import OpenAI self._client = OpenAI(organization=self.org, api_key=self.api_key) return self._client except Exception as e: raise RuntimeError("OpenAI SDK not available. Install `openai` >= 1.0 or use mode=readonly.") from e # public chat method def chat(self, model: str, messages: List[Dict[str, Any]], **params) -> Dict[str, Any]: payload = self._build_payload(model, messages, **params) payload_json = canonical_json(payload) key = self._hash_for(payload) # READ path if self.mode in ("readwrite","readonly"): row = self.db.get(key) if row: cached = json.loads(row["response_json"]) return cached if self.mode == "readonly": raise CacheMissError(f"Cache miss for key={key} (model={model}) in readonly mode.") # CALL provider if self.mode == "off": # still call provider pass # writeonly and readwrite both call provider here client = self._get_client() # convert payload to SDK call params call_kwargs = dict(payload) # SDK expects separate args rather than nested payload model = call_kwargs.pop("model") messages = call_kwargs.pop("messages") resp = client.chat.completions.create(model=model, messages=messages, **call_kwargs) # Convert to JSON-serializable dict try: # openai>=1.0 pydantic object supports model_dump_json if hasattr(resp, "model_dump"): resp_dict = resp.model_dump() elif hasattr(resp, "to_dict"): resp_dict = resp.to_dict() else: # fallback to str->json resp_dict = json.loads(str(resp)) except Exception: # last resort resp_dict = json.loads(json.dumps(resp, default=lambda o: getattr(o, "__dict__", str(o)))) # WRITE path if self.mode in ("readwrite","writeonly"): text_prompt = CacheDB._prompt_from_payload(payload) text_response = CacheDB._text_from_response(resp_dict) self.db.put( key_hash=key, model=model, payload_json=payload_json, response_json=json.dumps(resp_dict, ensure_ascii=False), prompt_excerpt_text=excerpt(text_prompt), response_excerpt_text=excerpt(text_response), ) return resp_dict # ──────────────────────────────────────────────────────────────────────────────── # CLI def build_cli(): p = argparse.ArgumentParser(description="SQLite cache for LLM chat (OpenAI).") p.add_argument("--db", default="ai_cache.sqlite", help="Path to SQLite cache") sub = p.add_subparsers(dest="cmd", required=True) # chat c = sub.add_parser("chat", help="Chat with caching") c.add_argument("--model", required=True, help="Model name (e.g., gpt-4o-mini)") c.add_argument("--system", default=None, help="System prompt") c.add_argument("--user", default=None, help="User message") c.add_argument("--messages-json", default=None, help="JSON for messages (overrides --system/--user)") c.add_argument("--temperature", type=float, default=None) c.add_argument("--top_p", type=float, default=None) c.add_argument("--seed", type=int, default=None) c.add_argument("--max_tokens", type=int, default=None) c.add_argument("--mode", default="readwrite", choices=["readwrite","readonly","writeonly","off"], help="Cache mode") # grep g = sub.add_parser("grep", help="Search cache by text") g.add_argument("query", help="Substring to search") # export/import ex = sub.add_parser("export", help="Export cache to JSONL") ex.add_argument("dst", help="Destination path") im = sub.add_parser("import", help="Import cache from JSONL") im.add_argument("src", help="Source JSONL path") return p def main(): args = build_cli().parse_args() db = CacheDB(args.db) if args.cmd == "grep": rows = db.grep(args.query) for r in rows: print(f"{r['key_hash'][:12]} {r['model']:>14} {r['hits']:>3} {r['created_at']} {r['prompt_excerpt']!r}") return if args.cmd == "export": db.export_jsonl(args.dst) print(f"Exported -> {args.dst}") return if args.cmd == "import": n = db.import_jsonl(args.src) print(f"Imported {n} rows from {args.src}") return if args.cmd == "chat": if args.messages_json: messages = json.loads(args.messages_json) else: messages = [] if args.system: messages.append({"role": "system", "content": args.system}) if args.user: messages.append({"role": "user", "content": args.user}) client = CachedOpenAI(db_path=args.db, mode=args.mode) resp = client.chat( model=args.model, messages=messages, temperature=args.temperature, top_p=args.top_p, seed=args.seed, max_tokens=args.max_tokens, ) # print assistant text if present text = CacheDB._text_from_response(resp) if text: print(text) else: print(json.dumps(resp, ensure_ascii=False, indent=2)) if __name__ == "__main__": main()