In [1]:
import json
import hashlib
import re
import os
import pandas as pd
import numpy as np
from pathlib import Path
from sentence_transformers import SentenceTransformer, util

In [2]:
import time, random
from hashlib import sha1
from typing import List, Dict, Set, Any

Loading Dataset And Cleaning

In [3]:
with open("NewSHLDataset.json", "r", encoding="utf-8") as f:
    data = json.load(f)
df = pd.DataFrame(data)
df.head(3)

Unnamed: 0,assessment_name,assessment_url,pdf_text,job_level,test_type,test_language
0,Global Skills Development Report,https://www.shl.com/products/product-catalog/v...,This report is designed to be given to individ...,"[Director, Entry-Level, Executive, General Pop...","[Ability & Aptitude, Biodata & Situational Jud...",[English (USA)]
1,.NET Framework 4.5,https://www.shl.com/products/product-catalog/v...,The.NET Framework 4.5 test measures knowledge ...,"[Mid-Professional, Professional Individual Con...",[Knowledge & Skills],[English (USA)]
2,.NET MVC (New),https://www.shl.com/products/product-catalog/v...,Multi-choice test that measures the knowledge ...,"[Mid-Professional, Professional Individual Con...",[Knowledge & Skills],[English (USA)]


In [4]:
df["pdf_text"] = df["pdf_text"].astype(str).str.replace(r"\n", " ", regex=True)
df["pdf_text"] = (df["pdf_text"].fillna("").astype(str).str.replace(r"\s+", " ", regex=True).str.strip())
df["assessment_name"] = df["assessment_name"].fillna("").astype(str).str.strip()
df["assessment_url"] = df["assessment_url"].fillna("").astype(str).str.strip()
# Drop empty rows (optional)
df = df[df["pdf_text"].str.len() > 0].reset_index(drop=True)

Changing the format of JD or Query to a structured one

In [None]:
# ==== PATHS ====
DATA_CSV   = "assessments_catalog.csv"  
CACHE_DIR  = Path("./embedding_cache")  
CACHE_DIR.mkdir(parents=True, exist_ok=True)

# ==== MODEL ====
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer(MODEL_NAME)

# ==== COLUMNS ====
COL_PDF   = "pdf_text"
COL_LEVEL = "job_level"
COL_LANG  = "test_language"
COL_NAME  = "assessment_name"
COL_URL   = "assessment_url"

# ==== RANKING WEIGHTS ====
W_LOOK  = 0.65
W_CONS  = 0.15
W_LANG  = 0.10
W_LEVEL = 0.10

# ==== COVERAGE ====
TOP_N = 10          # number of results to show
GUARANTEE_TOP = 4   # ensure facet coverage within top-4
K_PER_FACET = 3     # if coverage missing, pull from top-3 of that facet


In [6]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000) # Increases the overall display width
pd.set_option('display.max_colwidth', None)

In [7]:
# Keep a stable integer id aligned to current file ordering
df = df.reset_index(drop=False).rename(columns={"index": "row_id"})


In [8]:
# ==== COLUMN CONSTANTS ====
COL_PDF   = "pdf_text"
COL_LEVEL = "job_level"
COL_LANG  = "test_language"
COL_NAME  = "assessment_name"
COL_URL   = "assessment_url"


In [9]:
def norm_text(x) -> str:
    """Normalize any text or list/dict/object into a clean string."""
    if isinstance(x, list):
        x = " ".join(map(str, x))
    elif isinstance(x, dict):
        x = json.dumps(x, ensure_ascii=False)
    elif x is None:
        x = ""
    else:
        x = str(x)
    x = x.strip()
    x = re.sub(r"\s+", " ", x)
    return x

def _to_str_scalar(x) -> str:
    if isinstance(x, list):
        return " ".join(map(str, x))
    if isinstance(x, dict):
        return json.dumps(x, ensure_ascii=False)
    if x is None:
        return ""
    return str(x)

def sanitize_rows_df(df_rows: pd.DataFrame, text_cols: list[str]) -> pd.DataFrame:
    df_rows = df_rows.copy()
    for c in text_cols:
        if c in df_rows.columns:
            df_rows[c] = df_rows[c].map(_to_str_scalar)
            df_rows[c] = df_rows[c].astype(object)  # avoid pandas ExtensionDtypes
    if "row_id" in df_rows.columns:
        df_rows["row_id"] = df_rows["row_id"].astype(int)
    return df_rows

def dataset_fingerprint(df: pd.DataFrame, cols=("row_id", COL_PDF, COL_LEVEL, COL_LANG)) -> str:
    """Stable fingerprint over key fields + model name, to invalidate cache if data/model changes."""
    h = hashlib.sha256()
    h.update(MODEL_NAME.encode("utf-8"))
    for c in cols:
        s = "\n".join(map(str, df[c].astype(str).tolist()))
        h.update(s.encode("utf-8"))
    return h.hexdigest()

def _parquet_engine_available() -> bool:
    return (
        importlib.util.find_spec("pyarrow") is not None
        or importlib.util.find_spec("fastparquet") is not None
    )

def cache_paths(cache_dir: Path, fp: str):
    meta = cache_dir / f"meta_{fp}.json"
    pdf  = cache_dir / f"emb_pdf_{fp}.npy"
    lvl  = cache_dir / f"emb_level_{fp}.npy"
    lng  = cache_dir / f"emb_lang_{fp}.npy"
    rows_parquet = cache_dir / f"rows_{fp}.parquet"
    rows_csv     = cache_dir / f"rows_{fp}.csv"
    return meta, pdf, lvl, lng, rows_parquet, rows_csv

def save_cache(meta_path: Path, pdf_path: Path, lvl_path: Path, lng_path: Path,
               rows_parquet_path: Path, rows_csv_path: Path,
               emb_pdf: np.ndarray, emb_level: np.ndarray, emb_lang: np.ndarray,
               df_rows: pd.DataFrame):
    # Save npy arrays
    np.save(pdf_path, emb_pdf)
    np.save(lvl_path, emb_level)
    np.save(lng_path, emb_lang)

    # Sanitize df and write as Parquet if possible else CSV
    text_cols = [COL_NAME, COL_URL, COL_PDF, COL_LEVEL, COL_LANG]
    df_rows = sanitize_rows_df(df_rows, text_cols)

    rows_format = "csv"
    if _parquet_engine_available():
        try:
            import pyarrow as pa, pyarrow.parquet as pq
            table = pa.Table.from_pandas(df_rows, preserve_index=False, safe=True)
            pq.write_table(table, rows_parquet_path)
            rows_format = "parquet"
        except Exception:
            df_rows.to_csv(rows_csv_path, index=False)
    else:
        df_rows.to_csv(rows_csv_path, index=False)

    meta = {
        "model": MODEL_NAME,
        "created_at": int(time.time()),
        "num_rows": int(df_rows.shape[0]),
        "rows_format": rows_format,
        "schema": {
            "pdf_col": COL_PDF, "level_col": COL_LEVEL, "lang_col": COL_LANG,
            "name_col": COL_NAME, "url_col": COL_URL
        }
    }
    meta_path.write_text(json.dumps(meta, indent=2))

def try_load_cache(meta_path: Path, pdf_path: Path, lvl_path: Path, lng_path: Path,
                   rows_parquet_path: Path, rows_csv_path: Path):
    # check file existence
    if not (meta_path.exists() and pdf_path.exists() and lvl_path.exists() and lng_path.exists()):
        return None
    if not (rows_parquet_path.exists() or rows_csv_path.exists()):
        return None
    try:
        meta = json.loads(meta_path.read_text())
        emb_pdf = np.load(pdf_path)
        emb_level = np.load(lvl_path)
        emb_lang = np.load(lng_path)

        fmt = meta.get("rows_format", "parquet")
        if fmt == "parquet" and rows_parquet_path.exists() and _parquet_engine_available():
            try:
                import pyarrow.parquet as pq
                df_rows = pq.read_table(rows_parquet_path).to_pandas()
            except Exception:
                if rows_csv_path.exists():
                    df_rows = pd.read_csv(rows_csv_path)
                else:
                    return None
        elif rows_csv_path.exists():
            df_rows = pd.read_csv(rows_csv_path)
        else:
            return None

        return meta, emb_pdf, emb_level, emb_lang, df_rows
    except Exception:
        return None


In [10]:
import importlib.util
import time, hashlib
# Normalize key columns before fingerprint/embedding
df[COL_PDF]   = df[COL_PDF].map(norm_text)
df[COL_LEVEL] = df[COL_LEVEL].map(norm_text)
df[COL_LANG]  = df[COL_LANG].map(norm_text)

fp = dataset_fingerprint(df)
meta_path, pdf_path, lvl_path, lng_path, rows_parquet_path, rows_csv_path = cache_paths(CACHE_DIR, fp)

loaded = try_load_cache(meta_path, pdf_path, lvl_path, lng_path, rows_parquet_path, rows_csv_path)

if loaded is None:
    print("No valid cache found (or data/model changed). Building embeddings...")
    emb_pdf   = model.encode(df[COL_PDF].tolist(),   convert_to_tensor=False, normalize_embeddings=True)
    emb_level = model.encode(df[COL_LEVEL].tolist(), convert_to_tensor=False, normalize_embeddings=True)
    emb_lang  = model.encode(df[COL_LANG].tolist(),  convert_to_tensor=False, normalize_embeddings=True)

    emb_pdf   = np.asarray(emb_pdf, dtype="float32")
    emb_level = np.asarray(emb_level, dtype="float32")
    emb_lang  = np.asarray(emb_lang, dtype="float32")

    save_cache(
        meta_path, pdf_path, lvl_path, lng_path,
        rows_parquet_path, rows_csv_path,
        emb_pdf, emb_level, emb_lang,
        df[["row_id", COL_NAME, COL_URL, COL_PDF, COL_LEVEL, COL_LANG]]
    )
    print("Cache saved.")
else:
    meta, emb_pdf, emb_level, emb_lang, df_cached = loaded
    print(f"Loaded cache. Rows: {meta['num_rows']}  Model: {meta['model']}  Rows format: {meta.get('rows_format')}")
    # Align df to cached row order just in case
    df = df.merge(df_cached[["row_id"]], on="row_id", how="right").sort_values("row_id").reset_index(drop=True)


Loaded cache. Rows: 377  Model: sentence-transformers/all-MiniLM-L6-v2  Rows format: parquet


In [11]:
def cos_to_01(cos_scores: np.ndarray) -> np.ndarray:
    # Input may be cosine in [-1,1] if using util.cos_sim; here we use dot since normalized embeddings.
    # With normalized vectors, dot product is cosine in [-1,1]. Map to [0,1].
    return (cos_scores + 1.0) / 2.0

def scores_for_query(query: dict,
                     emb_pdf: np.ndarray, emb_level: np.ndarray, emb_lang: np.ndarray,
                     df_base: pd.DataFrame) -> pd.DataFrame:
    # Query fields
    q_look = norm_text(query.get("looking_for",""))
    q_level = norm_text(query.get("job_level",""))
    q_constraints = norm_text(query.get("constraints",""))
    q_language = norm_text(query.get("language",""))

    # Embed only provided parts
    e_look = model.encode(q_look, convert_to_tensor=False, normalize_embeddings=True) if q_look else None
    e_lvl  = model.encode(q_level, convert_to_tensor=False, normalize_embeddings=True) if q_level else None
    e_cons = model.encode(q_constraints, convert_to_tensor=False, normalize_embeddings=True) if q_constraints else None
    e_lang = model.encode(q_language, convert_to_tensor=False, normalize_embeddings=True) if q_language else None

    n = len(df_base)
    look_scores = np.full(n, 0.5, dtype="float32")
    level_scores = np.full(n, 0.5, dtype="float32")
    constraint_scores = np.full(n, 0.5, dtype="float32")
    lang_scores = np.full(n, 0.5, dtype="float32")

    # With normalized embeddings, matrix @ vector gives cosine; then map to [0,1]
    if e_look is not None:
        look_scores = cos_to_01(emb_pdf @ e_look)
    if e_lvl is not None:
        level_scores = cos_to_01(emb_level @ e_lvl)
    if e_cons is not None:
        constraint_scores = cos_to_01(emb_pdf @ e_cons)
    if e_lang is not None:
        lang_scores = cos_to_01(emb_lang @ e_lang)

    out = df_base.copy()
    out["looking_for_score"] = look_scores
    out["job_level_score"]   = level_scores
    out["constraint_score"]  = constraint_scores
    out["language_score"]    = lang_scores

    out["final_score"] = (
        W_LOOK  * out["looking_for_score"] +
        W_CONS  * out["constraint_score"]  +
        W_LANG  * out["language_score"]    +
        W_LEVEL * out["job_level_score"]
    )

    # Rank columns for coverage checks
    out = out.sort_values("final_score", ascending=False).reset_index(drop=True)
    out["rank_final"] = np.arange(len(out)) + 1
    out["rank_look"]  = out["looking_for_score"].rank(ascending=False, method="first")
    out["rank_cons"]  = out["constraint_score"].rank(ascending=False, method="first")
    out["rank_lang"]  = out["language_score"].rank(ascending=False, method="first")

    return out

def ensure_coverage(dfw: pd.DataFrame, facet_rank_col: str, facet_present: bool,
                    k: int = K_PER_FACET, top_m: int = GUARANTEE_TOP) -> pd.DataFrame:
    """Guarantee at least one top-k (by facet) item appears in the first top_m of final ranking."""
    if not facet_present or len(dfw) == 0:
        return dfw
    top_m_idx = dfw.index[:top_m]
    present = any(dfw.loc[i, facet_rank_col] <= k for i in top_m_idx)
    if present:
        return dfw

    cand = dfw[dfw[facet_rank_col] <= k].head(1)
    if cand.empty:
        return dfw
    cand_idx = cand.index[0]
    if cand_idx in top_m_idx:
        return dfw

    # Move candidate into position top_m-1
    rows = dfw.iloc[:top_m-1].copy()
    head_rest = dfw.iloc[top_m-1:].copy()
    cand_row = dfw.loc[[cand_idx]]
    dfw = pd.concat([rows, cand_row, head_rest.drop(index=cand_idx, errors="ignore")], axis=0).reset_index(drop=True)
    return dfw

def recommend(query: dict, df_base: pd.DataFrame,
              emb_pdf: np.ndarray, emb_level: np.ndarray, emb_lang: np.ndarray,
              top_n=TOP_N) -> pd.DataFrame:
    scored = scores_for_query(query, emb_pdf, emb_level, emb_lang, df_base)

    # Which facets were provided?
    facet_present_look = bool(norm_text(query.get("looking_for","")))
    facet_present_cons = bool(norm_text(query.get("constraints","")))
    facet_present_lang = bool(norm_text(query.get("language","")))

    # Apply coverage guarantees (top-4 must include at least one top-3 by each present facet)
    scored = scored.sort_values("final_score", ascending=False).reset_index(drop=True)
    scored["rank_look"] = scored["looking_for_score"].rank(ascending=False, method="first")
    scored = ensure_coverage(scored, "rank_look", facet_present_look, K_PER_FACET, GUARANTEE_TOP)

    scored["rank_cons"] = scored["constraint_score"].rank(ascending=False, method="first")
    scored = ensure_coverage(scored, "rank_cons", facet_present_cons, K_PER_FACET, GUARANTEE_TOP)

    scored["rank_lang"] = scored["language_score"].rank(ascending=False, method="first")
    scored = ensure_coverage(scored, "rank_lang", facet_present_lang, K_PER_FACET, GUARANTEE_TOP)

    cols = [COL_NAME, COL_URL, "looking_for_score", "job_level_score", "constraint_score", "language_score", "final_score"]
    return scored.head(top_n)[cols].reset_index(drop=True)


In [12]:
from Query_Restructured import get_assessment_summary

In [28]:
Query = """I am new looking for new graduates in my sales team, suggest an 30 min long assessment

"""
prompt_new = get_assessment_summary(Query)
prompt_new


[ACK] 1/1: {"ack":"received"}


{'looking_for': 'Looking for new graduates to join the sales team, assessing their communication, problem-solving, and interpersonal skills, along with their general aptitude for a sales role.',
 'constraints': 'Average assessment time: 30 mins',
 'job_level': 'junior',
 'need_to_assess': 'C,B,A,P',
 'language': 'unknown'}

In [29]:


results = recommend(prompt_new, df, emb_pdf, emb_level, emb_lang, top_n=TOP_N)

def explain_row(r):
    return (f"topic={r['looking_for_score']:.2f} | "
            f"constraints={r['constraint_score']:.2f} | "
            f"language={r['language_score']:.2f} | "
            f"level={r['job_level_score']:.2f}")

results["why"] = results.apply(explain_row, axis=1)
results

Unnamed: 0,assessment_name,assessment_url,looking_for_score,job_level_score,constraint_score,language_score,final_score,why
0,Sales Interview Guide,https://www.shl.com/products/product-catalog/view/sales-interview-guide/,0.740727,0.64518,0.541128,0.651462,0.692306,topic=0.74 | constraints=0.54 | language=0.65 | level=0.65
1,Graduate Scenarios,https://www.shl.com/products/product-catalog/view/graduate-scenarios/,0.712539,0.642156,0.659233,0.651462,0.691397,topic=0.71 | constraints=0.66 | language=0.65 | level=0.64
2,Entry Level Sales Solution,https://www.shl.com/products/product-catalog/view/entry-level-sales-solution/,0.743897,0.68167,0.509315,0.537414,0.681839,topic=0.74 | constraints=0.51 | language=0.54 | level=0.68
3,Graduate Scenarios Profile Report,https://www.shl.com/products/product-catalog/view/graduate-scenarios-profile...,0.691794,0.639502,0.628219,0.67732,0.675581,topic=0.69 | constraints=0.63 | language=0.68 | level=0.64
4,AI Skills,https://www.shl.com/products/product-catalog/view/ai-skills/,0.640571,0.544772,0.725695,0.595527,0.639255,topic=0.64 | constraints=0.73 | language=0.60 | level=0.54
5,OPQ MQ Sales Report,https://www.shl.com/products/product-catalog/view/opq-mq-sales-report/,0.740727,0.632149,0.541128,0.545375,0.680394,topic=0.74 | constraints=0.54 | language=0.55 | level=0.63
6,Salesforce Development (New),https://www.shl.com/products/product-catalog/view/salesforce-development-new/,0.709146,0.64916,0.632733,0.595527,0.680323,topic=0.71 | constraints=0.63 | language=0.60 | level=0.65
7,Verify Interactive G+ Report,https://www.shl.com/products/product-catalog/view/verify-interactive-g-report/,0.7069,0.639502,0.555914,0.67732,0.674555,topic=0.71 | constraints=0.56 | language=0.68 | level=0.64
8,Graduate Scenarios Narrative Report,https://www.shl.com/products/product-catalog/view/graduate-scenarios-narrati...,0.691794,0.62853,0.628219,0.67732,0.674484,topic=0.69 | constraints=0.63 | language=0.68 | level=0.63
9,Retail Sales and Service Simulation,https://www.shl.com/products/product-catalog/view/retail-sales-and-service-s...,0.704866,0.68167,0.583235,0.595527,0.673368,topic=0.70 | constraints=0.58 | language=0.60 | level=0.68


In [15]:
# ==== SHL Recall@10 Evaluation (slug-based, cached, retry/backoff) ====
# Assumes you already have in memory:
#   - df, emb_pdf, emb_level, emb_lang
#   - functions: get_assessment_summary(text: str) and recommend(prompt, df, emb_pdf, emb_level, emb_lang, top_n=10)
# Ground truth file path:
GROUND_TRUTH_PATH = r"C:\Users\vinee\Downloads\Gen_AI Dataset.xlsx"


# =================== CONFIG ===================
K = 10                    # Recall@K
DEFAULT_TOP_N = 10        # How many results to request from your model (min K)
REQUEST_DELAY_SEC = 0.6   # Pacing between calls to avoid RPM/QPS limits
MAX_RETRIES = 5           # Max retries on 429
BASE_BACKOFF = 2.0        # Base seconds for exponential backoff
CACHE_PATH = "pred_cache.json"
CACHE_VERSION = "v1"      # <-- bump this whenever you change prompts/model/logic
FORCE_REFRESH_ALL = True # Set True to bypass cache for all queries this run
MODEL_NAME = "gemini-2.5-flash"        # optional metadata for cache key
PROMPT_PRESET = "summary_default_v1"    # optional metadata for cache key

# Optional: import Google's 429 exception if available
try:
    from google.api_core.exceptions import ResourceExhausted
except Exception:
    ResourceExhausted = None

# =================== UTILITIES ===================
def normalize_url(u: Any) -> str:
    """Lowercase, strip, strip scheme, 'www.', trailing slash."""
    if not isinstance(u, str):
        return ""
    u = u.strip().lower()
    u = u.replace("https://", "").replace("http://", "")
    if u.startswith("www."):
        u = u[4:]
    if u.endswith("/"):
        u = u[:-1]
    return u

def extract_slug(u: Any) -> str:
    """Return a stable identifier (prefers '/view/<slug>', else last path segment)."""
    u = normalize_url(u)
    m = re.search(r"/view/([^/?#]+)", u)
    if m:
        return m.group(1)
    parts = [p for p in u.split("/") if p]
    return parts[-1] if parts else u

def recall_at_k(true_items: Set[str], pred_items: List[str], k: int) -> float:
    if not true_items:
        return 0.0
    return len(set(pred_items[:k]) & true_items) / len(true_items)

# =================== CACHE ===================
def _cache_load() -> Dict[str, List[str]]:
    if os.path.exists(CACHE_PATH):
        try:
            with open(CACHE_PATH, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception:
            return {}
    return {}

def _cache_save(cache: Dict[str, List[str]]):
    tmp = CACHE_PATH + ".tmp"
    with open(tmp, "w", encoding="utf-8") as f:
        json.dump(cache, f, ensure_ascii=False, indent=2)
    os.replace(tmp, CACHE_PATH)

def _qkey(query_text: str) -> str:
    """Cache key: version + model/prompt metadata + K/N + query text."""
    base = f"{CACHE_VERSION}|{MODEL_NAME}|{PROMPT_PRESET}|K={K}|N={DEFAULT_TOP_N}|{query_text}"
    return sha1(base.encode("utf-8")).hexdigest()

# =================== MODEL CALL WRAPPER ===================
def get_predictions_for_query(query_text: str,
                              top_n: int = DEFAULT_TOP_N,
                              force_refresh: bool = False) -> List[str]:
    """
    Calls your pipeline with retry/backoff and returns list of predicted slugs.
    Uses disk cache to avoid repeat API usage for the same query.
    """
    top_n = max(top_n, K)
    cache = _cache_load()
    key = _qkey(query_text)

    if not (force_refresh or FORCE_REFRESH_ALL):
        if key in cache:
            return cache[key]

    last_err = None
    for attempt in range(MAX_RETRIES):
        try:
            # Build prompt (your function)
            prompt = get_assessment_summary(query_text)
            # Call your recommender (expects DataFrame with 'assessment_url' column)
            res = recommend(prompt, df, emb_pdf, emb_level, emb_lang, top_n=top_n)

            if not isinstance(res, pd.DataFrame) or 'assessment_url' not in res.columns:
                preds = []
            else:
                preds = res['assessment_url'].astype(str).map(extract_slug).tolist()

            cache[key] = preds
            _cache_save(cache)

            # Pacing to respect RPM/QPS
            time.sleep(REQUEST_DELAY_SEC)
            return preds

        except Exception as e:
            last_err = e
            # Detect 429
            is_429 = False
            if ResourceExhausted and isinstance(e, ResourceExhausted):
                is_429 = True
            elif "ResourceExhausted" in str(e) or "429" in str(e):
                is_429 = True

            if is_429 and attempt < MAX_RETRIES - 1:
                sleep_s = BASE_BACKOFF * (2 ** attempt) + random.uniform(0, 0.5)
                time.sleep(sleep_s)
                continue
            else:
                # Save empty to avoid hammering API on repeated runs
                cache[key] = []
                _cache_save(cache)
                break

    return []

# =================== LOAD GROUND TRUTH ===================
gt = pd.read_excel(GROUND_TRUTH_PATH)  # expects columns: Query, Assessment_url
if not set(["Query", "Assessment_url"]).issubset(gt.columns):
    raise ValueError(f"Ground truth must have columns ['Query','Assessment_url']; found {list(gt.columns)}")

gt = gt.copy()
gt["Assessment_url"] = gt["Assessment_url"].astype(str).map(normalize_url)
gt["slug"] = gt["Assessment_url"].map(extract_slug)
gt_dict: Dict[str, Set[str]] = gt.groupby("Query")["slug"].apply(set).to_dict()

# =================== EVALUATE ===================
rows = []
for q in gt_dict.keys():
    pred_slugs = get_predictions_for_query(q, top_n=DEFAULT_TOP_N)
    r = recall_at_k(gt_dict[q], pred_slugs, K)
    rows.append({
        "Query": q,
        "n_true": len(gt_dict[q]),
        "n_pred": len(pred_slugs),
        "Recall@10": r,
        "note": "" if pred_slugs else "no_predictions (possibly quota/retry-exhausted or cached empty)"
    })

per_query_df = pd.DataFrame(rows).sort_values("Recall@10", ascending=True).reset_index(drop=True)
mean_recall_10 = per_query_df["Recall@10"].mean() if not per_query_df.empty else 0.0

# Nicely print results (truncate long queries)
pd.set_option("display.max_colwidth", 80)
print(per_query_df[["Query","n_true","n_pred","Recall@10","note"]].to_string(index=False))
print(f"\n=== Mean Recall@10 over {len(per_query_df)} queries: {mean_recall_10:.4f} ===")

# =================== QUICK HOW-TO ===================
# - To force fresh predictions for ALL queries this run: set FORCE_REFRESH_ALL = True at the top.
# - To version the cache when you change prompts/model: bump CACHE_VERSION.
# - To pace more (avoid 429): increase REQUEST_DELAY_SEC (e.g., 1.0–1.5).
# - To wipe cache entirely: 
#     import os; 
#     os.remove("pred_cache.json") if os.path.exists("pred_cache.json") else None


[ACK] 1/1: {"ack":"received"}
[ACK] 1/1: {"ack":"received"}
[ACK] 1/1: {"ack":"received"}
[ACK] 1/1: {"ack":"received"}
[ACK] 1/1: {"ack":"received"}
[ACK] 1/1: {"ack":"received"}
[ACK] 1/1: {"ack":"received"}
[ACK] 1/1: {"ack":"received"}
[ACK] 1/1: {"ack":"received"}
[ACK] 1/1: {"ack":"received"}
[ACK] 1/1: {"ack":"received"}
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              

In [16]:
import os
if os.path.exists("pred_cache.json"):
    os.remove("pred_cache.json")
