In [4]:
from typing import Dict
import numpy as np
from google.cloud import bigquery

In [22]:
def run_query(
    start_date: str, 
    end_date: str, 
    siso: str, 
    model_names: Dict[str, str], 
    source
):
    client = bigquery.Client(project="etsy-search-ml-dev")
    
    if source == "web":
        source_clause = "source = 'web_purchase'"
    elif source == "boe":
        source_clause = "source = 'boe_purchase'"
    elif source == "both":
        source_clause = "source in ('web_purchase', 'boe_purchase')"
        
    if siso == "si":
        siso_clause = "tags.userId > 0"
    elif siso == "so":
        siso_clause = "(tags.userId = 0 or tags.userId is null)"
    elif siso == "all":
        siso_clause = "True"
    
    model_name_clause = list(model_names.values())
    model_name_clause = ",".join([f"'{x}'" for x in model_name_clause])
    
#     print(f"{source_clause=}")
#     print(f"{siso_clause=}")
#     print(f"{model_name_clause=}")
    
    query_str = f"""select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('{start_date}') and date('{end_date}')
    and {source_clause}
    and {siso_clause}
    and modelName in (
      {model_name_clause}
    )
    group by modelName
    order by modelName"""
    
    print(query_str)
    print("\n")
    
    query_job = client.query(query_str)
    rows = query_job.result()
    df = rows.to_dataframe()   
    
    ndcg48_ctrl = df[df.modelName == model_names["control"]]["avg_pndcg48"].values[0]
    ndcg48_variant = df[df.modelName == model_names["variant"]]["avg_pndcg48"].values[0]
    ndcg48_rel_diff = (ndcg48_variant - ndcg48_ctrl) / ndcg48_ctrl
    print(f"ndcg48: {ndcg48_rel_diff:.2%}")
    
    ndcg10_ctrl = df[df.modelName == model_names["control"]]["avg_pndcg10"].values[0]
    ndcg10_variant = df[df.modelName == model_names["variant"]]["avg_pndcg10"].values[0]
    ndcg10_rel_diff = (ndcg10_variant - ndcg10_ctrl) / ndcg10_ctrl
    print(f"ndcg10: {ndcg10_rel_diff:.2%}")
    
    ppdcg10_ctrl = df[df.modelName == model_names["control"]]["avg_ppdcg10"].values[0]
    ppdcg10_variant = df[df.modelName == model_names["variant"]]["avg_ppdcg10"].values[0]
    ppdcg10_rel_diff = (ppdcg10_variant - ppdcg10_ctrl) / ppdcg10_ctrl
    print(f"ppdcg10: {ppdcg10_rel_diff:.2%}")
    
    return df

In [35]:
start_date = "2025-04-23"
end_date = "2025-05-06"
siso = "so"
source = "both"
model_names = {
    "control": "nrv2-semrel-uni-serve-tm-so",
    "variant": "nrv2-query-volume-so"
}

In [36]:
df = run_query(
    start_date=start_date,
    end_date=end_date, 
    siso=siso, 
    model_names=model_names, 
    source=source
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-04-23') and date('2025-05-06')
    and source in ('web_purchase', 'boe_purchase')
    and (tags.userId = 0 or tags.userId is null)
    and modelName in (
      'nrv2-semrel-uni-serve-tm-so','nrv2-query-volume-so'
    )
    group by modelName
    order by modelName


ndcg48: -0.13%
ndcg10: -0.18%
ppdcg10: -0.32%
