In [3]:
from typing import Dict
import numpy as np
from google.cloud import bigquery

In [4]:
def run_query(
    start_date: str, 
    end_date: str, 
    siso: str, 
    model_names: Dict[str, str], 
    source: str,
    us_only: bool = True,
    intl_only: bool = False,
):
    client = bigquery.Client(project="etsy-search-ml-dev")
    
    if source == "web":
        source_clause = "source = 'web_purchase'"
    elif source == "boe":
        source_clause = "source = 'boe_purchase'"
    elif source == "both":
        source_clause = "source in ('web_purchase', 'boe_purchase')"
        
    if siso == "si":
        siso_clause = "tags.userId > 0"
    elif siso == "so":
        siso_clause = "(tags.userId = 0 or tags.userId is null)"
    elif siso == "both":
        siso_clause = "True"
        
    if us_only:
        traffic_clause = "(tags.userCountry='US')"
    elif intl_only:
        traffic_clause = "(tags.userCountry!='US')"
    else:
        traffic_clause = "True"
    
    model_name_clause = list(model_names.values())
    model_name_clause = ",".join([f"'{x}'" for x in model_name_clause])
    
#     print(f"{source_clause=}")
#     print(f"{siso_clause=}")
#     print(f"{model_name_clause=}")
    
    query_str = f"""select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('{start_date}') and date('{end_date}')
    and {source_clause}
    and {siso_clause}
    and {traffic_clause}
    and modelName in (
      {model_name_clause}
    )
    group by modelName
    order by modelName"""
    
    print(query_str)
    print("\n")
    
    query_job = client.query(query_str)
    rows = query_job.result()
    df = rows.to_dataframe()   
    
    ndcg48_ctrl = df[df.modelName == model_names["control"]]["avg_pndcg48"].values[0]
    ndcg48_variant = df[df.modelName == model_names["variant"]]["avg_pndcg48"].values[0]
    ndcg48_rel_diff = (ndcg48_variant - ndcg48_ctrl) / ndcg48_ctrl
    print(f"ndcg48: {ndcg48_rel_diff:.2%}")
    
    ndcg10_ctrl = df[df.modelName == model_names["control"]]["avg_pndcg10"].values[0]
    ndcg10_variant = df[df.modelName == model_names["variant"]]["avg_pndcg10"].values[0]
    ndcg10_rel_diff = (ndcg10_variant - ndcg10_ctrl) / ndcg10_ctrl
    print(f"ndcg10: {ndcg10_rel_diff:.2%}")
    
    ppdcg10_ctrl = df[df.modelName == model_names["control"]]["avg_ppdcg10"].values[0]
    ppdcg10_variant = df[df.modelName == model_names["variant"]]["avg_ppdcg10"].values[0]
    ppdcg10_rel_diff = (ppdcg10_variant - ppdcg10_ctrl) / ppdcg10_ctrl
    print(f"ppdcg10: {ppdcg10_rel_diff:.2%}")
    
    return df

In [3]:
# US model w/ added intl features (SI)
df = run_query(
    start_date="2025-03-12",
    end_date="2025-03-27", 
    siso="si", 
    model_names={
        "control": "nrv2-no-borda-tm-si",
        "variant": "nrv2-us-intl-si"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-03-12') and date('2025-03-27')
    and source in ('web_purchase', 'boe_purchase')
    and tags.userId > 0
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-no-borda-tm-si','nrv2-us-intl-si'
    )
    group by modelName
    order by modelName


ndcg48: 0.80%
ndcg10: 1.25%
ppdcg10: 1.37%


In [4]:
# US model w/ added intl features (SO)
df = run_query(
    start_date="2025-03-12",
    end_date="2025-03-27", 
    siso="so", 
    model_names={
        "control": "nrv2-semrel-uni-serve-tm-so",
        "variant": "nrv2-us-intl-so"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-03-12') and date('2025-03-27')
    and source in ('web_purchase', 'boe_purchase')
    and (tags.userId = 0 or tags.userId is null)
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-semrel-uni-serve-tm-so','nrv2-us-intl-so'
    )
    group by modelName
    order by modelName


ndcg48: 0.28%
ndcg10: 0.40%
ppdcg10: 0.56%


In [5]:
# Categorical Embeddings + Buyer360 SI
df = run_query(
    start_date="2025-04-08",
    end_date="2025-04-22", 
    siso="si", 
    model_names={
        "control": "nrv2-us-intl-si",
        "variant": "nrv2-unif-emb-si"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-04-08') and date('2025-04-22')
    and source in ('web_purchase', 'boe_purchase')
    and tags.userId > 0
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-us-intl-si','nrv2-unif-emb-si'
    )
    group by modelName
    order by modelName


ndcg48: 0.08%
ndcg10: 0.20%
ppdcg10: 0.10%


In [6]:
# Categorical Embeddings + Buyer360 SI
df = run_query(
    start_date="2025-04-08",
    end_date="2025-04-22", 
    siso="si", 
    model_names={
        "control": "nrv2-us-intl-si",
        "variant": "nrv2-unif-emb-b360-si"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-04-08') and date('2025-04-22')
    and source in ('web_purchase', 'boe_purchase')
    and tags.userId > 0
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-us-intl-si','nrv2-unif-emb-b360-si'
    )
    group by modelName
    order by modelName


ndcg48: 0.17%
ndcg10: 0.35%
ppdcg10: 0.74%


In [7]:
# Categorical Embeddings SO
df = run_query(
    start_date="2025-04-08",
    end_date="2025-04-22", 
    siso="so", 
    model_names={
        "control": "nrv2-semrel-uni-serve-tm-so",
        "variant": "nrv2-unif-emb-so"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-04-08') and date('2025-04-22')
    and source in ('web_purchase', 'boe_purchase')
    and (tags.userId = 0 or tags.userId is null)
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-semrel-uni-serve-tm-so','nrv2-unif-emb-so'
    )
    group by modelName
    order by modelName


ndcg48: -0.16%
ndcg10: -0.33%
ppdcg10: 0.10%


In [8]:
# Query Volume Weighting SI
df = run_query(
    start_date="2025-04-23",
    end_date="2025-05-06", 
    siso="si", 
    model_names={
        "control": "nrv2-us-intl-si",
        "variant": "nrv2-query-volume-si"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-04-23') and date('2025-05-06')
    and source in ('web_purchase', 'boe_purchase')
    and tags.userId > 0
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-us-intl-si','nrv2-query-volume-si'
    )
    group by modelName
    order by modelName


ndcg48: -0.08%
ndcg10: -0.14%
ppdcg10: -0.13%


In [9]:
# Query Volume Weighting SO
df = run_query(
    start_date="2025-04-23",
    end_date="2025-05-06", 
    siso="so", 
    model_names={
        "control": "nrv2-semrel-uni-serve-tm-so",
        "variant": "nrv2-query-volume-so"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-04-23') and date('2025-05-06')
    and source in ('web_purchase', 'boe_purchase')
    and (tags.userId = 0 or tags.userId is null)
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-semrel-uni-serve-tm-so','nrv2-query-volume-so'
    )
    group by modelName
    order by modelName


ndcg48: -0.17%
ndcg10: -0.24%
ppdcg10: -0.41%


Unified model try 1 - nrv2_us_intl_v2_si

In [10]:
# US SO
df = run_query(
    start_date="2025-05-07",
    end_date="2025-05-15", 
    siso="so", 
    model_names={
        "control": "nrv2-semrel-uni-serve-tm-so",
        "variant": "nrv2-us-intl-v2-si"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-05-07') and date('2025-05-15')
    and source in ('web_purchase', 'boe_purchase')
    and (tags.userId = 0 or tags.userId is null)
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-semrel-uni-serve-tm-so','nrv2-us-intl-v2-si'
    )
    group by modelName
    order by modelName


ndcg48: -0.15%
ndcg10: -0.31%
ppdcg10: -1.85%


In [11]:
# US SI
df = run_query(
    start_date="2025-05-07",
    end_date="2025-05-15", 
    siso="si", 
    model_names={
        "control": "nrv2-us-intl-si",
        "variant": "nrv2-us-intl-v2-si"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-05-07') and date('2025-05-15')
    and source in ('web_purchase', 'boe_purchase')
    and tags.userId > 0
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-us-intl-si','nrv2-us-intl-v2-si'
    )
    group by modelName
    order by modelName


ndcg48: 0.07%
ndcg10: 0.11%
ppdcg10: 0.30%


In [14]:
# INTL
df = run_query(
    start_date="2025-05-07",
    end_date="2025-05-15", 
    siso="both", 
    model_names={
        "control": "nr-loc-no-xwalk-56d",
        "variant": "nrv2-us-intl-v2-si"
    }, 
    source="both",
    us_only=False,
    intl_only=True,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-05-07') and date('2025-05-15')
    and source in ('web_purchase', 'boe_purchase')
    and True
    and (tags.userCountry!='US')
    and modelName in (
      'nr-loc-no-xwalk-56d','nrv2-us-intl-v2-si'
    )
    group by modelName
    order by modelName


ndcg48: 0.66%
ndcg10: 0.78%
ppdcg10: 1.25%


Unified model try 1 - nrv2_us_intl_v2_digital_si

In [15]:
# US SO
df = run_query(
    start_date="2025-05-07",
    end_date="2025-05-15", 
    siso="so", 
    model_names={
        "control": "nrv2-semrel-uni-serve-tm-so",
        "variant": "nrv2-us-intl-v2-digital-si"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-05-07') and date('2025-05-15')
    and source in ('web_purchase', 'boe_purchase')
    and (tags.userId = 0 or tags.userId is null)
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-semrel-uni-serve-tm-so','nrv2-us-intl-v2-digital-si'
    )
    group by modelName
    order by modelName


ndcg48: -0.63%
ndcg10: -1.03%
ppdcg10: -2.19%


In [16]:
# US SI
df = run_query(
    start_date="2025-05-07",
    end_date="2025-05-15", 
    siso="si", 
    model_names={
        "control": "nrv2-us-intl-si",
        "variant": "nrv2-us-intl-v2-digital-si"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-05-07') and date('2025-05-15')
    and source in ('web_purchase', 'boe_purchase')
    and tags.userId > 0
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-us-intl-si','nrv2-us-intl-v2-digital-si'
    )
    group by modelName
    order by modelName


ndcg48: -0.39%
ndcg10: -0.57%
ppdcg10: -0.02%


In [17]:
# INTL
df = run_query(
    start_date="2025-05-07",
    end_date="2025-05-15", 
    siso="both", 
    model_names={
        "control": "nr-loc-no-xwalk-56d",
        "variant": "nrv2-us-intl-v2-digital-si"
    }, 
    source="both",
    us_only=False,
    intl_only=True,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-05-07') and date('2025-05-15')
    and source in ('web_purchase', 'boe_purchase')
    and True
    and (tags.userCountry!='US')
    and modelName in (
      'nr-loc-no-xwalk-56d','nrv2-us-intl-v2-digital-si'
    )
    group by modelName
    order by modelName


ndcg48: 0.35%
ndcg10: 0.32%
ppdcg10: 1.24%


Unified model try 2

In [18]:
# US SO
df = run_query(
    start_date="2025-05-23",
    end_date="2025-06-08", 
    siso="so", 
    model_names={
        "control": "nrv2-semrel-uni-serve-tm-so",
        "variant": "nrv2-us-intl-v2-si"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-05-23') and date('2025-06-08')
    and source in ('web_purchase', 'boe_purchase')
    and (tags.userId = 0 or tags.userId is null)
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-semrel-uni-serve-tm-so','nrv2-us-intl-v2-si'
    )
    group by modelName
    order by modelName


ndcg48: -0.26%
ndcg10: -0.44%
ppdcg10: -2.31%


In [19]:
# US SI
df = run_query(
    start_date="2025-05-23",
    end_date="2025-06-08", 
    siso="si", 
    model_names={
        "control": "nrv2-us-intl-si",
        "variant": "nrv2-us-intl-v2-si"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-05-23') and date('2025-06-08')
    and source in ('web_purchase', 'boe_purchase')
    and tags.userId > 0
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-us-intl-si','nrv2-us-intl-v2-si'
    )
    group by modelName
    order by modelName


ndcg48: -0.13%
ndcg10: -0.22%
ppdcg10: -0.21%


In [20]:
# INTL
df = run_query(
    start_date="2025-05-23",
    end_date="2025-06-08", 
    siso="both", 
    model_names={
        "control": "nr-loc-no-xwalk-56d",
        "variant": "nrv2-us-intl-v2-si"
    }, 
    source="both",
    us_only=False,
    intl_only=True,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-05-23') and date('2025-06-08')
    and source in ('web_purchase', 'boe_purchase')
    and True
    and (tags.userCountry!='US')
    and modelName in (
      'nr-loc-no-xwalk-56d','nrv2-us-intl-v2-si'
    )
    group by modelName
    order by modelName


ndcg48: 0.91%
ndcg10: 1.22%
ppdcg10: 1.78%


CLIP PUSH AIR

In [5]:
# US SO
df = run_query(
    start_date="2025-06-09",
    end_date="2025-06-16", 
    siso="so", 
    model_names={
        "control": "nrv2-semrel-uni-serve-tm-so",
        "variant": "nrv2-clip-push-air-unified"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)


# US SI
df = run_query(
    start_date="2025-06-09",
    end_date="2025-06-16",  
    siso="si", 
    model_names={
        "control": "nrv2-us-intl-si",
        "variant": "nrv2-clip-push-air-unified"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)

# INTL
df = run_query(
    start_date="2025-06-09",
    end_date="2025-06-16", 
    siso="both", 
    model_names={
        "control": "nr-loc-no-xwalk-56d",
        "variant": "nrv2-clip-push-air-unified"
    }, 
    source="both",
    us_only=False,
    intl_only=True,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-06-09') and date('2025-06-16')
    and source in ('web_purchase', 'boe_purchase')
    and (tags.userId = 0 or tags.userId is null)
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-semrel-uni-serve-tm-so','nrv2-clip-push-air-unified'
    )
    group by modelName
    order by modelName


ndcg48: -0.33%
ndcg10: -0.46%
ppdcg10: -18.53%




select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-06-09') and date('2025-06-16')
    and source in ('web_purchase', 'boe_purchase')
    and tags.userId > 0
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-us-intl-si','nrv2-clip-push-air-unified'
    )
    group by modelName
    order by modelName


ndcg48: 0.71%
ndcg10: 1.16%
ppdcg10: -17.05%




select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-06-09') and date('2025-06-16')
    and source in ('web_purchase', 'boe_purchase')
    and True
    and (tags.userCountry!='US')
    and modelName in (
      'nr-loc-no-xwalk-56d','nrv2-clip-push-air-unified'
    )
    group by modelName
    order by modelName


ndcg48: 1.56%
ndcg10: 2.26%
ppdcg10: -5.72%


CLIP PUSH AIR - SW

In [6]:
# US SO
df = run_query(
    start_date="2025-06-09",
    end_date="2025-06-16", 
    siso="so", 
    model_names={
        "control": "nrv2-semrel-uni-serve-tm-so",
        "variant": "nrv2-clip-push-air-unified-sw"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)


# US SI
df = run_query(
    start_date="2025-06-09",
    end_date="2025-06-16",  
    siso="si", 
    model_names={
        "control": "nrv2-us-intl-si",
        "variant": "nrv2-clip-push-air-unified-sw"
    }, 
    source="both",
    us_only=True,
    intl_only=False,
)

# INTL
df = run_query(
    start_date="2025-06-09",
    end_date="2025-06-16", 
    siso="both", 
    model_names={
        "control": "nr-loc-no-xwalk-56d",
        "variant": "nrv2-clip-push-air-unified-sw"
    }, 
    source="both",
    us_only=False,
    intl_only=True,
)



select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-06-09') and date('2025-06-16')
    and source in ('web_purchase', 'boe_purchase')
    and (tags.userId = 0 or tags.userId is null)
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-semrel-uni-serve-tm-so','nrv2-clip-push-air-unified-sw'
    )
    group by modelName
    order by modelName


ndcg48: -0.52%
ndcg10: -0.81%
ppdcg10: -17.11%




select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-06-09') and date('2025-06-16')
    and source in ('web_purchase', 'boe_purchase')
    and tags.userId > 0
    and (tags.userCountry='US')
    and modelName in (
      'nrv2-us-intl-si','nrv2-clip-push-air-unified-sw'
    )
    group by modelName
    order by modelName


ndcg48: -0.40%
ndcg10: -0.56%
ppdcg10: -15.51%




select 
      modelName,
      avg(metrics.purchase.ndcg48) as avg_pndcg48,  
      avg(metrics.purchase.ndcg10) as avg_pndcg10, 
      avg(metrics.purchase.dcgAttributedPrice10) as avg_ppdcg10
    from `etsy-search-ml-prod.search_ranking.second_pass_eval`
    where evalDate between date('2025-06-09') and date('2025-06-16')
    and source in ('web_purchase', 'boe_purchase')
    and True
    and (tags.userCountry!='US')
    and modelName in (
      'nr-loc-no-xwalk-56d','nrv2-clip-push-air-unified-sw'
    )
    group by modelName
    order by modelName


ndcg48: 0.86%
ndcg10: 1.15%
ppdcg10: -4.06%


Get segmented metrics from unified experiments

```sql
DECLARE start_date DATE DEFAULT "2025-05-23";
DECLARE end_date DATE DEFAULT "2025-06-08";
DECLARE config_flag_param STRING DEFAULT "ranking/search.mmx.2025_q2.nrv2_unified_ranking_try2";

create or replace table `etsy-search-ml-dev.yzhang.segmented_catapult_unified_try2` as (
    with ab_first_bucket as (
        SELECT 
            bucketing_id, bucketing_id_type, variant_id, bucketing_ts
        FROM `etsy-data-warehouse-prod.catapult_unified.bucketing_period`
        WHERE _date = end_date
        AND experiment_id = config_flag_param
    ),
    first_bucket_segments_unpivoted as (
        SELECT 
            bucketing_id, variant_id, event_id, event_value
        FROM `etsy-data-warehouse-prod.catapult_unified.aggregated_segment_event`
        WHERE _date = end_date
        AND experiment_id = config_flag_param
        AND event_id IN (
            "buyer_segment",
            "canonical_region"
        )
    ),
    first_bucket_segments as (
        SELECT *
        FROM first_bucket_segments_unpivoted
        PIVOT(
            MAX(event_value)
            FOR event_id IN (
                "buyer_segment",
                "canonical_region"
            )
        )
    ),
    events as (
        SELECT *
        FROM UNNEST([
            "backend_cart_payment", -- conversion rate
            "total_winsorized_gms", -- winsorized acbv
            "purchase_NDCG",
            "rich_search_events_w_purchase"
        ]) AS event_id
    ),
    events_per_unit as (
        SELECT
            bucketing_id, variant_id, event_id, event_value
        FROM `etsy-data-warehouse-prod.catapult_unified.aggregated_event_func`(start_date, end_date)
        JOIN events USING (event_id)
        WHERE experiment_id = config_flag_param   
    )
    SELECT
        bucketing_id,
        variant_id,
        event_id,
        COALESCE(event_value, 0) AS event_count,
        buyer_segment,
        canonical_region,
    FROM ab_first_bucket
    CROSS JOIN events
    LEFT JOIN events_per_unit
    USING(bucketing_id, variant_id, event_id)
    JOIN first_bucket_segments
    USING(bucketing_id, variant_id)
);


-- CVR 
SELECT
    variant_id,
    AVG(IF(event_count = 0, 0, 1)) AS percent_units_with_event,
FROM `etsy-search-ml-dev.yzhang.segmented_catapult_unified_try2`
where event_id = "backend_cart_payment"
GROUP BY variant_id
ORDER BY variant_id;

-- ACBV
SELECT
    variant_id,
    AVG(IF(event_count = 0, NULL, event_count)) AS avg_events_per_unit_with_event
FROM `etsy-search-ml-dev.yzhang.segmented_catapult_unified_try2`
where event_id = "total_winsorized_gms"
GROUP BY variant_id
ORDER BY variant_id;

-- pNDCG
with pNDCG_data as (
    SELECT
        variant_id,
        SUM(IF(event_count = 0, NULL, event_count)) AS pNDCG_sum
    FROM `etsy-search-ml-dev.yzhang.segmented_catapult_unified_try2`
    where event_id = "purchase_NDCG"
    GROUP BY variant_id
    ORDER BY variant_id
),
purchase_events_data as (
    SELECT
        variant_id,
        SUM(IF(event_count = 0, NULL, event_count)) AS nPurch_sum
    FROM `etsy-search-ml-dev.yzhang.segmented_catapult_unified_try2`
    where event_id = "rich_search_events_w_purchase"
    GROUP BY variant_id
    ORDER BY variant_id
)
select variant_id, pNDCG_sum / nPurch_sum
from pNDCG_data
join purchase_events_data
using (variant_id)
```