In [1]:
from google.cloud import bigquery
import pandas as pd
import numpy as np

In [2]:
base_url = "https://atlas.etsycorp.com/recsys/listing_viewer.php?listing_ids="

In [3]:
query_str = """select 
    mmxRequestUUID,
    query,
    pct_em,
    queryBin,
    qisClass,
    queryTaxoTop,
    queryEntities,
    listingId,
    rankingRank,
    semrelClass,
    llm_final_label, 
    llm_consensus_type
from `etsy-search-ml-dev.search.yzhang_emqueries_aug_result`
order by query, mmxRequestUUID, rankingRank
"""

client = bigquery.Client(project="etsy-search-ml-dev")
query_job = client.query(query_str)
rows = query_job.result()
df = rows.to_dataframe()

In [4]:
df.loc[:, "rel_info"] = df.apply(
    lambda row: f"teacher:{row['semrelClass']}-llm:{row['llm_final_label']}({row['llm_consensus_type']})", axis=1
)

In [5]:
df.head()

Unnamed: 0,mmxRequestUUID,query,pct_em,queryBin,qisClass,queryTaxoTop,queryEntities,listingId,rankingRank,semrelClass,llm_final_label,llm_consensus_type,rel_info
0,d7687416-4566-4979-adce-2c6042fd7e1b,10 year anniversary gift diamond,0.117647,head,direct_specified,weddings,,171733502,0,relevant,relevant,5-0,teacher:relevant-llm:relevant(5-0)
1,d7687416-4566-4979-adce-2c6042fd7e1b,10 year anniversary gift diamond,0.117647,head,direct_specified,weddings,,1815381940,1,partial,partial,5-0,teacher:partial-llm:partial(5-0)
2,d7687416-4566-4979-adce-2c6042fd7e1b,10 year anniversary gift diamond,0.117647,head,direct_specified,weddings,,1891684587,2,partial,partial,5-0,teacher:partial-llm:partial(5-0)
3,d7687416-4566-4979-adce-2c6042fd7e1b,10 year anniversary gift diamond,0.117647,head,direct_specified,weddings,,1199362126,3,partial,partial,5-0,teacher:partial-llm:partial(5-0)
4,d7687416-4566-4979-adce-2c6042fd7e1b,10 year anniversary gift diamond,0.117647,head,direct_specified,weddings,,4299041328,4,partial,partial,5-0,teacher:partial-llm:partial(5-0)


In [6]:
unique_requests = list(df.mmxRequestUUID.unique())

In [11]:
res_df_list = []
for i in range(len(unique_requests)):
    curr_df = df[df.mmxRequestUUID == unique_requests[i]]
    info_array = curr_df[["listingId", "rel_info"]].values
    info_array = list(info_array.reshape(-1))
    page_info = ",".join([str(x) for x in info_array])
    
    res_df = curr_df[[
        "mmxRequestUUID", "query", "pct_em", 
        "queryBin", "qisClass", "queryTaxoTop", "queryEntities"
    ]].drop_duplicates()
    assert res_df.shape[0] == 1
    curr_query = res_df["query"].values[0]
    
    curr_url = f'=HYPERLINK("{base_url}{curr_query},{page_info}", "link")'
    res_df.loc[:, "listing_viewer_url"] = curr_url
    
    res_df_list.append(res_df)

In [12]:
final_res_df = pd.concat(res_df_list, ignore_index=True)

In [13]:
final_res_df.to_csv("./init_explore_results.csv", index=False)