In [1]:
import utils.evaluation as eval_utils
import utils.model as model_utils
import torch
import polars as pl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
def compute_hits_at_k(ranks, ks=np.arange(1,11)):
    ranks = np.array(ranks)
    return np.array([(ranks < k).mean() for k in ks]), ks

def plot_hits_at_k(model_ranks, title, type = None):
    plt.figure(figsize=(10, 5))

    for model_name in MODEL_NAME.keys():
        if type:
            top_k_accuracy , ks = compute_hits_at_k(model_ranks[model_name][type])
        else:
            top_k_accuracy, ks = compute_hits_at_k(model_ranks[model_name])
        plt.plot(ks, top_k_accuracy, label=model_name)

    plt.xticks(ks)
    plt.xlabel("k")
    plt.ylabel("Hit at k")
    plt.legend()
    plt.title(title + ": Hits@k Comparison")
    # Move legend outside
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

    plt.tight_layout()  # Adjust layout to not cut off labels
    plt.show()

def compute_mmr(ranks):
    reciprocal_ranks = 1/(ranks + 1)
    return reciprocal_ranks.mean()

In [3]:
# load all info
Config = model_utils.Config()
df_concept_all = pl.read_parquet(Config.concept_parquet)
df_concept_all_idx = set(df_concept_all["idx"].unique().to_list())

df_concept_train = pl.read_parquet(Config.train_triplet)
df_concept_train_idx = set(df_concept_train["idx"].unique().to_list())

df_concept_test_idx = df_concept_all_idx - df_concept_train_idx
df_concept_test = list(df_concept_test_idx)

id2idx = dict(zip(df_concept_all["id"], df_concept_all["idx"]))

df_concept_test_fd = df_concept_all.filter(pl.col("idx").is_in(df_concept_test)).filter(pl.col("status") == "defined")["idx"].unique().to_list()



In [4]:
FILE_PATH_SAVE = "D:/finetune_sbert_new/1Membeddings/lora_16_quantized/"
embedding_name = "sapbert_lora_triplet16"
BENCHMARK_PATH = "D:/finetune_sbert_new/benchmark_jamil/"
embeddings = torch.load(FILE_PATH_SAVE + embedding_name + "_quantized.pt")
embedding_exp_q = embeddings["embedding_exp_q"]
embedding_label_q = embeddings["embedding_label_q"]

# task 1:  retrieve label given expression

In [5]:

# task 1
rank = eval_utils.top_k_array_by_batch(df_concept_test_fd, embedding_exp_q, embedding_label_q)


  query_matrix = torch.tensor(query_matrix, dtype=torch.float32)
  candidate_matrix = torch.tensor(candidate_matrix, dtype=torch.float32)


Processing batch 1/435 (0-100)


KeyboardInterrupt: 

In [8]:
compute_mmr(rank)

# task 2, 3
- task 2: retrieve synonyme given expression
- task 3: retrieve synonyme given label

In [10]:
FILE_PATH_SAVE = "D:/finetune_sbert_new/1Membeddings/lora_16_quantized/"
embedding_name = "sapbert_lora_triplet16"
syn_embeddings = torch.load(FILE_PATH_SAVE + embedding_name + "_syn_quantized.pt")

       
embedding_exp = embeddings["embedding_exp_q"]
embedding_label = embeddings["embedding_label_q"]


embedding_syn_idx = syn_embeddings["idx"]
embedding_syn_matrix = syn_embeddings["expressions_embeddings"]

# Determine test indices
embedding_syn_test_idx = list(set(embedding_syn_idx).intersection(set(df_concept_test_fd)))

# Compute rankings
rank_w_exp = eval_utils.top_k_array_syn(embedding_syn_test_idx, embedding_syn_idx, embedding_syn_matrix, embedding_exp, device=Config.device)
rank_w_label = eval_utils.top_k_array_syn(embedding_syn_test_idx, embedding_syn_idx, embedding_syn_matrix, embedding_label, device=Config.device)

In [12]:
print(compute_mmr(rank_w_exp))
print(compute_mmr(rank_w_label))

# task 4:  hierarchical similarity

In [None]:

df_hierarchical_similarity = pd.read_table(BENCHMARK_PATH + "hierarchical_similarity_benchmark.tsv")
df_hierarchical_similarity = pl.from_pandas(df_hierarchical_similarity)
df_hierarchical_similarity = df_hierarchical_similarity.select(pl.col("sctid").cast(pl.String), pl.col("close_sctid").cast(pl.String), pl.col("far_sctid").cast(pl.String))


embedding_exp = embeddings["embedding_exp_q"]
embedding_label = embeddings["embedding_label_q"]

accuracy_exp = eval_utils.compute_hierarchical_similarity(df_hierarchical_similarity, id2idx, embedding_exp)
accuracy_label = eval_utils.compute_hierarchical_similarity(df_hierarchical_similarity, id2idx, embedding_label)



In [14]:
print("Hierarchical similarity accuracy for expressions: ", accuracy_exp)
print("Hierarchical similarity accuracy for labels: ", accuracy_label)

# task 5: semantic composition

In [6]:
import ast
def compute_semantic_composition(df_semantic_composition, id2idx, embeddings_exp_ft, list_idx_all_pre_set):
    device = embeddings_exp_ft.device
    embedding_dim = embeddings_exp_ft.shape[1]
    idx_pre_set = set(list_idx_all_pre_set)

    list_anchor_idx = []
    list_composed_embedding = []

    # 1. Build all anchor and composed embeddings
    for row in df_semantic_composition.iter_rows():
        anchor_id = str(row[0])
        if anchor_id not in id2idx:
            continue
        anchor_idx = id2idx[anchor_id]

        try:
            related_ids = ast.literal_eval(row[1])
        except (ValueError, SyntaxError):
            continue

        valid_related_indices = [
            id2idx[str(rid)] for rid in related_ids if str(rid) in id2idx
        ]

        if not valid_related_indices:
            continue

        related_tensor = embeddings_exp_ft[valid_related_indices]
        composed = related_tensor.mean(dim=0)

        list_anchor_idx.append(anchor_idx)
        list_composed_embedding.append(composed)

    # 2. Stack all composed vectors and anchor indices
    composed_matrix = torch.stack(list_composed_embedding)  # [N, D]
    anchor_indices = torch.tensor(list_anchor_idx, dtype=torch.long, device=device)  # [N]

    # 3. Compute similarity in batch: [N, D] x [D, All] => [N, All]
    similarity_scores = torch.matmul(composed_matrix, embeddings_exp_ft.T)  # [N, Total]

    # 4. Get ranking positions
    top_k_pre = []
    idx_all_pre_tensor = torch.tensor(list(list_idx_all_pre_set), dtype=torch.long, device=device)

    for i in range(similarity_scores.shape[0]):
        scores = similarity_scores[i]
        sorted_indices = torch.argsort(scores, descending=True)

        # Efficient filtering for pre-defined only
        mask = torch.isin(sorted_indices, idx_all_pre_tensor)
        sorted_pre = sorted_indices[mask]

        anchor_idx = anchor_indices[i]
        match = (sorted_pre == anchor_idx).nonzero(as_tuple=True)[0]
        rank = match.item() if len(match) > 0 else -1
        top_k_pre.append(rank)

    return np.array(top_k_pre)

In [8]:
df_semantic_composition = pd.read_table(BENCHMARK_PATH + "semantic_composition_benchmark.tsv")
df_semantic_composition = pl.from_pandas(df_semantic_composition)
df_semantic_composition = df_semantic_composition.select(
    pl.col("id_node"),
    pl.col("parents_ids"),
)

# Prepare the set of indices
list_idx_all_pre = df_concept_all.filter(pl.col("concept_type") == "SCT_PRE")["idx"].unique().to_list()
list_idx_all_pre_set = set(list_idx_all_pre)

embedding_exp = embeddings["embedding_exp_q"].to(device=Config.device)
embedding_label = embeddings["embedding_label_q"].to(device=Config.device)

# mr_exp = eval_utils.compute_semantic_composition(df_semantic_composition, id2idx, embedding_exp, list_idx_all_pre_set)
# mr_label = eval_utils.compute_semantic_composition(df_semantic_composition, id2idx, embedding_label, list_idx_all_pre_set)
mr_exp = compute_semantic_composition(df_semantic_composition, id2idx, embedding_exp, list_idx_all_pre_set)
mr_label = compute_semantic_composition(df_semantic_composition, id2idx, embedding_label, list_idx_all_pre_set)


In [9]:
print(compute_mmr(mr_exp))
print(compute_mmr(mr_label))

0.20483985866704937
0.19029802637027435


# IS_A invariant

In [14]:
embedding_new_exp = torch.load(FILE_PATH_SAVE + embedding_name  + "_new_exp_quantized.pt")

rank_exp = eval_utils.top_k_exp_by_batch(embedding_new_exp["idx_true"], embedding_new_exp["new_expressions_embeddings"], embedding_exp, batch_size=1000, device=Config.device)
rank_label = eval_utils.top_k_exp_by_batch(embedding_new_exp["idx_true"], embedding_new_exp["new_expressions_embeddings"], embedding_label, batch_size=1000, device=Config.device)


  query_matrix = torch.tensor(query_matrix, dtype=torch.float32).to(device)
  candidate_matrix = torch.tensor(candidate_matrix, dtype=torch.float32).to(device)


Processing batch 0/46410)
Processing batch 1000/46410)
Processing batch 2000/46410)
Processing batch 3000/46410)
Processing batch 4000/46410)
Processing batch 5000/46410)
Processing batch 6000/46410)
Processing batch 7000/46410)
Processing batch 8000/46410)
Processing batch 9000/46410)
Processing batch 10000/46410)
Processing batch 11000/46410)
Processing batch 12000/46410)
Processing batch 13000/46410)
Processing batch 14000/46410)
Processing batch 15000/46410)
Processing batch 16000/46410)
Processing batch 17000/46410)
Processing batch 18000/46410)
Processing batch 19000/46410)
Processing batch 20000/46410)
Processing batch 21000/46410)
Processing batch 22000/46410)
Processing batch 23000/46410)
Processing batch 24000/46410)
Processing batch 25000/46410)
Processing batch 26000/46410)
Processing batch 27000/46410)
Processing batch 28000/46410)
Processing batch 29000/46410)
Processing batch 30000/46410)
Processing batch 31000/46410)
Processing batch 32000/46410)
Processing batch 33000/

In [15]:
print(compute_mmr(rank_exp))
print(compute_mmr(rank_label))

0.4119303841972174
0.18768916867913096
