In [80]:
import numpy as np
import pandas as pd
import json
import operator
from sklearn.metrics import f1_score
from scipy.stats import kendalltau

In [112]:
def sal_consistency(pairs:list, min_k=2, max_k=3):
    print(f'consistency over {len(pairs)} pairs')
    metrics = []
    for k in range(min_k, max_k):
        a_kt = 0
        a_f1 = 0
        for att_sal, token_sal in pairs:
            sorted_attributes_dict = sorted(att_sal.items(), key=operator.itemgetter(1), reverse=True)[:k]
            sorted_tokens_dict = sorted(token_sal.items(), key=operator.itemgetter(1), reverse=True)
            
            sorted_att_tokens = [k.split('__')[0] for k, _ in sorted_tokens_dict][:k]
            sorted_attributes = [k for k,v in sorted_attributes_dict]
            
            y_true = sorted_attributes
            y_pred = sorted_att_tokens
            
            kt_curr = kendalltau(y_true, y_pred).statistic
            
            f1_curr = f1_score(y_true, y_pred, average='macro')
            
            if kt_curr >= -1 and kt_curr <= 1:
                a_kt += kt_curr
                
            a_f1 += f1_curr
        avg_kt = a_kt / len(saliency_pairs)
        avg_f1 = a_f1 / len(saliency_pairs)
        print(f"@{k} - kt:{avg_kt}, f1:{avg_f1}")
        metrics.append((avg_kt, avg_f1))
    return metrics

In [96]:
def normalize_saliency(saliency:dict):
    new_dict = {}
    for k, v in saliency.items():
        if type(v) == list:
            new_dict[k] = v[0]
    if new_dict != {}:
        return new_dict
    else:
        return saliency

In [97]:
def file_to_saliencies(data_file): 
    with open(data_file) as f:
        data = json.load(f)
    return [(s['id'], normalize_saliency(s['saliency'])) for s in data['data']]


In [98]:
attributes_file = "/Users/tteofili/dev/ellmer/experiments/azure_openai/gpt-3.5-turbo/attribute/abt_buy/20240604/22_25/uncerta(freq)_sample_results.json"
tokens_file = "/Users/tteofili/dev/ellmer/experiments/azure_openai/gpt-3.5-turbo/token/abt_buy/20240607/09_03/uncerta(freq)_sample_results.json"

attributes_data = file_to_saliencies(attributes_file)
token_data = file_to_saliencies(tokens_file)

saliency_pairs = []
for a, t in zip(attributes_data, token_data):
    if a[0] == t[0] and len(a[1]) > 0 and len(t[1]) > 0:
        saliency_pairs.append((a[1], t[1]))
        
saliency_pairs

[({'ltable_name': 1.0,
   'ltable_description': 1.0,
   'ltable_price': 0.0,
   'rtable_name': 0.0,
   'rtable_description': 0.0,
   'rtable_price': 0.0},
  {'ltable_name__pink': 0,
   'ltable_description__pink': 0,
   'ltable_description__sony': 0,
   'ltable_description__cyber-shot': 0,
   'ltable_description__7.2': 0,
   'ltable_description__megapixel': 0,
   'ltable_description__digital': 0,
   'ltable_description__camera': 0,
   'ltable_description__dscw120p': 0,
   'ltable_description__4x': 0,
   'ltable_description__optical': 0,
   'ltable_description__zoom': 0,
   'ltable_description__2.5': 0,
   "ltable_description__'": 0,
   'ltable_description__tft': 0,
   'ltable_description__lcd': 0,
   'ltable_description__15': 0,
   'ltable_description__mb': 0,
   'ltable_description__internal': 0,
   'ltable_description__memory': 0,
   'ltable_description__face': 0,
   'ltable_description__detection': 0,
   'ltable_description__super': 0,
   'ltable_description__steadyshot': 0,
   'ltab

In [113]:
metrics = sal_consistency(saliency_pairs, max_k=6)
print(metrics)

consistency over 7 pairs
@2 - kt:0.2857142857142857, f1:0.5238095238095238
@3 - kt:0.23328473740792172, f1:0.3095238095238094
@4 - kt:-0.09199293583705415, f1:0.19166666666666665
@5 - kt:-0.11894650650159432, f1:0.14952380952380953
[(0.2857142857142857, 0.5238095238095238), (0.23328473740792172, 0.3095238095238094), (-0.09199293583705415, 0.19166666666666665), (-0.11894650650159432, 0.14952380952380953)]
