In [4]:
import json
from scipy.stats import kendalltau, spearmanr
import rbo
import pandas as pd
import glob
import random
import os
import sys
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.sankey import Sankey

# set column width to be able to see full text
pd.set_option('display.max_colwidth', 100)

base_path = f'../top_k_rankings'

In [5]:
with open(f'{base_path}/project-151-at-2024-02-07-14-54-bdd3deda.json', 'r') as f:
    annotations = json.load(f)
annotations[0]["annotations"][0]["result"][0]["value"]["ranker"]["rank"]

['chatgpt_22_multimedqa_6',
 'meta-llama_Llama-2-13b-chat-hf_22_multimedqa_10',
 'gpt2-xl_22_multimedqa_10',
 '01f617ac-9919-44fe-8fc8-2db9ff72b5e7',
 'gpt2_22_multimedqa_9']

In [6]:
len(annotations)

20

In [7]:
rankings = {}
for annotation in annotations:
    annotated_rankings = list(zip(range(len(annotation["annotations"][0]["result"][0])),
                                  annotation["annotations"][0]["result"][0]["value"]["ranker"]["rank"]))
    system_rankings = [(x["score"], x["id"]) for x in annotation["data"]["items"]]
    rankings[annotation["id"]] = {"annotated_rankings": annotated_rankings, "system_rankings": system_rankings}

In [8]:
d = {}
for k, v in rankings.items():
    a = [x[1] for x in v["annotated_rankings"]]
    s = [x[1] for x in sorted(v["system_rankings"], key=lambda x: x[0], reverse=True)]
    for i in range(len(a)):
        print(a[i][:8], s[i][:8])
    d[k] = {"rbo": rbo.RankingSimilarity(a, s).rbo(),
            "kendall": kendalltau(a, s).correlation,
            "spearman": spearmanr(a, s).correlation, }
    print(k, rbo.RankingSimilarity(a, s).rbo(), kendalltau(a, s).correlation, spearmanr(a, s).correlation)

chatgpt_ meta-lla
meta-lla chatgpt_
gpt2-xl_ 01f617ac
01f617ac gpt2-xl_
gpt2_22_ gpt2_22_
122997 0.7333333333333333 -0.19999999999999998 -0.3
chatgpt_ chatgpt_
meta-lla meta-lla
gpt2-xl_ gpt2-xl_
74c382e8 74c382e8
gpt2_35_ gpt2_35_
122998 1.0 0.9999999999999999 0.9999999999999999
chatgpt_ chatgpt_
meta-lla meta-lla
gpt2-xl_ gpt2-xl_
gpt2_1_m e6ab23e9
e6ab23e9 gpt2_1_m
122999 0.95 0.39999999999999997 0.6
chatgpt_ chatgpt_
meta-lla meta-lla
721cb1ec gpt2-xl_
gpt2-xl_ 721cb1ec
gpt2_68_ gpt2_68_
123000 0.9333333333333332 0.39999999999999997 0.6
chatgpt_ chatgpt_
meta-lla meta-lla
b3669cc3 b3669cc3
gpt2-xl_ gpt2-xl_
gpt2_54_ gpt2_54_
123001 1.0 0.9999999999999999 0.9999999999999999
meta-lla chatgpt_
chatgpt_ meta-lla
gpt2-xl_ e659c78c
e659c78c gpt2-xl_
gpt2_97_ gpt2_97_
123002 0.7333333333333333 -0.6 -0.7
chatgpt_ chatgpt_
meta-lla meta-lla
1715aa9e gpt2-xl_
gpt2-xl_ 1715aa9e
gpt2_117 gpt2_117
123003 0.9333333333333332 0.39999999999999997 0.6
chatgpt_ chatgpt_
6692650c meta-lla
gpt2-xl_ gpt

In [9]:
pd.DataFrame(d).T.describe()

Unnamed: 0,rbo,kendall,spearman
count,20.0,20.0,20.0
mean,0.8375,0.12,0.135
std,0.13332,0.516669,0.618381
min,0.533333,-0.6,-0.7
25%,0.733333,-0.25,-0.375
50%,0.858333,0.1,0.2
75%,0.933333,0.4,0.6
max,1.0,1.0,1.0


In [10]:
rankings

{122997: {'annotated_rankings': [(0, 'chatgpt_22_multimedqa_6'),
   (1, 'meta-llama_Llama-2-13b-chat-hf_22_multimedqa_10'),
   (2, 'gpt2-xl_22_multimedqa_10'),
   (3, '01f617ac-9919-44fe-8fc8-2db9ff72b5e7'),
   (4, 'gpt2_22_multimedqa_9')],
  'system_rankings': [(-0.0107910428196191,
    'meta-llama_Llama-2-13b-chat-hf_22_multimedqa_10'),
   (-5.417914390563965, 'gpt2_22_multimedqa_9'),
   (-0.0249859765172004, '01f617ac-9919-44fe-8fc8-2db9ff72b5e7'),
   (-0.0136555787175893, 'chatgpt_22_multimedqa_6'),
   (-0.0948760733008384, 'gpt2-xl_22_multimedqa_10')]},
 122998: {'annotated_rankings': [(0, 'chatgpt_35_multimedqa_4'),
   (1, 'meta-llama_Llama-2-13b-chat-hf_35_multimedqa_4'),
   (2, 'gpt2-xl_35_multimedqa_3'),
   (3, '74c382e8-098c-4253-b573-a0b989cbb46c'),
   (4, 'gpt2_35_multimedqa_4')],
  'system_rankings': [(-3.247481346130371, 'gpt2_35_multimedqa_4'),
   (-0.0074790478684008, 'meta-llama_Llama-2-13b-chat-hf_35_multimedqa_4'),
   (-0.0083243306726217, 'gpt2-xl_35_multimedqa_3'),

In [11]:
annotations[0]["id"]

122997

In [12]:
# flow = dict(zip(range(5), [{}] * 5))

def Xrankings(X, nodes_dict, flows_dict):

    for i, x in enumerate(X):
        X[i] = (x.replace("document", "Document")
                .replace("chatgpt","ChatGPT")
                .replace("meta-llama","Llama-2 13B")
                .replace("gpt2-xl","GPT-2 XL")
                .replace("gpt2","GPT-2"))

    for i in range(len(X)):
        if i not in nodes_dict:
            nodes_dict[i] = {}
        if X[i] in nodes_dict[i]:
            nodes_dict[i][X[i]] += 1
        else:
            nodes_dict[i][X[i]] = 1
            
        if i < len(X) - 1:
            t = (f"{X[i]}@{i+1}", f"{X[i+1]}@{i+2}")
            if t in flows_dict:
                flows_dict[t] += 1
            else:
                flows_dict[t] = 1
                
    return nodes_dict, flows_dict
               
               
s_nodes_dict, s_flows_dict = {}, {}    
a_nodes_dict, a_flows_dict = {}, {}    
for k, v in rankings.items():
    a = [x[1].split("_")[0] if len(x[1].split("_")[0].split("-")) < 5 else "document" for x in
         sorted(v["annotated_rankings"], key=lambda x: x[0], reverse=False)]
    s = [x[1].split("_")[0] if len(x[1].split("_")[0].split("-")) < 5 else "document" for x in
         sorted(v["system_rankings"], key=lambda x: x[0], reverse=True)]


    s_nodes_dict, s_flows_dict = Xrankings(s, s_nodes_dict, s_flows_dict)
    a_nodes_dict, a_flows_dict = Xrankings(a, a_nodes_dict, a_flows_dict)
    
s_nodes = []
a_nodes = []
for k, v in s_nodes_dict.items():
    level = []
    for k2, v2 in v.items():
        level.append((f"{k2}@{k+1}", v2))
    s_nodes.append(level)
for k, v in a_nodes_dict.items():
    level = []
    for k2, v2 in v.items():
        level.append((f"{k2}@{k+1}", v2))
    a_nodes.append(level)
    
s_flows = []
a_flows = []
for k, v in s_flows_dict.items():
    s_flows.append((k[0], k[1], v))
for k, v in a_flows_dict.items():
    a_flows.append((k[0], k[1], v))

In [13]:
s_nodes

[[('Llama-2 13B@1', 7), ('ChatGPT@1', 13)],
 [('ChatGPT@2', 6), ('Llama-2 13B@2', 12), ('GPT-2 XL@2', 2)],
 [('Document@3', 8), ('GPT-2 XL@3', 11), ('Llama-2 13B@3', 1)],
 [('GPT-2 XL@4', 7), ('Document@4', 10), ('GPT-2@4', 2), ('ChatGPT@4', 1)],
 [('GPT-2@5', 18), ('Document@5', 2)]]

In [14]:
# flows = sorted(flows, key=lambda x: x[2], reverse=True)
s_flows

[('Llama-2 13B@1', 'ChatGPT@2', 6),
 ('ChatGPT@2', 'Document@3', 4),
 ('Document@3', 'GPT-2 XL@4', 7),
 ('GPT-2 XL@4', 'GPT-2@5', 7),
 ('ChatGPT@1', 'Llama-2 13B@2', 12),
 ('Llama-2 13B@2', 'GPT-2 XL@3', 9),
 ('GPT-2 XL@3', 'Document@4', 9),
 ('Document@4', 'GPT-2@5', 10),
 ('Llama-2 13B@2', 'Document@3', 3),
 ('ChatGPT@1', 'GPT-2 XL@2', 1),
 ('GPT-2 XL@2', 'Llama-2 13B@3', 1),
 ('Llama-2 13B@3', 'Document@4', 1),
 ('ChatGPT@2', 'GPT-2 XL@3', 2),
 ('GPT-2 XL@3', 'GPT-2@4', 2),
 ('GPT-2@4', 'Document@5', 2),
 ('Llama-2 13B@1', 'GPT-2 XL@2', 1),
 ('GPT-2 XL@2', 'Document@3', 1),
 ('Document@3', 'ChatGPT@4', 1),
 ('ChatGPT@4', 'GPT-2@5', 1)]

In [17]:
from sankeyflow import Sankey
import matplotlib

# cmap = sns.cubehelix_palette(start=1,rot=len(s_flows+a_flows), light=.5, dark=.55, as_cmap=True)
cmap = sns.color_palette("jet",as_cmap=True)
rgba = matplotlib.colormaps.get_cmap(cmap)
unique_flows = list(set([f"{f[0].split('@')[0]}{f[1].split('@')[0]}" for f in s_flows+a_flows]))
unique_nodes = set()
for f in s_nodes+a_nodes:
    for n in f:
        unique_nodes.add(n[0].split('@')[0])
unique_nodes = list(unique_nodes)
def colf(i):
    v = rgba(i/len(unique_flows))
    return(v[0], v[1], v[2], .5)
def coln(i):
    v = rgba(i/len(unique_nodes))
    return(v[0], v[1], v[2], .5)
flow_cols = dict([(f,colf(i)) for i, f in enumerate(unique_flows)])
node_cols = dict([(f,coln(i)) for i, f in enumerate(unique_nodes)])

ValueError: No.

In [None]:
# for i, f in enumerate(s_flows):
#     s_flows[i] = (f[0], f[1], f[2], {"color": flow_cols[f"{f[0].split('@')[0]}{f[1].split('@')[0]}"],'flow_color_mode': 'source'})
# for i, f in enumerate(a_flows):
#     a_flows[i] = (f[0], f[1], f[2], {"color": flow_cols[f"{f[0].split('@')[0]}{f[1].split('@')[0]}"],'flow_color_mode': 'source'})

In [None]:
for i, f in enumerate(s_nodes):
    for j, n in enumerate(f):
        s_nodes[i][j] = (n[0], n[1], {"color": node_cols[n[0].split('@')[0]]})
for i, f in enumerate(a_nodes):
    for j, n in enumerate(f):
        a_nodes[i][j] = (n[0], n[1], {"color": node_cols[n[0].split('@')[0]]})

In [None]:
plt.figure(figsize=(16, 4), dpi=144)
s = Sankey(flows=s_flows, nodes=s_nodes, flow_color_mode='dest')
for n in s.nodes:
    for ni in n:
        ni.label = ni.label.split("@")[0]
s.draw()
plt.savefig("sankey-s.pdf", bbox_inches='tight')

In [None]:
plt.figure(figsize=(16, 4), dpi=144)
cmap = sns.cubehelix_palette(start=1,rot=-25, light=.75, dark=.25, as_cmap=True)
s = Sankey(flows=a_flows, nodes=a_nodes, flow_color_mode='dest')
for n in s.nodes:
    for ni in n:
        ni.label = ni.label.split("@")[0]
s.draw()
plt.savefig("sankey-a.pdf", bbox_inches='tight')