In [None]:
from pathlib import Path
import json
import rich
import pandas as pd
import numpy as np
from sklearn.metrics import precision_score, accuracy_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

formatters = {
    'accuracy': '{:,.2%}'.format,
    'F1': '{:,.2%}'.format,
    'precision': '{:,.2%}'.format,
}

multirun_path = Path("/home/valv/remote_data/17-31-14")

In [None]:
summary = []
records = []
glob_keys = ['strategy']
local_keys = ['labels', 'predictions']
series = {}

sorted_exp_paths = []
for exp in multirun_path.iterdir():
    if exp.is_dir():
        exp_data = json.load(open(exp / 'data.json', 'r'))
        sorted_exp_paths.append((exp, exp_data['accuracy']))
sorted_exp_paths = [x[0] for x in sorted(sorted_exp_paths, key=lambda x: -x[1])]
for exp in sorted_exp_paths:
        exp_data = json.load(open(exp / 'data.json', 'r'))
        exp_data['strategy'] = exp_data['strategy'].split('+')[0]
        summary.append(exp_data)
        series[exp_data['strategy']] = exp_data['predictions']
        for i in range(len(exp_data['predictions'])):
            record = {k: exp_data[k][i] for k in local_keys}
            record.update({k:v for k,v in exp_data.items() if k in glob_keys})
            record['qid'] = i
            records.append(record)

summary = pd.DataFrame(summary)
records = pd.DataFrame(records)
series = pd.DataFrame(series)
# write the summary to file
with pd.option_context("max_colwidth", 1000):
    summary.to_latex(buf=multirun_path / 'summary.tex',
    columns=['strategy', 'accuracy', 'f1'],
    float_format="%.2f",
    formatters=formatters,
    index=False)

In [None]:
## Aggreement matrix

In [None]:
N = len(summary)
X = np.zeros((N, N))
# diagonal
for i in range(N):
    row = summary.iloc[i]
    acc = accuracy_score(row['labels'], row['predictions'])
    X[i, i] = acc

# top-diagonal: % of aggreement on correct answers
for i in range(N):
    for j in range(N):
        if j > i:
            row_i = summary.iloc[i]
            row_j = summary.iloc[j]
            assert row_i['labels'] == row_j['labels']
            labels = row_i['labels']
            y_i = row_i['predictions']
            y_j = row_j['predictions']
            # filter the correct results
            y_i = [t for t,l in zip(y_i, labels) if t ==l]
            y_j = [t for t,l in zip(y_j, labels) if t ==l]
            agg = sum(1 for t_i, t_j in zip(y_i, y_j) if t_i == t_j) / len(y_i)
            # register
            X[i, j] = agg
# bottom-diagonal: % of aggreement on wrong answers
for i in range(N):
    for j in range(N):
        if j < i:
            row_i = summary.iloc[i]
            row_j = summary.iloc[j]
            assert row_i['labels'] == row_j['labels']
            labels = row_i['labels']
            y_i = row_i['predictions']
            y_j = row_j['predictions']
            # filter the correct results
            y_i = [t for t,l in zip(y_i, labels) if t != l]
            y_j = [t for t,l in zip(y_j, labels) if t != l]
            agg = sum(1 for t_i, t_j in zip(y_i, y_j) if t_i == t_j) / len(y_i)
            # register
            X[i, j] = agg


fig, ax= plt.subplots(figsize=((16, 12)), dpi=300)
sns.heatmap(X,
            annot=False,
            fmt='g',
            ax=ax,
            xticklabels=summary['strategy'],
            yticklabels=summary['strategy'],
            linewidths=.5,
            center=0.25,
            cmap='icefire',
           )
plt.tight_layout()
plt.savefig(multirun_path / 'confusion.png')
plt.show()

In [None]:
from sklearn.metrics.pairwise import pairwise_distances
df = series
jac_sim = 1 - pairwise_distances(df.T, metric = "hamming")
# optionally convert it to a DataFrame
jac_sim = pd.DataFrame(jac_sim, index=df.columns, columns=df.columns)

g = sns.clustermap(jac_sim,
                   # metric='matching', 
                   dendrogram_ratio=(.1, .1),
                   cbar_pos=(0.7, 0.1, .03, .2),
                   figsize=(20,20),
                   # cmap=sns.cubehelix_palette(as_cmap=True, reverse=False)
                  )
plt.savefig(multirun_path / 'cluster.png')
plt.show()

## Expert Model

In [None]:
from collections import Counter

def get_first(serie):
    x = serie.values[0]
    assert all(x == y for y in serie.values)
    return x
    
def majority_vote(serie):
    freqs = Counter(serie.values)
    return freqs.most_common(1)[0][0]



ranked_strategies = summary.sort_values('accuracy', ascending=False)['strategy'].values
expert_data = []
for top_n in [len(summary), 3, 5, 10]:
    records_ = records[records['strategy'].isin(ranked_strategies[:top_n])]
    expert = records_[['labels', 'predictions', 'qid']].groupby('qid')
    expert = expert.agg(
    {'labels': get_first,
     'predictions': majority_vote,
    })
    acc = accuracy_score(expert['labels'], expert['predictions'])
    prec = precision_score(expert['labels'], expert['predictions'], average='macro')
    f1 = f1_score(expert['labels'], expert['predictions'], average='macro')
    expert_data.append({'n_experts' : top_n, 'accuracy': acc, 'precision': prec, 'F1': f1})
    
expert_data = pd.DataFrame(expert_data).sort_values('n_experts', ascending=False)
with pd.option_context("max_colwidth", 1000):
    expert_data.to_latex(buf=multirun_path / 'experts.tex',
    columns=['n_experts', 'accuracy', 'F1', 'precision'],
    float_format="%.2f",
    index=False)
expert_data

In [None]:
## Aggreement matrix

In [None]:
from sklearn.metrics import precision_score, accuracy_score
N = len(summary)
X = np.zeros((N, N))
# diagonal
for i in range(N):
    row = summary.iloc[i]
    acc = accuracy_score(row['labels'], row['predictions'])
    X[i, i] = acc
    
# top-diagonal: % of aggreement on correct answers
for i in range(N):
    for j in range(N):
        if j > i:
            row_i = summary.iloc[i]
            row_j = summary.iloc[j]
            assert row_i['labels'] == row_j['labels']
            labels = row_i['labels']
            y_i = row_i['predictions']
            y_j = row_j['predictions']
            # filter the correct results
            y_i = [t for t,l in zip(y_i, labels) if t ==l]
            y_j = [t for t,l in zip(y_j, labels) if t ==l]
            agg = sum(1 for t_i, t_j in zip(y_i, y_j) if t_i == t_j) / len(y_i)
            # register
            X[i, j] = agg
# bottom-diagonal: % of aggreement on wrong answers
for i in range(N):
    for j in range(N):
        if j < i:
            row_i = summary.iloc[i]
            row_j = summary.iloc[j]
            assert row_i['labels'] == row_j['labels']
            labels = row_i['labels']
            y_i = row_i['predictions']
            y_j = row_j['predictions']
            # filter the correct results
            y_i = [t for t,l in zip(y_i, labels) if t != l]
            y_j = [t for t,l in zip(y_j, labels) if t != l]
            agg = sum(1 for t_i, t_j in zip(y_i, y_j) if t_i == t_j) / len(y_i)
            # register
            X[i, j] = agg
    
    
fig, ax= plt.subplots(figsize=((16, 12)), dpi=300)
sns.heatmap(X, 
            annot=False, 
            fmt='g', 
            ax=ax, 
            xticklabels=summary['strategy'], 
            yticklabels=summary['strategy'], 
            linewidths=.5, 
            center=0.25,
            cmap='icefire',
           )
plt.tight_layout()
plt.savefig(multirun_path / 'confusion.png')
plt.show()

In [None]:
from sklearn.metrics.pairwise import pairwise_distances
df = series
jac_sim = 1 - pairwise_distances(df.T, metric = "hamming")
# optionally convert it to a DataFrame
jac_sim = pd.DataFrame(jac_sim, index=df.columns, columns=df.columns)

g = sns.clustermap(jac_sim, 
                   # metric='matching', 
                   dendrogram_ratio=(.1, .1),
                   cbar_pos=(0.7, 0.1, .03, .2),
                   figsize=(20,20),
                   # cmap=sns.cubehelix_palette(as_cmap=True, reverse=False)
                  )
plt.savefig(multirun_path / 'cluster.png')
plt.show()

## Expert Model

In [None]:
from collections import Counter

def get_first(serie):
    x = serie.values[0]
    assert all(x == y for y in serie.values)
    return x
    
def majority_vote(serie):
    freqs = Counter(serie.values)
    return freqs.most_common(1)[0][0]



ranked_strategies = summary.sort_values('accuracy', ascending=False)['strategy'].values
expert_data = []
for top_n in [len(summary), 3, 5, 10]:
    records_ = records[records['strategy'].isin(ranked_strategies[:top_n])]
    expert = records_[['labels', 'predictions', 'qid']].groupby('qid')
    expert = expert.agg(
    {'labels': get_first,
     'predictions': majority_vote,
    })
    acc = accuracy_score(expert['labels'], expert['predictions'])
    f1 = f1_score(expert['labels'], expert['predictions'], average='macro')
    precision = precision_score(expert['labels'], expert['predictions'], average='macro')
    expert_data.append({'n_experts' : top_n, 'accuracy': acc, 'F1': f1, 'precision': precision})
    
expert_data = pd.DataFrame(expert_data).sort_values('n_experts', ascending=False)
with pd.option_context("max_colwidth", 1000):
    expert_data.to_latex(buf=multirun_path / 'experts.tex',
    columns=['n_experts', 'accuracy', 'precision', 'F1'],
    formatters=formatter)
expert_data