# code source: https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927SGUdfr6nsR?usp=sharing#scrollTo=2hkd4dxRR-DI

In [2]:
import pandas as pd
from matplotlib import pyplot as plt
import plotly.express as px

We allowed the user to declare a tie between the pairs of models. To collect additional data, later in the tournament we also allowed the user to declare a tie in which both models were bad. There were a significant number of tied outcomes.

In [16]:
battled_pairs = r'..\results\log_battle_arena_gpt4_as_judger.csv'
df = pd.read_csv(battled_pairs)
columns_to_inclusive = ['model_a', 'model_b', 'winner']
data = df[columns_to_inclusive]

data_no_ties = data[data['winner'].str.contains('tie', na=False) == False]
data_ties_only = data[data['winner'].str.contains('tie', na=False) == True]

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [6]:
fig = px.bar(pd.concat([data["model_a"], data["model_b"]]).value_counts(),
             title="Battle Count for Each Model", text_auto=True)
fig.update_layout(xaxis_title="model", yaxis_title="Battle Count", height=400,
                  showlegend=False)
fig

In [14]:
def visualize_battle_count(battles, title):
    ptbl = pd.pivot_table(battles, index="model_a", columns="model_b", aggfunc="size",
                          fill_value=0)
    battle_counts = ptbl + ptbl.T
    ordering = battle_counts.sum().sort_values(ascending=False).index
    fig = px.imshow(battle_counts.loc[ordering, ordering],
                    title=title, text_auto=True, width=600)
    fig.update_layout(xaxis_title="Model B",
                      yaxis_title="Model A",
                      xaxis_side="top", height=600, width=600,
                      title_y=0.07, title_x=0.5)
    fig.update_traces(hovertemplate=
                      "Model A: %{y}<br>Model B: %{x}<br>Count: %{z}<extra></extra>")
    return fig

fig = visualize_battle_count(data, title="Battle Count of Each Combination of Models")
fig

In [15]:
fig = visualize_battle_count(data_no_ties, title="Battle Count of Each Combination of Models")
fig

In [17]:
fig = visualize_battle_count(data_ties_only, title="Battle Count of Each Combination of Models")
fig