In [1]:
swiss_file = "analysis\E08_loss-balance\swiss_tournament_20251214_010134.pkl"

# Swiss Tournament Analysis

This notebook analyzes the results from a Swiss-system tournament between checkpoints.

In [2]:
import sys
from pathlib import Path

sys.path.append(str(Path("../..").resolve()))
import os

if os.path.basename(os.getcwd()) == "E08_loss-balance":
    os.chdir("../..")

In [3]:
import pickle
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import numpy as np

## Load Tournament Data

In [4]:
# Load the tournament results
with open(swiss_file, "rb") as f:
    tournament_data = pickle.load(f)

print("Tournament data loaded successfully!")
print(f"\nAvailable keys: {tournament_data.keys()}")

Tournament data loaded successfully!

Available keys: dict_keys(['config', 'final_standings', 'match_results', 'timestamp'])


## Tournament Configuration

In [5]:
config = tournament_data["config"]
print(f"Number of Rounds: {config['num_rounds']}")
print(f"Double Swiss: {config['double_swiss']}")
print(f"McMahon Scoring: {config['mcmahon']}")
print(f"Checkpoint Folder: {config['folder']}")
print(f"Timestamp: {tournament_data['timestamp']}")

Number of Rounds: 50
Double Swiss: True
McMahon Scoring: True
Checkpoint Folder: CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select
Timestamp: 20251214_010134


## Final Standings

In [6]:
# Convert standings to DataFrame
standings = tournament_data["final_standings"]
standings_df = pd.DataFrame(standings)

# Display top 20
print(f"\nTotal participants: {len(standings_df)}")
print(f"\nTop 20 Final Standings:")
top_20 = standings_df.head(20)[
    ["name", "score", "wins", "draws", "losses", "matches_played", "epoch"]
]
print(top_20.to_string(index=False))

standings_df.head()


Total participants: 5001

Top 20 Final Standings:
                                                  name  score  wins  draws  losses  matches_played  epoch
20251212_2206-LOSS_APPROACHs_1212-2_only_select_E_1034   68.0    68      0      32             100   1034
20251213_0341-LOSS_APPROACHs_1212-2_only_select_E_2322   67.0    67      0      33             100   2322
20251212_2002-LOSS_APPROACHs_1212-2_only_select_E_0535   67.0    67      0      33             100    535
20251213_1133-LOSS_APPROACHs_1212-2_only_select_E_3996   66.0    66      0      34             100   3996
20251213_1319-LOSS_APPROACHs_1212-2_only_select_E_4345   65.0    65      0      35             100   4345
20251213_1246-LOSS_APPROACHs_1212-2_only_select_E_4235   65.0    65      0      35             100   4235
20251213_1225-LOSS_APPROACHs_1212-2_only_select_E_4168   65.0    65      0      35             100   4168
20251212_2259-LOSS_APPROACHs_1212-2_only_select_E_1253   65.0    65      0      35             100   

Unnamed: 0,name,path,epoch,score,wins,draws,losses,matches_played
0,20251212_2206-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,1034,68.0,68,0,32,100
1,20251213_0341-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,2322,67.0,67,0,33,100
2,20251212_2002-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,535,67.0,67,0,33,100
3,20251213_1133-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,3996,66.0,66,0,34,100
4,20251213_1319-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,4345,65.0,65,0,35,100


In [7]:
standings_df

Unnamed: 0,name,path,epoch,score,wins,draws,losses,matches_played
0,20251212_2206-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,1034,68.0,68,0,32,100
1,20251213_0341-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,2322,67.0,67,0,33,100
2,20251212_2002-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,535,67.0,67,0,33,100
3,20251213_1133-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,3996,66.0,66,0,34,100
4,20251213_1319-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,4345,65.0,65,0,35,100
...,...,...,...,...,...,...,...,...
4996,20251213_0116-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,1771,34.0,34,0,66,100
4997,20251212_1800-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,36,34.0,34,0,66,100
4998,20251212_1749-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,0,33.5,32,1,65,98
4999,20251212_1754-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,7,33.0,26,0,60,86


In [8]:
standings_df.loc[standings_df["epoch"] == 3970]

Unnamed: 0,name,path,epoch,score,wins,draws,losses,matches_played
29,20251213_1126-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,3970,62.5,62,1,37,100


## Score Distribution

In [9]:
fig = go.Figure()

fig.add_trace(
    go.Histogram(
        x=standings_df["score"],
        nbinsx=30,
        name="Score Distribution",
        marker_color="blue",
    )
)

fig.update_layout(
    title="Score Distribution Across All Participants",
    xaxis_title="Total Score",
    yaxis_title="Number of Bots",
    height=400,
)
fig.show()

## Epoch vs Performance

In [10]:
# Scatter plot: Epoch vs Score
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=standings_df["epoch"],
        y=standings_df["score"],
        mode="markers",
        marker=dict(
            size=8,
            color=standings_df["score"],
            colorscale="Viridis",
            showscale=True,
            colorbar=dict(title="Score"),
        ),
        text=standings_df["name"].str[-15:],  # Show last 15 chars of name
        hovertemplate="<b>%{text}</b><br>Epoch: %{x}<br>Score: %{y}<extra></extra>",
    )
)

fig.update_layout(
    title="Epoch vs Tournament Score",
    xaxis_title="Training Epoch",
    yaxis_title="Tournament Score",
    height=500,
)
fig.show()

In [11]:
# Win rate vs epoch
standings_df["win_rate"] = standings_df["wins"] / standings_df["matches_played"]

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=standings_df["epoch"],
        y=standings_df["win_rate"],
        mode="markers",
        marker=dict(
            size=8,
            color=standings_df["win_rate"],
            colorscale="RdYlGn",
            showscale=True,
            colorbar=dict(title="Win Rate"),
        ),
        text=standings_df["name"].str[-15:],
        hovertemplate="<b>%{text}</b><br>Epoch: %{x}<br>Win Rate: %{y:.3f}<extra></extra>",
    )
)

fig.update_layout(
    title="Win Rate vs Training Epoch",
    xaxis_title="Training Epoch",
    yaxis_title="Win Rate",
    height=500,
)
fig.show()

## Match Results Analysis

In [12]:
# Convert match results to DataFrame
matches = tournament_data["match_results"]
matches_df = pd.DataFrame(matches)

print(f"Total matches played: {len(matches_df)}")
print(f"\nMatch results columns: {matches_df.columns.tolist()}")
matches_df.head(10)

Total matches played: 250000

Match results columns: ['round', 'match_id', 'player1', 'player2', 'player1_epoch', 'player2_epoch', 'p1_wins', 'p2_wins', 'draws']


Unnamed: 0,round,match_id,player1,player2,player1_epoch,player2_epoch,p1_wins,p2_wins,draws
0,1,R1_M1_G1,20251213_1643-LOSS_APPROACHs_1212-2_only_selec...,20251213_1642-LOSS_APPROACHs_1212-2_only_selec...,5000,4999,0,1,0
1,1,R1_M1_G2,20251213_1642-LOSS_APPROACHs_1212-2_only_selec...,20251213_1643-LOSS_APPROACHs_1212-2_only_selec...,4999,5000,1,0,0
2,1,R1_M2_G1,20251213_1642-LOSS_APPROACHs_1212-2_only_selec...,20251213_1642-LOSS_APPROACHs_1212-2_only_selec...,4998,4997,0,1,0
3,1,R1_M2_G2,20251213_1642-LOSS_APPROACHs_1212-2_only_selec...,20251213_1642-LOSS_APPROACHs_1212-2_only_selec...,4997,4998,1,0,0
4,1,R1_M3_G1,20251213_1641-LOSS_APPROACHs_1212-2_only_selec...,20251213_1641-LOSS_APPROACHs_1212-2_only_selec...,4996,4995,0,1,0
5,1,R1_M3_G2,20251213_1641-LOSS_APPROACHs_1212-2_only_selec...,20251213_1641-LOSS_APPROACHs_1212-2_only_selec...,4995,4996,1,0,0
6,1,R1_M4_G1,20251213_1641-LOSS_APPROACHs_1212-2_only_selec...,20251213_1641-LOSS_APPROACHs_1212-2_only_selec...,4994,4993,1,0,0
7,1,R1_M4_G2,20251213_1641-LOSS_APPROACHs_1212-2_only_selec...,20251213_1641-LOSS_APPROACHs_1212-2_only_selec...,4993,4994,0,1,0
8,1,R1_M5_G1,20251213_1640-LOSS_APPROACHs_1212-2_only_selec...,20251213_1640-LOSS_APPROACHs_1212-2_only_selec...,4992,4991,1,0,0
9,1,R1_M5_G2,20251213_1640-LOSS_APPROACHs_1212-2_only_selec...,20251213_1640-LOSS_APPROACHs_1212-2_only_selec...,4991,4992,1,0,0


In [13]:
# Analyze match outcomes
total_p1_wins = matches_df["p1_wins"].sum()
total_p2_wins = matches_df["p2_wins"].sum()
total_draws = matches_df["draws"].sum()
total_games = total_p1_wins + total_p2_wins + total_draws

print("=" * 60)
print("OVERALL MATCH STATISTICS")
print("=" * 60)
print(f"Player 1 Wins: {total_p1_wins} ({total_p1_wins/total_games*100:.1f}%)")
print(f"Player 2 Wins: {total_p2_wins} ({total_p2_wins/total_games*100:.1f}%)")
print(f"Draws: {total_draws} ({total_draws/total_games*100:.1f}%)")
print(f"Total Games: {total_games}")
print("=" * 60)

OVERALL MATCH STATISTICS
Player 1 Wins: 125071 (50.0%)
Player 2 Wins: 124177 (49.7%)
Draws: 752 (0.3%)
Total Games: 250000


In [14]:
# Match outcomes pie chart
fig = go.Figure(
    data=[
        go.Pie(
            labels=["Player 1 Wins", "Player 2 Wins", "Draws"],
            values=[total_p1_wins, total_p2_wins, total_draws],
            marker_colors=["#1f77b4", "#ff7f0e", "#2ca02c"],
        )
    ]
)

fig.update_layout(title="Overall Match Outcomes Distribution", height=400)
fig.show()

## Performance by Round

In [15]:
# Analyze performance by round
round_stats = matches_df.groupby("round").agg(
    {
        "p1_wins": "sum",
        "p2_wins": "sum",
        "draws": "sum",
    }
)

round_stats["total_games"] = (
    round_stats["p1_wins"] + round_stats["p2_wins"] + round_stats["draws"]
)
round_stats["p1_win_rate"] = round_stats["p1_wins"] / round_stats["total_games"]
round_stats["draw_rate"] = round_stats["draws"] / round_stats["total_games"]

fig = make_subplots(rows=2, cols=1, subplot_titles=("Games per Round", "Outcome Rates"))

# Total games per round
fig.add_trace(
    go.Scatter(
        x=round_stats.index,
        y=round_stats["total_games"],
        mode="lines+markers",
        name="Total Games",
    ),
    row=1,
    col=1,
)

# Outcome rates
fig.add_trace(
    go.Scatter(
        x=round_stats.index,
        y=round_stats["p1_win_rate"],
        mode="lines+markers",
        name="P1 Win Rate",
    ),
    row=2,
    col=1,
)

fig.add_trace(
    go.Scatter(
        x=round_stats.index,
        y=round_stats["draw_rate"],
        mode="lines+markers",
        name="Draw Rate",
    ),
    row=2,
    col=1,
)

fig.update_xaxes(title_text="Round", row=2, col=1)
fig.update_yaxes(title_text="Games", row=1, col=1)
fig.update_yaxes(title_text="Rate", row=2, col=1)
fig.update_layout(height=700, title_text="Round-by-Round Analysis")
fig.show()

## Top Performers Analysis

In [16]:
# Top 10 performers detailed stats
top_10 = standings_df.head(10).copy()
top_10["win_rate"] = top_10["wins"] / top_10["matches_played"]
top_10["draw_rate"] = top_10["draws"] / top_10["matches_played"]
top_10["loss_rate"] = top_10["losses"] / top_10["matches_played"]

fig = go.Figure()

# Stacked bar chart
fig.add_trace(
    go.Bar(
        name="Wins",
        x=top_10["name"].str[-15:],
        y=top_10["win_rate"],
        marker_color="green",
    )
)

fig.add_trace(
    go.Bar(
        name="Draws",
        x=top_10["name"].str[-15:],
        y=top_10["draw_rate"],
        marker_color="yellow",
    )
)

fig.add_trace(
    go.Bar(
        name="Losses",
        x=top_10["name"].str[-15:],
        y=top_10["loss_rate"],
        marker_color="red",
    )
)

fig.update_layout(
    barmode="stack",
    title="Top 10 Performers - Match Outcome Distribution",
    xaxis_title="Bot Name (last 15 chars)",
    yaxis_title="Rate",
    height=500,
)
fig.show()

## Epoch Correlation Analysis

In [17]:
from scipy import stats

# Calculate correlation between epoch and performance metrics
corr_score, p_score = stats.pearsonr(standings_df["epoch"], standings_df["score"])
corr_winrate, p_winrate = stats.pearsonr(
    standings_df["epoch"], standings_df["win_rate"]
)

print("=" * 60)
print("EPOCH CORRELATION ANALYSIS")
print("=" * 60)
print(f"Epoch vs Score:")
print(f"  Correlation: {corr_score:.4f}")
print(f"  P-value: {p_score:.4e}")
print(f"  Significant: {'Yes' if p_score < 0.05 else 'No'}")
print()
print(f"Epoch vs Win Rate:")
print(f"  Correlation: {corr_winrate:.4f}")
print(f"  P-value: {p_winrate:.4e}")
print(f"  Significant: {'Yes' if p_winrate < 0.05 else 'No'}")
print("=" * 60)

EPOCH CORRELATION ANALYSIS
Epoch vs Score:
  Correlation: 0.0090
  P-value: 5.2277e-01
  Significant: No

Epoch vs Win Rate:
  Correlation: 0.0086
  P-value: 5.4416e-01
  Significant: No


## Summary Statistics

In [18]:
summary = {
    "Total Bots": len(standings_df),
    "Total Rounds": config["num_rounds"],
    "Total Matches": len(matches_df),
    "Avg Score": standings_df["score"].mean(),
    "Median Score": standings_df["score"].median(),
    "Avg Win Rate": standings_df["win_rate"].mean(),
    "Top Score": standings_df["score"].max(),
    "Top Bot": standings_df.iloc[0]["name"][-30:],
    "Top Epoch": standings_df.iloc[0]["epoch"],
}

print("=" * 60)
print("TOURNAMENT SUMMARY")
print("=" * 60)
for key, value in summary.items():
    if isinstance(value, float):
        print(f"{key:.<40} {value:.3f}")
    else:
        print(f"{key:.<40} {value}")
print("=" * 60)

TOURNAMENT SUMMARY
Total Bots.............................. 5001
Total Rounds............................ 50
Total Matches........................... 250000
Avg Score............................... 50.000
Median Score............................ 50.000
Avg Win Rate............................ 0.498
Top Score............................... 68.000
Top Bot................................. ACHs_1212-2_only_select_E_1034
Top Epoch............................... 1034


## Bradley-Terry Scoring

Calculate Bradley-Terry scores to estimate relative strength of each bot based on match results.

In [19]:
from utils.metrics.bradley_terry import calculate_BradleyTerry

# Build win matrix from match results
# W[i,j] = number of wins of bot i over bot j
bot_names = standings_df["name"].tolist()
n_bots = len(bot_names)

# Create name to index mapping
name_to_idx = {name: idx for idx, name in enumerate(bot_names)}

# Initialize win matrix as DataFrame
W = pd.DataFrame(0.0, index=bot_names, columns=bot_names)

# Populate win matrix from match results
for _, match in matches_df.iterrows():
    p1_name = match["player1"]
    p2_name = match["player2"]

    # Add wins to matrix
    W.loc[p1_name, p2_name] += match["p1_wins"]
    W.loc[p2_name, p1_name] += match["p2_wins"]

    # Handle draws (split equally)
    if match["draws"] > 0:
        W.loc[p1_name, p2_name] += match["draws"] * 0.5
        W.loc[p2_name, p1_name] += match["draws"] * 0.5

print(f"Win matrix constructed: {W.shape}")
print(
    f"Total games recorded: {W.sum().sum() / 2:.0f}"
)  # Divide by 2 since matrix counts each game twice

Win matrix constructed: (5001, 5001)
Total games recorded: 125000


In [20]:
# Initialize scores (uniform initialization)
initial_scores = {name: 1.0 for name in bot_names}

# Calculate Bradley-Terry scores
bt_scores = calculate_BradleyTerry(
    score=initial_scores,
    W=W,
    EPOCHS=4,
    diff_threshold=1e-6,
    normalize=False,
    verbose=True,
)

print("\nBradley-Terry calculation complete!")

Calculating Bradley-Terry for 5001 agents.


BT Epoch: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5001/5001 [11:48<00:00,  7.05it/s]


Diff norm2: 0.004121
['2.125', '2.384', '2.030', '1.941', '1.857', '2.073', '2.051', '1.857', '1.894', '1.892', '1.817', '1.840', '1.924', '1.956', '1.891', '1.703', '1.886', '1.756', '1.907', '1.726', '1.703', '1.722', '1.703', '2.245', '1.703', '1.901', '1.754', '1.747', '1.795', '1.765', '1.949', '1.714', '2.176', '1.667', '1.719', '1.933', '1.718', '1.710', '1.632', '1.707', '1.697', '1.801', '1.723', '1.672', '1.737', '1.650', '1.680', '1.682', '1.950', '1.789', '1.632', '1.964', '1.807', '1.712', '1.649', '1.637', '1.623', '1.611', '1.830', '1.659', '1.654', '1.576', '1.767', '1.631', '1.615', '1.679', '1.681', '1.778', '1.691', '1.697', '1.636', '1.587', '1.678', '1.630', '1.584', '1.618', '1.619', '1.604', '1.688', '1.888', '1.714', '1.670', '1.716', '1.553', '1.638', '1.697', '1.946', '1.645', '1.807', '1.532', '1.795', '1.550', '1.532', '1.848', '1.519', '1.648', '1.588', '1.793', '1.747', '1.671', '1.574', '1.578', '1.581', '1.500', '1.703', '1.561', '1.500', '2.182', '1.751

BT Epoch: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5001/5001 [11:55<00:00,  6.99it/s]


Diff norm2: 0.003155
['3.741', '3.765', '3.197', '3.299', '2.725', '3.546', '2.969', '2.951', '2.856', '2.751', '2.670', '2.903', '2.970', '3.105', '2.932', '2.424', '2.446', '2.453', '2.850', '2.477', '2.344', '2.405', '2.352', '3.484', '2.373', '2.836', '2.537', '2.467', '2.720', '2.971', '2.935', '2.273', '3.320', '2.100', '2.617', '2.685', '2.336', '2.526', '2.172', '2.606', '2.482', '2.671', '2.365', '2.290', '2.309', '2.135', '2.399', '2.405', '2.841', '2.460', '2.184', '2.950', '2.777', '2.473', '2.377', '2.089', '2.061', '2.193', '2.799', '2.288', '2.340', '2.062', '2.587', '2.177', '2.050', '2.256', '2.234', '2.674', '2.552', '2.488', '2.214', '2.082', '2.460', '2.333', '1.977', '2.146', '2.272', '2.298', '2.341', '2.646', '2.256', '2.602', '2.264', '1.932', '2.354', '2.464', '2.745', '2.113', '2.552', '2.198', '2.408', '1.929', '2.199', '2.633', '1.846', '2.131', '2.148', '2.586', '2.489', '2.421', '2.236', '2.118', '2.169', '1.813', '2.282', '2.198', '1.980', '3.275', '2.413

BT Epoch: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5001/5001 [11:39<00:00,  7.14it/s]


Diff norm2: 0.002198
['5.182', '5.049', '4.228', '4.503', '3.435', '4.862', '3.781', '3.863', '3.725', '3.467', '3.361', '3.816', '3.823', '4.051', '3.866', '3.031', '2.901', '2.997', '3.616', '3.055', '2.835', '2.946', '2.809', '4.545', '2.912', '3.609', '3.130', '2.985', '3.528', '4.062', '3.713', '2.721', '4.328', '2.412', '3.364', '3.275', '2.791', '3.207', '2.567', '3.362', '3.092', '3.346', '2.859', '2.725', '2.721', '2.450', '2.962', '3.033', '3.554', '2.929', '2.531', '3.778', '3.580', '3.081', '2.975', '2.394', '2.322', '2.672', '3.590', '2.779', '2.832', '2.438', '3.303', '2.530', '2.325', '2.666', '2.631', '3.392', '3.239', '3.150', '2.639', '2.459', '3.091', '2.892', '2.228', '2.534', '2.776', '2.849', '2.820', '3.188', '2.670', '3.341', '2.652', '2.237', '2.909', '3.026', '3.377', '2.424', '3.115', '2.681', '2.868', '2.203', '2.662', '3.218', '2.055', '2.455', '2.521', '3.232', '3.046', '2.993', '2.755', '2.521', '2.621', '1.991', '2.696', '2.671', '2.329', '4.145', '2.930

BT Epoch: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5001/5001 [11:38<00:00,  7.15it/s]


Diff norm2: 0.001423
['6.326', '6.045', '5.022', '5.447', '3.937', '5.879', '4.392', '4.529', '4.381', '3.987', '3.836', '4.472', '4.433', '4.734', '4.597', '3.475', '3.218', '3.369', '4.170', '3.439', '3.156', '3.309', '3.089', '5.329', '3.281', '4.165', '3.527', '3.335', '4.116', '4.868', '4.263', '3.025', '5.088', '2.614', '3.887', '3.682', '3.095', '3.691', '2.823', '3.893', '3.494', '3.794', '3.197', '3.013', '2.996', '2.641', '3.337', '3.463', '4.055', '3.240', '2.713', '4.371', '4.132', '3.510', '3.372', '2.582', '2.481', '2.999', '4.143', '3.102', '3.141', '2.680', '3.817', '2.731', '2.474', '2.938', '2.870', '3.903', '3.705', '3.614', '2.910', '2.702', '3.512', '3.265', '2.385', '2.789', '3.119', '3.216', '3.146', '3.554', '2.956', '3.846', '2.888', '2.445', '3.274', '3.400', '3.812', '2.623', '3.504', '2.985', '3.178', '2.388', '2.962', '3.600', '2.175', '2.645', '2.743', '3.676', '3.420', '3.391', '3.111', '2.787', '2.919', '2.086', '2.951', '2.961', '2.542', '4.756', '3.246

## Bradley-Terry Rankings

In [21]:
# Create Bradley-Terry rankings DataFrame
bt_df = pd.DataFrame(
    [
        {
            "name": name,
            "bt_score": score,
            "epoch": standings_df[standings_df["name"] == name]["epoch"].iloc[0],
        }
        for name, score in bt_scores.items()
    ]
)

bt_df = bt_df.sort_values("bt_score", ascending=False).reset_index(drop=True)
bt_df["bt_rank"] = range(1, len(bt_df) + 1)

# Merge with original standings
standings_with_bt = standings_df.merge(
    bt_df[["name", "bt_score", "bt_rank"]], on="name", how="left"
)

# Compare Swiss rank vs BT rank
standings_with_bt["rank_diff"] = (
    standings_with_bt.index + 1 - standings_with_bt["bt_rank"]
)

print("Top 20 by Bradley-Terry Score:")
print(
    standings_with_bt.head(20)[
        ["name", "score", "bt_score", "bt_rank", "rank_diff", "epoch"]
    ].to_string(index=False)
)

standings_with_bt.head()

Top 20 by Bradley-Terry Score:
                                                  name  score  bt_score  bt_rank  rank_diff  epoch
20251212_2206-LOSS_APPROACHs_1212-2_only_select_E_1034   68.0  6.326011        1          0   1034
20251213_0341-LOSS_APPROACHs_1212-2_only_select_E_2322   67.0  6.044984        2          0   2322
20251212_2002-LOSS_APPROACHs_1212-2_only_select_E_0535   67.0  5.022249        7         -4    535
20251213_1133-LOSS_APPROACHs_1212-2_only_select_E_3996   66.0  5.447256        4          0   3996
20251213_1319-LOSS_APPROACHs_1212-2_only_select_E_4345   65.0  3.937000       27        -22   4345
20251213_1246-LOSS_APPROACHs_1212-2_only_select_E_4235   65.0  5.878822        3          3   4235
20251213_1225-LOSS_APPROACHs_1212-2_only_select_E_4168   65.0  4.392135       15         -8   4168
20251212_2259-LOSS_APPROACHs_1212-2_only_select_E_1253   65.0  4.529100       12         -4   1253
20251213_0210-LOSS_APPROACHs_1212-2_only_select_E_1979   64.5  4.380697       

Unnamed: 0,name,path,epoch,score,wins,draws,losses,matches_played,win_rate,bt_score,bt_rank,rank_diff
0,20251212_2206-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,1034,68.0,68,0,32,100,0.68,6.326011,1,0
1,20251213_0341-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,2322,67.0,67,0,33,100,0.67,6.044984,2,0
2,20251212_2002-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,535,67.0,67,0,33,100,0.67,5.022249,7,-4
3,20251213_1133-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,3996,66.0,66,0,34,100,0.66,5.447256,4,0
4,20251213_1319-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,4345,65.0,65,0,35,100,0.65,3.937,27,-22


In [35]:
standings_with_bt.loc[standings_with_bt["epoch"] == 3970]

Unnamed: 0,name,path,epoch,score,wins,draws,losses,matches_played,win_rate,bt_score,bt_rank,rank_diff,swiss_rank,abs_rank_diff
29,20251213_1126-LOSS_APPROACHs_1212-2_only_selec...,CHECKPOINTS\LOSS_APPROACHs_1212-2_only_select\...,3970,62.5,62,1,37,100,0.62,4.86832,8,22,30,22


In [22]:
# Bradley-Terry score distribution
fig = go.Figure()

fig.add_trace(
    go.Histogram(
        x=bt_df["bt_score"],
        nbinsx=30,
        name="BT Score Distribution",
        marker_color="purple",
    )
)

fig.update_layout(
    title="Bradley-Terry Score Distribution",
    xaxis_title="Bradley-Terry Score",
    yaxis_title="Number of Bots",
    height=400,
)
fig.show()

## Bradley-Terry vs Swiss Score Comparison

In [23]:
# Scatter plot: Swiss score vs BT score
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=standings_with_bt["score"],
        y=standings_with_bt["bt_score"],
        mode="markers",
        marker=dict(
            size=8,
            color=standings_with_bt["epoch"],
            colorscale="Viridis",
            showscale=True,
            colorbar=dict(title="Epoch"),
        ),
        text=standings_with_bt["name"].str[-15:],
        hovertemplate="<b>%{text}</b><br>Swiss: %{x:.2f}<br>BT: %{y:.4f}<br>Epoch: %{marker.color}<extra></extra>",
    )
)

# Add diagonal line (perfect correlation)
min_val = 0
max_swiss = standings_with_bt["score"].max()
max_bt = standings_with_bt["bt_score"].max()

fig.add_trace(
    go.Scatter(
        x=[0, max_swiss],
        y=[0, max_bt * (max_swiss / max_swiss)],
        mode="lines",
        line=dict(dash="dash", color="gray"),
        name="Reference",
        showlegend=False,
    )
)

fig.update_layout(
    title="Swiss Score vs Bradley-Terry Score",
    xaxis_title="Swiss Tournament Score",
    yaxis_title="Bradley-Terry Score",
    height=500,
)
fig.show()

In [24]:
# Correlation analysis
from scipy.stats import pearsonr, spearmanr

pearson_corr, pearson_p = pearsonr(
    standings_with_bt["score"], standings_with_bt["bt_score"]
)
spearman_corr, spearman_p = spearmanr(
    standings_with_bt["score"], standings_with_bt["bt_score"]
)

print("=" * 60)
print("CORRELATION ANALYSIS: Swiss Score vs Bradley-Terry Score")
print("=" * 60)
print(f"Pearson Correlation: {pearson_corr:.4f} (p={pearson_p:.4e})")
print(f"Spearman Correlation: {spearman_corr:.4f} (p={spearman_p:.4e})")
print("=" * 60)

CORRELATION ANALYSIS: Swiss Score vs Bradley-Terry Score
Pearson Correlation: 0.8835 (p=0.0000e+00)
Spearman Correlation: 0.9251 (p=0.0000e+00)


## Bradley-Terry Score vs Epoch

In [25]:
# BT score vs epoch
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=standings_with_bt["epoch"],
        y=standings_with_bt["bt_score"],
        mode="markers",
        marker=dict(
            size=5,
            color=standings_with_bt["bt_score"],
            colorscale="RdYlGn",
            showscale=True,
            colorbar=dict(title="BT Score"),
        ),
        text=standings_with_bt["name"].str[-15:],
        hovertemplate="<b>%{text}</b><br>Epoch: %{x}<br>BT Score: %{y:.4f}<extra></extra>",
    )
)

fig.update_layout(
    title="Bradley-Terry Score vs Training Epoch",
    xaxis_title="Training Epoch",
    yaxis_title="Bradley-Terry Score",
    height=500,
)
fig.show()

In [26]:
# Epoch correlation with BT score
bt_epoch_corr, bt_epoch_p = pearsonr(
    standings_with_bt["epoch"], standings_with_bt["bt_score"]
)

print("=" * 60)
print("EPOCH CORRELATION WITH BRADLEY-TERRY")
print("=" * 60)
print(f"Correlation: {bt_epoch_corr:.4f}")
print(f"P-value: {bt_epoch_p:.4e}")
print(f"Significant: {'Yes' if bt_epoch_p < 0.05 else 'No'}")
print("=" * 60)

EPOCH CORRELATION WITH BRADLEY-TERRY
Correlation: -0.0013
P-value: 9.2628e-01
Significant: No


## Ranking Disagreements

Identify bots where Swiss ranking and Bradley-Terry ranking differ significantly.

In [27]:
# Find largest ranking disagreements
standings_with_bt["swiss_rank"] = standings_with_bt.index + 1
standings_with_bt["abs_rank_diff"] = abs(standings_with_bt["rank_diff"])

disagreements = standings_with_bt.nlargest(10, "abs_rank_diff")[
    ["name", "swiss_rank", "bt_rank", "rank_diff", "score", "bt_score", "epoch"]
]

print("Top 10 Ranking Disagreements:")
print(disagreements.to_string(index=False))

Top 10 Ranking Disagreements:
                                                  name  swiss_rank  bt_rank  rank_diff  score  bt_score  epoch
20251213_1112-LOSS_APPROACHs_1212-2_only_select_E_3924        1646     4126      -2480   52.0  0.722968   3924
20251213_1535-LOSS_APPROACHs_1212-2_only_select_E_4786        1246     3586      -2340   53.0  0.866774   4786
20251213_1017-LOSS_APPROACHs_1212-2_only_select_E_3736        3205      940       2265   48.0  1.800299   3736
20251213_1320-LOSS_APPROACHs_1212-2_only_select_E_4348        1521     3669      -2148   52.5  0.843595   4348
20251212_2239-LOSS_APPROACHs_1212-2_only_select_E_1170        2737      640       2097   49.5  2.044707   1170
20251213_0424-LOSS_APPROACHs_1212-2_only_select_E_2476        1551     3596      -2045   52.5  0.864264   2476
20251213_0418-LOSS_APPROACHs_1212-2_only_select_E_2451        1719     3743      -2024   52.0  0.824661   2451
20251212_2056-LOSS_APPROACHs_1212-2_only_select_E_0750        2594      598       

In [28]:
# Visualize ranking differences
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=standings_with_bt["swiss_rank"],
        y=standings_with_bt["bt_rank"],
        mode="markers",
        marker=dict(
            size=8,
            color=standings_with_bt["abs_rank_diff"],
            colorscale="Reds",
            showscale=True,
            colorbar=dict(title="Rank Difference"),
        ),
        text=standings_with_bt["name"].str[-15:],
        hovertemplate="<b>%{text}</b><br>Swiss Rank: %{x}<br>BT Rank: %{y}<br>Diff: %{marker.color}<extra></extra>",
    )
)

# Add diagonal (perfect agreement)
max_rank = len(standings_with_bt)
fig.add_trace(
    go.Scatter(
        x=[1, max_rank],
        y=[1, max_rank],
        mode="lines",
        line=dict(dash="dash", color="gray"),
        name="Perfect Agreement",
        showlegend=False,
    )
)

fig.update_layout(
    title="Swiss Rank vs Bradley-Terry Rank",
    xaxis_title="Swiss Rank",
    yaxis_title="Bradley-Terry Rank",
    height=500,
)
# fig.update_xaxes(autorange="reversed")
# fig.update_yaxes(autorange="reversed")
fig.show()