In [1]:
import numpy as np
import pandas as pd
import os
import pickle
from tqdm import tqdm

# Get the size of the file for the progress bar
file_path = "E:/fab-data/simplified_tournament_data.pkl"
file_size = os.path.getsize(file_path)

# A list to hold all tournament data objects
all_tournament_data = []

# Open the pickle file for reading
with open(file_path, "rb") as f:
    with tqdm(total=file_size, unit='B', unit_scale=True, desc='Loading data') as pbar:
        while True:
            try:
                # Tell the progress bar how much data has been read so far
                pos_before = f.tell()
                
                # Load the next tournament data object from the file
                tournament_data = pickle.load(f)
                
                # Update the progress bar with the number of bytes read
                pos_after = f.tell()
                pbar.update(pos_after - pos_before)
                
                all_tournament_data.append(tournament_data)
            except EOFError:
                # No More data
                break
print(len(all_tournament_data))

Loading data: 100%|██████████| 1.45G/1.45G [01:47<00:00, 13.4MB/s]


# Loading Player and Deck data

In [6]:
# Constants
default_skill_value = 0 # value for a players skill if no player_skill is provided on player creation
skill_value_modifier = 0.03
total_of_decks = 11 # total amount of decks
player_num = 512 # total number of players in a tournament
num_rounds = 14 # total number of rounds in a tournament



# Player Tournament and classes
import numpy as np
import pandas as pd
from threading import Lock

class Deck:
    def __init__(self, name):
        self.name = name
        self.matchup_spread = {}

    def set_matchup_win_prob(self, opponent_deck_name, win_prob):
        self.matchup_spread[opponent_deck_name] = win_prob

    def get_matchup_win_prob(self, opponent_deck_name):
        return self.matchup_spread.get(opponent_deck_name, 0.5)
    
    def __str__(self):
        return self.name
    
    def __reduce__(self):
        # Return the class itself, arguments, and the state
        return (self.__class__, (self.name,), {'name': self.name, 'matchup_spread': self.matchup_spread})
    
    def __setstate__(self, state):
        # Set the object's state from the given state dictionary
        self.__dict__.update(state)

class DeckManager:
    def __init__(self):
        self.decks = {}

    def add_deck(self, deck):
        self.decks[deck.name] = deck

    def generate_win_probabilities(self, deck_names):
        num_decks = len(deck_names)
        for i in range(num_decks):
            for j in range(i + 1, num_decks):
                win_prob = np.clip(np.random.normal(0.5, 0.15), 0, 1)
                self.decks[deck_names[i]].set_matchup_win_prob(deck_names[j], win_prob)
                self.decks[deck_names[j]].set_matchup_win_prob(deck_names[i], 1 - win_prob)

    def get_win_prob_matrix(self):
        deck_names = list(self.decks.keys())
        num_decks = len(deck_names)
        matrix = np.zeros((num_decks, num_decks))
        for i in range(num_decks):
            for j in range(num_decks):
                matrix[i, j] = self.decks[deck_names[i]].get_matchup_win_prob(deck_names[j])
        return matrix

    def get_win_prob_dataframe(self):
        deck_names = list(self.decks.keys())
        num_decks = len(deck_names)
        matrix = np.zeros((num_decks, num_decks))
        for i in range(num_decks):
            for j in range(num_decks):
                matrix[i, j] = self.decks[deck_names[i]].get_matchup_win_prob(deck_names[j])
        df = pd.DataFrame(matrix, index=deck_names, columns=deck_names)
        return df

    def load_win_probabilities_from_csv(self, file_path):
        # Read the Excel file
        df = pd.read_csv(file_path, header=0, index_col=0)
        
        # Drop the Representation column if it exists
        if 'Representation' in df.columns:
            df = df.drop(columns=['Representation'])
        
        # Ensure decks are added to the manager
        deck_names = df.index.to_list()
        for deck_name in deck_names:
            if deck_name not in self.decks:
                self.add_deck(Deck(deck_name))
        
        # Populate the matchup probabilities
        for i, row in df.iterrows():
            for j, value in row.items():
                self.decks[i].set_matchup_win_prob(j, value)
    
    def __reduce__(self):
        # Return the class itself, no arguments, and the state
        return (self.__class__, (), {'decks': self.decks})
    
    def __setstate__(self, state):
        # Set the object's state from the given state dictionary
        self.__dict__.update(state)

class Player:
    def __init__(self, player_id, alias, player_skill=default_skill_value, deck=None):
        self.id = player_id
        self.alias = alias
        self.skill = player_skill
        self.deck = deck
        self.wins = 0
        self.losses = 0
        self.history = []  # List to store match results
        self.history_lock = Lock()
    
    def __reduce__(self):
        # The object's state is returned as a tuple:
        # (callable, arguments_to_callable, additional_state)
        # Lock object is not pickled, so we're not including it in the state.
        return (self.__class__, (self.id, self.alias, self.skill, self.deck), {'wins': self.wins, 'losses': self.losses, 'history': self.history})
    
    def __setstate__(self, state):
        self.wins = state.get('wins', 0)
        self.losses = state.get('losses', 0)
        self.history = state.get('history', [])
        self.history_lock = Lock()  # Initialize a new Lock object after unpickling

    def set_deck(self, deck):
        """Set the deck for the player."""
        self.deck = deck

    def get_deck(self):
        """Retrieve the player's deck."""
        return self.deck

    def record_match(self, tournament, tournament_round, opponent, result):
        with self.history_lock:
            self.history.append([tournament.name, tournament_round, str(opponent.id) + opponent.alias, result])
            
    def get_tournament_history(self, tournament):
        tournament_results = [history for history in self.history if history[0] == tournament.name]
        return tournament_results

    def get_tournament_standing(self, tournament):
        tournament_results = [history for history in self.history if history[0] == tournament.name]
        wins = 0
        losses = 0
        for match in tournament_results:
            result = match[-1]
            if result == "W":
                wins += 1
            elif result == "L":
                losses += 1
        return (wins, losses)

    def __str__(self):
        return f"[{self.id}] {self.alias} ({self.get_deck().name}) - Skill Level: {self.skill}"

    def __repr__(self):
        return f"[{self.id}] {self.alias} ({self.get_deck().name}) - Skill Level: {self.skill}"

In [7]:
with open("deck_manager.pkl", "rb") as f:
    deck_manager = pickle.load(f)
with open("players.pkl", "rb") as f:
    players = pickle.load(f)

# Analysis

In [14]:
import pandas as pd

def update_player_tournament_placements(player_placements, tournament):
    top_64 = tournament['top_64']
    for placement, player_id in enumerate(top_64, start=1):
        # Initialize the player's record if not present
        if player_id not in player_placements:
            player_placements[player_id] = {'top_8': 0, 'top_16': 0, 'top_32': 0, 'top_64': 0}
        
        # Update top placements
        if placement <= 8:
            player_placements[player_id]['top_8'] += 1
        elif placement <= 16:
            player_placements[player_id]['top_16'] += 1
        elif placement <= 32:
            player_placements[player_id]['top_32'] += 1
        elif placement <= 64:
            player_placements[player_id]['top_64'] += 1

def create_dataframe(players, player_placements):
    # Prepare the data for the DataFrame
    data = []
    for player in players:
        # Include only players who have placement records
        if player.id in player_placements:
            data.append({
                'player_id': player.id,
                'alias': player.alias,
                'skill': round(player.skill, 3),
                'deck_name': player.deck.name,
                'top_8': player_placements[player.id]['top_8'],
                'top_16': player_placements[player.id]['top_16'],
                'top_32': player_placements[player.id]['top_32'],
                'top_64': player_placements[player.id]['top_64'],
            })

    # Create the DataFrame
    df = pd.DataFrame(data)
    return df


In [16]:
player_placements = {}
for tournament in all_tournament_data:  # Assuming 'tournaments' is your list of tournament dicts
    update_player_tournament_placements(player_placements, tournament)

# Assuming 'players' is your list of player objects
df = create_dataframe(players, player_placements)

# Export the DataFrame to a CSV file
df.to_csv('player_placements.csv', index=False)
df

Unnamed: 0,player_id,alias,skill,deck_name,top_8,top_16,top_32,top_64
0,0,Theresa,-0.991,Rhinar,2460,5838,13,12187
1,1,Orville,-1.022,Boltyn,354,1357,1,4489
2,2,Patrice,-0.098,Iyslander,2123,5372,34,12023
3,3,Charlene,-2.013,Dash,1274,3548,24,8535
4,4,Mary,0.252,Uzuri,1042,3112,43,8637
...,...,...,...,...,...,...,...,...
507,507,James,-0.537,Dorinthea,201,985,0,3319
508,508,Kelly,1.638,Iyslander,987,3143,0,8482
509,509,Joseph,-1.152,Dromai,390,1663,0,5248
510,510,Penelope,2.010,Rhinar,2251,6037,0,13141


In [20]:
# Constants
TOTAL_TOURNAMENTS = len(all_tournament_data)

# Group by 'deck_name' and aggregate the data
deck_grouped = df.groupby('deck_name').agg({
    'skill': ['mean', 'max'],
    'top_8': 'sum',
    'top_16': 'sum',
    'top_32': 'sum',
    'top_64': 'sum',
}).reset_index()

# Flatten the MultiIndex in columns
deck_grouped.columns = ['_'.join(col).strip() if col[1] else col[0] for col in deck_grouped.columns.values]

# Rename the columns
deck_grouped.rename(columns={
    'skill_mean': 'average_skill',
    'skill_max': 'highest_skill',
    'top_8_sum': 'total_top_8',
    'top_16_sum': 'total_top_16',
    'top_32_sum': 'total_top_32',
    'top_64_sum': 'total_top_64',
}, inplace=True)

# Calculate the frequency of top placements for each deck
deck_grouped['frequency_top_8'] = deck_grouped['total_top_8'] / TOTAL_TOURNAMENTS
deck_grouped['frequency_top_16'] = deck_grouped['total_top_16'] / TOTAL_TOURNAMENTS
deck_grouped['frequency_top_32'] = deck_grouped['total_top_32'] / TOTAL_TOURNAMENTS
deck_grouped['frequency_top_64'] = deck_grouped['total_top_64'] / TOTAL_TOURNAMENTS

# Calculate the representation of each deck in top placements
deck_grouped['representation_top_8'] = deck_grouped['total_top_8'] / (8 * TOTAL_TOURNAMENTS)
deck_grouped['representation_top_16'] = deck_grouped['total_top_16'] / (16 * TOTAL_TOURNAMENTS)
deck_grouped['representation_top_32'] = deck_grouped['total_top_32'] / (32 * TOTAL_TOURNAMENTS)
deck_grouped['representation_top_64'] = deck_grouped['total_top_64'] / (64 * TOTAL_TOURNAMENTS)

# Optionally, sort the DataFrame based on a relevant column
deck_grouped.sort_values(by='representation_top_8', ascending=False, inplace=True)

# Export the DataFrame to a CSV file
deck_grouped.to_csv('deck_aggregated_data_with_representations.csv', index=False)
deck_grouped

Unnamed: 0,deck_name,average_skill,highest_skill,total_top_8,total_top_16,total_top_32,total_top_64,frequency_top_8,frequency_top_16,frequency_top_32,frequency_top_64,representation_top_8,representation_top_16,representation_top_32,representation_top_64
5,Dromai,-0.019939,1.92,246495,250466,465348,885957,2.46495,2.50466,4.65348,8.85957,0.308119,0.156541,0.145421,0.138431
3,Dash,-0.08271,1.944,138611,99053,278700,408833,1.38611,0.99053,2.787,4.08833,0.173264,0.061908,0.087094,0.06388
8,Katsu,0.038019,2.553,88012,113740,135607,392429,0.88012,1.1374,1.35607,3.92429,0.110015,0.071087,0.042377,0.061317
2,Bravo,-0.046435,1.865,84979,67667,180370,358462,0.84979,0.67667,1.8037,3.58462,0.106224,0.042292,0.056366,0.05601
6,Fai,0.096167,1.83,61558,68892,116310,275875,0.61558,0.68892,1.1631,2.75875,0.076948,0.043057,0.036347,0.043105
7,Iyslander,-0.030522,1.887,56934,56413,143578,248011,0.56934,0.56413,1.43578,2.48011,0.071167,0.035258,0.044868,0.038752
9,Rhinar,-0.215963,2.01,43139,47348,83587,156314,0.43139,0.47348,0.83587,1.56314,0.053924,0.029593,0.026121,0.024424
0,Azalea,0.057703,2.308,34766,34316,82436,189828,0.34766,0.34316,0.82436,1.89828,0.043458,0.021448,0.025761,0.029661
4,Dorinthea,-0.0375,1.854,18696,20555,44743,94484,0.18696,0.20555,0.44743,0.94484,0.02337,0.012847,0.013982,0.014763
10,Uzuri,0.113579,2.553,18011,26096,46982,127263,0.18011,0.26096,0.46982,1.27263,0.022514,0.01631,0.014682,0.019885


In [23]:
all_tournament_data[0]

{'tournament_id': 0,
 'top_64': [95,
  217,
  238,
  508,
  28,
  84,
  129,
  162,
  272,
  290,
  387,
  417,
  427,
  449,
  470,
  15,
  34,
  48,
  66,
  81,
  89,
  93,
  121,
  130,
  150,
  183,
  189,
  201,
  225,
  251,
  261,
  283,
  302,
  311,
  324,
  329,
  350,
  356,
  369,
  393,
  425,
  437,
  452,
  468,
  503,
  510,
  2,
  13,
  18,
  30,
  32,
  45,
  54,
  55,
  59,
  69,
  79,
  98,
  106,
  108,
  115,
  120,
  127,
  144],
 'top_64_runs': {95: [('94Howard', 'W'),
   ('93Linda', 'L'),
   ('94Howard', 'W'),
   ('89Fran', 'W'),
   ('93Linda', 'W'),
   ('88Donald', 'W'),
   ('79Sandra', 'W'),
   ('84Brian', 'W'),
   ('130Keisha', 'W'),
   ('183Eustolia', 'W'),
   ('449Cindy', 'W'),
   ('238Janice', 'L'),
   ('84Brian', 'W'),
   ('238Janice', 'W')],
  217: [('216Jason', 'L'),
   ('219Owen', 'W'),
   ('216Jason', 'W'),
   ('212John', 'L'),
   ('215Latasha', 'W'),
   ('223Glenn', 'W'),
   ('231Christopher', 'W'),
   ('225Betsy', 'W'),
   ('237Jean', 'W'),
   ('19