In [330]:
import pandas as pd
import numpy as np
import random

In [415]:
drop_list = ['does_not_want_to_play_with', 'competitiveness_level', 'fitness_level']

players_df = pd.read_csv('./players.csv').drop(drop_list, axis=1)

players_df

Unnamed: 0,name,skill_level,player_pref,gender
0,a,0.57,b,m
1,b,0.69,a,f
2,c,0.38,z,n
3,d,0.86,z,m
4,e,0.27,f,f
5,f,1.0,e,n
6,g,0.13,h,m
7,h,0.09,g,f
8,I,0.19,j,n
9,j,0.7,I,m


In [420]:
def create_random_teams_until_balanced(df, num_teams, max_tries=100):
    # compute the average skill level and gender balance for the entire group
    avg_skill = df['skill_level'].mean()
    gender_breakdown = df['gender'].value_counts(normalize=True)
    # compute the size of each team
    team_size = len(df) // num_teams

# extract player preferences from the dataframe
    preferences = {}
    for _, row in df.iterrows():
        name, skill_level, gender, prefs = row
        preferences[name] = set(prefs.split(','))

    # try creating random teams and assessing their balance up to max_tries times
    for _ in range(max_tries):
        # create a copy of the dataframe and shuffle it
        df = df.copy().sample(frac=1)
        # split the dataframe into a list of players
        players = df.values.tolist()
        # create a list of empty lists to store the players in each team
        teams = [[] for _ in range(num_teams)]
        
        # keep track of the number of players who didn't get a preferred player on their team
        not_assigned_count = 0

        # assign players to teams randomly, taking player preferences into account
        for player in players:
            name, skill_level, gender, prefs = player
            # try to assign the player to a team with someone they want to play with
            assigned = False
            for team in teams:
                if [p for p in team if p[0] in preferences[name]]:
                    team.append(player)
                    assigned = True
                    break
            # if the player could not be assigned to a team with someone they want to play with,
            # assign them to a random team
            if not assigned:
                team = random.choice(teams)
                team.append(player)
                not_assigned_count += 1

        # check if the teams are sufficiently balanced and evenly split
        balanced = True

        skill_diffs = []
        gender_diffs = []
        size_diffs = []

        for i, team in enumerate(teams):
            team_df = pd.DataFrame(team, columns=['name', 'skill_level', 'player_pref', 'gender'])
            skill_diff = abs(team_df['skill_level'].mean() - avg_skill)

            if 'f' in team_df['gender'].value_counts(normalize=True):
                f_ratio = team_df['gender'].value_counts(normalize=True)['f']
            else:
                f_ratio = 0

            if 'm' in team_df['gender'].value_counts(normalize=True):
                m_ratio = team_df['gender'].value_counts(normalize=True)['m']
            else:
                m_ratio = 0

            if 'n' in team_df['gender'].value_counts(normalize=True):
                n_ratio = team_df['gender'].value_counts(normalize=True)['n']
            else:
                n_ratio = 0

            gender_diff = abs(m_ratio - gender_breakdown['m']) + abs(f_ratio - gender_breakdown['f']) + abs(n_ratio - gender_breakdown['n'])

            size_diff = abs(len(team) - team_size)

            skill_diffs.append(skill_diff)
            gender_diffs.append(gender_diff)
            size_diffs.append(size_diff)

            if skill_diff > 0.15 or gender_diff > 0.15 or size_diff > 2 or not_assigned_count > 2:
                balanced = False
                break
        if balanced:
            return teams, skill_diffs, gender_diffs, size_diffs, not_assigned_count
    return None, None, None, None


In [422]:
create_random_teams_until_balanced(players_df, 3, 100000)