In [1]:
import pandas as pd
import numpy as np
import random

In [86]:
drop_list = ['does_not_want_to_play_with', 'competitiveness_level', 'fitness_level']

players_df = pd.read_csv('./players.csv').drop(drop_list, axis=1)

players_df

Unnamed: 0,name,skill_level,player_pref,gender,captain
0,a,0.57,b,m,yes
1,b,0.69,a,f,no
2,c,0.38,d,n,no
3,d,0.86,c,m,no
4,e,0.27,f,f,no
5,f,1.0,e,n,no
6,g,0.13,h,m,no
7,h,0.09,g,f,no
8,I,0.19,j,n,no
9,j,0.7,I,m,no


In [203]:
def create_random_teams_until_balanced(df, num_teams, max_tries, limits):
    # make sure num_teams = # of captains
    assert df.captain.value_counts()['yes'] == num_teams
    
    # compute the average skill level and gender balance for the entire group
    avg_skill = df['skill_level'].mean()
    gender_breakdown = df['gender'].value_counts(normalize=True)
    # compute the size of each team
    team_size = len(df) // num_teams

# extract player preferences from the dataframe
    preferences = {}
    for _, row in df.iterrows():
        name, skill_level, gender, captain, prefs = row
        # use if preferences are a comma separated list
        #preferences[name] = set(prefs.split(','))
        # use if just 1 preference/person
        preferences[name] = set(row["player_pref"])

    # try creating random teams and assessing their balance up to max_tries times
    for _ in range(max_tries):
        # create a copy of the dataframe and shuffle it
        df = df.copy().sample(frac=1)
        # split the dataframe into a list of players
        players = df.values.tolist()
        # create a list of empty lists to store the players in each team
        teams = [[] for _ in range(num_teams)]
        
        # keep track of the number of players who didn't get a preferred player on their team
        unpaired_count = 0

        # assign players to teams randomly, taking player preferences into account
        for player in players:
            name, skill_level, gender, captain, prefs = player
            # try to assign the player to a team with someone they want to play with
            assigned = False
            for team in teams:
                if [p for p in team if p[0] in preferences[name]]:
                    team.append(player)
                    assigned = True
                    break
            # if the player could not be assigned to a team with someone they want to play with,
            # assign them to a random team
            if not assigned:
                team = random.choice(teams)
                team.append(player)
                unpaired_count += 1

        # check if the teams are sufficiently balanced and evenly split
        balanced = True

        skill_diffs = []
        gender_diffs = []
        size_diffs = []

        for i, team in enumerate(teams):
            team_df = pd.DataFrame(team, columns=['name', 'skill_level', 'player_pref', 'gender', 'captain'])

            skill_diff = abs(team_df['skill_level'].mean() - avg_skill)
            if skill_diff > limits[0]:
                balanced = False
                break

            if 'f' in team_df['gender'].value_counts(normalize=True):
                f_ratio = team_df['gender'].value_counts(normalize=True)['f']
            else:
                f_ratio = 0

            if 'm' in team_df['gender'].value_counts(normalize=True):
                m_ratio = team_df['gender'].value_counts(normalize=True)['m']
            else:
                m_ratio = 0

            if 'n' in team_df['gender'].value_counts(normalize=True):
                n_ratio = team_df['gender'].value_counts(normalize=True)['n']
            else:
                n_ratio = 0

            gender_diff = abs(m_ratio - gender_breakdown['m']) + abs(f_ratio - gender_breakdown['f']) + abs(n_ratio - gender_breakdown['n'])
            if gender_diff > limits[1]:
                balanced = False
                break

            size_diff = abs(len(team) - team_size)
            if size_diff > 1:
                balanced = False
                break

            # make sure each team has 1 captain
            if 'yes' not in team_df['captain'].value_counts():
                captain_count = 0
            else:
                captain_count = team_df['captain'].value_counts()['yes']
            if captain_count != 1:
                balanced = False
                break

            if unpaired_count > limits[2]:
                balanced = False
                break

        if balanced:
            skill_diffs.append(skill_diff)
            gender_diffs.append(gender_diff)
            size_diffs.append(size_diff)
            print("The skill level differences are " + str(skill_diffs))
            print("The gender ratio differences are " + str(gender_diffs))
            print("The team size differences are " + str(size_diffs))
            print("The number of players who aren't on a team with their preferred friend are " + str(unpaired_count))
            print("The teams are below")
            return teams
    print("Try again, no balanced teams found")

In [207]:
limit_list = [0.15, 0.3, 4]
# skill_difference_limit, gender_ratio_difference_limit, unpaired player limit

create_random_teams_until_balanced(players_df, 4, 5000, limit_list)
# inputs: players df, # of teams, # of attempts, list of condition limits

Try again, no balanced teams found


In [None]:
# try to use a calc optimization-based approach. create 1 original random team formation. then keep swapping people for N swaps (IF it improves the overall optimization value) until the improvement is negligible or until N is reached
# this is a better approach tbh

# assign random teams with 1 captain/team. assign in a cyclical order such that later I don't need to worry about mis-sized teams
# assess optimization value (sum of skill_diffs across teams, sum of gender_diffs across teams, sum of unpaired_people, shouldn't have to worry about size difs if random team sort is done in a cyclical order)
# predict how random non-captain swap will change optimization value
# if value gets better, perform swap
# if value gets worse, don't perform swap, skip to predicting next random non-captain swap's effect
# go until N swaps are performed or until improvement is negligible (whichever comes first)
# perhaps print last M teams so we have a few options