In [3]:
import pandas as pd
from itertools import combinations
from collections import defaultdict
import random


df = pd.read_csv('hospital.csv')
df = df.fillna(999)
df['Scrub ID'] = df['Scrub ID'].astype('int')
df['First Assistant ID'] = df['First Assistant ID'].astype('int')
df['Second Assistant ID'] = df['Second Assistant ID'].astype('int')

df_gene = df[['DAY','Surgeon ID','Anesthesiologist ID','Scrub ID','Circulator ID','First Assistant ID','Second Assistant ID']]

cols = ['Surgeon ID',
       'Anesthesiologist ID', 'Scrub ID', 'Circulator ID',
       'First Assistant ID', 'Second Assistant ID']
def get_domain(day):
    df_sub = df[df['DAY']==day]
    domain = []
    for col in cols:
        domain.append(tuple(set(df_sub[col])))
    return domain


lookup =defaultdict(int)
for i in list(combinations(cols,2)):
    gp = df_gene.groupby([i[0],i[1]])['DAY'].count().reset_index().\
        rename(index=str, columns={'DAY': str(i[0])[:3]+'_'+str(i[1])[:3]})        
    for _, x in gp.iterrows():
        name = str(int(x[i[0]])) + '_' + str(int(x[i[1]]))
        lookup[name] = int(x[str(i[0])[:3]+'_'+str(i[1])[:3]])

for i in lookup.keys():
    if '999' in i:
        lookup[i]  = 0        
        
        
def cost_func(vec):
    total = 0
    for i in list(combinations(vec,2)):
        name = str(i[0]) + '_' + str(i[1])
        total += lookup[name]
    return -total
def geneticoptimize(domain,costf,popsize=50,
                      mutprob=0.2,elite=0.2,maxiter=1000):
    # Mutation Operation
    def mutate(vec):
        try:
            i=random.randint(0,len(domain)-1)
            if random.random()<0.5 and vec[i]>domain[i][0]:
                return vec[0:i]+[random.choice(domain[i])]+vec[i+1:]

            elif vec[i]<domain[i][1]:
                return vec[0:i]+[random.choice(domain[i])]+vec[i+1:]
        except:
            pass
  
    # Crossover Operation
    def crossover(r1,r2):
        i=random.randint(1,len(domain)-2)
        return r1[0:i]+r2[i:]

    # Build the initial population
    pop=[]
    for i in range(popsize):
        vec=[random.choice(domain[i]) 
            for i in range(len(domain))]
        pop.append(vec)
  
    # How many winners from each generation?
    topelite=int(elite*popsize)
  
    # Main loop 
    for i in range(maxiter):
        scores=[(costf(v),v) for v in pop if v != None]
        scores.sort()
        ranked=[v for (s,v) in scores]
    
        # Start with the pure winners
        pop=ranked[0:topelite]
    
        # Add mutated and bred forms of the winners
        while len(pop)<popsize:
            if random.random()<mutprob:

                # Mutation
                c=random.randint(0,topelite)
                pop.append(mutate(ranked[c]))
            else:
      
                # Crossover
                c1=random.randint(0,topelite)
                c2=random.randint(0,topelite)
                pop.append(crossover(ranked[c1],ranked[c2]))
    
        # Print current best score
#         print (scores[0][0])
    
    return scores[0][1],scores[0][0]

In [5]:
results = []
for i in range(1,535):
    print('Processing DAY',i)
    result,score = geneticoptimize(get_domain(i),cost_func)
    results.append([i,result,-score])

Processing DAY 1
Processing DAY 2
Processing DAY 3
Processing DAY 4
Processing DAY 5
Processing DAY 6
Processing DAY 7
Processing DAY 8
Processing DAY 9
Processing DAY 10
Processing DAY 11
Processing DAY 12
Processing DAY 13
Processing DAY 14
Processing DAY 15
Processing DAY 16
Processing DAY 17
Processing DAY 18
Processing DAY 19
Processing DAY 20
Processing DAY 21
Processing DAY 22
Processing DAY 23
Processing DAY 24
Processing DAY 25
Processing DAY 26
Processing DAY 27
Processing DAY 28
Processing DAY 29
Processing DAY 30
Processing DAY 31
Processing DAY 32
Processing DAY 33
Processing DAY 34
Processing DAY 35
Processing DAY 36
Processing DAY 37
Processing DAY 38
Processing DAY 39
Processing DAY 40
Processing DAY 41
Processing DAY 42
Processing DAY 43
Processing DAY 44
Processing DAY 45
Processing DAY 46
Processing DAY 47
Processing DAY 48
Processing DAY 49
Processing DAY 50
Processing DAY 51
Processing DAY 52
Processing DAY 53
Processing DAY 54
Processing DAY 55
Processing DAY 56
P

Processing DAY 438
Processing DAY 439
Processing DAY 440
Processing DAY 441
Processing DAY 442
Processing DAY 443
Processing DAY 444
Processing DAY 445
Processing DAY 446
Processing DAY 447
Processing DAY 448
Processing DAY 449
Processing DAY 450
Processing DAY 451
Processing DAY 452
Processing DAY 453
Processing DAY 454
Processing DAY 455
Processing DAY 456
Processing DAY 457
Processing DAY 458
Processing DAY 459
Processing DAY 460
Processing DAY 461
Processing DAY 462
Processing DAY 463
Processing DAY 464
Processing DAY 465
Processing DAY 466
Processing DAY 467
Processing DAY 468
Processing DAY 469
Processing DAY 470
Processing DAY 471
Processing DAY 472
Processing DAY 473
Processing DAY 474
Processing DAY 475
Processing DAY 476
Processing DAY 477
Processing DAY 478
Processing DAY 479
Processing DAY 480
Processing DAY 481
Processing DAY 482
Processing DAY 483
Processing DAY 484
Processing DAY 485
Processing DAY 486
Processing DAY 487
Processing DAY 488
Processing DAY 489
Processing D

In [2]:
geneticoptimize(get_domain(432),cost_func)

([684, 682, 418, 125, 565, 361], -2308)

In [12]:
results = pd.DataFrame(results)

In [13]:
results.columns = ['Day','Teams','Cooperate Times']

In [14]:
results[:3]

Unnamed: 0,Day,Teams,Cooperate Times
0,1,"[366, 833, 749, 608, 639, 10]",2025
1,2,"[743, 465, 40, 701, 510, 889]",2425
2,3,"[743, 465, 40, 470, 729, 889]",1326


In [16]:
results.to_csv('BestTeam.csv')

In [17]:
geneticoptimize(get_domain(20),cost_func)

([684, 238, 418, 125, 565, 361], -2850)