In [155]:
import pulp
from pulp import*
import pandas as pd



#### formulation4 exact 3 team all other constraint

In [156]:
def optimization_form_4 (k,I:list,cov_ub:float,Y:dict,cons_stack_num ):
    '''
    I: we solve for ith entry sequentially; for i in I
    cov_ub: uperbound of pairwise cov
    '''
    prob = pulp.LpProblem(("opt"), pulp.LpMaximize)
    x = pulp.LpVariable.dicts("X",((i, j) for i in range(1,k) for j in all_player_list), 0 , 1,cat='Integer')
    # t[i,l] indicator: for a lineup i, if there are any player comes from team l
    ## this is for constraint |team| >=2
    t = pulp.LpVariable.dicts("T",((i, l) for i in range(1,k) for l in team_list), 0 , 1,cat='Integer')

    # v[i,l] complete line indicator (a line has 3player from same line)
    # w for partial line (2 player)
    v = pulp.LpVariable.dicts("V",((i, team) for i in range(1,k) for team in team_list), 0 , 1,cat='Integer')
    w = pulp.LpVariable.dicts("W",((i, team) for i in range(1,k) for team in team_list), 0 , 1,cat='Integer')
    # order_stack
    o = pulp.LpVariable.dicts("O",((i, team,stack) for i in range(1,k) for team in team_list for stack in range(9)), 0 , 1,cat='Integer')
    
    #obj
    prob += lpSum(x[(I[0],j)]* mu[j] for j in all_player_list) 
    #constraints
    # sum(salary)<50000 for each lineup
    for i in I:
        prob += lpSum(x[(i,j)] * salary_dict[j] for j in all_player_list) <=50000
    # each lineup has 10 player
    for i in I:
        prob += lpSum(x[(i,j)] for j in all_player_list) == 10
    # for a lineup  2 pitchers
    for i in I:
        prob += lpSum(x[(i,j)] for j in p_list) == 2
    # 1c, 1 oneb 1 twob 1 threeb 1ss 3 of
    for i in I:
        prob += lpSum(x[(i,j)] for j in c_list) == 1
        prob += lpSum(x[(i,j)] for j in oneb_list) == 1
        prob += lpSum(x[(i,j)] for j in twob_list) == 1
        prob += lpSum(x[(i,j)] for j in threeb_list) == 1
        prob += lpSum(x[(i,j)] for j in ss_list) == 1
        prob += lpSum(x[(i,j)] for j in of_list) == 3
    #4.2 team constrant : 
    ## players for each lineup must come from at least 2 team
    ## each team cannot exceed 5 player
    
    for i in I:
        for l in team_list:
            prob += 5*t[(i,l)] >= lpSum(x[(i,j)] for j in team_dict[l])
            prob += t[(i,l)] <= lpSum(x[(i,j)] for j in team_dict[l])
    for i in I:       
        prob += lpSum(t[(i,l)] for l in team_list) >= 3
        prob += lpSum(t[(i,l)] for l in team_list) <= 5                  

    #pitchers stacking
    ## force the goalie variable xik to be zero if the lineup has any skater opposing goalie k
    for i in I:    
        for j in p_list:
            prob += lpSum(x[(i,l)] for l in hitters_team_dict[p_opponent_dict[j]]) <= 5*(1-x[i,j])  


    #line stacking
    ## force each lineup has one complete line :5 hitters from a team, two partial line

    for i in I:
        for team in team_list: 
            prob += 5 * v[(i,team)] <= lpSum(x[(i,j)] for j in hitters_team_dict[team]) 
            prob += v[(i,l)] <= lpSum(x[(i,j)] for j in hitters_team_dict[l])
        prob += lpSum(v[(i,team)] for team in team_list) == 1

    for i in I:
        for team in team_list: 
            prob += 2 * w[(i,team)] <= lpSum(x[(i,j)] for j in hitters_team_dict[team]) 
            prob += w[(i,l)] <= lpSum(x[(i,j)] for j in hitters_team_dict[l])
        prob += lpSum(w[(i,team)] for team in team_list) >= 2       
    #consecutive stacking
    for i in I:
        for team in team_list:
            for stack in range(9):
                prob += cons_stack_num * o[(i,team,stack)] <= lpSum(x[(i,j)] for j in order_stack_dict[team,stack]) #4 consq ok
                prob += o[(i,team,stack)] <= lpSum(x[(i,j)] for j in order_stack_dict[team,stack])
        prob += lpSum(o[(i,team,stack)] for team in team_list) == 1

        
    
    #4.4 overlap
    # from previously calculated entry we could get a table Y[i,j], 
    # relax overlap constraint for first entry
    if I[0] == 1:
        prob.solve()
        
        status = prob.status
        if status !=1:
            print(I)
            print(status)
        varsdict = {}
        for var in prob.variables():
            varsdict[var.name] = var.varValue
        for key,value in varsdict.items():
            if "X_" in key:
                entry = int(key.split(",_")[0][-1])
                player = str(key.split(",_")[1][1:-2])
                Y[(entry,player)] = value
        return(status,varsdict,Y)
    if I[0]>=2:
        for i in range(1,I[0]):
            prob += lpSum(Y[(i,j)]*x[(I[0],j)] for j in all_player_list) <= cov_ub
            #cov_ub+=0.1
        prob.solve(GLPK())
        status = prob.status
        if prob.status != 1:
            print(I)
            print(status)
        varsdict ={}
        for var in prob.variables():
            varsdict[var.name] = var.varValue
        for key,value in varsdict.items():
            if "X_" in key:
                if I[0]>=100:
                    entry = int(key.split(",_")[0][-1])+10*int(key.split(",_")[0][-2])+100*int(key.split(",_")[0][-3])
                elif I[0]>=10:
                    entry = int(key.split(",_")[0][-1])+10*int(key.split(",_")[0][-2])
                else:
                    entry = int(key.split(",_")[0][-1])
                player = key.split(",_")[1][1:-2]
                Y[(entry,player)] = value
        return(status,varsdict,Y)            

#### formulation_5 3team no goalie stacking

### def sequential_optimization_structure

In [157]:
def sequential_opt(k, cov_ub, optimization_form, cons_stack_num):
    '''
    k_entry: number of entry we want
    result: a dataframe representing result
    cov_ub: a list for pairwise covariance 
    optimization_form: which formulation of optimization
    '''

    progress = 0
    YY={}
    for i in range(1,k):
        for j in all_player_list:
            YY[(i,j)] = 0
    for i in [[x] for x in range(1,k)]:
        progress+=1
        print(str(progress)+"-th entry started>>>")
        (status,varsdict,YY) = optimization_form (k+1,i,cov_ub,YY, cons_stack_num)
    print(sum(YY.values()))    
    result_table = {}
    for i in range(1,k):
        result_table[i] = []
    for x,y in YY.items():
        if int(y)==1:
            result_table[x[0]].append(x[1])
    result = pd.DataFrame.from_dict(result_table)
    result.columns = ["entry" +str(i) for i in range(1,k)]
    result
    
    return result.T

## use data from DailyFantasyNerd, bend projection with ROTOGRINDER

In [158]:
hitters_data = pd.read_csv("DFN MLB Hitters DK 4_9.csv")
pitchers_data = pd.read_csv("DFN MLB Pitchers DK 4_9.csv")
hitters_data = hitters_data[hitters_data["Proj FP"]>0]
print(hitters_data.shape)
print(pitchers_data.shape)

(168, 31)
(20, 30)


In [159]:
hitters_data = hitters_data.rename(index=str, columns={"Player Name": "Last Name",
                                                       "Pos": "Position",
                                                     "Opp": "Opponent",
                                                     "PP": "Power_Play",
                                                     "Proj FP": "Projection"})
pitchers_data = pitchers_data.rename(index=str, columns={"Player Name": "Last Name", 
                                                     "Pos": "Position",
                                                     "Opp": "Opponent",
                                                     "PP": "Power_Play",
                                                     "Proj FP": "Projection"})  

hitters_data['Opponent'] = hitters_data['Opponent'].str.replace('@', '', regex=True)
pitchers_data['Opponent'] = pitchers_data['Opponent'].str.replace('@', '', regex=True)

hitters_data["Last Name"] = hitters_data["Last Name"].replace(' ', '_', regex=True)
pitchers_data["Last Name"] = pitchers_data["Last Name"].replace(' ', '_', regex=True)
hitters_data["Last Name"] = hitters_data["Last Name"].replace('-', '_', regex=True)
pitchers_data["Last Name"] = pitchers_data["Last Name"].replace('-', '_', regex=True)


In [160]:
#data preprocessing
c_set = hitters_data[hitters_data["Position"]=="C"]
oneb_set = hitters_data[hitters_data["Position"].str.contains("1B")]
twob_set = hitters_data[hitters_data["Position"].str.contains("2B")]
threeb_set = hitters_data[hitters_data["Position"].str.contains("3B")]
ss_set = hitters_data[hitters_data["Position"].str.contains("SS")]
of_set = hitters_data[hitters_data["Position"].str.contains("OF")]

c_list = c_set["Last Name"].unique()
oneb_list = oneb_set["Last Name"].unique()
twob_list = twob_set["Last Name"].unique()
threeb_list = threeb_set["Last Name"].unique()
ss_list = ss_set["Last Name"].unique()
of_list = of_set["Last Name"].unique()
p_list = pitchers_data["Last Name"].unique()

#hitter
hitters_list = hitters_data["Last Name"].unique()
all_player_list = list(set(list(p_list) + list(hitters_list)))
#team
team_list = hitters_data["Team"].unique()
#line


#we do not want hitters who is not in order 1,23,4
#non_first_powerline_list = (skater_data[ (skater_data["Power_Play"]!="P1") & (skater_data["Position"]=="D") ]
#                            ["Last Name"].unique())
#def mu: each player has a projection score
mu={}
for index,row in hitters_data.iterrows():
    mu[row["Last Name"]] = row.Projection
for index,row in pitchers_data.iterrows():
    mu[row["Last Name"]] = row.Projection
#def salary
salary_dict = {}
for index,row in hitters_data.iterrows():
    salary_dict[row["Last Name"]] = row.Salary
for index,row in pitchers_data.iterrows():
    salary_dict[row["Last Name"]] = row.Salary
#def team_dict
team_dict = {}
for team in team_list:
    team_dict[team] = []
for index, row in hitters_data.iterrows():
    team_dict[row.Team].append(row["Last Name"])
    team_dict[row.Team] = list(set( team_dict[row.Team]))
for index, row in pitchers_data.iterrows():
    team_dict[row.Team].append(row["Last Name"])
    team_dict[row.Team] = list(set( team_dict[row.Team]))
#def pitchers's opponent set
p_opponent_dict = {}
for x in p_list:
    p_opponent_dict[x]=[]
for index, row in pitchers_data.iterrows():
    p_opponent_dict[row["Last Name"]] = row["Opponent"]
# def team dict {team1:[player1..]}
hitters_team_dict = {}
for x in team_list:
    hitters_team_dict[x] = []
for index,row in hitters_data.iterrows():
    hitters_team_dict[row.Team].append(row["Last Name"])
    hitters_team_dict[row.Team] = list(set(hitters_team_dict[row.Team] ))

batting_order_list = ["1","2","3","4","5","6","7","8","9"]
hitters_data = hitters_data[hitters_data["Batting Order (Confirmed)"].isin(batting_order_list)]

order_stack=[ [1,2,3,4,5],
              [2,3,4,5,6],
              [3,4,5,6,7],
              [4,5,6,7,8],
              [5,6,7,8,9],
              [6,7,8,9,1],
              [7,8,9,1,2],
              [8,9,1,2,3],
              [9,1,2,3,4]]
order_stack_dict={}
for team in team_list:
    for i in range(9):
        order_stack_dict[team,i] = []

for index,row in hitters_data.iterrows():
    for i in range(9):
        if int(row["Batting Order (Confirmed)"]) in order_stack[i]:
            order_stack_dict[row.Team,i].append(row["Last Name"])

In [161]:
hitters_data.shape

(168, 31)

## output automation
#### parse output to dk's template format

In [162]:
template_id_df = pd.read_csv("DKSalaries (1).csv")
template_id_df["Unnamed: 13"] = template_id_df["Unnamed: 13"].str.replace(' ', '_', regex=True)
template_id_df["Unnamed: 13"] = template_id_df["Unnamed: 13"].str.replace('-', '_', regex=True)
player_id_dict = {}
for index,row in template_id_df.iterrows():
    player_id_dict[row["Unnamed: 13"]] = row["Unnamed: 14"]
player_score_dict = {}
for index,row in hitters_data.iterrows():
    player_score_dict[row["Last Name"]] = row["Actual FP"]
for index,row in pitchers_data.iterrows():
    player_score_dict[row["Last Name"]] = row["Actual FP"]

In [None]:
K=500
%time out_put = sequential_opt(K,7, optimization_form_4, 5) # glpk

1-th entry started>>>
2-th entry started>>>
3-th entry started>>>
4-th entry started>>>
5-th entry started>>>
6-th entry started>>>
7-th entry started>>>
8-th entry started>>>
9-th entry started>>>
10-th entry started>>>
11-th entry started>>>
12-th entry started>>>
13-th entry started>>>
14-th entry started>>>
15-th entry started>>>
16-th entry started>>>
17-th entry started>>>
18-th entry started>>>
19-th entry started>>>
20-th entry started>>>
21-th entry started>>>
22-th entry started>>>
23-th entry started>>>
24-th entry started>>>
25-th entry started>>>
26-th entry started>>>
27-th entry started>>>
28-th entry started>>>
29-th entry started>>>
30-th entry started>>>
31-th entry started>>>
32-th entry started>>>
33-th entry started>>>
34-th entry started>>>
35-th entry started>>>
36-th entry started>>>
37-th entry started>>>
38-th entry started>>>
39-th entry started>>>
40-th entry started>>>
41-th entry started>>>
42-th entry started>>>
43-th entry started>>>
44-th entry started>

In [None]:

DK_df = []
for i in range(K-1):
    out_c=[]
    out_p=[]
    out_oneb=[]
    out_twob=[]
    out_threeb=[]
    out_ss=[]
    out_of=[]

    row = out_put.iloc[i]
    for player in row:
        if player in c_list:
            out_c.append(player) 
        elif player in p_list:
            out_p.append(player)
        elif player in oneb_list:
            out_oneb.append(player)
        elif player in twob_list:
            out_twob.append(player)
        elif player in threeb_list:
            out_threeb.append(player)
        elif player in ss_list:
            out_ss.append(player)
        elif player in of_list:
            out_of.append(player)
    dk_out_row = out_p + out_c + out_oneb + out_twob + out_threeb + out_ss + out_of
    DK_df.append(dk_out_row)
    
DK_df = pd.DataFrame(DK_df)     


In [None]:
DK_df.replace(player_id_dict,inplace=True)
DK_df.columns = ["P","P","C","1B","2B","3B","SS","OF","OF","OF"]
DK_df.to_csv("dk_result 3_31.csv",index = False)

In [None]:
ex_list = []
for index,row in out_put.iterrows():
    score = 0
    for x in row:
        try:
            score+=player_score_dict[x]
        except:
            pass
        
    if score >=0:
        ex_list.append(score)
        print(index)
        print(score)
pd.DataFrame(ex_list).to_csv("ex_50_setting2.csv")

In [None]:
print("Max: " + str(max(ex_list)))
print("Mean: " + str(sum(ex_list)/len(ex_list)))

#158 86

In [None]:
line_vis = out_put.iloc[481]
print(line_vis)


In [None]:
pitchers_data[pitchers_data["Last Name"].isin(line_vis)]

In [None]:
hitters_data[hitters_data["Last Name"].isin(line_vis)]

In [None]:
hitters_data[hitters_data["Last Name"].isin(line_vis)][["Team",'Batting Order (Confirmed)']]

In [None]:
sum(out_put.iloc[9].replace(player_score_dict))

In [None]:
#137 8:56

In [None]:
###
# for people with two postion assume both postition could fit, exclude 1 at end
# for hitter <= 3 team
# for hitter, at least one team has >=4/5 player
# hitter consecutive order

In [None]:
out_put

In [177]:
from collections import defaultdict 
class Graph: 
  
    # Constructor 
    def __init__(self): 
  
        # default dictionary to store graph 
        self.graph = defaultdict(list) 
  
    # function to add an edge to graph 
    def addEdge(self,u,v): 
        self.graph[u].append(v) 
  
    # Function to print a BFS of graph 
    def BFS(self, s): 
  
        # Mark all the vertices as not visited 
        visited = [False] * (len(self.graph)) 
  
        # Create a queue for BFS 
        queue = [] 
  
        # Mark the source node as  
        # visited and enqueue it 
        queue.append(s) 
        visited[s] = True
  
        while queue: 
  
            # Dequeue a vertex from  
            # queue and print it 
            s = queue.pop(0) 
            print (s, end = " ") 
  
            # Get all adjacent vertices of the 
            # dequeued vertex s. If a adjacent 
            # has not been visited, then mark it 
            # visited and enqueue it 
            for i in self.graph[s]: 
                if visited[i] == False: 
                    queue.append(i) 
                    visited[i] = True


g = Graph() 
g.addEdge(0, 1) 
g.addEdge(0, 2) 
g.addEdge(1, 2) 
g.addEdge(2, 0) 
g.addEdge(2, 3) 
g.addEdge(3, 3) 
  
print ("Following is Breadth First Traversal"
                  " (starting from vertex 2)") 
g.BFS(2) 
  
# This code is contributed by Neelam Yadav 

Following is Breadth First Traversal (starting from vertex 2)
2 0 3 1 