In [52]:
#IMPORT MODULES
import numpy as np
import pandas as pd
import random
import math
import copy
import re
import sys
import sklearn
import pickle

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data as data

**Import Data

In [56]:
gameresults = pd.read_csv("data/gameresults.csv", encoding="latin1").values
gamescores = pd.read_csv("data/full_sheet_scaled.csv", encoding="latin1").fillna(0).values
#gamescores = pd.read_csv("~/Downloads/full_sheet_scaled.csv", encoding="latin1").fillna(0).values[]
mpval = pd.read_csv("data/mp538.csv", encoding="latin1").values
cdf = pd.read_csv("data/cdf.csv",encoding="latin1").values
mindf = pd.read_csv("data/minutes_target.csv",encoding="latin1").values

with open("data/nba_draft.p","rb") as f:
    draft = pickle.load(f)

with open("data/nba_height_weight.p","rb") as f:
    heightweight = pickle.load(f)

with open("data/nba_team_to_abbr_mapping.p","rb") as f:
    team_to_abbr_mapping = pickle.load(f)

In [57]:
MIN_MINUTES = 5
MINUTES_IDX = 35
GAME_ID_IDX = 31
TEAM_IDX = 1
PLAYER_IDX = 0
TOTAL_TEAM_MINUTES = 33

keep = [2,3,5,6,8,9,11,12,14,15,16,17,18,33,35,42,43,44] # box score lines we're including

injured_players = {
    "Klay Thompson",
    "Jonathan Isaac",
    "Markelle Fultz",
    "Spencer Dinwiddie",
    "Thomas Bryant",
    "Marquese Chriss",
    "Chris Clemons"
}

players_to_correct = {
    "Michael Porter": "Jr.",
    "Kevin Porter": "Jr.",
    "Otto Porter":  "Jr.",
    "Tim Hardaway": "Jr.",
    "Gary Trent": "Jr.",
    "Marvin Bagley": "III",
    "Jaren Jackson": "Jr.",
    "Kelly Oubre": "Jr.",
}

East=['BRK','PHI','CHI','NYK','ATL','DET','CLE','ORL','TOR','MIA','CHO','WAS','BOS','IND','MIL']
West=['LAL','UTA','MEM','NOP','GSW','HOU','DAL','DEN','SAC','OKC','POR','MIN','PHO','LAC','SAS']
NBA = East + West

**RUN THESE ONLY TO RETRAIN

In [None]:
box_score_datasets = create_box_score_datasets()

In [None]:
box_score_model = create_and_train_box_score_model(datasets=box_score_datasets, use_all=True, num_epochs=10) #14

In [120]:
##RUN THIS DAILY
embeddings, gp =create_embeddings_and_games_played(box_score_model)

In [59]:
mp = create_minutes_played(embeddings)

In [60]:
affiliations = create_affiliations(embeddings)

In [54]:
adjustment_datasets = create_year_adjustment_datasets(embeddings)

In [None]:
model_players = create_and_train_year_adjustment_model(adjustment_datasets, use_all=True, num_epochs=150, batch_size=64)

In [56]:
embeddings = update_embeddings_with_predictions(model_players, embeddings, coeff=1.0)

In [61]:
team_datasets = create_team_data_sets(gameresults, affiliations)

In [None]:
pc_model = create_and_train_team_model(team_datasets, use_all=True, num_epochs=14, just_minutes=False)

In [62]:
heightweight, mp = add_inseason_trades()
team_to_player_map, player_to_team_map = create_team_to_player_map(embeddings, heightweight, mp)

In [None]:
team_abilities = create_team_abilities(pc_model, heightweight, embeddings, mp, team_to_player_map, '2020')

In [None]:
team_win_prob_matrix = create_team_win_probability_matrix(pc_model, team_abilities)

In [None]:
champions, finals, playoffs = simulate_seasons(team_win_prob_matrix, num_simulations=10_000, include_season_to_date=True)

In [None]:
expected_wins = find_expected_wins(gameresults, include_season_to_date=True, display=True)

In [None]:
best_current_players = find_best_current_players('current', box_score_model, display=True)

In [None]:
sorted_results = sort_results(champions)

In [None]:
finals_results = sort_results(finals)

In [None]:
playoff_results = sort_results(playoffs)

In [642]:
#history of players
for player in embeddings:
    player_history(player)

**START OF UTILITY CODE

In [1]:
def run_through_model(model, dataset, batch_size, num_epochs, optimizer=None, regularize=False):
    criterion = nn.MSELoss()
    sampler = data.RandomSampler(dataset)
    data_loader = data.DataLoader(dataset=dataset, batch_size=batch_size, sampler=sampler, drop_last=True)
    for epoch in range(num_epochs):
        total_loss = 0
        for (batch, labels) in data_loader:
            prediction = model(batch)
            regularization_penalty = (0.25 * torch.norm(prediction)) if regularize else torch.zeros(1).detach()
            loss = criterion(prediction, labels) + regularization_penalty
            total_loss += loss.item() - regularization_penalty.item()
            if optimizer != None:
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
        print(total_loss / (int(len(dataset) / batch_size)))

**MODEL FOR ENCODING BOX SCORES AS EMBEDDINGS (DECLARATION FOLLOWED BY TRAINING)

In [9]:
##Train Embeddings
class BoxScoreToEmbeddingModel(nn.Module):
    
    def __init__(self, dimensionality):
        super().__init__()
        self.__layer_one = nn.Linear(dimensionality, 4)
        self.__layer_two = nn.Linear(4, 1)
    
    def forward(self, performance):
        if (len(performance) == 1):
            performance.unsqueeze(0)
        return self.predict(self.encode(performance))
    
    def encode(self,performance):
        return self.__layer_one(performance)
    
    def predict(self, embedding):
        return self.__layer_two(torch.tanh(embedding))

In [10]:
class PlayerBoxScoreDataSet(data.Dataset):
        
    def __init__(self, box_score_list, min_minutes=10, max_minutes=40):
        super().__init__()        
        LABEL_IDX = 46
        self.__x = self.__filter_games(box_score_list, keep, min_minutes, max_minutes)
        self.__y = self.__filter_games(box_score_list, [LABEL_IDX], min_minutes, max_minutes)
        
    def __filter_games(self, games, indices, min_minutes, max_minutes):
        return torch.stack([torch.tensor(game[indices].astype("double"), dtype=torch.double) for game in filter(lambda x: x[MINUTES_IDX] > min_minutes and x[MINUTES_IDX] < max_minutes, games)])
    
    def __getitem__(self, idx):
        return (self.__x[idx], self.__y[idx])
    
    def __len__(self):
        return len(self.__x)

In [11]:
def create_box_score_datasets():
    TEST_TRAIN_DIVIDE_IDX = int(0.8 * len(gamescores))
    return {
        "overall": PlayerBoxScoreDataSet(gamescores),
        "training": PlayerBoxScoreDataSet(gamescores[0:TEST_TRAIN_DIVIDE_IDX]),
        "validation": PlayerBoxScoreDataSet(gamescores[TEST_TRAIN_DIVIDE_IDX:len(gamescores)])
    }

In [12]:
def train_box_score_model(box_score_model, datasets, use_all=False, num_epochs=10, batch_size=64, regularize=False):
    box_score_optimizer = optim.AdamW(box_score_model.parameters(), lr=1e-4)
    with torch.enable_grad():
        run_through_model(box_score_model, datasets["training"] if not use_all else datasets["overall"], batch_size=batch_size, num_epochs=num_epochs, optimizer=box_score_optimizer, regularize=regularize)

def validate_box_score_model(box_score_model, datasets):
    with torch.no_grad():
        run_through_model(box_score_model, datasets["validation"], batch_size=1, num_epochs=1)

def create_and_train_box_score_model(datasets, use_all=False, num_epochs=10, batch_size=64, regularize=False):
    box_score_model = BoxScoreToEmbeddingModel(18).double() 
    train_box_score_model(box_score_model, datasets, use_all=use_all, num_epochs=num_epochs, batch_size=batch_size, regularize=regularize)
    if not use_all:
        validate_box_score_model(box_score_model, datasets)
    return box_score_model

**CALCULATES PLAYER'S EMBEDDINGS

In [13]:
def get_player_name(player):
    return player + " " + players_to_correct[player] if player in players_to_correct else player

def correct_player_dict(player_dict):
    for player in players_to_correct:
        player_dict[player + " " + players_to_correct[player]] = embeddings[player]

In [14]:
def create_embeddings_and_games_played(box_score_model):
    
    year_endings_map = {
        gameresults[1170, 0]: '2010',
        gameresults[2543, 0]: '2011',
        gameresults[3559, 0]: '2012',
        gameresults[4926, 0]: '2013',
        gameresults[6241, 0]: '2014',
        gameresults[7559, 0]: '2015',
        gameresults[8871, 0]: '2016',
        gameresults[10187, 0]: '2017',
        gameresults[11496, 0]: '2018',
        gameresults[12808, 0]: '2019',
        gameresults[13949, 0]: '2020'
    }

    embeddings={}
    gp={}
    loss_embeddings = 0
    counter = 0

    BASE_UPDATE_COEFFICIENT = (1/4) #TRYING OUT DIFFERENT CO-EFFICIENTS

    for game in gamescores:
        player = get_player_name(game[PLAYER_IDX])
        embeddings[player]={'current': torch.zeros(4)}
        gp[player] = 0

    old_game_id = None
    for game in filter(lambda x: x[MINUTES_IDX] > MIN_MINUTES, gamescores):

        game_id = game[GAME_ID_IDX]
        if game_id != old_game_id and game_id in year_endings_map:
            for player_name in embeddings:
                embeddings[player_name][year_endings_map[game_id]] = copy.deepcopy(embeddings[player_name]['current'])

        player = get_player_name(game[PLAYER_IDX])
        game_val_input = torch.from_numpy(game[keep].astype('double')).detach()
        with torch.no_grad():               
            encoded_game_val= box_score_model.encode(game_val_input)

        prev_gp = gp[player]
        new_gp = prev_gp + 1
        gp[player] = new_gp

        update_coefficient = BASE_UPDATE_COEFFICIENT * (1/ math.log(new_gp+1)) 
        
        diff = (encoded_game_val - embeddings[player]['current'])#*(mp[player][game_id]/48)
        dist = torch.norm(diff)
        counter += 1
        if counter > 28000:
            loss_embeddings += dist**2
        
        embeddings[player]['current'] += update_coefficient * diff 
        embeddings[player][game_id] = copy.deepcopy(embeddings[player]['current'])
        old_game_id = game_id
            
    print(loss_embeddings.item())
    return embeddings, gp

**GETS TEAM AFFILIATIONS

In [15]:
def get_nearest_player(player, embeddings):
    
    def has_funky_characters(name):
        for character in name:
            if ord(character) > ord("z"):
                return True
        return False
    
    HIST_LENGTH = ord("z")
    def create_letter_histogram(source):
        letter_histogram = np.zeros(HIST_LENGTH)
        for letter in source:
            if ord(letter) < HIST_LENGTH:
                letter_histogram[ord(letter)] += 1
        return letter_histogram
    
    def get_difference(hist1, hist2):
        return sum([abs(hist2[i] - hist1[i]) for i in range(HIST_LENGTH)])
    
    name = get_player_name(player)
    if name in embeddings:
        return name
    if name.replace("Jr.", "").rstrip() in embeddings:
        return name.replace("Jr.", "").rstrip()
    
    reference = create_letter_histogram(name)
    
    closest_name = ""
    best_match_score = 100
    for candidate in embeddings:
        score = get_difference(reference, create_letter_histogram(candidate))
        if score < best_match_score:
            best_match_score = score
            closest_name = candidate
            
    return closest_name if has_funky_characters(closest_name) else name

def update_minutes_played(mpval, mp, embeddings):

    MP_VAL_PLAYER_IDX = 0
    MP_VAL_MINUTES_IDX = 1

    for mp_value in mpval:
        player = get_player_name(mp_value[MP_VAL_PLAYER_IDX])
        if player not in mp:
            player = get_nearest_player(player, embeddings)
        if player in mp:
            mp[player]['projected']= mp_value[MP_VAL_MINUTES_IDX]

    for player in injured_players:
        mp[player]["projected"] = 0

    for team in NBA:
        roster = [get_nearest_player(get_player_name(name),embeddings) for name in filter(lambda name: get_nearest_player(get_player_name(name),embeddings) in mp, heightweight[team][2021])]
        total_minutes_played = sum([mp[player]['projected'] for player in roster])
        for player in roster:
            mp[player]['projected'] /= (total_minutes_played / TOTAL_TEAM_MINUTES)
    
    return mp

def initialize_minutes_played(embeddings):
    mp = {}
    for player_name in embeddings:
        mp[player_name] = {"projected": 0}
    for game in gamescores:
        player_name = get_nearest_player(get_player_name(game[PLAYER_IDX]), embeddings)
        mp[player_name][game[GAME_ID_IDX]] = game[MINUTES_IDX]
    return mp

def create_minutes_played(embeddings):
    mp = initialize_minutes_played(embeddings)
    return update_minutes_played(mpval, mp, embeddings)


In [16]:
def create_affiliations(embeddings):
    affiliations = {}
    for i, game in enumerate(gamescores):
        player = get_nearest_player(get_player_name(game[PLAYER_IDX]), embeddings)
        if player not in affiliations:
            affiliations[player] = {}
        team = game[TEAM_IDX]
        if team in team_to_abbr_mapping:
            #if game[GAME_ID_IDX] in affiliations[player]:
                #print(GAME_ID_IDX, player)
            affiliations[player][game[GAME_ID_IDX]] = team
    return affiliations

**PREDICTING CHANGES IN EMBEDDINGS FROM YEAR TO YEAR

In [17]:
#PREDICT YEAR-TO-YEAR IMPROVEMENTS
#now trying to predict scores embeddings
class EmbeddingAdjustmentModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.__layer_one = nn.Linear(8, 4)
        self.__layer_two = nn.Linear(4, 4)
    
    def forward(self, this_year):
        return self.__layer_two(F.gelu(self.__layer_one(this_year)))

In [18]:
class YearToYearImprovementDataset(data.Dataset):
        
    def __init__(self, yearlist, embeddings):
        super().__init__()
        EPSILON = 10e-4
        
        x_list = []
        y_list = []
        for year in yearlist:
            for player in filter(lambda name: str(year) in embeddings[name] and str(year + 1) in embeddings[name] and name in mp, embeddings):
                team = YearToYearImprovementDataset.find_team(player, year)
                current = YearToYearImprovementDataset.construct_vector_representation(team, player, year, embeddings)
                target = self.__get_target(player, year, embeddings)
                if not isinstance(current, type(None)) and np.linalg.norm(target.numpy()) > EPSILON:
                    x_list.append(current)
                    y_list.append(target)
        self.__x = torch.stack(x_list).double()
        self.__y = torch.stack(y_list).double()
    
    def __getitem__(self, idx):
        return (self.__x[idx], self.__y[idx])
    
    def __len__(self):
        return len(self.__x) 
    
    @staticmethod
    def construct_vector_representation(team, player, year, embeddings):
        skills = embeddings[player][str(year)]
        if team != None:
            height = int(heightweight[team][year][player]['height'])
            weight = int(heightweight[team][year][player]['weight'])
            dob = heightweight[team][year][player]['age']
            age = YearToYearImprovementDataset.get_age(dob, year)
            draft_pos = draft[player][1] if player in draft else 61
            descriptors = torch.tensor(np.asarray([height, weight, age, draft_pos])).float()
            overall_vec = torch.cat([skills, descriptors], dim=0).detach()
            return overall_vec.double()
        return None
    
    @staticmethod
    def get_age(date_of_birth, current_year):
        yob = int(re.search('\d{4}', date_of_birth).group(0))
        return current_year - yob
    
    @staticmethod
    def find_team(player, year):
        for team in heightweight:
            if player in heightweight[team][year]:
                return team
        return None
    
    def __get_target(self, player, year, embeddings):
        return embeddings[player][str(year + 1)] - embeddings[player][str(year)]

In [19]:
def create_year_adjustment_datasets(embeddings):
    return {
        "overall": YearToYearImprovementDataset(range(2010, 2020), embeddings),
        "training": YearToYearImprovementDataset(range(2010, 2019), embeddings),
        "validation": YearToYearImprovementDataset(range(2019, 2020), embeddings)
    }

def train_player_model(model_players, datasets, use_all=False, num_epochs=200, batch_size=64):
    optimizer_players = optim.AdamW(model_players.parameters(), lr=1e-4) # change the learning rate if you'd like
    with torch.enable_grad():
        run_through_model(model_players, datasets["training"] if not use_all else datasets["overall"], batch_size=batch_size, num_epochs=num_epochs, optimizer=optimizer_players, regularize=True)

def validate_player_model(model_players, datasets):
    with torch.no_grad():
        run_through_model(model_players, datasets["validation"], batch_size=1, num_epochs=1)

def create_and_train_year_adjustment_model(datasets, use_all=False, num_epochs=200, batch_size=64):
    model_players = EmbeddingAdjustmentModel().double()
    train_player_model(model_players, datasets, use_all=use_all, num_epochs=num_epochs, batch_size=batch_size)
    if not use_all:
        print("validating")
        validate_player_model(model_players, datasets)
    return model_players

In [20]:
def update_embeddings_with_predictions(model_players, embeddings, coeff=1):
    for player in filter(lambda name: torch.max(abs(embeddings[name]["2020"])) > 0 and mp[name]["projected"] > 0, embeddings):
        skills = embeddings[player]['2020']
        team = YearToYearImprovementDataset.find_team(player, 2020)
        vector_input = YearToYearImprovementDataset.construct_vector_representation(team, player, 2020, embeddings)
        if not isinstance(vector_input, type(None)):
            with torch.no_grad():
                prediction = model_players(vector_input)
            embeddings[player]['predicted'] = (prediction * coeff) + embeddings[player]['2020']
    return embeddings

**CREATING DATA, DEFININING, AND TRAINING MODEL FOR CONVERTING PLAYER EMBEDDINGS TO TEAM EMBEDDINGS TO WINS

In [21]:
class TeamDataset:
    
    def __init__(self, games, affiliations, batch_size=1):
        super().__init__()
        self.__winners = []
        self.__losers = []
        for (game_id, winner, loser) in filter(lambda x: len(x[0]) >= 3, games):
            self.__winners.append(TeamDataset.create_team_tensor(winner, game_id, affiliations))
            self.__losers.append(TeamDataset.create_team_tensor(loser, game_id, affiliations))
            if isinstance(self.__winners[-1], type(None)) or isinstance(self.__losers[-1], type(None)):
                self.__winners = self.__winners[:-1]
                self.__losers = self.__losers[:-1]
        self.__order = self.__generate_random_order()
        self.__idx = 0
        self.__batch_size = batch_size
    
    def __getitem__(self, idx):
        return (self.__winners[idx], self.__losers[idx])
    
    def __len__(self):
        return len(self.__winners)
    
    def __iter__(self):
        self.__idx = 0
        self.__order = self.__generate_random_order()
        return self
    
    def __next__(self):
        if self.__idx + self.__batch_size > len(self):
            raise StopIteration
        value = self[self.__idx:self.__idx + self.__batch_size]
        self.__idx += self.__batch_size
        return value
    
    def __generate_random_order(self):
        return random.choices(range(len(self)), k=len(self))
        
    def set_batch_size(self, batch_size):
        self.__batch_size = batch_size
    
    @staticmethod
    def create_team_tensor(team, game_id, affiliations):
        team_list = []
        for player in filter(lambda name: game_id in embeddings[name] and game_id in affiliations[name], embeddings):
            if affiliations[player][game_id] == team and mp[player][game_id] > 0:
                minutes_tensor = torch.tensor(mp[player][game_id], dtype=torch.float32)
                embedding_plus_minutes = torch.cat([embeddings[player][game_id], minutes_tensor.unsqueeze(0)], dim=0)
                team_list.append(embedding_plus_minutes.float().detach())
        return torch.stack(team_list) if len(team_list) > 0 else None

In [22]:
class PlayerCombinerModel(nn.Module):
        
    def __init__(self, just_minutes=True):
        super().__init__()
        
        EMBEDDING_SIZE = 4
        self.__player_layer = nn.Linear(EMBEDDING_SIZE + 1, 1) 
        #self.__team_layer_one = nn.Linear(EMBEDDING_SIZE * 2, 4)
        self.__team_layer_one = nn.Linear(EMBEDDING_SIZE, 4)
        self.__team_layer_two = nn.Linear(4, 1)
        self.__just_minutes = just_minutes
    
    def forward(self, team_one_player_lists, team_two_player_lists):
        
        outputs = []
        for team_one_player_list, team_two_player_list in zip(team_one_player_lists, team_two_player_lists):
            team_one_embedding = self.find_team_embedding(team_one_player_list)
            team_two_embedding = self.find_team_embedding(team_two_player_list)
            outputs.append(self.score_embeddings(team_one_embedding, team_two_embedding))
            
        return torch.stack(outputs)
    
    def find_team_embedding(self, tensor_list):
        if not self.__just_minutes:
            weights = self.__player_layer(tensor_list)
            normalized_weights = F.softmax(weights, dim=0)
        else:
            weights = tensor_list[:, -1].unsqueeze(1)
            normalized_weights = weights / torch.sum(weights)
        return torch.sum((normalized_weights * tensor_list[:, 0:4]), dim=0).squeeze()
    
    def score_embeddings(self, team_one_embedding, team_two_embedding):
        def score(combined):
            return self.__team_layer_two(torch.tanh(self.__team_layer_one(combined)))
        #output_one = score(torch.cat([team_one_embedding, team_two_embedding], dim=0))
        #output_two = score(torch.cat([team_two_embedding, team_one_embedding], dim=0))
        output_one = score(team_one_embedding)
        output_two = score(team_two_embedding)
        
        return torch.softmax(torch.cat([output_one, output_two], dim=0), dim=0)

In [23]:
def create_team_data_sets(gameresults, affiliations):
    return {
        "overall": TeamDataset(gameresults, affiliations),
        "training": TeamDataset(gameresults[0:int(0.8 * len(gameresults))], affiliations),
        "validation": TeamDataset(gameresults[int(0.8 * len(gameresults)):], affiliations)
    }

def run_through_pc_model(pc_model, dataset, batch_size, num_epochs, optimizer=None):
    criterion = nn.MSELoss()
    dataset.set_batch_size(batch_size)
    for epoch in range(num_epochs):
        total_loss = 0
        total_prediction = 0
        for (winners, losers) in dataset:
            prediction = pc_model(winners, losers)
            loss = criterion(prediction[:, 0].double(), torch.ones(batch_size).double())
            total_loss += loss.item()
            total_prediction += (torch.sum(prediction[:, 0]).item() / batch_size)
            if optimizer != None:
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
        
        num_runs = int(len(dataset) / batch_size)
        print(total_loss / num_runs, total_prediction / num_runs)

def train_team_model(pc_model, datasets, use_all=False, num_epochs=20, batch_size=32):
    pc_optimizer = optim.AdamW(pc_model.parameters(), lr=1e-3)
    with torch.enable_grad():
        run_through_pc_model(pc_model, datasets["training"] if not use_all else datasets["overall"], batch_size=batch_size, num_epochs=num_epochs, optimizer=pc_optimizer)
    return pc_model
    
def validate_team_model(pc_model, datasets):
    with torch.no_grad():
        run_through_pc_model(pc_model, datasets["validation"], batch_size=1, num_epochs=1)

def create_and_train_team_model(datasets, use_all=False, num_epochs=12, batch_size=64, just_minutes=False):
    pc_model = PlayerCombinerModel(just_minutes)
    pc_model = train_team_model(pc_model, datasets, use_all=use_all, num_epochs=num_epochs, batch_size=batch_size)
    if not use_all:
        validate_team_model(pc_model, datasets)
    return pc_model

In [24]:
def create_team_to_player_map(embeddings, heightweight, mp):
    team_to_player_map = {}
    player_to_team_map = {}
    for team in heightweight:
        team_to_player_map[team] = []
        for player in heightweight[team][2021]:
            player_name = get_nearest_player(get_player_name(player), embeddings)
            if player_name in mp:
                if mp[player_name]["projected"] > 0: #MIN_MINUTES instead of 0
                    team_to_player_map[team].append(player_name)
                    if player_name in player_to_team_map:
                        print(player_name, player_to_team_map[player_name], player)
                    player_to_team_map[player_name] = (team, player)
    return team_to_player_map, player_to_team_map

In [25]:
def create_team_abilities(pc_model, heightweight, embeddings, mp, team_to_player_map, value='predicted'):
    team_abilities = {}
    for team in team_to_player_map:
        team_list = []
        for player in team_to_player_map[team]:
            player_name = get_nearest_player(get_player_name(player), embeddings)
            if mp[player_name]['projected'] > 0:
                minutes_tensor = torch.tensor(mp[player_name]['projected'], dtype=torch.float32)
                predicted_score = embeddings[player_name][value].double().squeeze() if value in embeddings[player_name] else embeddings[player_name]['2020'].double()
                embedding_plus_minutes = torch.cat([predicted_score, minutes_tensor.unsqueeze(0).double()], dim=0)
                team_list.append(embedding_plus_minutes.float().detach())
        team_tensor = torch.stack(team_list)
        with torch.no_grad():
            team_abilities[team] = pc_model.find_team_embedding(team_tensor)
    return team_abilities

In [26]:
def create_team_win_probability_matrix(pc_model, team_abilities):
    team_win_prob_matrix = {}
    for team1 in NBA:
        if team1 not in team_win_prob_matrix:
            team_win_prob_matrix[team1] = {}
        for team2 in filter(lambda name: name != team1 and name not in team_win_prob_matrix[team1], NBA):
            if team2 not in team_win_prob_matrix:
                team_win_prob_matrix[team2] = {}
            with torch.no_grad():
                prediction = pc_model.score_embeddings(team_abilities[team1], team_abilities[team2])
            team_win_prob_matrix[team1][team2] = prediction[0].item()
            team_win_prob_matrix[team2][team1] = prediction[1].item()
    return team_win_prob_matrix

**FINDING EXPECTED NUMBER OF WINS

In [27]:
def create_potential_matchups_and_record_maps(include_season_to_date=False):
    record = {}
    potential_matchups = {}
    for team in NBA:
        if team not in potential_matchups:
            potential_matchups[team] = {}
            record[team] = {"W": 0, "L": 0}
        for opponent in filter(lambda name: name != team, NBA):
            if opponent not in potential_matchups:
                potential_matchups[opponent] = {}
                record[opponent] = {"W": 0, "L": 0}
            number_of_matchups = 3 if ((team in East and opponent in East) or (team in West and opponent in West)) else 2
            potential_matchups[team][opponent] = {"played": 0, "max": number_of_matchups}
            potential_matchups[opponent][team] = {"played": 0, "max": number_of_matchups}
    if include_season_to_date:
        update_standings_with_current_season(gameresults, potential_matchups, record)
 
    return potential_matchups, record

In [28]:
# SKIP THIS IF YOU WANT TO RUN FROM START OF SEASON
def update_standings_with_current_season(gameresults, potential_matchups, record):
    CURRENT_SEASON_IDX = 14033
    current_season = gameresults[CURRENT_SEASON_IDX:len(gameresults), :]
    for (game_id, winner, loser) in current_season:

        winner_name = team_to_abbr_mapping[winner]
        loser_name = team_to_abbr_mapping[loser]
        potential_matchups[winner_name][loser_name]["played"] += 1
        potential_matchups[loser_name][winner_name]["played"] += 1

        if winner_name not in record:
            record[winner_name] = {"W": 0, "L": 0}
        record[winner_name]["W"] += 1

        if loser_name not in record:
            record[loser_name] = {"W": 0, "L": 0}
        record[loser_name]["L"] += 1
    return record

In [29]:
def find_expected_wins(gameresults=None, include_season_to_date=False, display=True):
    potential_matchups, record = create_potential_matchups_and_record_maps(include_season_to_date=include_season_to_date)
    if include_season_to_date:
        update_standings_with_current_season(gameresults, potential_matchups, record)
    for team1 in NBA:
        for team2 in filter(lambda x: x != team1, NBA):
            matchups_to_simulate = potential_matchups[team1][team2]["max"] -potential_matchups[team1][team2]["played"]
            for matchup in range(matchups_to_simulate):
                potential_matchups[team1][team2]["played"] += 1
                potential_matchups[team2][team1]["played"] += 1

                prediction = team_win_prob_matrix[team1][team2]
                record[team1]["W"] += prediction
                record[team1]["L"] += 1 - prediction
                record[team2]["W"] += 1 - prediction
                record[team2]["L"] += prediction
    
    sorted_team_list = sorted([(team, record[team]["W"]) for team in record], key=lambda x: x[1], reverse=True)
    if display:
        for i, (team, wins) in enumerate(sorted_team_list):
            print(i + 1, team, round(wins,2))
    return sorted_team_list

**FINDING BEST PLAYERS CURRENTLY

In [30]:
def find_best_current_players(field, box_score_model, display=True):
    with torch.no_grad():
        result_list = [(player, convert_war(box_score_model.predict(embeddings[player][field].double()).item(),ind_game=False)) for player in filter(lambda x: field in embeddings[x], embeddings)]
    if display:
        for i, (player, value) in enumerate(sorted(filter(lambda x: gp[x[0]] > 50, result_list), key=lambda x: x[1], reverse=True)):
            print(i + 1, player, value)
    return result_list
    

In [31]:
def convert_war(plus_minus, ind_game=True,year=2021):
    cdf_dict = convert_cdf(cdf)
    if ind_game == False:
        points_added = plus_minus*36
    else:
        points_added = plus_minus
    points_base = math.floor(points_added)
    remainder = points_added-points_base
    if points_base>0:
        wins_added = (1-remainder)*cdf_dict[year][points_base-1]+remainder*cdf_dict[year][points_base]
    elif points_base<0:
        points_lost = abs(points_base)
        wins_added = -1*((1-remainder)*cdf_dict[year][points_lost-1]+remainder*cdf_dict[year][points_lost])
    else:
        wins_added = (1-remainder)*cdf_dict[year][points_base]
    wins_added /= 2
    wins_added = round(wins_added,3)
    if wins_added == -0.5:
        print(plus_minus)
    return wins_added

In [32]:
def convert_cdf(cdf):
    cdf_dict = {}
    for year in range(2010,2021):
        cdf_dict[year]={}
        cdf_dict[year][0] = 0
        
    for margininfo in cdf:
        margin = margininfo[0]
        for year in range(2010,2021):
            index = year-2008
            cdf_dict[year][margin] = margininfo[index]
    
    cdf_dict[2021] = cdf_dict[2020]
    return cdf_dict

In [33]:
def convert_war_game(gameperf):
    points_added = gameperf

In [34]:
def player_history(player):
    history = {}
    for game in embeddings[player]:
        if game != "current" and game != "predicted":
            history[game]=convert_war(box_score_model.predict(embeddings[player][game].double()).item())
    df_h = pd.DataFrame(list(history.items()),columns = ['game','skill']) 
    address = "history/"+player+".csv"
    df_h.to_csv(address)


**SIMULATING SEASON

In [35]:
def simulate_game(team_win_prob_matrix, team_one, team_two):
    if random.random() < team_win_prob_matrix[team_one][team_two]:
        return True, team_one, team_two
    else:
        return False, team_two, team_one

In [36]:
def get_seed(conference, sorted_teams):
    return [team for team in filter(lambda name: name in conference, sorted_teams)][:10]

In [37]:
def get_seed_index(seed):
    return seed-1

In [38]:
def playin_games(team_win_prob_matrix, seeds): 
    
    def get_team(seed_number):
        return seeds[get_seed_index(seed_number)]
    
    _, advancingseed_seven, loserseed = simulate_game(team_win_prob_matrix, get_team(7), get_team(8))    
    _, still_eligible_seed, _ = simulate_game(team_win_prob_matrix, get_team(9), get_team(10))
    _, advancingseed_eight, _ = simulate_game(team_win_prob_matrix, loserseed, still_eligible_seed)
    return advancingseed_seven, advancingseed_eight

In [39]:
def matchup(team_win_prob_matrix, teamone, teamtwo):
    SERIES_LENGTH = 7
    GAMES_NEEDED_TO_WIN = 4
    team_one_wins = 0
    for game in range(SERIES_LENGTH):
        team_one_won, _, _ = simulate_game(team_win_prob_matrix, teamone, teamtwo)
        team_one_wins = team_one_wins + 1 if team_one_won else team_one_wins
    return teamone if team_one_wins >= GAMES_NEEDED_TO_WIN else teamtwo

In [40]:
def playoff_games(team_win_prob_matrix, seeds):
    
    def get_team(seed_number):
        return seeds[get_seed_index(seed_number)]
    
    semifinalist_one = matchup(team_win_prob_matrix, get_team(1), get_team(8))
    semifinalist_two = matchup(team_win_prob_matrix, get_team(2), get_team(7))
    semifinalist_three = matchup(team_win_prob_matrix, get_team(3), get_team(6))
    semifinalist_four = matchup(team_win_prob_matrix, get_team(4), get_team(5))
    
    finalist_one = matchup(team_win_prob_matrix, semifinalist_one, semifinalist_four)
    finalist_two = matchup(team_win_prob_matrix, semifinalist_two, semifinalist_three)
    
    return matchup(team_win_prob_matrix, finalist_one, finalist_two)

In [41]:
def get_seeds(conference, record):
    random.shuffle(conference) # shuffled so that ties are broken randomly instead of alphabetically
    return sorted(conference, key=lambda team:record[team]["W"], reverse=True)

In [42]:
def run_conference_playoffs(team_win_prob_matrix, conference, record):
    seeds = get_seeds(conference, record)
    
    seven_seed, eight_seed = playin_games(team_win_prob_matrix, seeds)
    seeds[get_seed_index(7)] = seven_seed
    seeds[get_seed_index(8)] = eight_seed
    
    return playoff_games(team_win_prob_matrix, seeds), seeds[0:8]

In [43]:
def run_playoffs(team_win_prob_matrix, record):
    eastern_finalist, eastern_playoff_teams = run_conference_playoffs(team_win_prob_matrix, East, record)
    western_finalist, western_playoff_teams = run_conference_playoffs(team_win_prob_matrix, West, record)
    winner = matchup(team_win_prob_matrix, eastern_finalist, western_finalist)
    return {"winner": winner,
            "finalists": [eastern_finalist, western_finalist],
            "playoffs": eastern_playoff_teams + western_playoff_teams,
           }

In [44]:
def simulate_regular_season(team_win_prob_matrix, include_season_to_date=True):
    potential_matchups, record = create_potential_matchups_and_record_maps(include_season_to_date=include_season_to_date)
    for team1 in NBA:
        for team2 in filter(lambda x: x != team1, NBA):
            matchups_to_simulate = potential_matchups[team1][team2]["max"] - potential_matchups[team1][team2]["played"]
            for matchup in range(matchups_to_simulate):
                _, winner, loser = simulate_game(team_win_prob_matrix, team1, team2)
                record[winner]["W"] += 1
                record[loser]["L"] += 1
                potential_matchups[team1][team2]["played"] += 1
                potential_matchups[team2][team1]["played"] += 1
    return record

In [45]:
def simulate_season(team_win_prob_matrix, include_season_to_date=True):
    record = simulate_regular_season(team_win_prob_matrix, include_season_to_date)
    return run_playoffs(team_win_prob_matrix, record)

In [46]:
def simulate_seasons(team_win_prob_matrix, num_simulations=100, adjust=True, include_season_to_date=True):
    playoffs={}
    finals={}
    champions={}

    for team in NBA:
        playoffs[team] = 0
        finals[team] = 0
        champions[team] = 0

    for i in range(num_simulations):
        results = simulate_season(team_win_prob_matrix, include_season_to_date)
        champions[results["winner"]] += 1
        for team in results["finalists"]:
            finals[team] += 1
        for team in results["playoffs"]:
            playoffs[team] += 1

    if adjust:
        for team in NBA:
            playoffs[team] /= num_simulations
            finals[team] /= num_simulations
            champions[team] /= num_simulations
    
    return champions, finals, playoffs

In [47]:
def sort_results(results):
    sorted_list = sorted([(team, results[team]) for team in playoffs], key=lambda x: results[x[0]], reverse=True)
    for i, (team, prob) in enumerate(sorted_list):
        print(i + 1, team, prob)
    return sorted_list

In [48]:
class MinutesProjectionModel(nn.Module):
    def __init__(self):
        super().__init__()
        
        INPUT_SIZE = 4
        NUM_POSITIONS = 5
        self.__player_layer = nn.Linear(INPUT_SIZE, NUM_POSITIONS).float()
        self.__minutes_layer = nn.Linear(INPUT_SIZE, 1).float()
    
    def forward(self, player_list, allowed_positions):
        
        team_projections = []
        minutes_projections = []
        counter = 0
        for i, player_vec in enumerate(player_list):
            player_vec_to_use = player_vec.float().detach()
            player_minutes_projection_init = F.softmax(self.__player_layer(player_vec_to_use), dim=0)
            positions_mask = torch.from_numpy(np.array(allowed_positions[i])).detach().float()
            player_minutes_projection_allowed = player_minutes_projection_init*positions_mask
            team_projections.append(player_minutes_projection_allowed)
            
            minutes_projection = self.__minutes_layer(player_vec_to_use).squeeze()
            minutes_projections.append(minutes_projection)

                    
        # standardize minutes projections so that the total minutes played by each player
        minutes_vector = torch.stack(minutes_projections).float()
        standardized_minutes_vector = F.softmax(minutes_vector, dim=0).float() * torch.tensor([240]).squeeze().float().detach()
        
        # get minutes at each position
        tot_min_list = []
        for i in range(len(player_list)):
            total_minutes_for_player = standardized_minutes_vector[i]*team_projections[i] #total minutes for that player
            tot_min_list.append(total_minutes_for_player)

            
        team_stack = torch.stack(tot_min_list) #team_stack is nx5
        positions_sum_vec_with_zeroes = torch.sum(team_stack, dim=0).float()
        
        
        #in case any are zero
        EPSILON = torch.tensor([1e-5]).squeeze().float().detach()
        positions_sum_vec = positions_sum_vec_with_zeroes + EPSILON
    
        total_minutes_for_player_final = []
        for i in range(len(player_list)):
            total_minutes_for_player_final.append(tot_min_list[i] / positions_sum_vec)

        output_matrix=torch.stack(total_minutes_for_player_final)
        
        total_min_output = []
        for i in range(len(output_matrix)):
            total_min_output.append(torch.sum(output_matrix[i]))
        
        
        final_output = torch.stack(total_min_output)

        #final_output = torch.from_numpy(np.array(total_min_output,dtype="float64"))
        
        return final_output


In [None]:
for name, parameter in test_model.named_parameters():
    print(name, parameter.requires_grad)

In [49]:
#JUMPO
def create_minute_dictionary():
    mindict = {}
    for i in range(len(mindf)):
        name = mindf[i,0]
        year0 = mindf[i,1]
        year1 = mindf[i,3]
        if name in mindict:
            mindict[name][year1] = mindf[i,4]
        else:
            mindict[name]={year0: mindf[i,2], year1: mindf[i,4]}
        mindict[name]['mask'] = mindf[i,5:10]
    return(mindict)

def get_player_min_inputs(player, team, year):
    roster = heightweight[team][year]
    player_name = get_nearest_player(get_player_name(player), embeddings)
    dob = roster[player]['age']
    age = YearToYearImprovementDataset.get_age(dob, year)
    draft_pos = draft[player][1] if player in draft else 61
    skill=box_score_model.predict(embeddings[player_name][str(year)].double()).item()
    
    prev_year = year - 1
    if player_name not in mindict:
        min_pre = 0
    elif prev_year in mindict[player_name]:
        min_pre = mindict[player_name][year-1]
    else:
        min_pre = 0
        
    descriptors = torch.tensor(np.asarray([age, draft_pos, skill, min_pre])).float()
    full_input = torch.cat([descriptors], dim=0).detach()
    return(full_input)

def get_team_minute_info(team,year):
    xlist=[]
    ylist=[]
    masklist = []
    roster = heightweight[team][year]

    for player in roster:
        player_name = get_nearest_player(get_player_name(player), embeddings)
        new_input = get_player_min_inputs(player,team,year)
        if player_name in mindict:
            if year in mindict[player_name]:
                new_target = mindict[player_name][year]
            else:
                new_target = 0
        
        if player_name in mindict:    
            masklist.append(mindict[player_name]['mask'])
            xlist.append(new_input)
            ylist.append(new_target)
            
    #modify ylist
    tot_min = 0
    for i in range(len(ylist)):
        tot_min += ylist[i]
    for i in range(len(ylist)):
        ylist[i] /= (tot_min/240)
    ylist = torch.from_numpy(np.array(ylist, dtype="float64"))
    
    return xlist, ylist, masklist

In [50]:
def get_best_rookies(keep_print=True, year = 2020):    
    draft_results={}
    for player in draft:
        if draft[player][0]==year:
            if player in embeddings:
                win_draft=convert_war(box_score_model.predict(embeddings[player]['current'].double()).item())
                draft_results[player]=win_draft
    sorted_list = sorted([(player, draft_results[player]) for player in draft_results], key=lambda x: draft_results[x[0]], reverse=True)
    if keep_print==True:    
        for i, (player, wins) in enumerate(sorted_list):
            if i<10 and wins > 0.1:
                print(i + 1, player, wins)
    return sorted_list

def rookie_of_the_year(sorted_list_mvp, keep_print=True, year = 2020):
    roty_team = []
    taken = 0
    roty = 'None'
    for playerperformance in sorted_list_mvp:
        player = playerperformance[0]
        performance = playerperformance[1]
        if player in draft:
            draft_year = draft[player][0]
            if draft_year == year:
                if keep_print == True:
                    print(player,performance)
                if taken == 0:
                    roty = player
                if taken < 5:
                    roty_team.append(player)
                taken+=1
    return roty, roty_team

In [51]:
#fix trades
def add_inseason_trades():
    heightweight['CLE'][2021]['Jarrett Allen'] = heightweight['BRK'][2021]['Jarrett Allen']
    heightweight['CLE'][2021]['Taurean Prince'] = heightweight['BRK'][2021]['Taurean Prince']
    heightweight['HOU'][2021]['Victor Oladipo'] = heightweight['IND'][2021]['Victor Oladipo']
    heightweight['IND'][2021]['Caris LeVert'] = heightweight['BRK'][2021]['Caris LeVert']
    heightweight['BRK'][2021]['James Harden'] = heightweight['HOU'][2021]['James Harden']
    
    del heightweight['BRK'][2021]['Jarrett Allen']
    del heightweight['BRK'][2021]['Taurean Prince']
    del heightweight['IND'][2021]['Victor Oladipo']
    del heightweight['BRK'][2021]['Caris LeVert']
    del heightweight['HOU'][2021]['James Harden']
    
    mp = create_minutes_played(embeddings)
    
    return heightweight, mp
    

# **CALCULATING PERFORMANCE DURING YEAR TO DATE

In [55]:
#INSEASON JUMP HERE

IDX_2010 = range(0,1170)
IDX_2011 = range(1312,2543)
IDX_2012 = range(2624,3559)
IDX_2013 = range(3696,4926)
IDX_2014 = range(5010,6241)
IDX_2015 = range(6329,7559)
IDX_2016 = range(7640,8871)
IDX_2017 = range(8956,10187)
IDX_2018 = range(10266,11496)
IDX_2019 = range(11577,12808)
IDX_2020 = range(12889,13949)
IDX_2021 = range(14032,len(gameresults))
    
def season_leaders(range_val, keep_print = True):
    war = {}
    ind_game_war = []
    
    curr_season = gameresults[range_val,:] 
    list_game_results = list([result[0] for result in gameresults])

    for i in range(len(gamescores)):
        game = gamescores[i,40]
        if game in curr_season[:,0]:
            player=gamescores[i,0]
            war[player]=0
        
    marked = set()
    with torch.no_grad():
        for i in range(len(gamescores)): 
            game = gamescores[i,40]
            player=gamescores[i,0]
            if game in curr_season[:,0] and (game, player) not in marked and gamescores[i, -1] > 0:

                marked.add((game, player))
                gameval=gamescores[i,keep].astype('double')
                gamevalinput=torch.from_numpy(gameval)

                with torch.no_grad():
                    plus_minus = (box_score_model(gamevalinput).item() * gamescores[i, -1])
                    wins_added = convert_war(plus_minus)

                war[player] += wins_added
                ind_game_war.append((player, game, plus_minus))

                

    if keep_print == True:
        sorted_list_curr = sorted([(player, war[player]) for player in war], key=lambda x: war[x[0]], reverse=True)
        counter = 0
        for i, (player, wins) in enumerate(sorted_list_curr):
            #if mp[get_nearest_player(player,mp)]['projected']>0:
            counter +=1
            player_name = get_plain(player)
            print(counter,"\t", player_name, "\t", round(wins, 3))

        
    return war, ind_game_war
    

In [70]:
def get_all_nba(leaderboard):
    positions = get_positions()
    all_nba_pos = {}
    for teamnum in ['first','second','third']:
        all_nba_pos[teamnum]={'guard': 0, 'forward': 0, 'c': 0}

    all_nba=[]
    num_players={'first': 0, 'second': 0, 'third': 0}


    for i in range(25):
        player = get_nearest_player(leaderboard[i][0],positions)
        pos_vec = positions[player]
        pos_sum = 0
        if pos_vec['pg']==1 or pos_vec['sg']==1:
            pos_vec['guard']=1
        if pos_vec['sf']==1 or pos_vec['pf']==1:
            pos_vec['forward']=1
        for position in ['guard','forward','c']:
            if position in pos_vec:
                pos_sum+=pos_vec[position]
        for position in ['guard','forward','c']:
            if position in pos_vec:
                pos_vec[position]/=pos_sum

        placed = 0
        for teamnum in ['first','second','third']:
            available_pos = 0
            if placed == 0:
                for position in all_nba_pos[teamnum]:
                    if position == 'c':
                        idx = 1
                    else:
                        idx = 2
                    if all_nba_pos[teamnum][position]<idx and position in pos_vec:
                        available_pos += 1
                        all_nba_pos[teamnum][position]+=pos_vec[position]   
                if available_pos > 0 and num_players[teamnum]<5:
                    player = get_plain(player)
                    all_nba.append([player, teamnum])
                    placed += 1
                    num_players[teamnum] += 1

    first_team = []
    second_team = []
    third_team = []
    for i in range(15):
        if all_nba[i][1]=="first":
            first_team.append(all_nba[i][0])
        elif all_nba[i][1]=="second":
            second_team.append(all_nba[i][0])
        else:
            third_team.append(all_nba[i][0])
    return first_team, second_team, third_team

def most_improved_player(keep_print=True, field_curr = 'current', field_base = '2020', field_mp = 'projected'):
    team_improv = {}
    team_list = []
    player_improv = {}
    for team in NBA:    
        skilltot = 0
        skilltot_prior=0
        for player in team_to_player_map[team]:
            player_name = get_nearest_player(get_player_name(player), embeddings)
            if field_mp in mp[player_name] and mp[player_name][field_mp] > 0:
                    minutes_tensor = torch.tensor(mp[player_name][field_mp], dtype=torch.float32)
                    predicted_plusminus = box_score_model.predict(embeddings[player_name][field_curr].double())*minutes_tensor
                    val = round(predicted_plusminus.item(),1)
                    skilltot += val
                    team_list.append(predicted_plusminus.float().detach())
                    prior_plusminus = box_score_model.predict(embeddings[player_name][field_base].double())*minutes_tensor
                    val_prior = round(prior_plusminus.item(),1)
                    skilltot_prior += val_prior
                    change = round(val - val_prior,1)
                    player_improv[player] = {'current': val, 'start': val_prior, 'change': change}

        change_team = round(skilltot - skilltot_prior,1)
        team_improv[team]={'current': skilltot, 'start': skilltot_prior, 'change': change_team}
    
    
    sorted_list = sorted([(player, player_improv[player]['change']) for player in player_improv], key = lambda x: x[1], reverse=True)
    
    if keep_print==True:
        for i, (player, improvement) in enumerate(sorted_list):
            print(i + 1, player, improvement) 

    return sorted_list

def get_all_stars():
    num_east = 0
    num_west = 0
    east_allstars = []
    west_allstars = []
    for i in range(50):
        player = get_nearest_player(leaderboard[i][0],player_to_team_map)   
        team = player_to_team_map[player][0]
        if team in East:
            if num_east < 12:
                num_east += 1
                player = get_plain(player)
                east_allstars.append(player)
        else:
            if num_west < 12:
                num_west += 1
                player = get_plain(player)
                west_allstars.append(player)
    return east_allstars, west_allstars

def get_best_game(keep_print=True, num_print=25, ind_game_war = ind_game_war):
    
    indgameres = {}
    for game in ind_game_war:
        uniqid = game[0] + " " + game[1]
        indgameres[uniqid] = game[2]
    sorted_list_games = sorted([(player, indgameres[player]) for player in indgameres], key=lambda x: indgameres[x[0]], reverse=True)
    if keep_print == True:
        for i, (game, points) in enumerate(sorted_list_games):
            if(i<num_print):
                print(i+1,"\t", game, "\t", round(points, 1))
            
    return sorted_list_games

def get_historical_minute_averages(mp): 
    avg_min_df = pd.read_csv("data/avg_min.csv", encoding="latin1").values
    for playerseason in avg_min_df:
        season = playerseason[1]
        player = playerseason[0]
        avgmin = playerseason[2]
        if player in mp:
            mp[player][season]=avgmin
    return mp    

**SAVING AND LOADING MODELS

In [58]:
###JUMP HERE FOR SAVING (SAVE)
def save_models(box_score_model, model_players, pc_model, embeddings, team_datasets, \
                adjustment_datasets, box_score_datasets, team_abilities, team_win_prob_matrix):
    torch.save(box_score_model.state_dict(), "models/box_score_model.pt")
    torch.save(model_players.state_dict(), "models/player_adjustment_model.pt")
    torch.save(pc_model.state_dict(), "models/team_model.pt")

    with open("data/team_datasets.p", "wb") as f:
        pickle.dump(team_datasets, f)
    
    with open("data/adjustment_datasets.p", "wb") as f:
        pickle.dump(adjustment_datasets, f)
        
    with open("data/box_score_datasets.p", "wb") as f:
        pickle.dump(box_score_datasets, f)
    
    with open("data/embeddings.p","wb") as f:
        pickle.dump(embeddings, f)
    
    with open("data/team_abilities.p", "wb") as f:
        pickle.dump(team_abilities, f)
    
    with open("data/team_win_prob_matrix.p", "wb") as f:
        pickle.dump(team_win_prob_matrix, f)

In [109]:
def load_models(box_score_model=True, model_players=True, pc_model=True):
    
    box_score_model = None
    model_players = None
    pc_model = None
    
    if box_score_model:
        box_score_model = BoxScoreToEmbeddingModel(20)
        box_score_model.load_state_dict(torch.load("models/box_score_model.pt"))
        box_score_model.double()

    if model_players:
        model_players = EmbeddingAdjustmentModel()
        model_players = model_players.double()
        model_players.load_state_dict(torch.load("/Users/williamarnesen/Desktop/NBA_Model/model_y2y.pt"))
    
    if pc_model:
        pc_model = PlayerCombinerModel()
        pc_model.load_state_dict(torch.load("models/model_matchups.pt"))
        pc_model.double()
    
    return box_score_model, model_players, pc_model

In [126]:
box_score_model = None
box_score_model = BoxScoreToEmbeddingModel(18)
box_score_model.load_state_dict(torch.load("models/box_score_model.pt"))


BoxScoreToEmbeddingModel(
  (_BoxScoreToEmbeddingModel__layer_one): Linear(in_features=18, out_features=4, bias=True)
  (_BoxScoreToEmbeddingModel__layer_two): Linear(in_features=4, out_features=1, bias=True)
)

In [5]:
def load_datasets(embeddings=True, box_score_datasets=True, team_datasets=True, adjustment_datasets=True, \
                 team_abilities=True, team_win_prob_matrix=True):
    
    embeddings = None,
    box_score_datasets = None
    team_datasets = None
    adjustment_datasets = None
    team_abilities = None
    team_win_prob_matrix = None
    
    if team_datasets:
        with open("data/team_datasets.p", "rb") as f:
            team_datasets = pickle.load(f)
    
    if adjustment_datasets:
        with open("data/adjustment_datasets.p", "rb") as f:
            adjustment_datasets = pickle.load(f)
    
    if box_score_datasets:
        with open("data/box_score_datasets.p", "rb") as f:
            box_score_datasets = pickle.load(f)
    
    if embeddings:
        with open("data/embeddings.p","rb") as f:
            embeddings = pickle.load(f)
    
    if team_abilities:
        with open("data/team_abilities.p", "rb") as f:
            team_abilities = pickle.load(f)
    
    if team_win_prob_matrix:
        with open("data/team_win_prob_matrix.p", "rb") as f:
            team_win_prob_matrix = pickle.load(f)
    
    return embeddings, box_score_datasets, team_datasets, adjustment_datsets, team_abilities, team_win_prob_matrix

In [872]:
save_models(box_score_model, model_players, pc_model, embeddings, team_datasets, \
                adjustment_datasets, box_score_datasets, team_abilities, team_win_prob_matrix)

In [None]:
for player in draft:
    year = draft[player][0]
    if year == 2020 and len(player) > 0:
        rookie_team = create_generic_team_with_player(player, 48, embeddings[player]['2020'])
        proj=pc_model([rookie_team],[base_list])
        print(player,round(proj[0,0].item(),3))

In [90]:
df_p = pd.DataFrame(list(playoffs.items()),columns = ['team','playoffs']) 
df_f = pd.DataFrame(list(finals.items()),columns = ['team','finals']) 
df_c = pd.DataFrame(list(champions.items()),columns = ['team','champions']) 

df_p.to_csv("results/playoffsprob_jan9.csv")
df_f.to_csv("results/finalsprob_jan9.csv")
df_c.to_csv("results/championprob_jan9.csv")

In [67]:
def get_positions():
    bypos = pd.read_csv("data/minutes_by_position.csv", encoding="latin1").values
    positions = {}
    for i in range(len(bypos)):
        player = bypos[i,0]
        pg = bypos[i,2]
        sg = bypos[i,3]
        sf = bypos[i,4]
        pf = bypos[i,5]
        c = bypos[i,6]
        positions[player]={'pg': pg, 'sg': sg, 'sf': sf, 'pf': pf, 'c': c}
    return positions

def get_plain(name):
    if re.search('Nikola Jok',name):
        name = "Nikola Jokic"
    if re.search('Nikola V',name):
        name = "Nikola Vucevic"
    if re.search('Kristaps',name):
        name = "Kristaps Porzingis"
    if re.search('Luka D', name):
        name = "Luka Doncic"
    if re.search('Goran',name):
        name = "Goran Dragic"
    return name

def get_awards(year=2021):
    indices = [IDX_2010, IDX_2011, IDX_2012, IDX_2013, IDX_2014, IDX_2015, IDX_2016, IDX_2017, IDX_2018, IDX_2019, IDX_2020, IDX_2021]
    relevant_index = indices[year-2010]
    war, ind_game_war = season_leaders(relevant_index, keep_print = False)
    sorted_list_mvp = sorted([(player, war[player]) for player in war], key=lambda x: war[x[0]], reverse=True)
    mvp = get_plain(sorted_list_mvp[0][0])
    
    roty, roty_team = rookie_of_the_year(sorted_list_mvp,keep_print=False,year=year-1)
    
    if year == 2021:
        field_curr = 'current'
        field_mp = 'projected'
    else:
        field_curr = str(year)
        field_mp = year
        
    field_base = str(year-1)
    sorted_list_mip = most_improved_player(keep_print=False, field_curr = field_curr, field_base = field_base, field_mp = field_mp)
    #most_improved_player(keep_print=True, field_curr = 'current', field_base = '2020', field_mp = 'projected')
    
    
    mip = sorted_list_mip[0][0]
    mip_check = 0
    mip_idx = 0
    
    while mip_check ==0:
        if mip in draft:
            if draft[mip][0]>=2019:
                mip_idx+=1
                mip = sorted_list_mip[mip_idx][0]
            else:
                mip_check = 1
        else:
            mip_check = 1
    first_team, second_team, third_team = get_all_nba(sorted_list_mvp)
    
    if year == 2021:
        east_allstars, west_allstars = get_all_stars()
    
    sorted_games = get_best_game(keep_print=False, ind_game_war = ind_game_war)
    best_game = sorted_games[0][0]
    
    print("MVP:", mvp)
    print("ROTY:", roty)
    print("MIP:", mip)
    print('\n')
    print("All-NBA First Team:", first_team)
    print("All-NBA Second Team:", second_team)
    print("All-NBA Third Team:", third_team)
    print('\n')
    if year == 2021:
        print("East All-Stars:", east_allstars)
        print('\n')
        print("West All-Stars:", west_allstars)
        print('\n')
    print('All-Rookie First-Team', roty_team)
    print('\n')
    print('Best Game:', best_game)
    

In [None]:
war, ind_game_war = season_leaders(IDX_2021)

In [83]:
def write_full_info():
    full_info = {}
    for playerskill in best_current_players:
        player = playerskill[0]
        player_name = get_nearest_player(player,war)
        if player_name in war:
            full_info[player_name]={'war': war[player_name]}
            full_info[player_name]['skill'] = playerskill[1]
            if player_name in player_to_team_map:
                full_info[player_name]['team'] = player_to_team_map[get_nearest_player(player_name,player_to_team_map)][0]
            else:
                player_name_aff = get_nearest_player(player_name,affiliations)
                if player_name_aff in affiliations:
                    for game in affiliations[player_name_aff]:
                        team = affiliations[player_name_aff][game]
                    team_name = team_to_abbr_mapping[team]
                    full_info[player_name]['team'] = team_name    
    infodf = pd.DataFrame.from_dict(full_info,orient='index')
    infodf.to_csv('full_war.csv')
    
    print('FIX LUKA DONCIC, GORAN DRAGIC AND DAVIS BERTANS')
    return full_info

         
    