In [102]:
import pandas as pd
import dask.dataframe as dd
import pyarrow as pa
import numpy as np
import random
import sklearn
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import torch
import torch.nn.functional as F
from torch import nn
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pickle

In [103]:
# Load the encoders from the file
with open('MODELS/mlb_encoders.pkl', 'rb') as file:
    encoders = pickle.load(file)

# Load the scaler from the file
with open('MODELS/mlb_scaler.pkl', 'rb') as file:
    scaler = pickle.load(file)

model = torch.nn.Sequential(   
    torch.nn.Linear(137,256),
    torch.nn.ReLU(),
    torch.nn.Linear(256,128),
    torch.nn.ReLU(),
    torch.nn.Linear(128,64),
    torch.nn.ReLU(),
    torch.nn.Linear(64,64),
    torch.nn.ReLU(),
    torch.nn.Linear(64,16),
    torch.nn.ReLU(),
    torch.nn.Linear(16,1)
)

# Load the saved model state
model.load_state_dict(torch.load('MODELS/mlb_model.pth'))

<All keys matched successfully>

In [104]:
# Read the raw data file and get the first datapoint
df = pd.read_parquet('/Users/stefanfeiler/Desktop/SmartBetter/SmartBetter/data/mlb_for_model.parquet')

In [197]:
first_row = df.iloc[100001]  # Get a sample row as a sample datapoint 
new_df = pd.DataFrame([first_row], columns=df.columns)

In [198]:
# DON'T TOUCH. ORDER IS VERY IMPORTANT
continuous_cols = [col for col in new_df.columns if '_odds' in col]
continuous_cols.append('minutes_since_commence')
continuous_cols.append('this_team_game_of_season')
continuous_cols.append('opponent_game_of_season')
categorical_cols = ['home_away', 'team_1', 'hour_of_start', 'day_of_week', 'number_of_game_today', 'day_night', 'park_id', 'this_team_league', 'opponent_league']
dp_raw = pd.concat([new_df[continuous_cols], new_df[categorical_cols]], axis=1)

In [199]:
dp_raw.head()

Unnamed: 0,barstool_1_odds,betclic_1_odds,betfair_1_odds,betfred_1_odds,betmgm_1_odds,betonlineag_1_odds,betrivers_1_odds,betus_1_odds,betway_1_odds,bovada_1_odds,...,opponent_game_of_season,home_away,team_1,hour_of_start,day_of_week,number_of_game_today,day_night,park_id,this_team_league,opponent_league
100001,2.15,2.15,2.17,2.15,2.16,2.14,2.17,2.2,2.13,2.17,...,111.0,1,Los Angeles Angels,18,Fri,0,N,ANA01,AL,AL


In [200]:
dp_raw_np = np.array(dp_raw)

In [201]:
dp_raw_np[:, 42:]

array([[1, 'Los Angeles Angels', 18, 'Fri', '0', 'N', 'ANA01', 'AL',
        'AL']], dtype=object)

In [202]:
# Define the indices of the columns you want to standardize and those we don't
continuous_vars = dp_raw_np[:, :42]
categorical_vars = dp_raw_np[:, 42:]

In [203]:
categorical_vars[0]

array([1, 'Los Angeles Angels', 18, 'Fri', '0', 'N', 'ANA01', 'AL', 'AL'],
      dtype=object)

In [204]:
encoders

{'home_away': OneHotEncoder(sparse_output=False),
 'team_1': OneHotEncoder(sparse_output=False),
 'hour_of_start': OneHotEncoder(sparse_output=False),
 'day_of_week': OneHotEncoder(sparse_output=False),
 'number_of_game_today': OneHotEncoder(sparse_output=False),
 'day_night': OneHotEncoder(sparse_output=False),
 'park_id': OneHotEncoder(sparse_output=False),
 'this_team_league': OneHotEncoder(sparse_output=False),
 'opponent_league': OneHotEncoder(sparse_output=False)}

In [205]:
# Create an instance of StandardScaler and fit it on the training data
scaled_continuous = scaler.transform(continuous_vars)

In [206]:
one_hot_encoded_row = []
for i, col_name in enumerate(categorical_cols):
    value = categorical_vars[0][i]

    encoder = encoders[col_name]  # Get the corresponding encoder
    encoded_value = encoder.transform([[value]])  # Pass a 2D array-like input
    one_hot_encoded_row.append(encoded_value)

one_hot_encoded_row = np.concatenate(one_hot_encoded_row, axis=1)  # Combine encoded values

one_hot_encoded_row

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0.,
        0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0.]])

In [207]:
final_data_point = np.concatenate((scaled_continuous, one_hot_encoded_row), axis=1)

In [208]:
final_data_point.shape

(1, 137)

In [209]:
input_tensor = torch.tensor(final_data_point, dtype=torch.float32)

In [210]:
output_tensor = model(input_tensor)
output_predictions = output_tensor.detach().numpy()
output_predictions

array([[-6.9872856]], dtype=float32)