<a href="https://colab.research.google.com/github/yaobviously/DS-Unit-2-Kaggle-Challenge/blob/main/first_stab_pymc_nfl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install bambi 

In [219]:
!pip install dill



In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import bambi
import scipy.linalg

from sqlalchemy import create_engine

import theano.tensor as tt
import pymc3 as pm
import arviz as az
import xarray

az.style.use('arviz-darkgrid')

  import pandas.util.testing as tm


In [226]:
# import dill
# from dill import dump, load

# with open('/content/drive/MyDrive/nflfastR-data/qb_model_bambi.pkl', 'wb') as model:
#   dump(qb_model, model, protocol=dill.HIGHEST_PROTOCOL)

In [111]:
URI = 'postgresql://yeunwjcsjwwzge:24f76f29b196dbec6342d9cbe0588297b58bdbd2e058ac5da8eae280d7d2370c@ec2-18-215-44-132.compute-1.amazonaws.com:5432/d6mvs6tutt0f4m'

engine = create_engine(URI)

df = pd.read_sql('SELECT * FROM rolling_qb_dk', con=engine)
rb_df = pd.read_sql('SELECT * FROM rolling_rush_dk', con=engine)
wr_df = pd.read_sql('SELECT * FROM rolling_receiver_dk', con=engine)
games_df = pd.read_sql('SELECT * FROM games', con=engine)
roster_df = pd.read_sql('SELECT * FROM depth_charts WHERE season >=2016', con=engine)

In [5]:
def prepare_qb_data(df=df, min_season=2016, min_games=6):   

  df['season'] = [int(x.split('_')[0]) for x in df['game_id']]
  df = df[df['season'] >= min_season].copy()

  df = (
      df
      .groupby(['starting_qb'])
      .filter(lambda x: x['game_id'].count() >= min_games)
      .reset_index(drop=True)
  )

  df = (
      df
      .merge(games_df[['game_id', 'starting_qb', 'spread_line']],
             how='left',
             on=['game_id', 'starting_qb'])
  )

  df = df.fillna(0)

  return df

In [6]:
qb_df = prepare_qb_data()

In [17]:
qb_model = bambi.Model('dk_points ~ total_line + spread_line + rolling_dk_points + rolling_pass_tds + rolling_rush_tds + (1|starting_qb)', qb_df)

group_specific_sd = bambi.Prior("HalfNormal", sigma=10)
group_specific_prior = bambi.Prior("Normal", mu=0, sigma=group_specific_sd)
qb_model.set_priors(group_specific=group_specific_prior)

qb_results = qb_model.fit()

In [None]:
print(az.summary(qb_results).to_string())

In [None]:
az.plot_forest(
    qb_results,
    figsize=(10, 32),
    kind='ridgeplot'
)

In [21]:
qb_posterior_predictive = qb_model.predict(qb_results, kind='pps', draws=5000)


In [23]:
predictions = pd.DataFrame(qb_results.posterior_predictive["dk_points"][0].values)

qb_df['bayes_mean'] = predictions.mean().values
qb_df[qb_df['season'] == 2021][['starting_qb', 'total_line', 'spread_line', 'bayes_mean']].sort_values(by='bayes_mean', ascending=False)

In [None]:
# intercepts = []

# for i in range(50):
#   int_ = qb_results.posterior["1|starting_qb"].stack(draws=("chain", "draw"))[i].values.mean()

#   intercepts.append(int_)

In [60]:
def prepare_wr_data(df=wr_df, min_season=2016, min_games=12):

  df = df[wr_df['season'] >= min_season].copy()
  df = df.groupby('player_id').filter(lambda x: x['game_id'].count() >= min_games)
  df['week'] = [int(x.split('_')[1]) for x in df['game_id']]

  df = (
      df
      .merge(roster_df[['player_id', 'position', 'depth_team', 'season', 'week']], how='left', on=['player_id', 'season', 'week'])
  )

  df = df[df['position'] == 'WR'].copy()

  return df

In [61]:
wr_model_df = prepare_wr_data()

In [65]:
wr_model = bambi.Model('rec_dk_pts ~ total_line + spread_line + rolling_rec_dk_pts + rolling_targets  + rolling_target_share + (1|player_id)', wr_model_df)

wr_results = wr_model.fit()

In [None]:
print(az.summary(wr_results).to_string())

In [None]:
az.plot_forest(
    wr_results,
    figsize=(10, 100),
    kind='ridgeplot'
)

In [157]:
# wr_intercepts = []

# for intercept in range(1599):
#   int_ = wr_results.posterior['1|player_id'].stack(draws=('chain', 'draw'))[intercept].values.mean()
#   wr_intercepts.append(int_)

In [71]:
def prepare_te_data(df=wr_df, min_season=2016, min_games=12):

  df = df[wr_df['season'] >= min_season].copy()
  df = df.groupby('player_id').filter(lambda x: x['game_id'].count() >= min_games)
  df['week'] = [int(x.split('_')[1]) for x in df['game_id']]

  df = (
      df
      .merge(roster_df[['player_id', 'position', 'depth_team', 'season', 'week']], how='left', on=['player_id', 'season', 'week'])
  )

  df = df[df['position'] == 'TE'].copy()

  return df

In [72]:
te_model_df = prepare_te_data()

In [73]:
te_model = bambi.Model('rec_dk_pts ~ total_line + spread_line + rolling_rec_dk_pts + rolling_targets  + rolling_target_share + (1|player_id)', te_model_df)
te_results = te_model.fit()

In [None]:
print(az.summary(te_results).to_string())

In [81]:
def prepare_wr_te_data(df=wr_df, min_season=2016, min_games=12):

  df = df[wr_df['season'] >= min_season].copy()
  df = df.groupby('player_id').filter(lambda x: x['game_id'].count() >= min_games)
  df['week'] = [int(x.split('_')[1]) for x in df['game_id']]

  df = (
      df
      .merge(roster_df[['player_id', 'position', 'depth_team', 'season', 'week']], how='left', on=['player_id', 'season', 'week'])
  )

  df = df[df['position'].isin(['WR','TE'])].copy()

  return df

In [82]:
combined_model_df = prepare_wr_te_data()

In [87]:
combined_model = bambi.Model('rec_dk_pts ~ 0 + total_line + spread_line + rolling_rec_dk_pts + rolling_targets  + rolling_target_share + position + (1|player_id)', combined_model_df)
combined_results = combined_model.fit()

In [None]:
print(az.summary(combined_results).to_string())

In [None]:
az.plot_posterior(combined_results)

In [142]:
player_rb_dict = {}

for a, b in zip(roster_df['player_id'], roster_df['position']):

  if b == 'RB':
    player_rb_dict[a] = b

rb_model_df = rb_df[rb_df['player_id'].isin(player_rb_dict.keys())].copy()
rb_model_df = (
    rb_model_df
    .merge(roster_df[['season', 'week', 'player_id', 'depth_team']], how='left', on=['season', 'week', 'player_id'])
    .drop_duplicates(subset=['game_id', 'player_id'], keep='first')
)
rb_model_df['depth_team'] = rb_model_df['depth_team'].astype('category')
rb_model_df.dropna(inplace=True)

In [181]:
rb_model = bambi.Model('total_dk ~ 0 + total_line + spread_line + rolling_target_share + rolling_targets + rolling_td + depth_team + rolling_rush_att + rolling_rush_td + (1|player_id)', rb_model_df)
rb_results = rb_model.fit()

In [None]:
print(az.summary(rb_results).to_string())