In [158]:
## These are useful features I extracted
features = \
{'K': ['PAT Attempts_prev',
  'PAT Made_prev',
  'Fantasy Points_prev_1yr',
  'PAT Made_prev_1yr',
  'FG Attempts_prev_1yr',
  'FG Made_prev_1yr',
  'Points Against_momentum_team',
  'Rushing Yards Allowed_momentum_team',
  'Fantasy Points_prev_1yr_team',
  'Sacks_prev_1yr_team',
  'Safeties_prev_1yr_team',
  'Points Against_prev_1yr_team',
  'Total Yards Allowed_prev_1yr_team'],
 'QB': ['Fantasy Points_prev',
  'Pass Attempts_prev',
  'Pass Completions_prev',
  'Passing Yards_prev',
  'Touchdown Passes_prev',
  'Passes Intercepted_prev',
  'Rushing Attempts_prev',
  'Fantasy Points_momentum',
  'Pass Attempts_momentum',
  'Pass Completions_momentum',
  'Passing Yards_momentum',
  'Touchdown Passes_momentum',
  'Passes Intercepted_momentum',
  'Rushing Attempts_momentum',
  'Fantasy Points_prev_1yr',
  'Pass Attempts_prev_1yr',
  'Pass Completions_prev_1yr',
  'Passing Yards_prev_1yr',
  'Touchdown Passes_prev_1yr',
  'Passes Intercepted_prev_1yr',
  'Passing Conversions_prev_1yr',
  'Rushing Attempts_prev_1yr',
  'Rushing Yards_prev_1yr',
  'Rushing Touchdowns_prev_1yr',
  'Receptions_prev_1yr',
  'Receiving Yards_prev_1yr',
  'Points Against_prev_team',
  'Passing Yards Allowed_prev_team',
  'Rushing Yards Allowed_prev_team',
  'Passing Yards Allowed_momentum_team',
  'Total Yards Allowed_momentum_team',
  'Sacks_prev_1yr_team'],
 'RB': ['Fantasy Points_prev',
  'Pass Attempts_prev',
  'Rushing Attempts_prev',
  'Rushing Yards_prev',
  'Rushing Touchdowns_prev',
  'Receptions_prev',
  'Receiving Yards_prev',
  'Touchdown Receptions_prev',
  'Fumbles Lost_prev',
  'Fantasy Points_momentum',
  'Rushing Attempts_momentum',
  'Rushing Yards_momentum',
  'Rushing Touchdowns_momentum',
  'Receptions_momentum',
  'Receiving Yards_momentum',
  'Touchdown Receptions_momentum',
  'Fumbles Lost_momentum',
  'Fantasy Points_prev_1yr',
  'Pass Attempts_prev_1yr',
  'Passes Intercepted_prev_1yr',
  'Rushing Attempts_prev_1yr',
  'Rushing Yards_prev_1yr',
  'Rushing Touchdowns_prev_1yr',
  'Rushing Conversions_prev_1yr',
  'Receptions_prev_1yr',
  'Receiving Yards_prev_1yr',
  'Touchdown Receptions_prev_1yr',
  'Receiving Conversions_prev_1yr',
  'Fumbles Lost_prev_1yr',
  'Safeties_prev_team',
  'Safeties_prev_1yr_team'],
 'TE': ['Fantasy Points_prev',
  'Receptions_prev',
  'Receiving Yards_prev',
  'Touchdown Receptions_prev',
  'Fantasy Points_momentum',
  'Receptions_momentum',
  'Receiving Yards_momentum',
  'Touchdown Receptions_momentum',
  'Fantasy Points_prev_1yr',
  'Receptions_prev_1yr',
  'Receiving Yards_prev_1yr',
  'Touchdown Receptions_prev_1yr',
  'Sacks_prev_team',
  'Points Against_momentum_team',
  'Points Against_prev_1yr_team'],
 'WR': ['Fantasy Points_prev',
  'Receptions_prev',
  'Receiving Yards_prev',
  'Touchdown Receptions_prev',
  'Fantasy Points_momentum',
  'Receptions_momentum',
  'Receiving Yards_momentum',
  'Touchdown Receptions_momentum',
  'Fantasy Points_prev_1yr',
  'Rushing Attempts_prev_1yr',
  'Rushing Yards_prev_1yr',
  'Rushing Touchdowns_prev_1yr',
  'Rushing Conversions_prev_1yr',
  'Receptions_prev_1yr',
  'Receiving Yards_prev_1yr',
  'Touchdown Receptions_prev_1yr',
  'Fumbles Lost_prev_1yr',
  'Interceptions_momentum_team',
  'Passing Yards Allowed_momentum_team',
  'Fantasy Points_prev_1yr_team',
  'Interceptions_prev_1yr_team',
  'Blocked Kicks_prev_1yr_team',
  'Touchdowns_prev_1yr_team']}

features_DST =\
{'DST': ['Fantasy Points_prev',
  'Sacks_prev',
  'Fantasy Points_momentum',
  'Sacks_momentum',
  'Fumble Recoveries_momentum',
  'Points Against_momentum',
  'Total Yards Allowed_momentum',
  'Fantasy Points_prev_1yr',
  'Touchdowns_prev_1yr',
  'Points Against_prev_1yr',
  'Rushing Yards Allowed_prev_1yr',
  'Total Yards Allowed_prev_1yr']}

In [159]:
features['DST'] = features_DST['DST']

In [37]:
#Regularization that I will apply for each position 
Reg_type = {'WR': 'r', 'QB': 'l', 'RB': 'l', 'TE': 'r',
           'K': 'r', 'DST': 'r'}

In [38]:
# polynomial degree
deg_poly = {'WR': 3, 'QB': 5, 'RB': 4, 'TE': 3,
           'K': 3, 'DST': 4}

# alpha
alphas = {'WR': 1e-6, 'QB': 1e-6, 'RB': 1e-6, 'TE': 1e-6,
           'K': 1e-3, 'DST': 1e-6}


In [39]:
# all of the imports
import pandas as pd
import numpy as np
import pickle 
import patsy
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import KFold
% matplotlib inline

import warnings
warnings.filterwarnings('ignore')

from sklearn.cross_validation import cross_val_score
from sklearn import feature_selection as f_select
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [168]:
## find mse and rsq for each position
from scipy import stats

mse = dict()
rsq = dict()

for pos in ['QB', 'WR', 'RB', 'TE', 'K', 'DST']:

    df = pd.read_csv(pos+'_adj2.csv')
    df.dropna(inplace=True)
    df.reset_index(drop=True, inplace=True)

    df = df.sort_values(['year', 'week'])
    ## divide train and test set
    df_train = df[(df['year'] <= 2015)].reset_index(drop=True)
    df_test = df[(df['year'] > 2015)].reset_index(drop=True)

    y_train = df_train[['Fantasy Points']].reset_index(drop=True)
    X_train = df_train[features[pos]].reset_index(drop=True)

    y_test = df_test[['Fantasy Points']].reset_index(drop=True)
    X_test = df_test[features[pos]].reset_index(drop=True)
    
    # set optimal degree and alpha
    degree = deg_poly[pos]
    alpha_ = alphas[pos]
    
    # apply ridge or lasso
    if Reg_type[pos] == 'r':
        est = make_pipeline(PolynomialFeatures(degree), Ridge(alpha=alpha_));
    elif Reg_type[pos] == 'l':
        est = make_pipeline(PolynomialFeatures(degree), Lasso(alpha=alpha_));

    est.fit(X_train, y_train)
    mse[pos] = mean_squared_error(y_train, est.predict(X_train))
    rsq[pos] = r2_score(y_test, est.predict(X_test))