In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_absolute_error

from sqlalchemy import create_engine

In [2]:
# loading up the postgres credentials
user = os.environ['DB_USER']
password = os.environ['DB_PASSWORD']
host = os.environ['DB_HOST']
database = os.environ['DB_NAME']
port = os.environ['DB_PORT']
    
URI = f'postgresql://{user}:{password}@{host}:{port}/{database}'

In [3]:
lee_sharpe_query = """
    select *
    from lee_sharpe
    where season >= 2012
"""

ls = pd.read_sql(lee_sharpe_query, URI)

In [4]:
engine = create_engine(URI)

query = """
    select *
    from player_stats
    where season >=2012 and position = 'QB'
    order by season asc, week asc
"""
df = pd.read_sql(query, con=engine)

In [5]:
# creating a dictionary of all QB starters for each season and week.
# this will be used to assign starter status to each player
# in the main dataframe in order to include only games in which
# the quarterback started

ls_dict = (
    ls
    .groupby(['season', 'week'])[['away_qb_id', 'home_qb_id']]
    .agg({'away_qb_id' : 'unique', 'home_qb_id' : 'unique'})
    .reset_index()
    .assign(all_starters=lambda x: x[['away_qb_id', 'home_qb_id']]
                                 .apply(lambda row: np.concatenate(row), axis=1))
    .set_index(['season', 'week'])['all_starters']
    .to_dict()
)

df['starter'] = [1 if p in ls_dict[(s, w)] else 0 for p, s, w in zip(df['player_id'], df['season'], df['week'])]