In [58]:
import pandas as pd
import numpy as np

In [59]:
def kickerScore(player) -> float:
    score = 0

    score += player["FgMade_50"] * 5
    score += player["FgMade_40-49"] * 4
    score += player["FgMade_30-39"] * 3
    score += player["FgMade_20-29"] * 3
    score += player["FgMade_0-19"] * 3
    score += player["PatMade"] * 1
    score += player["FgMiss_0-19"] * -1
    score += player["FgMiss_20-29"] * -1
    score += player["FgMiss_30-39"] * -1

    return round(score, 2)

def skillScore(player) -> float:
    score = 0
    score += player["PassingYDS"] * 0.04
    score += player["PassingTD"] * 4
    score += player["PassingInt"] * -2 
    score += player["RushingYDS"] * 0.1
    score += player["RushingTD"] * 6
    score += player["ReceivingRec"] * 1
    score += player["ReceivingYDS"] * 0.1
    score += player["ReceivingTD"] * 6
    score += player["RetTD"] * 6
    score += player["FumTD"] * 6
    score += player["2PT"] * 2
    score += player["Fum"] * -2

    return round(score, 2)

def cleanSkill(df) -> pd.DataFrame:
    df = df.fillna(0)
    return df

# QBs

In [60]:
skill_columns = ['PlayerId', 'PassingYDS', 'PassingTD',
       'PassingInt', 'RushingYDS', 'RushingTD', 'ReceivingRec', 'ReceivingYDS',
       'ReceivingTD', 'RetTD', 'FumTD', '2PT', 'Fum', 'TouchCarries', 'TouchReceptions', 'Touches', 'TargetsReceptions',
       'Targets', 'ReceptionPercentage', 'RzTarget', 'RzTouch', 'RzG2G']
name_and_skill__columns = ['PlayerName', 'PlayerId', 'PassingYDS', 'PassingTD',
       'PassingInt', 'RushingYDS', 'RushingTD', 'ReceivingRec', 'ReceivingYDS',
       'ReceivingTD', 'RetTD', 'FumTD', '2PT', 'Fum', 'TouchCarries', 'TouchReceptions', 'Touches', 'TargetsReceptions',
       'Targets', 'ReceptionPercentage', 'RzTarget', 'RzTouch', 'RzG2G']

In [61]:
qb_2015 = pd.read_csv('https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/2015/QB_season.csv').loc[:, skill_columns]
cleanSkill(qb_2015)

# map year - 1 -> year
# x[year - 1] and y[year] are partners

large_df = pd.DataFrame(columns=skill_columns)

for i in range(2015, 2023):
    main_df = pd.read_csv(f'https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/{i}/QB_season.csv').loc[:, skill_columns]
    main_df = cleanSkill(main_df)

    score_df = pd.read_csv(f'https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/{i + 1}/QB_season.csv').loc[:, skill_columns]
    score_df = cleanSkill(score_df)

    score_pairs = []
    for _, row in score_df.iterrows():
        score = skillScore(row)

        score_pairs.append([row["PlayerId"], score])
    score_df = pd.DataFrame(score_pairs, columns=["PlayerId", "Score"])
    main_df = pd.merge(main_df, score_df[['PlayerId', 'Score']], on='PlayerId', how='inner')
    large_df = pd.concat([large_df, main_df], axis=0, ignore_index=True)

large_df = large_df[
    (large_df['Score'] > 50) &
    (large_df['PassingYDS'] > 0.0)
]
X_df = large_df.drop(columns=["PlayerId", "Score"])
y_df = large_df["Score"]
X = X_df.to_numpy()
y = y_df.to_numpy()

np.save('../../assets/qb/X.npy', X)
np.save('../../assets/qb/y.npy', y)

  large_df = pd.concat([large_df, main_df], axis=0, ignore_index=True)


In [62]:
# get future (testX)
main_df = pd.read_csv('https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/2023/QB_season.csv').loc[:, name_and_skill__columns]
main_df = cleanSkill(main_df)
main_df = main_df[
    (main_df['PassingYDS'] > 0.0)
]
X_df = main_df.reset_index().drop(columns=["index"])
map_df = X_df[['PlayerName', 'PlayerId']]
X_df = X_df.drop(columns=["PlayerName", "PlayerId"])
X_df

np.save('../../assets/qb/finalX.npy', X_df.to_numpy())
map_df.to_csv('../../assets/qb/map.csv')

# RB

In [63]:
# map year - 1 -> year
# x[year - 1] and y[year] are partners

large_df = pd.DataFrame(columns=skill_columns)

for i in range(2015, 2023):
    main_df = pd.read_csv(f'https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/{i}/RB_season.csv').loc[:, skill_columns]
    main_df = cleanSkill(main_df)

    score_df = pd.read_csv(f'https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/{i + 1}/RB_season.csv').loc[:, skill_columns]
    score_df = cleanSkill(score_df)

    score_pairs = []
    for _, row in score_df.iterrows():
        score = skillScore(row)

        score_pairs.append([row["PlayerId"], score])
    score_df = pd.DataFrame(score_pairs, columns=["PlayerId", "Score"])
    main_df = pd.merge(main_df, score_df[['PlayerId', 'Score']], on='PlayerId', how='inner')
    large_df = pd.concat([large_df, main_df], axis=0, ignore_index=True)

large_df = large_df[
    (large_df['Score'] > 25) &
    (large_df['TouchCarries'] > 0) &
    (large_df['Touches'] > 0) &
    (large_df['RushingYDS'] > 0.0)
]
X_df = large_df.drop(columns=["PlayerId", "Score"])
y_df = large_df["Score"]
X = X_df.to_numpy()
y = y_df.to_numpy()

np.save('../../assets/rb/X.npy', X)
np.save('../../assets/rb/y.npy', y)

  large_df = pd.concat([large_df, main_df], axis=0, ignore_index=True)


In [64]:
# get future (testX)
main_df = pd.read_csv('https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/2023/RB_season.csv').loc[:, name_and_skill__columns]
large_df = cleanSkill(main_df)
main_df = large_df[
    (large_df['TouchCarries'] > 0) &
    (large_df['Touches'] > 0) &
    (large_df['RushingYDS'] > 0.0)
]
X_df = main_df.reset_index().drop(columns=["index"])
map_df = X_df[['PlayerName', 'PlayerId']]
X_df = X_df.drop(columns=["PlayerName", "PlayerId"])
X_df

np.save('../../assets/rb/finalX.npy', X_df.to_numpy())
map_df.to_csv('../../assets/rb/map.csv')

# WR

In [65]:
# map year - 1 -> year
# x[year - 1] and y[year] are partners

large_df = pd.DataFrame(columns=skill_columns)

for i in range(2015, 2023):
    main_df = pd.read_csv(f'https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/{i}/WR_season.csv').loc[:, skill_columns]
    main_df = cleanSkill(main_df)

    score_df = pd.read_csv(f'https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/{i + 1}/WR_season.csv').loc[:, skill_columns]
    score_df = cleanSkill(score_df)

    score_pairs = []
    for _, row in score_df.iterrows():
        score = skillScore(row)

        score_pairs.append([row["PlayerId"], score])
    score_df = pd.DataFrame(score_pairs, columns=["PlayerId", "Score"])
    main_df = pd.merge(main_df, score_df[['PlayerId', 'Score']], on='PlayerId', how='inner')
    large_df = pd.concat([large_df, main_df], axis=0, ignore_index=True)

large_df = large_df[
    (large_df['Score'] > 25) &
    (large_df['Touches'] > 0)
]
X_df = large_df.drop(columns=["PlayerId", "Score"])
y_df = large_df["Score"]
X = X_df.to_numpy()
y = y_df.to_numpy()

np.save('../../assets/wr/X.npy', X)
np.save('../../assets/wr/y.npy', y)

  large_df = pd.concat([large_df, main_df], axis=0, ignore_index=True)


In [66]:
# get future (testX)
main_df = pd.read_csv('https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/2023/WR_season.csv').loc[:, name_and_skill__columns]
large_df = cleanSkill(main_df)
main_df = large_df[
    (large_df['Touches'] > 0)
]
X_df = main_df.reset_index().drop(columns=["index"])
map_df = X_df[['PlayerName', 'PlayerId']]
X_df = X_df.drop(columns=["PlayerName", "PlayerId"])
X_df

np.save('../../assets/wr/finalX.npy', X_df.to_numpy())
map_df.to_csv('../../assets/wr/map.csv')

# TE

In [67]:
# map year - 1 -> year
# x[year - 1] and y[year] are partners

large_df = pd.DataFrame(columns=skill_columns)

for i in range(2015, 2023):
    main_df = pd.read_csv(f'https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/{i}/TE_season.csv').loc[:, skill_columns]
    main_df = cleanSkill(main_df)

    score_df = pd.read_csv(f'https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/{i + 1}/TE_season.csv').loc[:, skill_columns]
    score_df = cleanSkill(score_df)

    score_pairs = []
    for _, row in score_df.iterrows():
        score = skillScore(row)

        score_pairs.append([row["PlayerId"], score])
    score_df = pd.DataFrame(score_pairs, columns=["PlayerId", "Score"])
    main_df = pd.merge(main_df, score_df[['PlayerId', 'Score']], on='PlayerId', how='inner')
    large_df = pd.concat([large_df, main_df], axis=0, ignore_index=True)

large_df = large_df[
    (large_df['Score'] > 25) &
    (large_df['ReceivingYDS'] > 0)
]
X_df = large_df.drop(columns=["PlayerId", "Score"])
y_df = large_df["Score"]
X = X_df.to_numpy()
y = y_df.to_numpy()

np.save('../../assets/te/X.npy', X)
np.save('../../assets/te/y.npy', y)

  large_df = pd.concat([large_df, main_df], axis=0, ignore_index=True)


In [68]:
# get future (testX)
main_df = pd.read_csv('https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/2023/TE_season.csv').loc[:, name_and_skill__columns]
large_df = cleanSkill(main_df)
main_df = large_df[
    (large_df['ReceivingYDS'] > 0)
]
X_df = main_df.reset_index().drop(columns=["index"])
map_df = X_df[['PlayerName', 'PlayerId']]
X_df = X_df.drop(columns=["PlayerName", "PlayerId"])
X_df

np.save('../../assets/te/finalX.npy', X_df.to_numpy())
map_df.to_csv('../../assets/te/map.csv')

# K

In [69]:
k_columns = ['PlayerId', 'PatMade', 'PatMissed',
       'FgMade_0-19', 'FgMade_20-29', 'FgMade_30-39', 'FgMade_40-49',
       'FgMade_50', 'FgMiss_0-19', 'FgMiss_20-29', 'FgMiss_30-39']
name_and_k_columns = ['PlayerName', 'PlayerId', 'PatMade', 'PatMissed',
       'FgMade_0-19', 'FgMade_20-29', 'FgMade_30-39', 'FgMade_40-49',
       'FgMade_50', 'FgMiss_0-19', 'FgMiss_20-29', 'FgMiss_30-39']

In [70]:
# map year - 1 -> year
# x[year - 1] and y[year] are partners

large_df = pd.DataFrame(columns=k_columns)

for i in range(2015, 2023):
    main_df = pd.read_csv(f'https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/{i}/K_season.csv').loc[:, k_columns]
    main_df = cleanSkill(main_df)

    score_df = pd.read_csv(f'https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/{i + 1}/K_season.csv').loc[:, k_columns]
    score_df = cleanSkill(score_df)

    score_pairs = []
    for _, row in score_df.iterrows():
        score = kickerScore(row)

        score_pairs.append([row["PlayerId"], score])
    score_df = pd.DataFrame(score_pairs, columns=["PlayerId", "Score"])
    main_df = pd.merge(main_df, score_df[['PlayerId', 'Score']], on='PlayerId', how='inner')
    large_df = pd.concat([large_df, main_df], axis=0, ignore_index=True)

large_df = large_df[
    (large_df['Score'] > 10) &
    (large_df['PatMade'] > 1)
]
X_df = large_df.drop(columns=["PlayerId", "Score"])
y_df = large_df["Score"]
X = X_df.to_numpy()
y = y_df.to_numpy()

np.save('../../assets/k/X.npy', X)
np.save('../../assets/k/y.npy', y)

  large_df = pd.concat([large_df, main_df], axis=0, ignore_index=True)


In [71]:
# get future (testX)
main_df = pd.read_csv('https://raw.githubusercontent.com/hvpkod/NFL-Data/main/NFL-data-Players/2023/K_season.csv').loc[:, name_and_k_columns]
large_df = cleanSkill(main_df)
main_df = large_df[
    (large_df['PatMade'] > 0)
]
X_df = main_df.reset_index().drop(columns=["index"])
map_df = X_df[['PlayerName', 'PlayerId']]
X_df = X_df.drop(columns=["PlayerName", "PlayerId"])
X_df

np.save('../../assets/k/finalX.npy', X_df.to_numpy())
map_df.to_csv('../../assets/k/map.csv')