### Imports and Styling

In [1]:
#Pandas for df, matplotlib for graphs, numpy for regression line, sklearn.metrics for r2 score
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import r2_score

#Some styling got the graphs
%matplotlib inline
import matplotlib.style as style
style.use('seaborn')

### Import and merge data

In [2]:
shots_df = pd.read_csv('data/19_20_shot_quality.csv')
season_df = pd.read_excel('data/19_20_SEASON_STATS.xlsx')
merged = pd.merge(shots_df,season_df, on="Team")

### Function to create graphs

In [3]:
def make_graph(X,Y,SAVE_AS="do_not"):
    #set up the variables
    x=merged[X]
    y=merged[Y]
    m, b = np.polyfit(x,y,1)
    y_pred = m*x+b
    r2=round(r2_score(y,y_pred),2)
    #Make the graph
    fig = plt.figure(figsize=(32,16))
    fig, ax = plt.subplots()
    ax.scatter(x,y,c="#ff3300",alpha=0.7)
    ax.plot(x,y_pred)
    ax.set_xlabel(X)
    ax.set_ylabel(Y)
    ax.set_title(f"R2 score = {r2}")
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    #To save the image if desired
    if SAVE_AS != "do_not":
        fig.savefig(f"data/{SAVE_AS}.png",dpi=500)
    

### Create the graphs

In [None]:
#I call the function for several comparative parameters and then save the graphs
#Above Percent was the best predictor, but still only had an r2 score of .42
#There was virtually no difference between 
make_graph('PTS','abovePercent',"PTSvABP")
make_graph('PTS%','abovePercent',"PTS%vABP")
make_graph('PTS','belowPercent',"PTSvBEP")
make_graph('PTS','forPercent',"PTSvFOP")
make_graph('PTS','againstPercent',"PTSvAGP")