# Calculate Z-scores for players

This takes the list of players that we're trying to calculate scores for, and creates z-scores for players with an arbitrarily chosen minimum innings (for pitchers) or plate appearances (for hitters).

Notes for this year's sheet:

- Z-score avg/stdev calculated using minimum IP (70) or PA (35) stats for batting and pitching.
- Reliever value was artificially decremented as usual since they're normally over-inflated value-wise. Multiplied by (project innings) / (90th pctile of projected innings), capped at 1.0.

In [1]:
import pandas as pd
import numpy as np
import os
import sqlalchemy
import psycopg2

# arbitrarily high max view columns
pd.options.display.max_columns = 150

  """)


## SqlAlchemy Connection Information

These are used to get and return a connection to the postgres DB so that we can query for player stats and write them back to the table.

In [2]:
# connection information for the database
POSTGRES_USER = os.environ.get("POSTGRES_USER")
POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD")
POSTGRES_IP = "192.168.0.118"
POSTGRES_PORT = 5432
POSTGRES_DB = "postgres"

def get_sqlalchemy_engine():
    """
    Create and return a SQLAlchemy engine for inserting into postgres.
    """
    # ## Write Information Back to Database
    #
    return sqlalchemy.create_engine(
        "postgres://{user}:{password}@{host}:{port}/{db}".format(
            user=POSTGRES_USER,
            password=POSTGRES_PASSWORD,
            host=POSTGRES_IP,
            port=POSTGRES_PORT,
            db=POSTGRES_DB,
        )
    )

In [3]:
# create a connection and read in pitchers data
engine = get_sqlalchemy_engine()
conn = engine.connect()

## Calculate Z-Scores

For each of the players, we calculate a z-score based on how they compare to the rest of the league. There are several columns that we want to do this for.

- For each z-score, there is an arbitrary minimum set to inflate the league average away from non-full-time players.
- After each is totaled, the z-scores are totaled and then z-scored again to make a single number. This is done so that we can compare pitchers and batters into the same chart.

In [4]:
# read in information from postgres
dfp = pd.read_sql("select * from fantasy.pitchers_projections_depth_charts", conn)
dfb = pd.read_sql("select * from fantasy.batters_projections_depth_charts", conn)

In [6]:
# create any calculated columns necessary
dfb['k_pct'] = dfb['so'] / dfb['ab']
dfb['rc'] = dfb['ab'] * dfb['obp'] * dfb['slg']

In [78]:
# thresholds for batters and pitchers

MINIMUM_INNINGS_PITCHED = 70
MINIMUM_PLATE_APPEARANCES = 350
min_pa = dfb['pa'] > MINIMUM_PLATE_APPEARANCES
min_ip = dfp['ip'] > MINIMUM_INNINGS_PITCHED

# dict of columns that we want to calculate z-scores for
# +1 means more is better, -1 means negative is better
dfb_score_cols = {
    "pa": {"dir": 1, "weight": 1.3}, 
    "k_pct": {"dir": -1, "weight": 1.0},
    "hr": {"dir": 1, "weight": 0.9},
    "rc": {"dir": 1, "weight": 1.4},
    "woba": {"dir": 1, "weight": 1.3},
    "slg": {"dir": 1, "weight": 1.0}
}
dfp_score_cols = {
    "ip": {"dir": 1, "weight": 1.3},
    "era": {"dir": -1, "weight": 1.0},
    "hr": {"dir": -1, "weight": 0.9},
    "so": {"dir": 1, "weight": 1.0},
    "whip": {"dir": -1, "weight": 1.5},
    "k-9": {"dir": 1, "weight": 1.3}
}
dfb_weight = 1.0
dfp_weight = 0.78

# get all of the score columns for maximum z-scoring
dfb_score_col_names = [x + "_score" for x in dfb_score_cols.keys()]
dfp_score_col_names = [x + "_score" for x in dfp_score_cols.keys()]

In [65]:
# only mean and stdev are filtered by minimums, so that way the player's actual
# projections are still counted normally.

for col in dfb_score_cols.keys():
    col_score = col + "_score"
    dfb[col_score] = (
        (dfb[col] - dfb[col][min_pa].mean()) / dfb[col][min_pa].std(ddof=0)
        * dfb_score_cols[col]["dir"]
        * dfb_score_cols[col]["weight"]
    )

for col in dfp_score_cols.keys():
    col_score = col + "_score"
    dfp[col_score] = (
        (dfp[col] - dfp[col][min_ip].mean()) / dfp[col][min_ip].std(ddof=0)
        * dfp_score_cols[col]["dir"]
        * dfp_score_cols[col]["weight"]
    )

In [80]:
# sum all of the values into 'total_score'
dfb['total_score'] = dfb[dfb_score_col_names].sum(axis=1)
dfb['total_z_score'] = (
    dfb['total_score'] - dfb['total_score'][min_pa].mean()) / dfb['total_score'][min_pa].std(ddof=0)
dfb['total_z_score'] *= dfb_weight
dfb['total_z_score_rank'] = dfb['total_z_score'].rank(ascending=False)

# do the same for pitchers, but scale back reliever value according to innings
dfp['total_score'] = dfp[dfp_score_col_names].sum(axis=1)
dfp['reliever_decrement'] = (dfp['ip'] / dfp['ip'].max())
dfp['total_score'] = dfp['total_score'] * dfp['reliever_decrement']
dfp['total_z_score'] = (
    dfp['total_score'] - dfp['total_score'][min_ip].mean()) / dfp['total_score'][min_ip].std(ddof=0)
dfp['total_z_score'] *= dfp_weight
dfp['total_z_score_rank'] = dfp['total_z_score'].rank(ascending=False)

# sort by score descending
dfb.sort_values(by='total_z_score_rank', inplace=True)
dfp.sort_values(by='total_z_score_rank', inplace=True)


## Top Batter Results

In [67]:
dfb.head(50)

Unnamed: 0,index,name,g,pa,ab,h,2b,3b,hr,r,rbi,bb,so,hbp,sb,cs,avg,obp,slg,ops,woba,fld,bsr,war,adp,k_pct,rc,pa_score,k_pct_score,hr_score,rc_score,woba_score,slg_score,total_score,total_z_score,total_z_score_rank
0,0,Mike Trout,157,658,517,153,28,4,40,114,101,124,135,10,23,6,0.295,0.437,0.595,1.032,0.427,0.4,3.6,9.2,1.1,0.261122,134.427755,1.502723,-0.260102,2.360006,4.546168,4.937847,3.314984,16.401628,3.028083,1.0
4,4,Mookie Betts,162,679,591,178,43,4,29,115,95,76,92,5,27,7,0.301,0.382,0.532,0.913,0.386,16.9,5.3,7.4,1.9,0.155668,120.105384,1.809519,1.383807,1.14756,3.522164,2.921623,2.048771,12.833444,2.369322,2.0
3,3,J.D. Martinez,155,651,576,169,35,2,39,98,117,66,163,4,4,2,0.293,0.366,0.565,0.931,0.389,-0.1,-1.8,4.0,6.1,0.282986,119.11104,1.400458,-0.600942,2.249784,3.451071,3.069152,2.712025,12.281548,2.267431,3.0
10,10,Nolan Arenado,155,651,578,166,38,4,36,97,112,62,114,3,3,2,0.287,0.357,0.55,0.907,0.379,7.0,-0.5,4.9,7.1,0.197232,113.4903,1.400458,0.735876,1.919117,3.049206,2.57739,2.410546,12.092592,2.232545,4.0
1,1,Juan Soto,152,637,539,158,30,3,32,98,101,92,122,2,7,3,0.294,0.397,0.538,0.935,0.395,-4.7,0.4,4.7,35.3,0.226345,115.122854,1.195927,0.282031,1.478227,3.165928,3.364209,2.169362,11.655685,2.151883,5.0
8,8,Jose Ramirez,155,651,562,162,41,4,28,100,94,77,74,5,25,8,0.289,0.377,0.522,0.899,0.38,3.8,3.7,6.3,4.2,0.131673,110.598228,1.400458,1.757875,1.037338,2.842432,2.626566,1.847784,11.512453,2.12544,6.0
12,12,Anthony Rizzo,155,651,548,153,33,2,28,88,98,78,89,19,8,4,0.279,0.384,0.501,0.885,0.375,4.9,-1.4,4.3,37.7,0.162409,105.426432,1.400458,1.278731,1.037338,2.472665,2.380685,1.425713,9.995589,1.845395,7.0
5,5,Christian Yelich,155,651,567,169,35,4,27,98,94,72,136,6,17,5,0.297,0.38,0.519,0.898,0.383,2.3,2.7,5.2,6.8,0.239859,111.82374,1.400458,0.071365,0.927116,2.930052,2.774095,1.787489,9.890574,1.826007,8.0
2,2,Bryce Harper,148,623,500,133,27,1,35,97,100,112,147,4,12,5,0.266,0.401,0.533,0.934,0.392,-3.5,0.8,4.8,16.9,0.294,106.8665,0.991396,-0.772637,1.808895,2.575625,3.21668,2.068869,9.888828,1.825684,9.0
6,6,Giancarlo Stanton,145,609,530,138,25,1,44,94,113,67,179,7,3,1,0.261,0.349,0.563,0.912,0.381,1.6,-0.2,4.2,21.5,0.337736,104.13811,0.786865,-1.454433,2.800896,2.380554,2.675742,2.671828,9.861452,1.82063,10.0


## Top Pitcher Results

In [81]:
## Top Pitchers
dfp.head(50)

Unnamed: 0,index,name,w,l,sv,hld,era,gs,g,ip,h,er,hr,so,bb,whip,k-9,bb-9,fip,war,ip_score,era_score,hr_score,so_score,whip_score,k-9_score,total_score,reliever_decrement,total_z_score,total_z_score_rank
14,14,Max Scherzer,16,8,0,0,3.06,32,32,210.0,162,71,25,271,54,1.03,11.61,2.32,3.04,5.7,2.468381,2.149044,-1.041024,3.237561,4.143111,3.181959,14.072023,0.995261,3.011031,1.0
5,5,Jacob deGrom,14,9,0,0,2.84,32,32,211.0,174,67,20,249,50,1.06,10.59,2.14,2.82,6.0,2.503261,2.544001,-0.207681,2.743113,3.727421,2.247249,13.557363,1.0,2.897049,2.0
1,1,Chris Sale,15,6,0,0,2.7,29,29,183.0,141,55,18,238,38,0.97,11.67,1.85,2.63,6.0,1.526646,2.795336,0.125656,2.495889,4.974489,3.236942,13.143875,0.867299,2.805473,3.0
27,27,Justin Verlander,16,8,0,0,3.2,32,32,204.0,164,72,26,243,49,1.04,10.73,2.18,3.27,5.2,2.259107,1.897708,-1.207692,2.608264,4.004547,2.375543,11.541446,0.966825,2.450581,4.0
22,22,Corey Kluber,16,9,0,0,3.27,32,32,211.0,187,77,23,220,43,1.09,9.37,1.82,3.22,5.3,2.503261,1.77204,-0.707687,2.091341,3.311732,1.129262,10.099949,1.0,2.13133,5.0
25,25,Gerrit Cole,14,8,0,0,3.35,32,32,198.0,167,73,22,231,57,1.14,10.51,2.61,3.25,4.8,2.049832,1.62842,-0.541018,2.338565,2.618916,2.173938,9.635987,0.938389,2.028576,6.0
34,34,Blake Snell,15,10,0,0,3.15,32,32,190.0,153,66,18,224,75,1.2,10.63,3.55,3.3,4.0,1.770799,1.987471,0.125656,2.181241,1.787537,2.283904,9.127752,0.900474,1.916016,7.0
40,40,Carlos Carrasco,15,9,0,0,3.41,32,32,194.0,175,73,23,212,45,1.13,9.83,2.08,3.33,4.5,1.910316,1.520704,-0.707687,1.911542,2.757479,1.550798,8.222614,0.919431,1.715554,8.0
55,55,Trevor Bauer,14,9,0,0,3.47,32,32,191.0,166,74,20,215,67,1.22,10.09,3.15,3.42,4.3,1.805678,1.412989,-0.207681,1.978967,1.510411,1.789058,7.503694,0.905213,1.556334,9.0
46,46,Aaron Nola,13,9,0,0,3.39,31,31,189.0,170,71,20,200,51,1.17,9.49,2.44,3.36,4.2,1.73592,1.556609,-0.207681,1.641844,2.203227,1.239228,7.317387,0.895735,1.515072,10.0


## Write to the Database

Prior to writing the excel files, write back to the database for safekeeping.

In [83]:
dfb.to_sql("batters_scores", conn, schema="fantasy", if_exists="replace")
result = conn.execute("grant select on fantasy.batters_scores to public")
dfp.to_sql("pitchers_scores", conn, schema="fantasy", if_exists="replace")
result = conn.execute("grant select on fantasy.pitchers_scores to public")

## Create Draft Sheet

This sheet includes a number of important pieces of information for drafting specifically, so that players can look up by eligibility as well as important stats.

- Name
- Eligibility
- Positions
- Combined scores

In [84]:
draft_sheet_query = """
select
	p.fullname,
	p.averagedraftposition as espn_avg_draft_pos,
	p.eligibility,
	p.position,
	p.injurystatus,
	sc.catg,
	sc.score
from fantasy.players p
	left join (
		select name, 'b' as catg, total_z_score as score
		from fantasy.batters_scores
		union all
		select name, 'p' as catg, total_z_score as score
		from fantasy.pitchers_scores
	) sc
		on p.fullname = sc.name
		and case when position like '%P%' then 'p' else 'b' end = sc.catg
"""
dfd = pd.read_sql(sqlalchemy.text(draft_sheet_query), conn)

In [85]:
dfd.sort_values(by='score', inplace=True, ascending=False)
dfd.head(100)
dfd['rank'] = dfd['score'].rank(ascending=False, method='first', na_option='bottom')

## Draft Sheet Preview

This is a quick look at how the players are going to appear in the final draft sheet. Useful for comparing overall pitcher v batter weights (i.e. verify that nobody should be ranked higher than trout).

In [86]:
dfd.head(100)

Unnamed: 0,fullname,espn_avg_draft_pos,eligibility,position,injurystatus,catg,score,rank
0,Mike Trout,1.50,OF|UTIL,OF,ACTIVE,b,3.028083,1.0
540,Max Scherzer,5.21,P|SP,SP,ACTIVE,p,3.011031,2.0
541,Jacob deGrom,11.97,P|SP,SP,ACTIVE,p,2.897049,3.0
542,Chris Sale,9.21,P|SP,SP,ACTIVE,p,2.805473,4.0
543,Justin Verlander,18.85,P|SP,SP,ACTIVE,p,2.450581,5.0
1,Mookie Betts,2.52,OF|UTIL,OF,ACTIVE,b,2.369322,6.0
2,J.D. Martinez,5.86,OF|UTIL,OF|DH,ACTIVE,b,2.267431,7.0
3,Nolan Arenado,6.07,3B|1B/3B|UTIL,3B,ACTIVE,b,2.232545,8.0
4,Juan Soto,34.25,OF|UTIL,OF,ACTIVE,b,2.151883,9.0
544,Corey Kluber,19.43,P|SP,SP,ACTIVE,p,2.131330,10.0


## Write Excel files for distribution

In [87]:
with pd.ExcelWriter('fantasy2019.xlsx') as writer:
    dfb.to_excel(writer, sheet_name='batters')
    dfp.to_excel(writer, sheet_name='pitchers')
    dfd.to_excel(writer, sheet_name='draft_sheet')