### Imports

In [1]:
import pandas as pd
import numpy as np
#import matplotlib.pyplot as plt
#import seaborn as sns
import os
import warnings
warnings.filterwarnings('ignore')

### Load Preprocessed Data

In [67]:
#Load preprocessed
df = pd.read_csv('../data/cleaned_data_for_FeatureDev.csv')

In [None]:
df.info()

In [None]:
df.head()

### Creating Target Feature(s): % Pts Won by Player (Also Versions Just Serve or Return % Pts Won)

In [68]:
# Creation of target feature: proportionalizing points played in the match appropriately between the two players.
df["w_pts_won%"] = round(((df["w_1stWon"] + df["w_2ndWon"] + (df["l_svpt"] - (df["l_1stWon"] + df["l_2ndWon"])))/ (df["w_svpt"] + df["l_svpt"]))*100, 2)
#df["w_pts_won%"]                                                      

In [69]:
# Loser % pts won is simply 100 - w % pts won
df["l_pts_won%"] = 100 - df["w_pts_won%"]
#df["l_pts_won%"]

In [70]:
# Potentially interesting to look at number of pts played previously as a fatigue metric (along with match time), so saving per match total pts to make some backward-looking calculations later
df["tot_pts"] = df["l_svpt"] + df["w_svpt"]
#df["tot_pts"]

In [71]:
# Winner % Serve pts won
df["w_sv_pts_won%"] = round((df["w_1stWon"] + df["w_2ndWon"]) / df["w_svpt"]*100,2)
#df["w_sv_pts_won%"] 

In [72]:
# Winner % Return pts won
df["w_ret_pts_won%"] =round(((df["l_svpt"] - (df["l_1stWon"] + df["l_2ndWon"]))/df["l_svpt"])*100,2)
#df["w_ret_pts_won%"]

In [73]:
#Loser % Serve pts won
df["l_sv_pts_won%"] = round((df["l_1stWon"] + df["l_2ndWon"]) / df["l_svpt"]*100,2)
#df["l_sv_pts_won%"] 

In [74]:
# Loser % Return pts won
df["l_ret_pts_won%"] =round(((df["w_svpt"] - (df["w_1stWon"] + df["w_2ndWon"]))/df["w_svpt"])*100,2)
#df["l_ret_pts_won%"]

### Creating Predictive Features 1: Opponent Differential-Based Features

we will make more match-specific differential features later, once we've generated backward-looking player stats. But for now we will make some match-specific differetials from natively-available player metrics (but importantly NOT from stats from the actual match we are trying to predict, or from subsequent matches)

In [75]:
# ATP ranking points differential between winner vs loser (and loser vs winner)
df['w_rank_pts'] = df['w_rank_pts'].fillna(0) # if player has no pts, assign 0
df['l_rank_pts'] = df['l_rank_pts'].fillna(0) # if player has no pts, assign 0
df["w_rank_pts_diff"] = df["w_rank_pts"] - df["l_rank_pts"]
df["l_rank_pts_diff"] = -df["w_rank_pts_diff"]

In [76]:
# ATP ranking differential between winner vs loser (and loser vs winner) (to be consistent with points diff, positive number = better ranking than opp)
max_winners = df['w_rank'].max()
max_losers = df['l_rank'].max()
max_sample = max(max_winners, max_losers)
#max_sample

df['w_rank'] = df['w_rank'].fillna(max_sample + 1) # if player has no ranking, assign sample max + 1
df['l_rank'] = df['l_rank'].fillna(max_sample + 1) # if player has no ranking, assign sample max + 1
df["w_rank_diff"] = -(df["w_rank"] - df["l_rank"])
df["l_rank_diff"] = -df["w_rank_diff"]

In [77]:
# Generate log of ranking for both players and then calculate the difference (assumption that one ranking place separates players more as you get closer to the top of the rankings)
df["w_log_rank"] = np.log(df["w_rank"]).round(2)
df["l_log_rank"] = np.log(df["l_rank"]).round(2)
df["w_log_rank_diff"] = -(df["w_log_rank"] - df["l_log_rank"])
df["l_log_rank_diff"] = -(df["w_log_rank_diff"])

In [78]:
# Height differential between winner vs loser (cm) (and loser vs winner)
df["w_ht_diff"] = (df["w_ht"] - df["l_ht"])
df["l_ht_diff"] = -df["w_ht_diff"]

In [79]:
# Age differential between winner vs loser (yrs) (and loser vs winner)
df["w_age_diff"] = (df["w_age"] - df["l_age"])
df["l_age_diff"] = -df["w_age_diff"]

In [80]:
# Marker column for if winner was Left-Handed and loser was Right-Handed (and vice versa) (1=T, 0=F)
df['w_L_opp_R'] = np.where((df['w_hd'] == 'L') & (df['l_hd'] == 'R'), 1, 0)
df['l_L_opp_R'] = np.where((df['w_hd'] == 'R') & (df['l_hd'] == 'L'), 1, 0)

# a small number of low-match # players in the sample are unknown (U) for handed, even after investigation on ATP site.

In [81]:
# Convert player handedness itself to numeric encoding
df.loc[(df["w_hd"] == "L"), "w_hd"] = 2 #Lefties converts to 2
df.loc[(df["l_hd"] == "L"), "l_hd"] = 2 #Lefties converts to 2
df.loc[(df["w_hd"] == "R"), "w_hd"] = 1 #Righties converts to 1
df.loc[(df["l_hd"] == "R"), "l_hd"] = 1 #Righties converts to 1
df.loc[(df["w_hd"] == "U"), "w_hd"] = 1 #Unknowns convert to 0
df.loc[(df["l_hd"] == "U"), "l_hd"] = 1 #Unknowns convert to 0

df["w_hd"] = pd.to_numeric(df["w_hd"])
df["l_hd"] = pd.to_numeric(df["l_hd"])

In [82]:
# Marker column for if winner was from the country where the tourney was held, and opponent was not (and vice versa) (1=T, 0=F)
df['w_HCA_opp_N'] = np.where((df['t_country'] == df['w_ioc']) & (df['t_country'] != df['l_ioc']), 1, 0)
df['l_HCA_opp_N'] = np.where((df['t_country'] != df['w_ioc']) & (df['t_country'] == df['l_ioc']), 1, 0)

In [83]:
df = df[['t_id','t_date','tour_wk','t_name','t_country','t_surf','t_lvl','t_draw_size','m_num','t_round','t_round_num','m_best_of','m_score','m_time(m)','tot_pts','w_id','w_name','w_rank','w_rank_pts','w_log_rank','w_ioc','w_ent','w_hd','w_ht', 'w_age','w_svpt','w_1stWon','w_2ndWon','w_SvGms','w_ace','w_bpSaved','w_bpFaced','l_id','l_name','l_rank','l_rank_pts','l_log_rank','l_ioc','l_ent','l_hd','l_ht', 'l_age','l_svpt','l_1stWon','l_2ndWon','l_SvGms','l_ace','l_bpSaved','l_bpFaced','w_pts_won%','w_sv_pts_won%','w_ret_pts_won%','l_pts_won%','l_sv_pts_won%','l_ret_pts_won%','w_rank_diff','w_rank_pts_diff','w_log_rank_diff','w_ht_diff','w_age_diff','w_L_opp_R','w_HCA_opp_N','l_rank_diff','l_rank_pts_diff','l_log_rank_diff','l_ht_diff','l_age_diff','l_L_opp_R','l_HCA_opp_N']]

In [None]:
df.info()

In [None]:
#Save current df
#df.to_csv('../data/test_df_mid.csv', index=False)

### Predictive Features 2: Windowed/Rolling Backward-Looking, Player-Specific Features Not Explicitly Compared to Current Opponent

Splitting out the two players from each match and rearranging df such that each player per match has their own row (makes the next series of grouped, sorted rolling calculations much easier to accomplish- will revert to match-level columns for additional comparative calculations derived from these new rolling calculations later)

In [84]:
df_winners = df.drop(["l_ret_pts_won%","l_sv_pts_won%","l_pts_won%","l_L_opp_R","l_age_diff","l_rank","l_rank_pts", "l_rank_pts_diff", "l_rank_diff","l_ht_diff","l_1stWon", "l_2ndWon", "l_SvGms", "l_ent", "l_hd", "l_ht", "l_ioc", "l_age","l_log_rank","l_log_rank_diff","l_HCA_opp_N"], axis = 1)
#df_winners.info()

In [85]:
# 1 = Winner
df_winners["m_outcome"] = 1

In [None]:
df_winners.info()

In [86]:
#Renaming columns to remove winner-loser descriptions (we just added a 'match_outcome' column to each new df to keep track of this). 
df_winners = df_winners.set_axis(["t_id", "t_date", "tour_wk", "t_name", "t_country", "t_surf", "t_lvl", "t_draw_size", "m_num", "t_round", "t_rd_num", "m_best_of", "m_score", "m_time(m)", "m_tot_pts", "p_id", "p_name","p_rank", "p_rank_pts", "p_log_rank", "p_country", "p_ent", "p_hand", "p_ht", "p_age", "p_svpt", "p_1stWon","p_2ndWon","p_SvGms","p_ace","p_bpSaved","p_bpFaced","opp_id","opp_name","opp_svpt","opp_ace","opp_bpSaved","opp_bpFaced","p_pts_won%","p_sv_pts_won%","p_ret_pts_won%","p_opp_rank_diff", "p_opp_rank_pts_diff","p_opp_log_rank_diff","p_opp_ht_diff","p_opp_age_diff","p_L_opp_R","p_HCA_opp_N","m_outcome"], axis=1)
#df_winners.head(30)

In [87]:
df_losers = df.drop(["w_ret_pts_won%","w_sv_pts_won%","w_pts_won%","w_L_opp_R","w_age_diff","w_rank","w_rank_pts", "w_rank_pts_diff", "w_rank_diff","w_ht_diff","w_1stWon", "w_2ndWon", "w_SvGms","w_ent", "w_hd", "w_ht", "w_ioc", "w_age","w_log_rank","w_log_rank_diff","w_HCA_opp_N"], axis = 1)
#df_losers.info()

In [88]:
# 0 = Loser
df_losers["m_outcome"] = 0

In [None]:
df_losers.info()

In [89]:
#Renaming columns to remove winner-loser descriptions (we just added a 'match_outcome' column to each new df to keep track of this). 
df_losers = df_losers.set_axis(["t_id", "t_date", "tour_wk", "t_name", "t_country", "t_surf", "t_lvl", "t_draw_size", "m_num", "t_round", "t_rd_num", "m_best_of", "m_score", "m_time(m)", "m_tot_pts", "opp_id", "opp_name", "opp_svpt","opp_ace","opp_bpSaved","opp_bpFaced","p_id","p_name","p_rank", "p_rank_pts", "p_log_rank", "p_country", "p_ent", "p_hand", "p_ht", "p_age", "p_svpt", "p_1stWon","p_2ndWon","p_SvGms", "p_ace","p_bpSaved","p_bpFaced","p_pts_won%","p_sv_pts_won%","p_ret_pts_won%","p_opp_rank_diff", "p_opp_rank_pts_diff","p_opp_log_rank_diff","p_opp_ht_diff","p_opp_age_diff","p_L_opp_R","p_HCA_opp_N","m_outcome"], axis=1)
#df_losers.head(30)

In [None]:
df_winners.info()

In [None]:
df_losers.info()

In [None]:
#Save current df
#df_winners.to_csv('../data/test_winners.csv', index=False)
#df_losers.to_csv('../data/test_losers.csv', index=False)

In [90]:
#Re-merge data, but now with no separate columns for winners and losers (winner or loser indicated by a new "match_outcome" column)
df_player = pd.concat([df_winners, df_losers], ignore_index=True)
#df_player.head(20)

In [None]:
#Save current df
#df_player.to_csv('../data/test_df2.csv', index=False)

In [91]:
# This sort is necessary for visually troubleshoting/validating all of the rolling, groups calculations below
df_player = df_player.sort_values(by=['p_id','tour_wk','t_rd_num'], ascending = False)
#df_player

In [92]:
#Save current df
df_player.to_csv('../data/test_df.csv', index=False)

#### % Total Pts Won Features: Recent and Long-Term Performance

We are going to take a time-decaying version of % total points won, over the last 60 surface-specific matches relative to the match at hand. I played with various integration windows and weightings in early experiments, evaluating correlations to target in EDA and contribution to models with just a few features, and converged on what's here now (though differences are pretty small between around 20-100 matches and with a range of weighting factors).

In [93]:
# The idea for this first part of the operation is that, looking back from a given match on a surface-specific basis, we will average the previous hundred matches in groups 
# of 10. We will use some trickiness to contend with the "runway problem".For the earliest 10 matches in entire player sample, we
# will allow the average of 10 to include as few as one matches. As we move beyond a player's first ten matches in the sample, we
# will allow as few as 5 matches in a group of 10 (eg, if a player has played exactly 15 matches ) on hard courts, we get a full
# previous 10 matches sample looking back from the most recent match and a five match sample for the second 10 matches looking
# back from the most recent match. But if the player has only played 14 matches, we will back step so that the second sample
# includes matches -10 to -14, instead of just matches -11 through -13. -9 to -13 instead of just -12 through -13 and so on. The trade
# off for not having thin data at the end of a sample, is slight overlap in matches counted between adjacent groups of 10. 

#We do get stuck with the very first match for a player, on a given surface, in the sample having a NaN. We may end up simply removing 
#these from the overall sample at a later step simply because we don't have requisite data for projection.

df_player = df_player.iloc[::-1]

df_player['p_pts_won%_1to10'] = df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(1))
df_player['p_pts_won%_1to10'] = df_player['p_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(1)))
df_player['p_pts_won%_1to10'] = df_player['p_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(1)))
df_player['p_pts_won%_1to10'] = df_player['p_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(1)))
df_player['p_pts_won%_1to10'] = df_player['p_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(1)))
df_player['p_pts_won%_1to10'] = df_player['p_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1)))
df_player['p_pts_won%_1to10'] = df_player['p_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1)))
df_player['p_pts_won%_1to10'] = df_player['p_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1)))
df_player['p_pts_won%_1to10'] = df_player['p_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1)))
df_player['p_pts_won%_1to10'] = df_player['p_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1)))

df_player['p_pts_won%_11to20'] = df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(11))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(11)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(11)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(11)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(11)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_11to20'] = df_player['p_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_pts_won%_21to30'] = df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(21))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(21)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(21)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(21)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(21)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_21to30'] = df_player['p_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_pts_won%_31to40'] = df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(31))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(31)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(31)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(31)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(31)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_31to40'] = df_player['p_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_pts_won%_41to50'] = df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(41))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(41)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(41)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(41)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(41)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_41to50'] = df_player['p_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_pts_won%_51to60'] = df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(51))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(51)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(51)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(51)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(51)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(51)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(50)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(49)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(48)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(47)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(46)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(45)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(44)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(43)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(42)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_pts_won%_51to60'] = df_player['p_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player = df_player.iloc[::-1]

In [94]:
# Now we will take the by-10s past total pts won % data per player created above and make a decay fxn-weighted version to evaluate past 
#performance at any given match for prediction. Decay-weighted versions unequivocally perform better than unweighted versions. 

df_player["p_pts_won%_l60_decay"] = (((df_player['p_pts_won%_1to10'] * 6) + (df_player['p_pts_won%_11to20'] * 5) + (df_player['p_pts_won%_21to30'] * 4) 
+ (df_player['p_pts_won%_31to40'] * 3) + (df_player['p_pts_won%_41to50'] * 2) + (df_player['p_pts_won%_51to60'] * 1))/21).round(2)

In [None]:
df_player.tail(30)

In [None]:
# For the sake of comparison, generate an unweighted version of the same as above (last 50 surface-specific matches)

#df_player["p_pts_won%_l50"] = ((df_player['p_pts_won%_1to10'] + df_player['p_pts_won%_11to20']  + df_player['p_pts_won%_21to30']  
#+ df_player['p_pts_won%_31to40'] + df_player['p_pts_won%_41to50'])/5).round(2)

In [96]:
#Dropping the transient columns used for the decay calculations (% total pts won)
df_player.drop(["p_pts_won%_1to10", "p_pts_won%_11to20","p_pts_won%_21to30","p_pts_won%_31to40","p_pts_won%_41to50","p_pts_won%_51to60"],axis=1, inplace=True)
df_player

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,p_ret_pts_won%,p_opp_rank_diff,p_opp_rank_pts_diff,p_opp_log_rank_diff,p_opp_ht_diff,p_opp_age_diff,p_L_opp_R,p_HCA_opp_N,m_outcome,p_pts_won%_l60_decay
19984,2019-560,20190826,2019_24,US Open,USA,Hard,4,128,2059,R128,...,38.41,-1280.0,-393.0,-2.34,-8.0,-20.95,0,1,0,
20367,2019-M014,20191014,2019_29,Moscow,RUS,Hard,1,32,2446,R32,...,31.94,-993.0,-57.0,-1.10,-8.0,-2.48,0,0,0,
18809,2019-M004,20190225,2019_07,Acapulco,MEX,Hard,1,32,545,R32,...,17.50,-2059.0,-753.0,-3.40,7.0,-6.63,0,0,0,
2257,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,46.34,-77.0,-1179.0,-1.66,5.0,-2.49,0,1,1,52.80
2259,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,34.48,-35.0,-305.0,-0.46,5.0,-1.96,0,1,1,52.67
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12825,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16430,R32,...,46.51,-266.0,-1572.0,-2.71,15.0,-14.82,0,1,1,45.88
12809,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16414,R64,...,57.69,-234.0,-702.0,-1.72,8.0,-10.04,0,1,1,39.65
31024,2014-321,20140707,2014_18,Stuttgart,GER,Clay,1,28,16360,R32,...,31.40,-237.0,-737.0,-1.78,2.0,-11.74,0,1,0,35.46
30686,2014-308,20140428,2014_13,Munich,GER,Clay,1,28,15713,R32,...,29.79,-699.0,-675.0,-2.45,15.0,-15.91,0,1,0,36.84


In [95]:
#Save current df
df_player.to_csv('../data/test_df2.csv', index=False)

In [97]:
# performance integration over the last 60 surface-specific matches by 10s, but just for serve pts ("p_sv_pts_won%_x_to_y")
df_player = df_player.iloc[::-1]

df_player['p_sv_pts_won%_1to10'] = df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(1))
df_player['p_sv_pts_won%_1to10'] = df_player['p_sv_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(1)))
df_player['p_sv_pts_won%_1to10'] = df_player['p_sv_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(1)))
df_player['p_sv_pts_won%_1to10'] = df_player['p_sv_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(1)))
df_player['p_sv_pts_won%_1to10'] = df_player['p_sv_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(1)))
df_player['p_sv_pts_won%_1to10'] = df_player['p_sv_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1)))
df_player['p_sv_pts_won%_1to10'] = df_player['p_sv_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1)))
df_player['p_sv_pts_won%_1to10'] = df_player['p_sv_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1)))
df_player['p_sv_pts_won%_1to10'] = df_player['p_sv_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1)))
df_player['p_sv_pts_won%_1to10'] = df_player['p_sv_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1)))

df_player['p_sv_pts_won%_11to20'] = df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(11))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(11)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(11)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(11)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(11)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_11to20'] = df_player['p_sv_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_sv_pts_won%_21to30'] = df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(21))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(21)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(21)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(21)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(21)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_21to30'] = df_player['p_sv_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_sv_pts_won%_31to40'] = df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(31))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(31)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(31)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(31)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(31)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_31to40'] = df_player['p_sv_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_sv_pts_won%_41to50'] = df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(41))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(41)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(41)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(41)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(41)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_41to50'] = df_player['p_sv_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_sv_pts_won%_51to60'] = df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(51))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(51)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(51)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(51)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(51)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(51)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(50)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(49)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(48)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(47)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(46)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(45)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(44)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(43)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(42)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_sv_pts_won%_51to60'] = df_player['p_sv_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player = df_player.iloc[::-1]

In [98]:
# Now we will take the by-10s past serve pts won % data per player created above and make a decay fxn-weighted version to evaluate past 
#performance at any given match for prediction. Decay-weighted versions unequivocally perform better than unweighted versions. 

df_player["p_sv_pts_won%_l60_decay"] = (((df_player['p_sv_pts_won%_1to10'] * 6) + (df_player['p_sv_pts_won%_11to20'] * 5) + (df_player['p_sv_pts_won%_21to30'] * 4) 
+ (df_player['p_sv_pts_won%_31to40'] * 3) + (df_player['p_sv_pts_won%_41to50'] * 2) + (df_player['p_sv_pts_won%_51to60'] * 1))/21).round(2)

In [None]:
# For the sake of comparison, generate an unweighted version of the same

#df_player["p_sv_pts_won%_l50"] = ((df_player['p_sv_pts_won%_1to10'] + df_player['p_sv_pts_won%_11to20']  + df_player['p_sv_pts_won%_21to30']  
#+ df_player['p_sv_pts_won%_31to40'] + df_player['p_sv_pts_won%_41to50'])/5).round(2)

In [99]:
#Dropping the transient columns used for the decay calculation for serve performance
df_player.drop(["p_sv_pts_won%_1to10","p_sv_pts_won%_11to20","p_sv_pts_won%_21to30","p_sv_pts_won%_31to40","p_sv_pts_won%_41to50", "p_sv_pts_won%_51to60"],axis=1, inplace=True)
df_player

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,p_opp_rank_diff,p_opp_rank_pts_diff,p_opp_log_rank_diff,p_opp_ht_diff,p_opp_age_diff,p_L_opp_R,p_HCA_opp_N,m_outcome,p_pts_won%_l60_decay,p_sv_pts_won%_l60_decay
19984,2019-560,20190826,2019_24,US Open,USA,Hard,4,128,2059,R128,...,-1280.0,-393.0,-2.34,-8.0,-20.95,0,1,0,,
20367,2019-M014,20191014,2019_29,Moscow,RUS,Hard,1,32,2446,R32,...,-993.0,-57.0,-1.10,-8.0,-2.48,0,0,0,,
18809,2019-M004,20190225,2019_07,Acapulco,MEX,Hard,1,32,545,R32,...,-2059.0,-753.0,-3.40,7.0,-6.63,0,0,0,,
2257,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,-77.0,-1179.0,-1.66,5.0,-2.49,0,1,1,52.80,68.07
2259,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,-35.0,-305.0,-0.46,5.0,-1.96,0,1,1,52.67,67.87
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12825,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16430,R32,...,-266.0,-1572.0,-2.71,15.0,-14.82,0,1,1,45.88,52.04
12809,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16414,R64,...,-234.0,-702.0,-1.72,8.0,-10.04,0,1,1,39.65,45.14
31024,2014-321,20140707,2014_18,Stuttgart,GER,Clay,1,28,16360,R32,...,-237.0,-737.0,-1.78,2.0,-11.74,0,1,0,35.46,35.98
30686,2014-308,20140428,2014_13,Munich,GER,Clay,1,28,15713,R32,...,-699.0,-675.0,-2.45,15.0,-15.91,0,1,0,36.84,33.33


In [None]:
df_player.tail()

In [100]:
# performance integration over the last 50 surface-specific matches by 10s, but just for return pts ("p_ret_pts_won%_x_to_y")
df_player = df_player.iloc[::-1]

df_player['p_ret_pts_won%_1to10'] = df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(1))
df_player['p_ret_pts_won%_1to10'] = df_player['p_ret_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(1)))
df_player['p_ret_pts_won%_1to10'] = df_player['p_ret_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(1)))
df_player['p_ret_pts_won%_1to10'] = df_player['p_ret_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(1)))
df_player['p_ret_pts_won%_1to10'] = df_player['p_ret_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(1)))
df_player['p_ret_pts_won%_1to10'] = df_player['p_ret_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1)))
df_player['p_ret_pts_won%_1to10'] = df_player['p_ret_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1)))
df_player['p_ret_pts_won%_1to10'] = df_player['p_ret_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1)))
df_player['p_ret_pts_won%_1to10'] = df_player['p_ret_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1)))
df_player['p_ret_pts_won%_1to10'] = df_player['p_ret_pts_won%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1)))

df_player['p_ret_pts_won%_11to20'] = df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(11))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(11)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(11)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(11)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(11)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_11to20'] = df_player['p_ret_pts_won%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_ret_pts_won%_21to30'] = df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(21))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(21)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(21)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(21)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(21)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_21to30'] = df_player['p_ret_pts_won%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_ret_pts_won%_31to40'] = df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(31))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(31)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(31)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(31)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(31)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_31to40'] = df_player['p_ret_pts_won%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_ret_pts_won%_41to50'] = df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(41))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(41)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(41)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(41)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(41)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_41to50'] = df_player['p_ret_pts_won%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_ret_pts_won%_51to60'] = df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(51))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(51)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(51)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(51)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(51)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(51)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(50)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(49)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(48)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(47)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(46)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(45)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(44)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(43)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(42)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ret_pts_won%_51to60'] = df_player['p_ret_pts_won%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player = df_player.iloc[::-1]

In [101]:
# Now we will take the by-10s past return pts won % data per player created above and make a decay fxn-weighted version to evaluate past 
# return performance at any given match for prediction

df_player["p_ret_pts_won%_l60_decay"] = (((df_player['p_ret_pts_won%_1to10'] * 6) + (df_player['p_ret_pts_won%_11to20'] * 5) + (df_player['p_ret_pts_won%_21to30'] * 4) 
+ (df_player['p_ret_pts_won%_31to40'] * 3) + (df_player['p_ret_pts_won%_41to50'] * 2) + (df_player['p_ret_pts_won%_51to60'] * 1))/21).round(2)


In [None]:
# For the sake of comparison, generate an unweighted version of the same

#df_player["p_ret_pts_won%_l50"] = ((df_player['p_ret_pts_won%_1to10'] + df_player['p_ret_pts_won%_11to20']  + df_player['p_ret_pts_won%_21to30']  
#+ df_player['p_ret_pts_won%_31to40'] + df_player['p_ret_pts_won%_41to50'])/5).round(2)

In [102]:
#Dropping the transient columns used for the decay calculation
df_player.drop(['p_ret_pts_won%_1to10', "p_ret_pts_won%_11to20","p_ret_pts_won%_21to30","p_ret_pts_won%_31to40","p_ret_pts_won%_41to50", "p_ret_pts_won%_51to60"],axis=1, inplace=True)
df_player

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,p_opp_rank_pts_diff,p_opp_log_rank_diff,p_opp_ht_diff,p_opp_age_diff,p_L_opp_R,p_HCA_opp_N,m_outcome,p_pts_won%_l60_decay,p_sv_pts_won%_l60_decay,p_ret_pts_won%_l60_decay
19984,2019-560,20190826,2019_24,US Open,USA,Hard,4,128,2059,R128,...,-393.0,-2.34,-8.0,-20.95,0,1,0,,,
20367,2019-M014,20191014,2019_29,Moscow,RUS,Hard,1,32,2446,R32,...,-57.0,-1.10,-8.0,-2.48,0,0,0,,,
18809,2019-M004,20190225,2019_07,Acapulco,MEX,Hard,1,32,545,R32,...,-753.0,-3.40,7.0,-6.63,0,0,0,,,
2257,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,-1179.0,-1.66,5.0,-2.49,0,1,1,52.80,68.07,39.28
2259,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,-305.0,-0.46,5.0,-1.96,0,1,1,52.67,67.87,39.37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12825,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16430,R32,...,-1572.0,-2.71,15.0,-14.82,0,1,1,45.88,52.04,39.54
12809,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16414,R64,...,-702.0,-1.72,8.0,-10.04,0,1,1,39.65,45.14,33.49
31024,2014-321,20140707,2014_18,Stuttgart,GER,Clay,1,28,16360,R32,...,-737.0,-1.78,2.0,-11.74,0,1,0,35.46,35.98,34.54
30686,2014-308,20140428,2014_13,Munich,GER,Clay,1,28,15713,R32,...,-675.0,-2.45,15.0,-15.91,0,1,0,36.84,33.33,39.29


In [103]:
#Save current df
df_player.to_csv('../data/test_df3.csv', index=False)

#good up to here (9:53 AM, 04/26/22)

In [None]:
df_player.tail(30)

In [None]:
df_player.info()

In [104]:
# Generates a measure of variability (std) in performance over the last 60 matches on a % total points won basis (surface-specific)
df_player = df_player.iloc[::-1]

df_player['p_pts_won%_std_l60'] = df_player.groupby(['p_id','t_surf'])['p_pts_won%'].transform(lambda x: x.rolling(window=60, min_periods = 1).std().round(2).shift(1))

df_player = df_player.iloc[::-1]

In [105]:
# Generates a measure of variability (std) in performance over the last 60 matches on a % serve points won basis (surface-specific)
df_player = df_player.iloc[::-1]

df_player['p_sv_pts_won%_std_l60'] = df_player.groupby(['p_id','t_surf'])['p_sv_pts_won%'].transform(lambda x: x.rolling(window=60, min_periods = 1).std().round(2).shift(1))

df_player = df_player.iloc[::-1]

In [106]:
# Generates a measure of variability (std) in performance over the last 60 matches on a % return points won basis (surface-specific)

df_player = df_player.iloc[::-1]

df_player['p_ret_pts_won%_std_l60'] = df_player.groupby(['p_id','t_surf'])['p_ret_pts_won%'].transform(lambda x: x.rolling(window=60, min_periods = 1).std().round(2).shift(1))

df_player = df_player.iloc[::-1]

In [107]:
df_player.head(30)

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,p_opp_age_diff,p_L_opp_R,p_HCA_opp_N,m_outcome,p_pts_won%_l60_decay,p_sv_pts_won%_l60_decay,p_ret_pts_won%_l60_decay,p_pts_won%_std_l60,p_sv_pts_won%_std_l60,p_ret_pts_won%_std_l60
19984,2019-560,20190826,2019_24,US Open,USA,Hard,4,128,2059,R128,...,-20.95,0,1,0,,,,,,
20367,2019-M014,20191014,2019_29,Moscow,RUS,Hard,1,32,2446,R32,...,-2.48,0,0,0,,,,,,
18809,2019-M004,20190225,2019_07,Acapulco,MEX,Hard,1,32,545,R32,...,-6.63,0,0,0,,,,,,
2257,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,-2.49,0,1,1,52.8,68.07,39.28,6.15,6.46,8.02
2259,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,-1.96,0,1,1,52.67,67.87,39.37,6.42,6.68,8.36
20539,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2794,RR3,...,-3.14,0,1,0,52.59,67.65,39.5,6.74,6.99,8.5
2271,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2795,RR2,...,-2.93,0,1,1,52.26,67.46,39.06,6.0,7.08,7.39
2268,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2792,RR1,...,-3.57,0,1,1,52.22,67.25,39.24,6.39,7.21,7.64
20447,2019-0337,20191021,2019_30,Vienna,AUT,Hard,1,32,2528,R16,...,-14.96,0,0,0,52.35,67.22,39.53,6.74,7.78,7.65
2191,2019-0337,20191021,2019_30,Vienna,AUT,Hard,1,32,2541,R32,...,-17.83,0,0,1,52.17,67.24,39.29,7.14,8.52,8.02


In [108]:
# decay-weighted player ace % over up to the last 60 matches (surface-specific)
df_player["p_ace%"] = ((df_player["p_ace"]/df_player["p_svpt"])*100).round(2)

In [109]:
df_player = df_player.iloc[::-1]

df_player['p_ace%_1to10'] = df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(1))
df_player['p_ace%_1to10'] = df_player['p_ace%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(1)))
df_player['p_ace%_1to10'] = df_player['p_ace%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(1)))
df_player['p_ace%_1to10'] = df_player['p_ace%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(1)))
df_player['p_ace%_1to10'] = df_player['p_ace%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(1)))
df_player['p_ace%_1to10'] = df_player['p_ace%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1)))
df_player['p_ace%_1to10'] = df_player['p_ace%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1)))
df_player['p_ace%_1to10'] = df_player['p_ace%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1)))
df_player['p_ace%_1to10'] = df_player['p_ace%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1)))
df_player['p_ace%_1to10'] = df_player['p_ace%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1)))

df_player['p_ace%_11to20'] = df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(11))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(11)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(11)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(11)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(11)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_11to20'] = df_player['p_ace%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_ace%_21to30'] = df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(21))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(21)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(21)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(21)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(21)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_21to30'] = df_player['p_ace%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_ace%_31to40'] = df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(31))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(31)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(31)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(31)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(31)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_31to40'] = df_player['p_ace%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_ace%_41to50'] = df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(41))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(41)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(41)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(41)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(41)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_41to50'] = df_player['p_ace%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_ace%_51to60'] = df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(51))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(51)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(51)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(51)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(51)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(51)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(50)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(49)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(48)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(47)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(46)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(45)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(44)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(43)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(42)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_ace%_51to60'] = df_player['p_ace%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_ace%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player = df_player.iloc[::-1]

In [110]:
# Now we will take the by-10s ace% data per player created above and make a decay fxn-weighted version to evaluate past 
#performance at any given match for prediction. Decay-weighted versions unequivocally perform better than unweighted versions. 

df_player["p_ace%_l60_decay"] = (((df_player['p_ace%_1to10'] * 6) + (df_player['p_ace%_11to20'] * 5) + (df_player['p_ace%_21to30'] * 4) 
+ (df_player['p_ace%_31to40'] * 3) + (df_player['p_ace%_41to50'] * 2) + (df_player['p_ace%_51to60'] * 1))/21).round(2)

In [111]:
#Dropping the transient columns used for the decay calculation
df_player.drop(['p_ace%_1to10', "p_ace%_11to20","p_ace%_21to30","p_ace%_31to40","p_ace%_41to50", "p_ace%_51to60"],axis=1, inplace=True)
df_player

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,p_HCA_opp_N,m_outcome,p_pts_won%_l60_decay,p_sv_pts_won%_l60_decay,p_ret_pts_won%_l60_decay,p_pts_won%_std_l60,p_sv_pts_won%_std_l60,p_ret_pts_won%_std_l60,p_ace%,p_ace%_l60_decay
19984,2019-560,20190826,2019_24,US Open,USA,Hard,4,128,2059,R128,...,1,0,,,,,,,1.09,
20367,2019-M014,20191014,2019_29,Moscow,RUS,Hard,1,32,2446,R32,...,0,0,,,,,,,4.82,
18809,2019-M004,20190225,2019_07,Acapulco,MEX,Hard,1,32,545,R32,...,0,0,,,,,,,3.70,
2257,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,1,1,52.80,68.07,39.28,6.15,6.46,8.02,3.85,7.23
2259,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,1,1,52.67,67.87,39.37,6.42,6.68,8.36,7.69,7.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12825,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16430,R32,...,1,1,45.88,52.04,39.54,13.85,19.04,12.79,0.00,4.53
12809,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16414,R64,...,1,1,39.65,45.14,33.49,7.40,16.07,5.08,6.82,3.76
31024,2014-321,20140707,2014_18,Stuttgart,GER,Clay,1,28,16360,R32,...,1,0,35.46,35.98,34.54,1.96,3.75,6.72,6.45,2.42
30686,2014-308,20140428,2014_13,Munich,GER,Clay,1,28,15713,R32,...,1,0,36.84,33.33,39.29,,,,2.27,2.56


In [112]:
# player aced% (as a returner) over up to the last 100 matches (surface-specific)
df_player["p_aced%"] = ((df_player["opp_ace"]/df_player["opp_svpt"])*100).round(2)
df_player["p_aced%"]

19984     3.66
20367    15.28
18809     5.00
2257      4.88
2259      3.45
         ...  
12825     1.16
12809     1.92
31024     5.81
30686     4.26
33339     3.57
Name: p_aced%, Length: 36538, dtype: float64

In [113]:
df_player = df_player.iloc[::-1]

df_player['p_aced%_1to10'] = df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(1))
df_player['p_aced%_1to10'] = df_player['p_aced%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(1)))
df_player['p_aced%_1to10'] = df_player['p_aced%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(1)))
df_player['p_aced%_1to10'] = df_player['p_aced%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(1)))
df_player['p_aced%_1to10'] = df_player['p_aced%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(1)))
df_player['p_aced%_1to10'] = df_player['p_aced%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1)))
df_player['p_aced%_1to10'] = df_player['p_aced%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1)))
df_player['p_aced%_1to10'] = df_player['p_aced%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1)))
df_player['p_aced%_1to10'] = df_player['p_aced%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1)))
df_player['p_aced%_1to10'] = df_player['p_aced%_1to10'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1)))

df_player['p_aced%_11to20'] = df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(11))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(11)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(11)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(11)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(11)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_11to20'] = df_player['p_aced%_11to20'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_aced%_21to30'] = df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(21))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(21)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(21)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(21)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(21)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_21to30'] = df_player['p_aced%_21to30'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_aced%_31to40'] = df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(31))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(31)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(31)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(31)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(31)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_31to40'] = df_player['p_aced%_31to40'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_aced%_41to50'] = df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(41))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(41)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(41)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(41)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(41)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_41to50'] = df_player['p_aced%_41to50'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player['p_aced%_51to60'] = df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(51))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(51)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(51)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(51)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(51)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(51)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(50)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(49)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(48)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(47)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(46)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(45)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(44)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(43)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(42)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player['p_aced%_51to60'] = df_player['p_aced%_51to60'].fillna(df_player.groupby(['p_id','t_surf'])['p_aced%'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player = df_player.iloc[::-1]

In [114]:
# Now we will take the by-10s aced% *(as a returner) data per player created above and make a decay fxn-weighted version to evaluate past 
#performance at any given match for prediction. Decay-weighted versions unequivocally perform better than unweighted versions. 

df_player["p_aced%_l60_decay"] = (((df_player['p_aced%_1to10'] * 6) + (df_player['p_aced%_11to20'] * 5) + (df_player['p_aced%_21to30'] * 4) 
+ (df_player['p_aced%_31to40'] * 3) + (df_player['p_aced%_41to50'] * 2) + (df_player['p_aced%_51to60'] * 1))/21).round(2)

In [115]:
#Dropping the transient columns used for the decay calculation
df_player.drop(['p_aced%_1to10', "p_aced%_11to20","p_aced%_21to30","p_aced%_31to40","p_aced%_41to50", "p_aced%_51to60"],axis=1, inplace=True)
df_player

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,p_pts_won%_l60_decay,p_sv_pts_won%_l60_decay,p_ret_pts_won%_l60_decay,p_pts_won%_std_l60,p_sv_pts_won%_std_l60,p_ret_pts_won%_std_l60,p_ace%,p_ace%_l60_decay,p_aced%,p_aced%_l60_decay
19984,2019-560,20190826,2019_24,US Open,USA,Hard,4,128,2059,R128,...,,,,,,,1.09,,3.66,
20367,2019-M014,20191014,2019_29,Moscow,RUS,Hard,1,32,2446,R32,...,,,,,,,4.82,,15.28,
18809,2019-M004,20190225,2019_07,Acapulco,MEX,Hard,1,32,545,R32,...,,,,,,,3.70,,5.00,
2257,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,52.80,68.07,39.28,6.15,6.46,8.02,3.85,7.23,4.88,9.32
2259,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,52.67,67.87,39.37,6.42,6.68,8.36,7.69,7.30,3.45,9.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12825,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16430,R32,...,45.88,52.04,39.54,13.85,19.04,12.79,0.00,4.53,1.16,3.89
12809,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16414,R64,...,39.65,45.14,33.49,7.40,16.07,5.08,6.82,3.76,1.92,4.55
31024,2014-321,20140707,2014_18,Stuttgart,GER,Clay,1,28,16360,R32,...,35.46,35.98,34.54,1.96,3.75,6.72,6.45,2.42,5.81,3.92
30686,2014-308,20140428,2014_13,Munich,GER,Clay,1,28,15713,R32,...,36.84,33.33,39.29,,,,2.27,2.56,4.26,3.57


In [116]:
# player break point save % over up to the last 60 matches (surface-specific)
# I played around with a weighted version of this, but it didn't work as well as unweighted just due to the rareness of the events

df_player["p_bp_save%"] = ((df_player["p_bpSaved"]/df_player["p_bpFaced"])*100).round(2)

df_player = df_player.iloc[::-1]
df_player['p_bp_save%_l60'] = df_player.groupby(['p_id','t_surf'])['p_bp_save%'].transform(lambda x: x.rolling(window=60, min_periods = 1).mean().round(2).shift(1))
df_player = df_player.iloc[::-1]

In [117]:
# player break point conversion % over up to the last 100 matches (surface-specific)
# I played around with a weighted version of this, but it didn't work as well as unweighted just due to the rareness of the events

df_player["p_bp_convert%"] = ((1 - (df_player["opp_bpSaved"]/df_player["opp_bpFaced"]))*100).round(2)
#df_player["p_bp_convert%"].tail(30)

df_player = df_player.iloc[::-1]
df_player['p_bp_convert%_l60'] = df_player.groupby(['p_id','t_surf'])['p_bp_convert%'].transform(lambda x: x.rolling(window=60, min_periods = 1).mean().round(2).shift(1))
df_player = df_player.iloc[::-1]

In [50]:
df_player.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36538 entries, 33339 to 19984
Data columns (total 63 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   t_id                      36538 non-null  object 
 1   t_date                    36538 non-null  int64  
 2   tour_wk                   36538 non-null  object 
 3   t_name                    36538 non-null  object 
 4   t_country                 36538 non-null  object 
 5   t_surf                    36538 non-null  object 
 6   t_lvl                     36538 non-null  int64  
 7   t_draw_size               36538 non-null  int64  
 8   m_num                     36538 non-null  int64  
 9   t_round                   36538 non-null  object 
 10  t_rd_num                  36488 non-null  float64
 11  m_best_of                 36538 non-null  int64  
 12  m_score                   36538 non-null  object 
 13  m_time(m)                 36536 non-null  float64
 14  m_

In [118]:
df_player = df_player[['t_id','t_date','tour_wk','t_name','t_country','t_surf','t_lvl','t_draw_size','m_num','t_round','t_rd_num','m_best_of','m_score','m_time(m)','m_tot_pts','p_id','p_name','p_rank','p_rank_pts','p_log_rank','p_country','p_ent','p_hand','p_ht', 'p_age','p_svpt','p_1stWon','p_2ndWon','p_SvGms','opp_id','opp_name','p_pts_won%','p_pts_won%_l60_decay','p_sv_pts_won%','p_sv_pts_won%_l60_decay','p_ret_pts_won%','p_ret_pts_won%_l60_decay','p_pts_won%_std_l60','p_sv_pts_won%_std_l60','p_ret_pts_won%_std_l60','p_ace%','p_ace%_l60_decay', 'p_aced%_l60_decay', 'p_bp_save%_l60', 'p_bp_convert%_l60', 'p_opp_rank_diff','p_opp_rank_pts_diff','p_opp_log_rank_diff','p_opp_ht_diff','p_opp_age_diff','p_L_opp_R','p_HCA_opp_N','m_outcome']]

In [119]:
#Save current df
df_player.to_csv('../data/test_df4.csv', index=False)

#good to here (10:09 AM 04/26/2022)

#### "Stamina": Total tour-level matches played in the sample and "Fatigue": Recent time on court and total pts played.  For "Fatigue", decay-fxn features are created (will revisit the decay paramaters after initial modeling). 

#### "Body Battery" is a feature combining "Stamina" and "Fatigue" features

In [120]:
# "Stamina" here is represented by how many tour-level matches a player has played previously.
#This measure accumulates across surface, tournament and year through the entire sample, BUT is capped at 300 matches. 

df_player = df_player.iloc[::-1]
df_player['p_matches'] = df_player.groupby('p_id')['p_id'].transform(lambda x: x.rolling(300, min_periods=1).count().shift(1))
df_player = df_player.iloc[::-1]
#df_player['p_matches']

# if this is the first tour match for the player, NA will become 0
df_player['p_matches'] = df_player['p_matches'].fillna(1)

In [121]:
# total time on court (min) for player in his previous match 

# as this is intented to be a measure of short-term fatigue, we DO NOT want it to spill beyond the current tournament (hence the two-level groupby)
df_player["p_m_time_last"] = df_player.groupby(['p_id','tour_wk'])['m_time(m)'].shift(-1)
#df_player["p_match_time_last"]

# if this is the first match in the tournament for the player, we will put an hour on the player's legs as a baseline assumption
# Might revisit later, especially for qualifiers (they already played a few matches on-site)
df_player['p_m_time_last'] = df_player['p_m_time_last'].fillna(60) #empirically modeled out to be the best assumption

In [122]:
# To model player cumulative fatigue, we will look at the total time on court for the player over up to his previous 5 matches within a tournament
# and apply a "semi-exponential" decay function to total load (NAs are still counted as 60 minutes, but the 60 min is still subject to the decay fxn)

df_player["p_m_time_2ago"] = df_player.groupby(['p_id','tour_wk'])['m_time(m)'].shift(-2)
df_player['p_m_time_2ago'] = df_player['p_m_time_2ago'].fillna(60) 
df_player["p_m_time_3ago"] = df_player.groupby(['p_id','tour_wk'])['m_time(m)'].shift(-3)
df_player['p_m_time_3ago'] = df_player['p_m_time_3ago'].fillna(60) 
df_player["p_m_time_4ago"] = df_player.groupby(['p_id','tour_wk'])['m_time(m)'].shift(-4)
df_player['p_m_time_4ago'] = df_player['p_m_time_4ago'].fillna(60)
df_player["p_m_time_5ago"] = df_player.groupby(['p_id','tour_wk'])['m_time(m)'].shift(-5)
df_player['p_m_time_5ago'] = df_player['p_m_time_5ago'].fillna(60) 
df_player['p_tot_time_l5_decay'] = ((df_player['p_m_time_last']*1) + (df_player['p_m_time_2ago']*.8) + (df_player['p_m_time_3ago']*.6) + (df_player['p_m_time_4ago']*.4) + (df_player['p_m_time_5ago']*.2))
df_player['p_tot_time_l5'] = ((df_player['p_m_time_last']*1) + (df_player['p_m_time_2ago']*1) + (df_player['p_m_time_3ago']*1) + (df_player['p_m_time_4ago']*1) + (df_player['p_m_time_5ago']*1)) #empirically tested to discern that no decay performs slightly better than decay
df_player = df_player.drop(['p_m_time_2ago','p_m_time_3ago','p_m_time_4ago','p_m_time_5ago'],axis=1)

In [None]:
# Uses rolling fxn to compute total time on court (min) for a player for the last 'window=' matches, with at least 'min_periods' matches required to generate a non-NaN value.
# Requires inversion of the df for a backward looking computation. After computation, df is flipped back "rightside up".
#Shift of 1 on the inverted df ensures that the data from the current row is NOT included in the computation

# as this is intented to be a measure of cumulative fatigue, we DO NOT want it to spill beyond the current tournament (hence the two-level groupby)

#df_player = df_player.iloc[::-1]
#df_player['p_match_time_last3'] = df_player.groupby(['p_id','tour_week'])['m_time'].transform(lambda x: x.rolling(window=3, min_periods = 1).sum().shift(1))
#df_player = df_player.iloc[::-1]
#df_player['p_match_time_last3']

# if this is the first match in the tournament for the player, we will put an hour on the player's legs as a baseline assumption
# Might revisit later, especially for qualifiers (they already played a few matches on-site)
#df_player['p_match_time_last3'] = df_player['p_match_time_last3'].fillna(60) 


In [123]:
# number of points played by player in his previous match 

# as this is intented to be a measure of short-term fatigue, we DO NOT want it to spill beyond the current tournament (hence the two-level groupby)

df_player["p_tot_pts_last"] = df_player.groupby(['p_id','tour_wk'])['m_tot_pts'].shift(-1) 
#df_player["p_total_pts_last"]

# if this is the first match in the tournament for the player, we will put 100 pts (average # of pts played in an hour) on the player's legs as a baseline assumption
# Might revisit later, especially for qualifiers (they already played a few matches on-site)
df_player['p_tot_pts_last'] = df_player['p_tot_pts_last'].fillna(100)

In [124]:
# To model player cumulative fatigue, we will look at the total points played for the player over up to his previous 5 matches within a tournament
# and apply a "semi-exponential" decay function to total load (NAs are still counted as 100 pts, but the 100 pts is still subject to the decay fxn)

df_player["p_tot_pts_2ago"] = df_player.groupby(['p_id','tour_wk'])['m_tot_pts'].shift(-2)
df_player['p_tot_pts_2ago'] = df_player['p_tot_pts_2ago'].fillna(100) 
df_player["p_tot_pts_3ago"] = df_player.groupby(['p_id','tour_wk'])['m_tot_pts'].shift(-3)
df_player['p_tot_pts_3ago'] = df_player['p_tot_pts_3ago'].fillna(100) 
df_player["p_tot_pts_4ago"] = df_player.groupby(['p_id','tour_wk'])['m_tot_pts'].shift(-4)
df_player['p_tot_pts_4ago'] = df_player['p_tot_pts_4ago'].fillna(100)
df_player["p_tot_pts_5ago"] = df_player.groupby(['p_id','tour_wk'])['m_tot_pts'].shift(-5)
df_player['p_tot_pts_5ago'] = df_player['p_tot_pts_5ago'].fillna(100) 
df_player['p_tot_pts_l5_decay'] = ((df_player['p_tot_pts_last']*.9) + (df_player['p_tot_pts_2ago']*.7) + (df_player['p_tot_pts_3ago']*.5) + (df_player['p_tot_pts_4ago']*.3) + (df_player['p_tot_pts_5ago']*.2))
df_player['p_tot_pts_l5'] = ((df_player['p_tot_pts_last']*1) + (df_player['p_tot_pts_2ago']*1) + (df_player['p_tot_pts_3ago']*1) + (df_player['p_tot_pts_4ago']*1) + (df_player['p_tot_pts_5ago']*1)) 
df_player = df_player.drop(['p_tot_pts_2ago','p_tot_pts_3ago','p_tot_pts_4ago','p_tot_pts_5ago'],axis=1)

In [None]:
# Uses rolling fxn to compute total points played for a player for the last 'window=' matches, with at least 'min_periods' matches required to generate a non-NaN value.
# Requires inversion of the df for a backward looking computation. After computation, df is flipped back "rightside up".
#Shift of 1 on the inverted df ensures that the data from the current row is NOT included in the computation

# as this is intented to be a measure of cumulative fatigue, we DO NOT want it to spill beyond the current tournament (hence the two-level groupby)

#df_player = df_player.iloc[::-1]
#df_player['p_total_pts_last3'] = df_player.groupby(['p_id','tour_week'])['m_total_pts'].transform(lambda x: x.rolling(window=3, min_periods = 1).sum().shift(1))
#df_player = df_player.iloc[::-1]
#df_player['p_total_pts_last3']

# if this is the first match in the tournament for the player, we will put 100 pts (average # of pts played in an hour) on the player's legs as a baseline assumption
# Might revisit later, especially for qualifiers (they already played a few matches on-site)
#df_player['p_total_pts_last3'] = df_player['p_total_pts_last3'].fillna(100)

In [125]:
# Putting "Stamina" and "Fatigue" together to derive a "body battery" metric. Currently, the square root is taken for total matches played (capped at 250) 
# in the denominator. Can play with the power here with feedback from EDA/modeling. Also can consider factoring age in later.
# Of note, metric will be lower for a player with more stamina at a given adjusted total number of minutes played over the last 5. 
# Need to consider this when calculating the differential. 

#df_player["p_stamina_adj_fatigue"] = (df_player["p_tot_time_l5_decay"]/np.sqrt(df_player["p_matches"])).round(2)
#df_player["p_stamina_adj_fatigue"] = (df_player["p_tot_time_l5_decay"]/np.cbrt(df_player["p_matches"])).round(2)
df_player["p_stamina_adj_fatigue"] = (df_player["p_tot_time_l5"]/np.cbrt(df_player["p_matches"])).round(2) #empirically tested to be slightly better than divide by sqrt in linear modeling
df_player.head(50)

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,p_HCA_opp_N,m_outcome,p_matches,p_m_time_last,p_tot_time_l5_decay,p_tot_time_l5,p_tot_pts_last,p_tot_pts_l5_decay,p_tot_pts_l5,p_stamina_adj_fatigue
19984,2019-560,20190826,2019_24,US Open,USA,Hard,4,128,2059,R128,...,1,0,1.0,60.0,180.0,300.0,100.0,260.0,500.0,300.0
20367,2019-M014,20191014,2019_29,Moscow,RUS,Hard,1,32,2446,R32,...,0,0,1.0,60.0,180.0,300.0,100.0,260.0,500.0,300.0
18809,2019-M004,20190225,2019_07,Acapulco,MEX,Hard,1,32,545,R32,...,0,0,1.0,60.0,180.0,300.0,100.0,260.0,500.0,300.0
2257,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,1,1,19.0,75.0,237.4,380.0,123.0,323.1,603.0,142.41
2259,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,1,1,18.0,103.0,235.4,365.0,152.0,318.4,580.0,139.27
20539,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2794,RR3,...,1,0,17.0,56.0,196.8,322.0,88.0,277.2,528.0,125.23
2271,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2795,RR2,...,1,1,16.0,86.0,206.0,326.0,140.0,296.0,540.0,129.37
2268,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2792,RR1,...,1,1,15.0,60.0,180.0,300.0,100.0,260.0,500.0,121.64
20447,2019-0337,20191021,2019_30,Vienna,AUT,Hard,1,32,2528,R16,...,0,0,14.0,90.0,210.0,330.0,136.0,292.4,536.0,136.92
2191,2019-0337,20191021,2019_30,Vienna,AUT,Hard,1,32,2541,R32,...,0,1,13.0,60.0,180.0,300.0,100.0,260.0,500.0,127.59


In [None]:
df_player.head(30)

#### Head-To-Head Match Record Calculations 

In [126]:
df_player = df_player.iloc[::-1]
df_player['p_H2H_w'] = df_player.groupby(['p_id','opp_id','t_surf'])['m_outcome'].transform(lambda x: x.rolling(window=2000, min_periods = 1).sum().shift(1))
df_player = df_player.iloc[::-1]
df_player['p_H2H_w'] = df_player['p_H2H_w'].fillna(0)
df_player

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,m_outcome,p_matches,p_m_time_last,p_tot_time_l5_decay,p_tot_time_l5,p_tot_pts_last,p_tot_pts_l5_decay,p_tot_pts_l5,p_stamina_adj_fatigue,p_H2H_w
19984,2019-560,20190826,2019_24,US Open,USA,Hard,4,128,2059,R128,...,0,1.0,60.0,180.0,300.0,100.0,260.0,500.0,300.00,0.0
20367,2019-M014,20191014,2019_29,Moscow,RUS,Hard,1,32,2446,R32,...,0,1.0,60.0,180.0,300.0,100.0,260.0,500.0,300.00,0.0
18809,2019-M004,20190225,2019_07,Acapulco,MEX,Hard,1,32,545,R32,...,0,1.0,60.0,180.0,300.0,100.0,260.0,500.0,300.00,0.0
2257,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,1,19.0,75.0,237.4,380.0,123.0,323.1,603.0,142.41,0.0
2259,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,1,18.0,103.0,235.4,365.0,152.0,318.4,580.0,139.27,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12825,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16430,R32,...,1,4.0,58.0,178.0,298.0,96.0,256.4,496.0,187.73,0.0
12809,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16414,R64,...,1,3.0,60.0,180.0,300.0,100.0,260.0,500.0,208.01,0.0
31024,2014-321,20140707,2014_18,Stuttgart,GER,Clay,1,28,16360,R32,...,0,2.0,60.0,180.0,300.0,100.0,260.0,500.0,238.11,0.0
30686,2014-308,20140428,2014_13,Munich,GER,Clay,1,28,15713,R32,...,0,1.0,60.0,180.0,300.0,100.0,260.0,500.0,300.00,0.0


In [127]:
#column reorg thematically
df_player_1a = df_player[['t_id','t_date','tour_wk','t_name','t_country','t_surf','t_lvl','t_draw_size','m_num','t_round','t_rd_num','m_best_of','m_score','m_time(m)','m_tot_pts','p_id','p_name','p_rank','p_rank_pts','p_log_rank','p_matches','p_country','p_ent','p_hand','p_ht', 'p_age','p_svpt','p_1stWon','p_2ndWon','p_SvGms','opp_id','opp_name','p_pts_won%','p_pts_won%_l60_decay','p_sv_pts_won%','p_sv_pts_won%_l60_decay','p_ret_pts_won%','p_ret_pts_won%_l60_decay','p_pts_won%_std_l60','p_sv_pts_won%_std_l60','p_ret_pts_won%_std_l60','p_ace%', 'p_ace%_l60_decay', 'p_aced%_l60_decay', 'p_bp_save%_l60', 'p_bp_convert%_l60', 'p_m_time_last','p_tot_time_l5_decay','p_tot_time_l5','p_tot_pts_last','p_tot_pts_l5_decay','p_tot_pts_l5','p_stamina_adj_fatigue','p_H2H_w','p_opp_rank_diff','p_opp_rank_pts_diff','p_opp_log_rank_diff','p_opp_ht_diff','p_opp_age_diff','p_L_opp_R','p_HCA_opp_N','m_outcome']]

In [None]:
df_player_1a.info()

In [128]:
#Save current df
df_player_1a.to_csv('../data/df_fulltest.csv', index=False)

#good to here 10:15 AM 04/26/22

### Calculate Some More Match-Specific Differentials of Interest Based on Rolling Stats Accumulated Above

to do this, we are going to re-form the match-specific structure of the original dataframe

In [129]:
# First, drop a few raw feature columns we don't need anymore now that we've computed %s
df_player_1a = df_player_1a.drop(["p_svpt","p_1stWon","p_2ndWon","p_SvGms"], axis = 1)
#df_player_1a.info()

In [130]:
# Split back into winner and loser dataframes transiently
df_winners2 = df_player_1a[df_player_1a['m_outcome'] == 1]
df_losers2 = df_player_1a[df_player_1a['m_outcome'] == 0]

In [None]:
df_winners2.info()

In [131]:
# Now we can just do a left join on m_id
df_player2 = df_winners2.merge(df_losers2, on='m_num', how = 'left')

In [None]:
#df_player2.head()

In [132]:
df_player2 = df_player2.drop(['t_id_y', 't_name_y','t_country_y','t_surf_y','t_draw_size_y','t_lvl_y','t_date_y','m_score_y','m_best_of_y','t_round_y','m_time(m)_y','tour_wk_y','t_rd_num_y','m_tot_pts_y'],axis=1)

In [133]:
df_player2.rename(columns = {'t_id_x':'t_id', 't_name_x':'t_name','t_country_x':'t_country','t_surf_x':'t_surf','t_draw_size_x':'t_draw_size','t_rd_num_x':'t_rd_num','t_lvl_x':'t_lvl','t_date_x':'t_date','m_score_x':'m_score','m_best_of_x':'m_best_of','t_round_x':'t_round','m_time(m)_x':'m_time(m)','tour_wk_x':'tour_wk','round_num_x':'t_rd_num','m_tot_pts_x':'m_tot_pts'}, inplace=True)

In [134]:
df_player2.to_csv('../data/df_fulltest_4.csv', index=False)
#think we are ok here for now 10:20 AM 04/26/22

In [None]:
#df_player2.head(5)

In [415]:
df_player2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18269 entries, 0 to 18268
Columns: 151 entries, t_id to l_H2H_diff
dtypes: float64(119), int32(4), int64(15), object(13)
memory usage: 20.9+ MB


#### % Pts Won Opponent Differentials: Recent and Long-Term (Total, Serving and Returning)

Just to be clear, unless specified, these are NOT Head to Head Differentials. They are subtractions for a given stat accrued across periods against ALL players (with other specified filters, such as surface-specificity) for one player minus those accrued by the other player (of course, these rolling windows could include some H2H data just by chance). 

In [135]:
#Calculate difference in % total pts won in previous 60 matches WITH decay fxn by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)

df_player2["w_pts_won%_l60_decay_diff"] = df_player2["p_pts_won%_l60_decay_x"] - df_player2["p_pts_won%_l60_decay_y"]
df_player2["l_pts_won%_l60_decay_diff"] = -(df_player2["w_pts_won%_l60_decay_diff"]) 

In [136]:
#Calculate difference in % serve pts won in previous 60 matches WITH decay fxn by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)

df_player2["w_sv_pts_won%_l60_decay_diff"] = df_player2["p_sv_pts_won%_l60_decay_x"] - df_player2["p_sv_pts_won%_l60_decay_y"]
df_player2["l_sv_pts_won%_l60_decay_diff"] = -(df_player2["w_sv_pts_won%_l60_decay_diff"]) 

In [137]:
#Calculate difference in % return pts won in previous 60 matches WITH decay fxn by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)

df_player2["w_ret_pts_won%_l60_decay_diff"] = df_player2["p_ret_pts_won%_l60_decay_x"] - df_player2["p_ret_pts_won%_l60_decay_y"]
df_player2["l_ret_pts_won%_l60_decay_diff"] = -(df_player2["w_ret_pts_won%_l60_decay_diff"]) 

In [138]:
#Calculate difference in WINNER % SERVE pts won in previous 60 matches WITH decay fxn as compared to LOSER % RETURN pts won in previous 60 matches WITH decay fxn 
# Remember that this is surface-specific (see above calculations)

df_player2["w_sv_pts_won%_l60_decay_l_ret_pts_won%_l60_decay_diff"] = df_player2["p_sv_pts_won%_l60_decay_x"] - df_player2["p_ret_pts_won%_l60_decay_y"]

#Calculate difference in LOSER % SERVE pts won in previous 60 matches WITH decay fxn as compared to WINNER % RETURN pts won in previous 60 matches WITH decay fxn 
# Remember that this is surface-specific (see above calculations)

df_player2["l_sv_pts_won%_l60_decay_w_ret_pts_won%_l60_decay_diff"] = df_player2["p_sv_pts_won%_l60_decay_y"] - df_player2["p_ret_pts_won%_l60_decay_x"]

In [139]:
#Calculate difference in WINNER % RETURN pts won in previous 60 matches WITH decay fxn as compared to LOSER % SERVE pts won in previous 60 matches WITH decay fxn 
# Remember that this is surface-specific (see above calculations)

df_player2["w_ret_pts_won%_l60_decay_l_sv_pts_won%_l60_decay_diff"] = df_player2["p_ret_pts_won%_l60_decay_x"] - df_player2["p_sv_pts_won%_l60_decay_y"]

#Calculate difference in LOSER % RETURN pts won in previous 60 matches WITH decay fxn as compared to WINNER % SERVE pts won in previous 60 matches WITH decay fxn 
# Remember that this is surface-specific (see above calculations)

df_player2["l_ret_pts_won%_l60_decay_w_sv_pts_won%_l60_decay_diff"] = df_player2["p_ret_pts_won%_l60_decay_y"] - df_player2["p_sv_pts_won%_l60_decay_x"]

In [140]:
#Calculate difference in variability (std) in % total pts won in previous 60 matches WITHOUT decay by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)

df_player2["w_pts_won%_std_l60_diff"] = df_player2["p_pts_won%_std_l60_x"] - df_player2["p_pts_won%_std_l60_y"]
df_player2["l_pts_won%_std_l60_diff"] = -(df_player2["w_pts_won%_std_l60_diff"]) 

In [141]:
#Calculate difference in variability (std) in % serve pts won in previous 60 matches WITHOUT decay by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)

df_player2["w_sv_pts_won%_std_l60_diff"] = df_player2["p_sv_pts_won%_std_l60_x"] - df_player2["p_sv_pts_won%_std_l60_y"]
df_player2["l_sv_pts_won%_std_l60_diff"] = -(df_player2["w_sv_pts_won%_std_l60_diff"]) 

In [142]:
#Calculate difference in variability (std) in % return pts won in previous 60 matches WITHOUT decay by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)

df_player2["w_ret_pts_won%_std_l60_diff"] = df_player2["p_ret_pts_won%_std_l60_x"] - df_player2["p_ret_pts_won%_std_l60_y"]
df_player2["l_ret_pts_won%_std_l60_diff"] = -(df_player2["w_ret_pts_won%_std_l60_diff"]) 

In [143]:
#Calculate difference in mean ace% across the previous 60 matches WITH decay by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)

df_player2["w_ace%_l60_decay_diff"] = df_player2["p_ace%_l60_decay_x"] - df_player2["p_ace%_l60_decay_y"]
df_player2["l_ace%_l60_decay_diff"] = -(df_player2["w_ace%_l60_decay_diff"]) 

In [144]:
#Calculate difference in mean aced% (as returner) across the previous 60 matches WITH decay by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)

df_player2["w_aced%_l60_decay_diff"] = df_player2["p_aced%_l60_decay_x"] - df_player2["p_aced%_l60_decay_y"]
df_player2["l_aced%_l60_decay_diff"] = -(df_player2["w_aced%_l60_decay_diff"]) 

In [145]:
#Calculate difference in WINNER mean ace% in previous 60 matches WITH decay fxn as compared to LOSER mean aced% (as returner) in previous 60 matches WITH decay fxn 
# Remember that this is surface-specific (see above calculations)

df_player2["w_ace%_l60_decay_l_aced%_l60_decay_diff"] = df_player2["p_ace%_l60_decay_x"] - df_player2["p_aced%_l60_decay_y"]

#Calculate difference in LOSER mean ace% in previous 60 matches WITH decay fxn as compared to WINNER mean aced% (as returner) in previous 60 matches WITH decay fxn 
# Remember that this is surface-specific (see above calculations)

df_player2["l_ace%_l60_decay_w_aced%_l60_decay_diff"] = df_player2["p_ace%_l60_decay_y"] - df_player2["p_aced%_l60_decay_x"]

In [146]:
#Calculate difference in WINNER mean aced% (as returner) in previous 60 matches WITH decay fxn as compared to LOSER mean ace% in previous 60 matches WITH decay fxn 
# Remember that this is surface-specific (see above calculations)

df_player2["w_aced%_l60_decay_l_ace%_l60_decay_diff"] = df_player2["p_aced%_l60_decay_x"] - df_player2["p_ace%_l60_decay_y"]

#Calculate difference in LOSER mean aced% (as returner) in previous 60 matches WITH decay fxn as compared to WINNER mean ace% in previous 60 matches WITH decay fxn 
# Remember that this is surface-specific (see above calculations)

df_player2["l_aced%_l60_decay_w_ace%_l60_decay_diff"] = df_player2["p_aced%_l60_decay_x"] - df_player2["p_ace%_l60_decay_y"]

In [147]:
#Calculate difference in mean bp saved% across the previous 60 matches WITHOUT decay by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)
df_player2["w_bp_save%_l60_diff"] = df_player2["p_bp_save%_l60_x"] - df_player2["p_bp_save%_l60_y"]
df_player2["l_bp_save%_l60_diff"] = -(df_player2["w_bp_save%_l60_diff"]) 

In [148]:
#Calculate difference in mean bp converted (as a returner) % across the previous 60 matches WITHOUT decay by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)
df_player2["w_bp_convert%_l60_diff"] = df_player2["p_bp_convert%_l60_x"] - df_player2["p_bp_convert%_l60_y"]
df_player2["l_bp_convert%_l60_diff"] = -(df_player2["w_bp_convert%_l60_diff"]) 

In [149]:
#Calculate difference in WINNER mean bp saved% in previous 60 matches WITHOUT decay fxn as compared to LOSER bp converted% (as returner) WITHOUT decay fxn in previous 60 matches  
# Remember that this is surface-specific (see above calculations)

df_player2["w_bp_save%_l60_l_bp_convert%_l60_diff"] = df_player2["p_bp_save%_l60_x"] - df_player2["p_bp_convert%_l60_y"]

#Calculate difference in LOSER mean bp saved% in previous 60 matches WITHOUT decay fxn as compared to WINNER bp converted% (as returner) WITHOUT decay fxn in previous 60 matches  
# Remember that this is surface-specific (see above calculations)

df_player2["l_bp_save%_l60_w_bp_convert%_l60_diff"] = df_player2["p_bp_save%_l60_y"] - df_player2["p_bp_convert%_l60_x"]


In [150]:
#Calculate difference in WINNER mean bp converted% (as returner) in previous 60 matches WITHOUT decay fxn as compared to LOSER bp saved% WITHOUT decay fxn in previous 60 matches  
# Remember that this is surface-specific (see above calculations)

df_player2["w_bp_convert%_l60_l_bp_save%_l60_diff"] = df_player2["p_bp_convert%_l60_x"] - df_player2["p_bp_save%_l60_y"]

#Calculate difference in LOSER mean bp converted% (as returner) in previous 60 matches WITHOUT decay fxn as compared to WINNER bp saved% WITHOUT decay fxn in previous 60 matches  
# Remember that this is surface-specific (see above calculations)

df_player2["l_bp_convert%_l60_w_bp_save%_l60_diff"] = df_player2["p_bp_convert%_l60_y"] - df_player2["p_bp_save%_l60_x"]

In [None]:
df_player2.info()

#### "Fatigue" and "Battery" Opponent Differentials: Time and Pts Played-Based Features ("Fatigue" short term, "Battery" long term)

In [151]:
#Calculate difference in number of minutes played in his previous match by winner as compared to by loser (and loser as compared to winner)
#tournament specific

df_player2["w_time_last_diff"] = df_player2["p_m_time_last_x"] - df_player2["p_m_time_last_y"]
df_player2["l_time_last_diff"] = -(df_player2["w_time_last_diff"]) 

#df_player2[["w_time_last_diff","l_time_last_diff"]].head()

In [152]:
#Calculate difference in decay-adjusted number of minutes played over the last 5 (within-tournament) matches (see above for key details about the calculations in the values being differentiated here)

df_player2["w_tot_time_l5_decay_diff"] = df_player2["p_tot_time_l5_decay_x"] - df_player2["p_tot_time_l5_decay_y"]
df_player2["l_tot_time_l5_decay_diff"] = -(df_player2["w_tot_time_l5_decay_diff"]) 

#df_player2[["w_time_last3_diff","l_time_last3_diff"]].head()

In [153]:
#Calculate difference in NON decay-adjusted number of minutes played over the last 5 (within-tournament) matches (see above for key details about the calculations in the values being differentiated here)

df_player2["w_tot_time_l5_diff"] = df_player2["p_tot_time_l5_x"] - df_player2["p_tot_time_l5_y"]
df_player2["l_tot_time_l5_diff"] = -(df_player2["w_tot_time_l5_diff"]) 

In [154]:
#Calculate difference in number of points played in his previous match by winner as compared to by loser (and loser as compared to winner)
#tournament specific

df_player2["w_pts_last_diff"] = df_player2["p_tot_pts_last_x"] - df_player2["p_tot_pts_last_y"]
df_player2["l_pts_last_diff"] = -(df_player2["w_pts_last_diff"]) 

#df_player2[["w_pts_last_diff","l_pts_last_diff"]].head()

In [155]:
#Calculate difference in decay-adjusted number of points played over the last 5 (within-tournament) matches (see above for key details about the calculations in the values being differentiated here)

df_player2["w_tot_pts_l5_decay_diff"] = df_player2["p_tot_pts_l5_decay_x"] - df_player2["p_tot_pts_l5_decay_y"]
df_player2["l_tot_pts_l5_decay_diff"] = -(df_player2["w_tot_pts_l5_decay_diff"]) 

#df_player2[["w_pts_last3_diff","l_pts_last3_diff"]].head()

In [156]:
#Calculate difference in NON decay-adjusted number of points played over the last 5 (within-tournament) matches (see above for key details about the calculations in the values being differentiated here)

df_player2["w_tot_pts_l5_diff"] = df_player2["p_tot_pts_l5_x"] - df_player2["p_tot_pts_l5_y"]
df_player2["l_tot_pts_l5_diff"] = -(df_player2["w_tot_pts_l5_diff"]) 

#df_player2[["w_pts_last3_diff","l_pts_last3_diff"]].head()

In [157]:
#Calculate difference in total number of matches played by winner as compared to by the loser (and loser as compared to the winner)
#Accumulated across surfaces and tournaments in the entire sample ("battery size"), but capped to max 150

df_player2["w_matches_diff"] = df_player2["p_matches_x"] - df_player2["p_matches_y"]
df_player2["l_matches_diff"] = -(df_player2["w_matches_diff"]) 

#df_player2[["w_matches_diff", "l_matches_diff"]].head()

In [158]:
# calculate difference in body battery" metric calculated in previous section (note: "advantage will be manifest as a negative number")

df_player2["w_stam_adj_fatigue_diff"] = df_player2["p_stamina_adj_fatigue_x"] - df_player2["p_stamina_adj_fatigue_y"]
df_player2["l_stam_adj_fatigue_diff"] = -(df_player2["w_stam_adj_fatigue_diff"])

In [159]:
# calculate difference in previous H2H record between the two players (surface-specific, but through the entire sample)

df_player2["w_H2H_diff"] = df_player2["p_H2H_w_x"] - df_player2["p_H2H_w_y"]
df_player2["l_H2H_diff"] = -(df_player2["w_H2H_diff"])

In [160]:
# save before flipping back to match organization
df_player2.to_csv('../data/df_fulltest_5.csv', index=False)

In [None]:
df_player2.columns

In [161]:
df_player2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18269 entries, 0 to 18268
Columns: 151 entries, t_id to l_H2H_diff
dtypes: float64(119), int32(4), int64(15), object(13)
memory usage: 20.9+ MB


#### Now that we've done this second wave of differentials, we want to go back to a player-by-player organization 

In [162]:
#Dropping other player columns
df_winners3 = df_player2.drop(["opp_id_x", "opp_name_x", "m_outcome_x", "m_outcome_y", "p_id_y", "p_name_y", "p_rank_y", "p_rank_pts_y", "p_log_rank_y", "p_matches_y", "p_country_y", "p_ent_y", "p_hand_y", "p_ht_y", "p_age_y", "opp_id_y", "opp_name_y", "p_pts_won%_y", "p_sv_pts_won%_y", "p_sv_pts_won%_l60_decay_y", "p_ret_pts_won%_y", "p_ret_pts_won%_l60_decay_y", "p_pts_won%_std_l60_y", "p_sv_pts_won%_std_l60_y", "p_ret_pts_won%_std_l60_y", "p_ace%_y", "p_ace%_l60_decay_y", "p_aced%_l60_decay_y", "p_bp_save%_l60_y", "p_bp_convert%_l60_y", "p_m_time_last_y", "p_tot_time_l5_decay_y", "p_tot_time_l5_y", "p_tot_pts_last_y", "p_tot_pts_l5_decay_y", "p_tot_pts_l5_y", "p_stamina_adj_fatigue_y", "p_H2H_w_y", "p_opp_rank_diff_y", "p_opp_rank_pts_diff_y", "p_opp_log_rank_diff_y", "p_opp_ht_diff_y", "p_opp_age_diff_y", "p_L_opp_R_y","p_HCA_opp_N_y", "l_pts_won%_l60_decay_diff", "l_sv_pts_won%_l60_decay_diff","l_ret_pts_won%_l60_decay_diff", "l_sv_pts_won%_l60_decay_w_ret_pts_won%_l60_decay_diff", "l_ret_pts_won%_l60_decay_w_sv_pts_won%_l60_decay_diff", "l_pts_won%_std_l60_diff", "l_sv_pts_won%_std_l60_diff", "l_ret_pts_won%_std_l60_diff", "l_ace%_l60_decay_diff", "l_aced%_l60_decay_diff", "l_ace%_l60_decay_w_aced%_l60_decay_diff", "l_aced%_l60_decay_w_ace%_l60_decay_diff", "l_bp_save%_l60_diff", "l_bp_convert%_l60_diff", "l_bp_save%_l60_w_bp_convert%_l60_diff", "l_bp_convert%_l60_w_bp_save%_l60_diff", "l_time_last_diff", "l_tot_time_l5_decay_diff", "l_tot_time_l5_diff", "l_pts_last_diff", "l_tot_pts_l5_decay_diff", "l_tot_pts_l5_diff", "l_matches_diff", "l_stam_adj_fatigue_diff" ,"l_H2H_diff"], axis = 1)

In [163]:
df_winners3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18269 entries, 0 to 18268
Data columns (total 81 columns):
 #   Column                                                 Non-Null Count  Dtype  
---  ------                                                 --------------  -----  
 0   t_id                                                   18269 non-null  object 
 1   t_date                                                 18269 non-null  int64  
 2   tour_wk                                                18269 non-null  object 
 3   t_name                                                 18269 non-null  object 
 4   t_country                                              18269 non-null  object 
 5   t_surf                                                 18269 non-null  object 
 6   t_lvl                                                  18269 non-null  int64  
 7   t_draw_size                                            18269 non-null  int64  
 8   m_num                                         

In [164]:
# 1 = Winner
df_winners3["m_outcome"] = 1

In [None]:
#df_winners3.to_csv('../data/df_winners.csv', index=False)

In [165]:
#Renaming columns to remove winner-loser descriptions so we can re-concatenate winners and losers
df_winners3 = df_winners3.set_axis(["t_id", "t_date", "tour_wk", "t_name", "t_country", "t_surf", "t_lvl", "t_draw_size", "m_num", "t_round", "t_rd_num", "m_best_of", "m_score","m_time(m)", "m_tot_pts", "p_id", "p_name","p_rank", "p_rank_pts", "p_log_rank", "p_matches", "p_country", "p_ent", "p_hand", "p_ht", "p_age", "p_pts_won%", "p_pts_won%_l60_decay","p_sv_pts_won%", "p_sv_pts_won%_l60_decay", "p_ret_pts_won%", "p_ret_pts_won%_l60_decay", "p_pts_won%_std_l60",'p_sv_pts_won%_std_l60','p_ret_pts_won%_std_l60', "p_ace%", "p_ace%_l60_decay", "p_aced%_l60_decay", "p_bp_save%_l60", "p_bp_convert%_l60", "p_m_time_last", "p_tot_time_l5_decay", "p_tot_time_l5", "p_tot_pts_last", "p_tot_pts_l5_decay", "p_tot_pts_l5", "p_stamina_adj_fatigue", "p_H2H_w", "p_opp_rank_diff", "p_opp_rank_pts_diff","p_opp_log_rank_diff","p_opp_ht_diff","p_opp_age_diff","p_L_opp_R","p_HCA_opp_N", "p_opp_pts_won%_l60_decay", "p_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_opp_ret_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_opp_sv_pts_won%_l60_decay_diff", "p_pts_won%_std_l60_diff", "p_sv_pts_won%_std_l60_diff", "p_ret_pts_won%_std_l60_diff", "p_ace%_l60_decay_diff", "p_aced%_l60_decay_diff", "p_ace%_l60_decay_opp_aced%_l60_decay_diff", "p_aced%_l60_decay_opp_ace%_l60_decay_diff", "p_bp_save%_l60_diff", "p_bp_convert%_l60_diff", "p_bp_save%_l60_opp_bp_convert%_l60_diff", "p_bp_convert%_l60_opp_bp_save%_l60_diff", "p_time_last_diff", "p_tot_time_l5_decay_diff", "p_tot_time_l5_diff", "p_pts_last_diff", "p_tot_pts_l5_decay_diff", "p_tot_pts_l5_diff", "p_matches_diff", "p_stam_adj_fatigue_diff", "p_H2H_diff", "m_outcome"], axis=1)
#df_winners.head(30)

In [None]:
df_winners3.info()

In [166]:
df_losers3 = df_player2.drop(["opp_id_y","opp_name_y","m_outcome_x", "m_outcome_y", "p_id_x","p_name_x", "p_rank_x", "p_rank_pts_x", "p_log_rank_x", "p_matches_x", "p_country_x", "p_ent_x", "p_hand_x", "p_ht_x", "p_age_x", "opp_id_x", "opp_name_x", "p_pts_won%_x", "p_sv_pts_won%_x", "p_sv_pts_won%_l60_decay_x", "p_ret_pts_won%_x", "p_ret_pts_won%_l60_decay_x", 'p_pts_won%_std_l60_x','p_sv_pts_won%_std_l60_x','p_ret_pts_won%_std_l60_x', "p_ace%_x","p_ace%_l60_decay_x", "p_aced%_l60_decay_x", "p_bp_save%_l60_x", "p_bp_convert%_l60_x", "p_m_time_last_x", "p_tot_time_l5_decay_x", "p_tot_time_l5_x", "p_tot_pts_last_x", "p_tot_pts_l5_decay_x", "p_tot_pts_l5_x", "p_stamina_adj_fatigue_x", "p_H2H_w_x", "p_opp_rank_diff_x", "p_opp_rank_pts_diff_x", "p_opp_log_rank_diff_x", "p_opp_ht_diff_x", "p_opp_age_diff_x", "p_L_opp_R_x","p_HCA_opp_N_x", "w_pts_won%_l60_decay_diff", "w_sv_pts_won%_l60_decay_diff","w_ret_pts_won%_l60_decay_diff", "w_sv_pts_won%_l60_decay_l_ret_pts_won%_l60_decay_diff", "w_ret_pts_won%_l60_decay_l_sv_pts_won%_l60_decay_diff", "w_pts_won%_std_l60_diff", "w_sv_pts_won%_std_l60_diff", "w_ret_pts_won%_std_l60_diff", "w_ace%_l60_decay_diff", "w_aced%_l60_decay_diff", "w_ace%_l60_decay_l_aced%_l60_decay_diff", "w_aced%_l60_decay_l_ace%_l60_decay_diff", "w_bp_save%_l60_diff", "w_bp_convert%_l60_diff", "w_bp_save%_l60_l_bp_convert%_l60_diff", "w_bp_convert%_l60_l_bp_save%_l60_diff", "w_time_last_diff", "w_tot_time_l5_decay_diff", "w_tot_time_l5_diff", "w_pts_last_diff", "w_tot_pts_l5_decay_diff", "w_tot_pts_l5_diff", "w_matches_diff", "w_stam_adj_fatigue_diff" ,"w_H2H_diff" ], axis = 1)
#df_winners.info()

In [167]:
# 0 = Losers
df_losers3["m_outcome"] = 0

In [None]:
#df_losers3.to_csv('../data/df_losers.csv', index=False)

In [168]:
df_losers3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18269 entries, 0 to 18268
Data columns (total 82 columns):
 #   Column                                                 Non-Null Count  Dtype  
---  ------                                                 --------------  -----  
 0   t_id                                                   18269 non-null  object 
 1   t_date                                                 18269 non-null  int64  
 2   tour_wk                                                18269 non-null  object 
 3   t_name                                                 18269 non-null  object 
 4   t_country                                              18269 non-null  object 
 5   t_surf                                                 18269 non-null  object 
 6   t_lvl                                                  18269 non-null  int64  
 7   t_draw_size                                            18269 non-null  int64  
 8   m_num                                         

In [169]:
#Renaming columns to remove winner-loser descriptions so we can re-concatenate winners and losers
df_losers3 = df_losers3.set_axis(["t_id", "t_date", "tour_wk", "t_name", "t_country", "t_surf", "t_lvl", "t_draw_size", "m_num", "t_round", "t_rd_num", "m_best_of", "m_score","m_time(m)", "m_tot_pts", "p_opp_pts_won%_l60_decay", "p_id", "p_name","p_rank", "p_rank_pts", "p_log_rank", "p_matches", "p_country", "p_ent", "p_hand", "p_ht", "p_age", "p_pts_won%", "p_pts_won%_l60_decay", "p_sv_pts_won%", "p_sv_pts_won%_l60_decay", "p_ret_pts_won%", "p_ret_pts_won%_l60_decay",'p_pts_won%_std_l60', 'p_sv_pts_won%_std_l60', 'p_ret_pts_won%_std_l60', "p_ace%", "p_ace%_l60_decay", "p_aced%_l60_decay", "p_bp_save%_l60","p_bp_convert%_l60", "p_m_time_last", "p_tot_time_l5_decay", "p_tot_time_l5", "p_tot_pts_last", "p_tot_pts_l5_decay", "p_tot_pts_l5", "p_stamina_adj_fatigue", "p_H2H_w", "p_opp_rank_diff", "p_opp_rank_pts_diff", "p_opp_log_rank_diff", "p_opp_ht_diff", "p_opp_age_diff", "p_L_opp_R", "p_HCA_opp_N", "p_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_opp_ret_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_opp_sv_pts_won%_l60_decay_diff", "p_pts_won%_std_l60_diff", "p_sv_pts_won%_std_l60_diff", "p_ret_pts_won%_std_l60_diff", "p_ace%_l60_decay_diff", "p_aced%_l60_decay_diff", "p_ace%_l60_decay_opp_aced%_l60_decay_diff", "p_aced%_l60_decay_opp_ace%_l60_decay_diff", "p_bp_save%_l60_diff","p_bp_convert%_l60_diff", "p_bp_save%_l60_opp_bp_convert%_l60_diff", "p_bp_convert%_l60_opp_bp_save%_l60_diff", "p_time_last_diff", "p_tot_time_l5_decay_diff", "p_tot_time_l5_diff", "p_pts_last_diff", "p_tot_pts_l5_decay_diff", "p_tot_pts_l5_diff", "p_matches_diff", "p_stam_adj_fatigue_diff", "p_H2H_diff", "m_outcome"], axis=1)

In [None]:
df_losers3.info()

In [170]:
#Re-merge data, but now with no separate columns for winners and losers 
df_player4 = pd.concat([df_winners3, df_losers3], ignore_index=True)

df_player4.head(20) 

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,p_time_last_diff,p_tot_time_l5_decay_diff,p_tot_time_l5_diff,p_pts_last_diff,p_tot_pts_l5_decay_diff,p_tot_pts_l5_diff,p_matches_diff,p_stam_adj_fatigue_diff,p_H2H_diff,m_outcome
0,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,2.0,16.8,11.0,9.0,39.9,49.0,-80.0,62.64,0.0,1
1,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,45.0,29.2,28.0,53.0,36.6,44.0,-18.0,37.21,0.0,1
2,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2795,RR2,...,-15.0,-15.0,-15.0,13.0,11.7,13.0,-5.0,5.77,0.0,1
3,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2792,RR1,...,0.0,0.0,0.0,0.0,0.0,0.0,-106.0,60.99,1.0,1
4,2019-0337,20191021,2019_30,Vienna,AUT,Hard,1,32,2541,R32,...,0.0,0.0,0.0,0.0,0.0,0.0,-287.0,82.78,0.0,1
5,2019-7485,20191014,2019_29,Antwerp,BEL,Hard,1,32,2407,QF,...,-18.0,-42.0,-48.0,-22.0,-48.5,-63.0,-106.0,65.95,0.0,1
6,2019-7485,20191014,2019_29,Antwerp,BEL,Hard,1,32,2411,R16,...,9.0,9.0,9.0,11.0,9.9,11.0,-290.0,98.62,0.0,1
7,2019-7485,20191014,2019_29,Antwerp,BEL,Hard,1,32,2419,R32,...,0.0,0.0,0.0,0.0,0.0,0.0,4.0,-31.22,0.0,1
8,2019-0439,20190715,2019_18,Umag,CRO,Clay,1,32,1705,R32,...,0.0,0.0,0.0,0.0,0.0,0.0,-10.0,53.8,0.0,1
9,2019-M009,20190513,2019_15,Rome Masters,ITA,Clay,2,64,1139,R64,...,0.0,0.0,0.0,0.0,0.0,0.0,-242.0,190.1,0.0,1


In [171]:
df_player4 = df_player4.sort_values(by=['p_id','tour_wk','t_rd_num'], ascending = False)
df_player4

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,p_time_last_diff,p_tot_time_l5_decay_diff,p_tot_time_l5_diff,p_pts_last_diff,p_tot_pts_l5_decay_diff,p_tot_pts_l5_diff,p_matches_diff,p_stam_adj_fatigue_diff,p_H2H_diff,m_outcome
34850,2019-560,20190826,2019_24,US Open,USA,Hard,4,128,2059,R128,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-225.0,250.75,-0.0,0
18294,2019-M014,20191014,2019_29,Moscow,RUS,Hard,1,32,2446,R32,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.00,-0.0,0
19382,2019-M004,20190225,2019_07,Acapulco,MEX,Hard,1,32,545,R32,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-25.0,198.73,-0.0,0
0,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,2.0,16.8,11.0,9.0,39.9,49.0,-80.0,62.64,0.0,1
1,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,45.0,29.2,28.0,53.0,36.6,44.0,-18.0,37.21,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18267,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16430,R32,...,-2.0,-2.0,-2.0,-4.0,-3.6,-4.0,-111.0,126.04,0.0,1
18268,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16414,R64,...,0.0,0.0,0.0,0.0,0.0,0.0,-108.0,145.59,0.0,1
30990,2014-321,20140707,2014_18,Stuttgart,GER,Clay,1,28,16360,R32,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-94.0,172.59,-0.0,0
35726,2014-308,20140428,2014_13,Munich,GER,Clay,1,28,15713,R32,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-81.0,230.95,-0.0,0


In [174]:
df_player4.to_csv('../data/df_fulltest_6.csv', index=False)

### "Strength of Schedule" Calculation, and "SOS Adjusted Versions of Features

The idea here is that at a given match to be predicted on for a given player, we already have a bunch of decay-weighted metrics over the previous 60 matches for the player being predicted on. This is great, but what it lacks is the context of how good the opponents encounterted during that stretch were at the time the match was played. So what we can do is take a decay weighted average of how the collection of THOSE OPPONENTS had performed on a decay-weighted basis leading into the match with our player of interest. We can then evaluate how well our player of interest performed (for ex., % pts won) relative to what their opponents yielded on average leading into that match (to continue this example, % pts yielded). This is very tricky to think about and requires constant care to not introduce data leakage. I think it will be a massive improvement over the large improvements already seen just with the decay-weighted longer term past performance devoid of SOS-normalization. How to factor the l60s by the delta between past performance and expectation of past performance given the schedule will be largely an empirical question.

In [175]:
#Calculates decay-weighted 'Strength of Schedule' for the past 60 opponents of a given player in a given match, 
# in terms of THEIR OWN mean decay-weighted % pts in their last 60 matches prior to a match in which they faced the player of interest
# See comments at the beginning of this section for more of the logic.

df_player4 = df_player4.iloc[::-1]

df_player4['p_SOS_l60_decay_1to10'] = df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(1))
df_player4['p_SOS_l60_decay_1to10'] = df_player4['p_SOS_l60_decay_1to10'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(1)))
df_player4['p_SOS_l60_decay_1to10'] = df_player4['p_SOS_l60_decay_1to10'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(1)))
df_player4['p_SOS_l60_decay_1to10'] = df_player4['p_SOS_l60_decay_1to10'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(1)))
df_player4['p_SOS_l60_decay_1to10'] = df_player4['p_SOS_l60_decay_1to10'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(1)))
df_player4['p_SOS_l60_decay_1to10'] = df_player4['p_SOS_l60_decay_1to10'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1)))
df_player4['p_SOS_l60_decay_1to10'] = df_player4['p_SOS_l60_decay_1to10'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1)))
df_player4['p_SOS_l60_decay_1to10'] = df_player4['p_SOS_l60_decay_1to10'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1)))
df_player4['p_SOS_l60_decay_1to10'] = df_player4['p_SOS_l60_decay_1to10'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1)))
df_player4['p_SOS_l60_decay_1to10'] = df_player4['p_SOS_l60_decay_1to10'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1)))

df_player4['p_SOS_l60_decay_11to20'] = df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(11))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(11)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(11)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(11)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(11)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_11to20'] = df_player4['p_SOS_l60_decay_11to20'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player4['p_SOS_l60_decay_21to30'] = df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(21))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(21)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(21)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(21)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(21)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_21to30'] = df_player4['p_SOS_l60_decay_21to30'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player4['p_SOS_l60_decay_31to40'] = df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(31))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(31)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(31)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(31)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(31)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_31to40'] = df_player4['p_SOS_l60_decay_31to40'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player4['p_SOS_l60_decay_41to50'] = df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(41))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(41)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(41)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(41)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(41)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_41to50'] = df_player4['p_SOS_l60_decay_41to50'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player4['p_SOS_l60_decay_51to60'] = df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 10).mean().round(2).shift(51))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 9).mean().round(2).shift(51)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 8).mean().round(2).shift(51)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 7).mean().round(2).shift(51)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 6).mean().round(2).shift(51)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(51)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(50)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(49)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(48)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(47)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(46)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(45)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(44)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(43)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(42)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(41)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(40)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(39)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(38)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(37)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(36)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(35)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(34)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(33)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(32)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(31)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(30)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(29)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(28)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(27)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(26)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(25)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(24)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(23)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(22)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(21)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(20)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(19)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(18)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(17)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(16)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(15)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(14)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(13)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(12)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(11)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(10)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(9)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(8)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(7)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(6)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(5)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(4)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(3)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(2)))
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 5).mean().round(2).shift(1))) #you can't go to a zero shift, because this would mean counting matches that haven't happened yet
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 4).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 3).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 2).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample
df_player4['p_SOS_l60_decay_51to60'] = df_player4['p_SOS_l60_decay_51to60'].fillna(df_player4.groupby(['p_id','t_surf'])['p_opp_pts_won%_l60_decay'].transform(lambda x: x.rolling(window=10, min_periods = 1).mean().round(2).shift(1))) #given the above, we will lower the period requirement stepping back down to 1 to cover the last few matches in the sample

df_player4 = df_player4.iloc[::-1]

In [177]:
# Decay weights the SOS calculation and frames as expected points% given up by opponents over the last 60 surface-specific matches. 

df_player4["p_expected_opp_yield_pts%"] = (100 - (((df_player4['p_SOS_l60_decay_1to10'] * 6) + (df_player4['p_SOS_l60_decay_11to20'] * 5) + (df_player4['p_SOS_l60_decay_21to30'] * 4) 
+ (df_player4['p_SOS_l60_decay_31to40'] * 3) + (df_player4['p_SOS_l60_decay_41to50'] * 2) + (df_player4['p_SOS_l60_decay_51to60'] * 1))/21)).round(2)


In [179]:
#Dropping the transient columns used for the decay calculations (% total pts won)
df_player4.drop(["p_SOS_l60_decay_1to10", "p_SOS_l60_decay_11to20","p_SOS_l60_decay_21to30","p_SOS_l60_decay_31to40","p_SOS_l60_decay_41to50","p_SOS_l60_decay_51to60"],axis=1, inplace=True)
df_player4

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,m_num,t_round,...,p_tot_time_l5_decay_diff,p_tot_time_l5_diff,p_pts_last_diff,p_tot_pts_l5_decay_diff,p_tot_pts_l5_diff,p_matches_diff,p_stam_adj_fatigue_diff,p_H2H_diff,m_outcome,p_expected_opp_yield_pts%
34850,2019-560,20190826,2019_24,US Open,USA,Hard,4,128,2059,R128,...,-0.0,-0.0,-0.0,-0.0,-0.0,-225.0,250.75,-0.0,0,
18294,2019-M014,20191014,2019_29,Moscow,RUS,Hard,1,32,2446,R32,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.00,-0.0,0,
19382,2019-M004,20190225,2019_07,Acapulco,MEX,Hard,1,32,545,R32,...,-0.0,-0.0,-0.0,-0.0,-0.0,-25.0,198.73,-0.0,0,
0,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2781,F,...,16.8,11.0,9.0,39.9,49.0,-80.0,62.64,0.0,1,50.14
1,2019-7696,20191105,2019_33,NextGen Finals,ITA,Hard,3,8,2783,SF,...,29.2,28.0,53.0,36.6,44.0,-18.0,37.21,0.0,1,50.22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18267,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16430,R32,...,-2.0,-2.0,-4.0,-3.6,-4.0,-111.0,126.04,0.0,1,51.01
18268,2014-414,20140714,2014_19,Hamburg,GER,Clay,1,48,16414,R64,...,0.0,0.0,0.0,0.0,0.0,-108.0,145.59,0.0,1,51.51
30990,2014-321,20140707,2014_18,Stuttgart,GER,Clay,1,28,16360,R32,...,-0.0,-0.0,-0.0,-0.0,-0.0,-94.0,172.59,-0.0,0,52.33
35726,2014-308,20140428,2014_13,Munich,GER,Clay,1,28,15713,R32,...,-0.0,-0.0,-0.0,-0.0,-0.0,-81.0,230.95,-0.0,0,50.24


In [382]:
# Creates an "SOS-adjusted" version of p_pts_won_l60_decay. The adjustment weight is empirically tuned with simple modeling feedback (1 weight is best).
# Starting with a weight adjument equal to half the amount (+ or -) between actual performance over the last 50 and 
# "expected" performance based on SOS over that stretch

#df_player4["p_SOS_adj_pts_won%_l60_decay"] = df_player4["p_pts_won%_l60_decay"] + (.5*(df_player4["p_pts_won%_l60_decay"] - df_player4["p_expected_opp_yield_pts%"]))
df_player4["p_SOS_adj_pts_won%_l60_decay"] = df_player4["p_pts_won%_l60_decay"] + (1*(df_player4["p_pts_won%_l60_decay"] - df_player4["p_expected_opp_yield_pts%"]))
#df_player4["p_SOS_adj_pts_won%_l60_decay"] = df_player4["p_pts_won%_l60_decay"] + (.25*(df_player4["p_pts_won%_l60_decay"] - df_player4["p_expected_opp_yield_pts%"]))
#df_player4["p_SOS_adj_pts_won%_l60_decay"] = df_player4["p_pts_won%_l60_decay"] + (.75*(df_player4["p_pts_won%_l60_decay"] - df_player4["p_expected_opp_yield_pts%"]))
#df_player4["p_SOS_adj_pts_won%_l60_decay"] = df_player4["p_pts_won%_l60_decay"] + (1.5*(df_player4["p_pts_won%_l60_decay"] - df_player4["p_expected_opp_yield_pts%"]))

In [350]:
df_player4.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36538 entries, 34850 to 26087
Data columns (total 84 columns):
 #   Column                                                   Non-Null Count  Dtype  
---  ------                                                   --------------  -----  
 0   t_id                                                     36538 non-null  object 
 1   t_date                                                   36538 non-null  int64  
 2   tour_wk                                                  36538 non-null  object 
 3   t_name                                                   36538 non-null  object 
 4   t_country                                                36538 non-null  object 
 5   t_surf                                                   36538 non-null  object 
 6   t_lvl                                                    36538 non-null  int64  
 7   t_draw_size                                              36538 non-null  int64  
 8   m_num                 

In [188]:
df_player4.to_csv('../data/df_fulltest_7.csv', index=False)

### Proxy For Speed of Conditions

For now, we will use ace rate per tournament from the previous year as a proxy for court speed. Conditions, of course, are dictated by a number of factors, including the balls, altitude, watering frequency (clay) and sand incorporation in the mix for hard courts. Also, indoor conditions tend to be faster than outdoor. This is challenging to model because all conditions variables are seldom the same from year to year at a given venue. Plus, even the weather at the time of a match will make a considerable difference. Ideally, as close to real time a model of conditions before a given match we want to predict can be generated. Once a sufficient number of matches have been played in a given tournament, priors on court speed can be updated as well. But for now, we will stick with previous year ace rate as an admittedly crude proxy for speed of conditions.

In [None]:
#df = pd.read_csv('../data/.csv')

In [383]:
t_ace_perc = df_player4[["p_name","t_name","t_id","p_ace%"]]
t_ace_perc.head()

Unnamed: 0,p_name,t_name,t_id,p_ace%
34850,Zachary Svajda,US Open,2019-560,1.09
18294,Alibek Kachmazov,Moscow,2019-M014,4.82
19382,Emilio Nava,Acapulco,2019-M004,3.7
0,Jannik Sinner,NextGen Finals,2019-7696,3.85
1,Jannik Sinner,NextGen Finals,2019-7696,7.69


In [384]:
t_ace_perc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36538 entries, 34850 to 26087
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   p_name  36538 non-null  object 
 1   t_name  36538 non-null  object 
 2   t_id    36538 non-null  object 
 3   p_ace%  36538 non-null  float64
dtypes: float64(1), object(3)
memory usage: 1.4+ MB


In [385]:
# Before computing by-tourny, by-year means, let's remove data from the three largest freakish ace outliers intennis history.
# Their absense or presence, esecially if they go very deep in the tourny, really does make a big difference at the individual tourny level as far as ace stats.
t_ace_perc = t_ace_perc[~t_ace_perc['p_name'].str.contains("Karlovic")]
t_ace_perc = t_ace_perc[~t_ace_perc['p_name'].str.contains("Isner")]
t_ace_perc = t_ace_perc[~t_ace_perc['p_name'].str.contains("Opelka")]
t_ace_perc.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 35812 entries, 34850 to 26087
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   p_name  35812 non-null  object 
 1   t_name  35812 non-null  object 
 2   t_id    35812 non-null  object 
 3   p_ace%  35812 non-null  float64
dtypes: float64(1), object(3)
memory usage: 1.4+ MB


In [386]:
# computes mean ace % per tourny per year (minus the freakish 6'7" and above outliers removed above)
t_ace_perc = t_ace_perc.groupby(['t_id','t_name']).mean().round(2)
t_ace_perc.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,p_ace%
t_id,t_name,Unnamed: 2_level_1
2012-1536,Madrid Masters,8.01
2012-1720,Bangkok,5.26
2012-2276,Zagreb,8.24
2012-301,Auckland,6.55
2012-308,Munich,7.43
2012-314,Gstaad,6.85
2012-316,Bastad,4.55
2012-319,Kitzbuhel,5.28
2012-321,Stuttgart,5.58
2012-328,Basel,9.18


In [387]:
t_ace_perc = t_ace_perc.sort_values(by=['t_name','t_id'], ascending = False)
t_ace_perc

Unnamed: 0_level_0,Unnamed: 1_level_0,p_ace%
t_id,t_name,Unnamed: 2_level_1
2019-9164,Zhuhai,6.46
2015-2276,Zagreb,7.94
2014-2276,Zagreb,8.76
2013-2276,Zagreb,8.28
2012-2276,Zagreb,8.24
...,...,...
2016-M004,Acapulco,8.10
2015-807,Acapulco,5.89
2014-807,Acapulco,7.50
2013-807,Acapulco,4.70


In [388]:
t_ace_perc.rename(columns = {'p_ace%':'t_ace%'}, inplace=True)

In [389]:
# For each tourny in the sample, applies the previous year's ace% from the same tourney (where available) as the speedo conditions proxy
t_ace_perc["t_ace%_last"] = t_ace_perc.groupby('t_name')['t_ace%'].shift(-1)
t_ace_perc

Unnamed: 0_level_0,Unnamed: 1_level_0,t_ace%,t_ace%_last
t_id,t_name,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-9164,Zhuhai,6.46,
2015-2276,Zagreb,7.94,8.76
2014-2276,Zagreb,8.76,8.28
2013-2276,Zagreb,8.28,8.24
2012-2276,Zagreb,8.24,
...,...,...,...
2016-M004,Acapulco,8.10,5.89
2015-807,Acapulco,5.89,7.50
2014-807,Acapulco,7.50,4.70
2013-807,Acapulco,4.70,4.66


In [390]:
# Now we can just do a left join with the main dataframe on t_id to fill in the proper last year's value for each player/match
df_player5 = df_player4.merge(t_ace_perc['t_ace%_last'], on='t_id', how = 'left')

For tournaments without a prior year to assess conditions from (mostly tournies from the first year of the sample (2014) that we won't actually make predictions on, we will just use the overall sample mean for its' surface (hard or clay)


In [None]:
df_player5.info()

In [391]:
surface_ace_perc_means = df_player5[["p_name", "t_name","t_id","t_surf","p_ace%"]]
surface_ace_perc_means.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36538 entries, 0 to 36537
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   p_name  36538 non-null  object 
 1   t_name  36538 non-null  object 
 2   t_id    36538 non-null  object 
 3   t_surf  36538 non-null  object 
 4   p_ace%  36538 non-null  float64
dtypes: float64(1), object(4)
memory usage: 1.7+ MB


In [392]:
# as with the by-tourny means above, removing the ace freaks before computing the by surface averages for filling in the NaNs
surface_ace_perc_means = surface_ace_perc_means[~surface_ace_perc_means['p_name'].str.contains("Karlovic")]
surface_ace_perc_means = surface_ace_perc_means[~surface_ace_perc_means['p_name'].str.contains("Isner")]
surface_ace_perc_means = surface_ace_perc_means[~surface_ace_perc_means['p_name'].str.contains("Opelka")]
surface_ace_perc_means.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35812 entries, 0 to 36537
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   p_name  35812 non-null  object 
 1   t_name  35812 non-null  object 
 2   t_id    35812 non-null  object 
 3   t_surf  35812 non-null  object 
 4   p_ace%  35812 non-null  float64
dtypes: float64(1), object(4)
memory usage: 1.6+ MB


In [393]:
# computes mean across all matches played on one surface in the sample (clay or hard court). Used to fill in NaNs with surface=specificity
surface_ace_perc_means = surface_ace_perc_means.groupby(['t_surf']).mean().round(2)
surface_ace_perc_means.rename(columns = {'p_ace%':'t_ace%'}, inplace=True)
surface_ace_perc_means

Unnamed: 0_level_0,t_ace%
t_surf,Unnamed: 1_level_1
Clay,5.46
Hard,8.46


In [394]:
surface_ace_perc_means["t_ace%"][0], surface_ace_perc_means["t_ace%"][1]

(5.46, 8.46)

In [395]:
df_player5.loc[(df_player5["t_ace%_last"].isnull()) & (df_player5["t_surf"] == "Hard") , "t_ace%_last"] = surface_ace_perc_means["t_ace%"][1]
df_player5.loc[(df_player5["t_ace%_last"].isnull()) & (df_player5["t_surf"] == "Clay") , "t_ace%_last"] = surface_ace_perc_means["t_ace%"][0] 

In [396]:
# Numerically encode surface (and handedness, which should have been converted earlier) moving forward
df_player5.loc[(df_player5["t_surf"] == "Hard"), "t_surf"] = 2 #Hard Court
df_player5.loc[(df_player5["t_surf"] == "Clay"), "t_surf"] = 1 #Clay Court

df_player5["t_surf"] = pd.to_numeric(df_player5["t_surf"])

In [397]:
df_player5["t_surf"].unique()

array([2, 1], dtype=int64)

In [None]:
df_player5.info()

In [398]:
# Now just drop player ace% per match column so we don't accidentally include in predictions
df_player5 = df_player5.drop(["p_ace%"], axis=1)

In [None]:
df_player5.info()

In [None]:
# Renaming ace avoidance columns to be more intutitve at a glance
#df_player5.rename(columns = {'p_aced%_l60_decay':'p_ace_avoid%_l60_decay'}, inplace=True)
#df_player5.rename(columns = {'p_aced%_l60_decay_diff':'p_ace_avoid%_l60_decay_diff'}, inplace=True)

In [399]:
df_player6 = df_player5[["t_id", "t_date", "tour_wk", "t_name", "t_country", "t_surf", "t_lvl", "t_draw_size", "t_round", "t_rd_num", "t_ace%_last", "m_num", "m_best_of", "m_score", "m_outcome", "m_time(m)", "m_tot_pts", "p_id", "p_name","p_rank", "p_rank_pts", "p_log_rank", "p_matches", "p_country", "p_ent", "p_hand", "p_ht", "p_age", "p_pts_won%", "p_pts_won%_l60_decay", "p_SOS_adj_pts_won%_l60_decay", "p_sv_pts_won%", "p_sv_pts_won%_l60_decay", "p_ret_pts_won%", "p_ret_pts_won%_l60_decay", 'p_pts_won%_std_l60','p_sv_pts_won%_std_l60','p_ret_pts_won%_std_l60', "p_ace%_l60_decay", "p_aced%_l60_decay", "p_bp_save%_l60", "p_bp_convert%_l60", "p_m_time_last", "p_tot_time_l5_decay", "p_tot_time_l5", "p_tot_pts_last", "p_tot_pts_l5_decay", "p_tot_pts_l5", "p_stamina_adj_fatigue", "p_H2H_w", "p_opp_rank_diff", "p_opp_rank_pts_diff","p_opp_log_rank_diff","p_opp_ht_diff","p_opp_age_diff","p_L_opp_R","p_HCA_opp_N", "p_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_opp_ret_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_opp_sv_pts_won%_l60_decay_diff", "p_pts_won%_std_l60_diff", "p_sv_pts_won%_std_l60_diff", "p_ret_pts_won%_std_l60_diff", "p_ace%_l60_decay_diff", "p_aced%_l60_decay_diff", "p_ace%_l60_decay_opp_aced%_l60_decay_diff", "p_aced%_l60_decay_opp_ace%_l60_decay_diff", "p_bp_save%_l60_diff", "p_bp_convert%_l60_diff", "p_bp_save%_l60_opp_bp_convert%_l60_diff", "p_bp_convert%_l60_opp_bp_save%_l60_diff", "p_time_last_diff", "p_tot_time_l5_decay_diff", "p_tot_time_l5_diff", "p_pts_last_diff", "p_tot_pts_l5_decay_diff", "p_tot_pts_l5_diff", "p_matches_diff", "p_stam_adj_fatigue_diff", "p_H2H_diff"]]

In [206]:
df_player6.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36538 entries, 0 to 36537
Data columns (total 82 columns):
 #   Column                                                   Non-Null Count  Dtype  
---  ------                                                   --------------  -----  
 0   t_id                                                     36538 non-null  object 
 1   t_date                                                   36538 non-null  int64  
 2   tour_wk                                                  36538 non-null  object 
 3   t_name                                                   36538 non-null  object 
 4   t_country                                                36538 non-null  object 
 5   t_surf                                                   36538 non-null  int64  
 6   t_lvl                                                    36538 non-null  int64  
 7   t_draw_size                                              36538 non-null  int64  
 8   t_round                   

Now we need to go back to by match organization one more time to calculate differentials in SOS-adjusted statistics

In [400]:
# Split back into winner and loser dataframes transiently
df_winners3 = df_player6[df_player6['m_outcome'] == 1]
df_losers3 = df_player6[df_player6['m_outcome'] == 0]

In [401]:
# Now we can just do a left join on m_id
df_player7 = df_winners3.merge(df_losers3, on='m_num', how = 'left')

In [402]:
df_player7 = df_player7.drop(['t_id_y', 't_name_y','t_country_y','t_surf_y','t_draw_size_y','t_lvl_y','t_date_y','m_score_y','m_best_of_y','t_round_y','m_time(m)_y','tour_wk_y','t_rd_num_y','m_tot_pts_y'],axis=1)

In [403]:
df_player7.rename(columns = {'t_id_x':'t_id', 't_name_x':'t_name','t_country_x':'t_country', 't_ace%_last_x': 't_ace%_last', 't_surf_x':'t_surf', 't_draw_size_x':'t_draw_size', 't_rd_num_x':'t_rd_num','t_lvl_x':'t_lvl','t_date_x':'t_date','m_score_x':'m_score','m_best_of_x':'m_best_of','t_round_x':'t_round','m_time(m)_x':'m_time(m)','tour_wk_x':'tour_wk','round_num_x':'t_rd_num','m_tot_pts_x':'m_tot_pts'}, inplace=True)

In [404]:
#Calculate difference in % total SOS ADJUSTED pts won (in previous 60 matches WITH decay fxn) by winner as compared to loser (and loser as compared to winner)
# Remember that this is surface-specific (see above calculations)

df_player7["w_SOS_adj_pts_won%_l60_decay_diff"] = df_player7["p_SOS_adj_pts_won%_l60_decay_x"] - df_player7["p_SOS_adj_pts_won%_l60_decay_y"]
df_player7["l_SOS_adj_pts_won%_l60_decay_diff"] = -(df_player7["w_SOS_adj_pts_won%_l60_decay_diff"]) 

Now back to player-level organization for output to EDA

In [405]:
#Dropping other player columns
df_winners4 = df_player7.drop(["m_outcome_x", "m_outcome_y", "t_ace%_last_y", "p_id_y", "p_name_y", "p_rank_y", "p_rank_pts_y", "p_log_rank_y", "p_matches_y", "p_country_y", "p_ent_y", "p_hand_y", "p_ht_y", "p_age_y", "p_pts_won%_y", "p_pts_won%_l60_decay_y", "p_SOS_adj_pts_won%_l60_decay_y", "p_sv_pts_won%_y", "p_sv_pts_won%_l60_decay_y", "p_ret_pts_won%_y", "p_ret_pts_won%_l60_decay_y", "p_pts_won%_std_l60_y", "p_sv_pts_won%_std_l60_y", "p_ret_pts_won%_std_l60_y", "p_ace%_l60_decay_y", "p_aced%_l60_decay_y", "p_bp_save%_l60_y", "p_bp_convert%_l60_y", "p_m_time_last_y", "p_tot_time_l5_decay_y", "p_tot_time_l5_y", "p_tot_pts_last_y", "p_tot_pts_l5_decay_y", "p_tot_pts_l5_y", "p_stamina_adj_fatigue_y", "p_H2H_w_y", "p_opp_rank_diff_y", "p_opp_rank_pts_diff_y", "p_opp_log_rank_diff_y", "p_opp_ht_diff_y", "p_opp_age_diff_y", "p_L_opp_R_y","p_HCA_opp_N_y", "p_pts_won%_l60_decay_diff_y", "p_sv_pts_won%_l60_decay_diff_y","p_ret_pts_won%_l60_decay_diff_y", "p_sv_pts_won%_l60_decay_opp_ret_pts_won%_l60_decay_diff_y", "p_ret_pts_won%_l60_decay_opp_sv_pts_won%_l60_decay_diff_y", "p_pts_won%_std_l60_diff_y", "p_sv_pts_won%_std_l60_diff_y", "p_ret_pts_won%_std_l60_diff_y", "p_ace%_l60_decay_diff_y", "p_aced%_l60_decay_diff_y", "p_ace%_l60_decay_opp_aced%_l60_decay_diff_y", "p_aced%_l60_decay_opp_ace%_l60_decay_diff_y", "p_bp_save%_l60_diff_y", "p_bp_convert%_l60_diff_y", "p_bp_save%_l60_opp_bp_convert%_l60_diff_y", "p_bp_convert%_l60_opp_bp_save%_l60_diff_y", "p_time_last_diff_y", "p_tot_time_l5_decay_diff_y", "p_tot_time_l5_diff_y", "p_pts_last_diff_y", "p_tot_pts_l5_decay_diff_y", "p_tot_pts_l5_diff_y", "p_matches_diff_y", "p_stam_adj_fatigue_diff_y" ,"p_H2H_diff_y", "l_SOS_adj_pts_won%_l60_decay_diff"], axis = 1)

In [406]:
# 1 = Winner
df_winners4["m_outcome"] = 1

In [232]:
df_winners4.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18269 entries, 0 to 18268
Data columns (total 83 columns):
 #   Column                                                     Non-Null Count  Dtype  
---  ------                                                     --------------  -----  
 0   t_id                                                       18269 non-null  object 
 1   t_date                                                     18269 non-null  int64  
 2   tour_wk                                                    18269 non-null  object 
 3   t_name                                                     18269 non-null  object 
 4   t_country                                                  18269 non-null  object 
 5   t_surf                                                     18269 non-null  int64  
 6   t_lvl                                                      18269 non-null  int64  
 7   t_draw_size                                                18269 non-null  int64  
 8   t_roun

In [407]:
#Renaming columns to remove winner-loser descriptions so we can re-concatenate winners and losers
df_winners4 = df_winners4.set_axis(["t_id", "t_date", "tour_wk", "t_name", "t_country", "t_surf", "t_lvl", "t_draw_size", "t_round", "t_rd_num", "t_ace%_last", "m_num", "m_best_of", "m_score","m_time(m)", "m_tot_pts", "p_id", "p_name","p_rank", "p_rank_pts", "p_log_rank", "p_matches", "p_country", "p_ent", "p_hand", "p_ht", "p_age", "p_pts_won%", "p_pts_won%_l60_decay", "p_SOS_adj_pts_won%_l60_decay", "p_sv_pts_won%", "p_sv_pts_won%_l60_decay", "p_ret_pts_won%", "p_ret_pts_won%_l60_decay", "p_pts_won%_std_l60",'p_sv_pts_won%_std_l60','p_ret_pts_won%_std_l60', "p_ace%_l60_decay", "p_aced%_l60_decay", "p_bp_save%_l60", "p_bp_convert%_l60", "p_m_time_last", "p_tot_time_l5_decay", "p_tot_time_l5", "p_tot_pts_last", "p_tot_pts_l5_decay", "p_tot_pts_l5", "p_stamina_adj_fatigue", "p_H2H_w", "p_opp_rank_diff", "p_opp_rank_pts_diff","p_opp_log_rank_diff","p_opp_ht_diff","p_opp_age_diff","p_L_opp_R","p_HCA_opp_N", "p_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_opp_ret_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_opp_sv_pts_won%_l60_decay_diff", "p_pts_won%_std_l60_diff", "p_sv_pts_won%_std_l60_diff", "p_ret_pts_won%_std_l60_diff", "p_ace%_l60_decay_diff", "p_aced%_l60_decay_diff", "p_ace%_l60_decay_opp_aced%_l60_decay_diff", "p_aced%_l60_decay_opp_ace%_l60_decay_diff", "p_bp_save%_l60_diff", "p_bp_convert%_l60_diff", "p_bp_save%_l60_opp_bp_convert%_l60_diff", "p_bp_convert%_l60_opp_bp_save%_l60_diff", "p_time_last_diff", "p_tot_time_l5_decay_diff", "p_tot_time_l5_diff", "p_pts_last_diff", "p_tot_pts_l5_decay_diff", "p_tot_pts_l5_diff", "p_matches_diff", "p_stam_adj_fatigue_diff", "p_H2H_diff", "p_SOS_adj_pts_won%_l60_decay_diff", "m_outcome"], axis=1)

In [236]:
df_winners4.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18269 entries, 0 to 18268
Data columns (total 83 columns):
 #   Column                                                   Non-Null Count  Dtype  
---  ------                                                   --------------  -----  
 0   t_id                                                     18269 non-null  object 
 1   t_date                                                   18269 non-null  int64  
 2   tour_wk                                                  18269 non-null  object 
 3   t_name                                                   18269 non-null  object 
 4   t_country                                                18269 non-null  object 
 5   t_surf                                                   18269 non-null  int64  
 6   t_lvl                                                    18269 non-null  int64  
 7   t_draw_size                                              18269 non-null  int64  
 8   t_round                   

In [211]:
df_player7.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18269 entries, 0 to 18268
Columns: 149 entries, t_id to p_H2H_diff_y
dtypes: float64(121), int32(4), int64(14), object(10)
memory usage: 20.6+ MB


In [408]:
#Dropping other player columns
df_losers4 = df_player7.drop(["m_outcome_x", "m_outcome_y", "t_ace%_last_y", "p_id_x", "p_name_x", "p_rank_x", "p_rank_pts_x", "p_log_rank_x", "p_matches_x", "p_country_x", "p_ent_x", "p_hand_x", "p_ht_x", "p_age_x", "p_pts_won%_x", "p_pts_won%_l60_decay_x", "p_SOS_adj_pts_won%_l60_decay_x", "p_sv_pts_won%_x", "p_sv_pts_won%_l60_decay_x", "p_ret_pts_won%_x", "p_ret_pts_won%_l60_decay_x", "p_pts_won%_std_l60_x", "p_sv_pts_won%_std_l60_x", "p_ret_pts_won%_std_l60_x", "p_ace%_l60_decay_x", "p_aced%_l60_decay_x", "p_bp_save%_l60_x", "p_bp_convert%_l60_x", "p_m_time_last_x", "p_tot_time_l5_decay_x", "p_tot_time_l5_x", "p_tot_pts_last_x", "p_tot_pts_l5_decay_x", "p_tot_pts_l5_x", "p_stamina_adj_fatigue_x", "p_H2H_w_x", "p_opp_rank_diff_x", "p_opp_rank_pts_diff_x", "p_opp_log_rank_diff_x", "p_opp_ht_diff_x", "p_opp_age_diff_x", "p_L_opp_R_x","p_HCA_opp_N_x", "p_pts_won%_l60_decay_diff_x", "p_sv_pts_won%_l60_decay_diff_x","p_ret_pts_won%_l60_decay_diff_x", "p_sv_pts_won%_l60_decay_opp_ret_pts_won%_l60_decay_diff_x", "p_ret_pts_won%_l60_decay_opp_sv_pts_won%_l60_decay_diff_x", "p_pts_won%_std_l60_diff_x", "p_sv_pts_won%_std_l60_diff_x", "p_ret_pts_won%_std_l60_diff_x", "p_ace%_l60_decay_diff_x", "p_aced%_l60_decay_diff_x", "p_ace%_l60_decay_opp_aced%_l60_decay_diff_x", "p_aced%_l60_decay_opp_ace%_l60_decay_diff_x", "p_bp_save%_l60_diff_x", "p_bp_convert%_l60_diff_x", "p_bp_save%_l60_opp_bp_convert%_l60_diff_x", "p_bp_convert%_l60_opp_bp_save%_l60_diff_x", "p_time_last_diff_x", "p_tot_time_l5_decay_diff_x", "p_tot_time_l5_diff_x", "p_pts_last_diff_x", "p_tot_pts_l5_decay_diff_x", "p_tot_pts_l5_diff_x", "p_matches_diff_x", "p_stam_adj_fatigue_diff_x" ,"p_H2H_diff_x", "w_SOS_adj_pts_won%_l60_decay_diff"], axis = 1)

In [409]:
# 0 = Loser
df_losers4["m_outcome"] = 0

In [245]:
df_losers4.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18269 entries, 0 to 18268
Data columns (total 83 columns):
 #   Column                                                     Non-Null Count  Dtype  
---  ------                                                     --------------  -----  
 0   t_id                                                       18269 non-null  object 
 1   t_date                                                     18269 non-null  int64  
 2   tour_wk                                                    18269 non-null  object 
 3   t_name                                                     18269 non-null  object 
 4   t_country                                                  18269 non-null  object 
 5   t_surf                                                     18269 non-null  int64  
 6   t_lvl                                                      18269 non-null  int64  
 7   t_draw_size                                                18269 non-null  int64  
 8   t_roun

In [410]:
#Renaming columns to remove winner-loser descriptions so we can re-concatenate winners and losers
df_losers4 = df_losers4.set_axis(["t_id", "t_date", "tour_wk", "t_name", "t_country", "t_surf", "t_lvl", "t_draw_size", "t_round", "t_rd_num", "t_ace%_last", "m_num", "m_best_of", "m_score","m_time(m)", "m_tot_pts", "p_id", "p_name","p_rank", "p_rank_pts", "p_log_rank", "p_matches", "p_country", "p_ent", "p_hand", "p_ht", "p_age", "p_pts_won%", "p_pts_won%_l60_decay", "p_SOS_adj_pts_won%_l60_decay", "p_sv_pts_won%", "p_sv_pts_won%_l60_decay", "p_ret_pts_won%", "p_ret_pts_won%_l60_decay", "p_pts_won%_std_l60",'p_sv_pts_won%_std_l60','p_ret_pts_won%_std_l60', "p_ace%_l60_decay", "p_aced%_l60_decay", "p_bp_save%_l60", "p_bp_convert%_l60", "p_m_time_last", "p_tot_time_l5_decay", "p_tot_time_l5", "p_tot_pts_last", "p_tot_pts_l5_decay", "p_tot_pts_l5", "p_stamina_adj_fatigue", "p_H2H_w", "p_opp_rank_diff", "p_opp_rank_pts_diff","p_opp_log_rank_diff","p_opp_ht_diff","p_opp_age_diff","p_L_opp_R","p_HCA_opp_N", "p_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_opp_ret_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_opp_sv_pts_won%_l60_decay_diff", "p_pts_won%_std_l60_diff", "p_sv_pts_won%_std_l60_diff", "p_ret_pts_won%_std_l60_diff", "p_ace%_l60_decay_diff", "p_aced%_l60_decay_diff", "p_ace%_l60_decay_opp_aced%_l60_decay_diff", "p_aced%_l60_decay_opp_ace%_l60_decay_diff", "p_bp_save%_l60_diff", "p_bp_convert%_l60_diff", "p_bp_save%_l60_opp_bp_convert%_l60_diff", "p_bp_convert%_l60_opp_bp_save%_l60_diff", "p_time_last_diff", "p_tot_time_l5_decay_diff", "p_tot_time_l5_diff", "p_pts_last_diff", "p_tot_pts_l5_decay_diff", "p_tot_pts_l5_diff", "p_matches_diff", "p_stam_adj_fatigue_diff", "p_H2H_diff", "p_SOS_adj_pts_won%_l60_decay_diff", "m_outcome"], axis=1)

In [411]:
#Re-merge data, but now with no separate columns for winners and losers 
df_player8 = pd.concat([df_winners4, df_losers4], ignore_index=True)

df_player8.head(20)

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,t_round,t_rd_num,...,p_tot_time_l5_decay_diff,p_tot_time_l5_diff,p_pts_last_diff,p_tot_pts_l5_decay_diff,p_tot_pts_l5_diff,p_matches_diff,p_stam_adj_fatigue_diff,p_H2H_diff,p_SOS_adj_pts_won%_l60_decay_diff,m_outcome
0,2019-7696,20191105,2019_33,NextGen Finals,ITA,2,3,8,F,5.0,...,16.8,11.0,9.0,39.9,49.0,-80.0,62.64,0.0,-0.62,1
1,2019-7696,20191105,2019_33,NextGen Finals,ITA,2,3,8,SF,4.0,...,29.2,28.0,53.0,36.6,44.0,-18.0,37.21,0.0,3.21,1
2,2019-7696,20191105,2019_33,NextGen Finals,ITA,2,3,8,RR2,2.0,...,-15.0,-15.0,13.0,11.7,13.0,-5.0,5.77,0.0,9.0,1
3,2019-7696,20191105,2019_33,NextGen Finals,ITA,2,3,8,RR1,1.0,...,0.0,0.0,0.0,0.0,0.0,-106.0,60.99,1.0,5.04,1
4,2019-0337,20191021,2019_30,Vienna,AUT,2,1,32,R32,1.0,...,0.0,0.0,0.0,0.0,0.0,-287.0,82.78,0.0,3.15,1
5,2019-7485,20191014,2019_29,Antwerp,BEL,2,1,32,QF,3.0,...,-42.0,-48.0,-22.0,-48.5,-63.0,-106.0,65.95,0.0,6.96,1
6,2019-7485,20191014,2019_29,Antwerp,BEL,2,1,32,R16,2.0,...,9.0,9.0,11.0,9.9,11.0,-290.0,98.62,0.0,-5.31,1
7,2019-7485,20191014,2019_29,Antwerp,BEL,2,1,32,R32,1.0,...,0.0,0.0,0.0,0.0,0.0,4.0,-31.22,0.0,0.86,1
8,2019-0439,20190715,2019_18,Umag,CRO,1,1,32,R32,1.0,...,0.0,0.0,0.0,0.0,0.0,-10.0,53.8,0.0,1.05,1
9,2019-M009,20190513,2019_15,Rome Masters,ITA,1,2,64,R64,1.0,...,0.0,0.0,0.0,0.0,0.0,-242.0,190.1,0.0,-5.38,1


In [412]:
df_player8 = df_player8.sort_values(by=['p_id','tour_wk','t_rd_num'], ascending = False)
df_player8

Unnamed: 0,t_id,t_date,tour_wk,t_name,t_country,t_surf,t_lvl,t_draw_size,t_round,t_rd_num,...,p_tot_time_l5_decay_diff,p_tot_time_l5_diff,p_pts_last_diff,p_tot_pts_l5_decay_diff,p_tot_pts_l5_diff,p_matches_diff,p_stam_adj_fatigue_diff,p_H2H_diff,p_SOS_adj_pts_won%_l60_decay_diff,m_outcome
34850,2019-560,20190826,2019_24,US Open,USA,2,4,128,R128,1.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-225.0,250.75,-0.0,,0
18294,2019-M014,20191014,2019_29,Moscow,RUS,2,1,32,R32,1.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-0.0,-0.00,-0.0,,0
19382,2019-M004,20190225,2019_07,Acapulco,MEX,2,1,32,R32,1.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-25.0,198.73,-0.0,,0
0,2019-7696,20191105,2019_33,NextGen Finals,ITA,2,3,8,F,5.0,...,16.8,11.0,9.0,39.9,49.0,-80.0,62.64,0.0,-0.62,1
1,2019-7696,20191105,2019_33,NextGen Finals,ITA,2,3,8,SF,4.0,...,29.2,28.0,53.0,36.6,44.0,-18.0,37.21,0.0,3.21,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18267,2014-414,20140714,2014_19,Hamburg,GER,1,1,48,R32,2.0,...,-2.0,-2.0,-4.0,-3.6,-4.0,-111.0,126.04,0.0,-11.09,1
18268,2014-414,20140714,2014_19,Hamburg,GER,1,1,48,R64,1.0,...,0.0,0.0,0.0,0.0,0.0,-108.0,145.59,0.0,-22.60,1
30990,2014-321,20140707,2014_18,Stuttgart,GER,1,1,28,R32,1.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-94.0,172.59,-0.0,-31.83,0
35726,2014-308,20140428,2014_13,Munich,GER,1,1,28,R32,1.0,...,-0.0,-0.0,-0.0,-0.0,-0.0,-81.0,230.95,-0.0,-18.55,0


In [413]:
df_player9 = df_player8[["t_id", "t_date", "tour_wk", "t_name", "t_country", "t_surf", "t_lvl", "t_draw_size", "t_round", "t_rd_num", "t_ace%_last", "m_num", "m_best_of", "m_score", "m_outcome", "m_time(m)", "m_tot_pts", "p_id", "p_name","p_rank", "p_rank_pts", "p_log_rank", "p_matches", "p_country", "p_ent", "p_hand", "p_ht", "p_age", "p_pts_won%", "p_pts_won%_l60_decay", "p_SOS_adj_pts_won%_l60_decay", "p_sv_pts_won%", "p_sv_pts_won%_l60_decay", "p_ret_pts_won%", "p_ret_pts_won%_l60_decay", "p_pts_won%_std_l60",'p_sv_pts_won%_std_l60','p_ret_pts_won%_std_l60', "p_ace%_l60_decay", "p_aced%_l60_decay", "p_bp_save%_l60", "p_bp_convert%_l60", "p_m_time_last", "p_tot_time_l5_decay", "p_tot_time_l5", "p_tot_pts_last", "p_tot_pts_l5_decay", "p_tot_pts_l5", "p_stamina_adj_fatigue", "p_H2H_w", "p_opp_rank_diff", "p_opp_rank_pts_diff","p_opp_log_rank_diff","p_opp_ht_diff","p_opp_age_diff","p_L_opp_R","p_HCA_opp_N", "p_pts_won%_l60_decay_diff", "p_SOS_adj_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_diff", "p_sv_pts_won%_l60_decay_opp_ret_pts_won%_l60_decay_diff", "p_ret_pts_won%_l60_decay_opp_sv_pts_won%_l60_decay_diff", "p_pts_won%_std_l60_diff", "p_sv_pts_won%_std_l60_diff", "p_ret_pts_won%_std_l60_diff", "p_ace%_l60_decay_diff", "p_aced%_l60_decay_diff", "p_ace%_l60_decay_opp_aced%_l60_decay_diff", "p_aced%_l60_decay_opp_ace%_l60_decay_diff", "p_bp_save%_l60_diff", "p_bp_convert%_l60_diff", "p_bp_save%_l60_opp_bp_convert%_l60_diff", "p_bp_convert%_l60_opp_bp_save%_l60_diff", "p_time_last_diff", "p_tot_time_l5_decay_diff", "p_tot_time_l5_diff", "p_pts_last_diff", "p_tot_pts_l5_decay_diff", "p_tot_pts_l5_diff", "p_matches_diff", "p_stam_adj_fatigue_diff", "p_H2H_diff"]]

In [214]:
#Save current df
#df_winners4.to_csv('../data/df_rematch4.csv', index=False)

#### Save Data

In [414]:
#Save current df for output to EDA step
df_player9.to_csv('../data/df_forEDA.csv', index=False)