In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

##Importing data

atc = pd.read_csv('fangraphs_bats_atc.csv')
steamer = pd.read_csv('fangraphs_bats_steamer.csv')
thebat = pd.read_csv('fangraphs_bats_thebat.csv')
thebatx = pd.read_csv('fangraphs_bats_thebatx.csv')

In [None]:
## Merging dataframes together to create one data frame
new_df = atc.merge(steamer, how='outer', on='id', suffixes=('_x', '_y'))
new_df = new_df.merge(thebat, how='outer', on='id', suffixes=('_z'))
new_df = new_df.merge(thebatx, how='outer', on='id', suffixes=('_z'))
new_df

In [None]:
## Dropping null and duplicate values
new_df = new_df.dropna()
new_df.drop_duplicates(subset=['id'], keep='first', inplace=True)

## Taking the merged dataframes and getting the average result for each player and category

## Creating get average function
def get_average(df, col_name, new_col_name):
    """
    Adds a new column to a pandas DataFrame containing the average of specified columns.
    
    Args:
    df (pandas.DataFrame): The DataFrame to modify.
    col_name(s) (list of str): The names of the columns to average.
    new_col_name (str): The name of the new column to create.
    
    Returns:
    None (modifies df in place).
    """
    avg_col = df[col_name].mean(axis=1)
    df[new_col_name] = avg_col

##Executing average functions
get_average(new_df, ['G_x', 'G_y', 'G_', 'Gz'], 'New_G')
get_average(new_df, ['AB_x', 'AB_y', 'AB_', 'ABz'], 'New_AB')
get_average(new_df, ['PA_x', 'PA_y'], 'New_PA') ## Two files missing the PA column
get_average(new_df, ['H_x', 'H_y', 'H_', 'Hz'], 'New_H')
get_average(new_df, ['2B_x', '2B_y', '2B_', '2Bz'], 'New_2B')
get_average(new_df, ['3B_x', '3B_y', '3B_', '3Bz'], 'New_3B')
get_average(new_df, ['HR_x', 'HR_y', 'HR_', 'HRz'], 'New_HR')
get_average(new_df, ['R_x', 'R_y', 'R_', 'Rz'], 'New_R')
get_average(new_df, ['RBI_x', 'RBI_y', 'RBI_', 'RBIz'], 'New_RBI')
get_average(new_df, ['BB_x', 'BB_y', 'BB_', 'BBz'], 'New_BB')
get_average(new_df, ['SO_x', 'SO_y', 'SO_', 'SOz'], 'New_SO')
get_average(new_df, ['SB_x', 'SB_y', 'SB_', 'SBz'], 'New_SB')
get_average(new_df, ['AVG_x', 'AVG_y', 'AVG_', 'AVGz'], 'New_AVG')
get_average(new_df, ['K%_x', 'K%_y', 'K%_', 'K%z'], 'New_K%')
get_average(new_df, ['ADP_x', 'ADP_y', 'ADP_', 'ADPz'], 'New_ADP')

## Dropping the columns we just used to get the averages
new_df = new_df.drop(columns=['G_x', 'AB_x', 'PA_x', 'H_x', '2B_x', '3B_x', 'HR_x',
       'R_x', 'RBI_x', 'BB_x', 'SO_x', 'SB_x', 'AVG_x', 'K%_x', 'ADP_x',
       'PlayerName_y', 'G_y', 'AB_y', 'PA_y', 'H_y', '2B_y', '3B_y', 'HR_y',
       'R_y', 'RBI_y', 'BB_y', 'SO_y', 'SB_y', 'AVG_y', 'K%_y', 'ADP_y',
       'PlayerName_', 'G_', 'AB_', 'H_', '2B_', '3B_', 'HR_', 'R_', 'RBI_',
       'BB_', 'SO_', 'SB_', 'AVG_', 'K%_', 'PlayerNamez', 'Gz', 'ABz',
       'Hz', '2Bz', '3Bz', 'HRz', 'Rz', 'RBIz', 'BBz', 'SOz', 'SBz', 'AVGz',
       'K%z', 'ADPz'])


In [None]:
##Creating rank functions

def get_rank_desc(df, col_name, new_col_name):
        """
    Adds a new column to a pandas DataFrame containing the rank of specified column in descnding order.
    
    Args:
    df (pandas.DataFrame): The DataFrame to modify.
    col_name(s) (list of str): The name of the column to rank.
    new_col_name (str): The name of the new column to create.
    
    Returns:
    None (modifies df in place).
    """
    ## Uses the .rank function with asending false parameter
    rank_col = df[col_name].rank(ascending=False)
    df[new_col_name] = rank_col
    
def get_rank_asc(df, col_name, new_col_name):
    """
    Adds a new column to a pandas DataFrame containing the rank of specified column in ascending order.
    
    Args:
    df (pandas.DataFrame): The DataFrame to modify.
    col_name(s) (list of str): The name of the column to rank.
    new_col_name (str): The name of the new column to create.
    
    Returns:
    None (modifies df in place).
    """
    ## Uses the .rannk function with asending true parameter
    rank_col = df[col_name].rank(ascending=True)
    df[new_col_name] = rank_col

## Executing desc rank functions
get_rank_desc(new_df, 'New_H', 'Rank_H')
get_rank_desc(new_df, 'New_2B', 'Rank_2B')
get_rank_desc(new_df, 'New_HR', 'Rank_HR')
get_rank_desc(new_df, 'New_RBI', 'Rank_RBI')
get_rank_desc(new_df, 'New_BB', 'Rank_BB')
get_rank_desc(new_df, 'New_SB', 'Rank_SB')
get_rank_desc(new_df, 'New_AVG', 'Rank_AVG')
get_rank_desc(new_df, 'New_R', 'Rank_R')
## Executing asc rank functions
get_rank_asc(new_df, 'New_K%', 'Rank_K%')
## Using diffrent rank parameter for 3B to weight correctly
new_df['Rank_3B'] = new_df['New_3B'].rank(ascending=False, method='min')

new_df.head(20)

In [None]:
## Getting total rank from all categories and aggregating a total ranking

new_df['Total_Rank'] = new_df[['Rank_H', 'Rank_2B', 'Rank_3B', 'Rank_HR', 'Rank_RBI', 'Rank_BB', 'Rank_SB', 'Rank_AVG', 'Rank_K%', 'Rank_R']].sum(axis=1)

## Creating Rank column based on aggregated total rank scores
new_df['Rank'] = new_df['Total_Rank'].rank(ascending=True)

In [None]:
##Exporting new datframe to csv
new_df.to_csv('AngeloRankings.csv', encoding='utf-8')