In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

%matplotlib inline

In [None]:
df = pd.read_csv('../Resources/marathon_results_2019.csv')
df.head()

In [None]:
# Subset the dataframe to only the columns "Age", "M/F", split times (i.e. "5K, 10K", etc.), "Pace", and "Official Time". 
df = df[['Age', 'M/F', '5K', '10K', '15K', '20K', 'Half', '25K', '30K', '35K', '40K', 'Pace', 'Official Time']]
df.head()

In [None]:
# Convert the split times, "Pace", and "Official Time" to timedeltas using apply() and pandas.to_timedelta()
time_columns = ['5K', '10K', '15K', '20K', 'Half','25K', '30K', '35K', '40K', 'Pace', 'Official Time']
df[time_columns] = df[time_columns].apply(pd.to_timedelta)
df.head()

In [None]:
# Convert timedeltas to seconds by applying the lambda function lambda x: x.dt.total_seconds()
df[time_columns] = df[time_columns].apply(lambda x: x.dt.total_seconds())
df.head()

In [None]:
# Subset df to only rows where the split values are non-zero (!=0 means not equal to 0)
df = df[
    (df['5K'] != 0) & 
    (df['10K'] != 0) & 
    (df['15K'] != 0) & 
    (df['20K'] != 0) & 
    (df['Half'] != 0) & 
    (df['25K'] != 0) & 
    (df['30K'] != 0) & 
    (df['35K'] != 0) & 
    (df['40K'] != 0)
]

# A more concise method
#df = df[~(df == 0).any(axis=1)]

df.head()

In [None]:
# Use LabelEncoder to convert 'M/F' into integer  (change M/F to 0/1 is what labelencoder does)
df['M/F'] = LabelEncoder().fit_transform(df['M/F'])
df.head()

In [None]:
# Convert 'Age' to a numeric value
df['Age'] = pd.to_numeric(df['Age'])

In [None]:
# 'Pace' should be perfectly correlated with 'Official Time', so we'll remove it from our X data (but keep it for analysis)
# Plot a scatter plot of 'Pace' against 'Official Time' to make sure
df.plot(kind='scatter', x='Pace', y='Official Time')

In [None]:
# Create a training set 'X' with every column except 'Pace'
X = df.drop('Pace', axis=1)

In [None]:
# Scale the dataset using MinMaxScaler()
X_scaled = MinMaxScaler().fit_transform(X)
X_scaled 