In [28]:
from google.colab import files
uploaded = files.upload()
# download dataset onto device and then select file from "Choose Files" button

In [29]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [30]:
import pandas as pd
import numpy as np
from numpy import inf
# import io
# df = pd.read_csv(io.BytesIO(uploaded['Calisthenics Data - Sheet1.csv']))
df = pd.read_csv('Calisthenics Data - Sheet1.csv')

In [31]:
## Removing the values as detailed in the "Meeting Questions" file
## (for now based on Omari's feedback):

# remove half reps
df = df[(pd.isna(df['HR/FR'])) | (df['HR/FR'] != 'HR')]

# remove zeros
df = df[df['Reps'] > 0]

# remove freeform
df = df[df['FF/PR/PB'] != 'FF']

# remove untimed
df = df[((df['Exercise'] != 3) & (df['Time Format'] != 'UNTIMED')) | (df['Exercise'] == 3)]

In [32]:
# split into exercises
e1 = df[df['Exercise'] == 1]
e2 = df[df['Exercise'] == 2]
e3 = df[df['Exercise'] == 3]
e4 = df[df['Exercise'] == 4]
e5 = df[df['Exercise'] == 5]

In [33]:
# standardize the reps for each exercise to have a mean of 0 and a standard
# deviation (sigma) of 1
e1['Scaled'] = (e1['Reps'] - np.mean(e1['Reps']))/np.std(e1['Reps'])
e2['Scaled'] = (e2['Reps'] - np.mean(e2['Reps']))/np.std(e2['Reps'])
e3['Scaled'] = (e3['Reps'] - np.mean(e3['Reps']))/np.std(e3['Reps'])
e4['Scaled'] = (e4['Reps'] - np.mean(e4['Reps']))/np.std(e4['Reps'])
e5['Scaled'] = (e5['Reps'] - np.mean(e5['Reps']))/np.std(e5['Reps'])

## (Note that by standardizing the data, it becomes normally distributed with
## the center being 0 and the resulting values in the 'Scaled' column are how
## many standard deviations away from the center a particular observation is)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  e1['Scaled'] = (e1['Reps'] - np.mean(e1['Reps']))/np.std(e1['Reps'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  e2['Scaled'] = (e2['Reps'] - np.mean(e2['Reps']))/np.std(e2['Reps'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  e3['Scaled'] = (e3['Reps'] - np.mean(e3['Reps']))/np.std(e3['Reps']

Similar to SAT scoring, where the highest possible scores correspond to the 99th percentile, I define an exceptional number of reps for a calisthenics exercise as being in the 99th percentile.

In [34]:
## Since the goal is to have the average person score around a 500 and an
## exceptional score being a 1000 (based on how I define exceptional),

# calculate the standard deviation for the 99th percentile to correspond to 1000
# as an exceptional score and the mean to be 500
Z = 2.326 #(the z-score for the 99th percentile z-score for normal distribution)
mean = 500
X = 1000
sigma = (X - mean) / Z

# scale the normally distributed data for each exercise accordingly
e1['Scaled'] = e1['Scaled'] * sigma + 500
e2['Scaled'] = e2['Scaled'] * sigma + 500
e3['Scaled'] = e3['Scaled'] * sigma + 500
e4['Scaled'] = e4['Scaled'] * sigma + 500
e5['Scaled'] = e5['Scaled'] * sigma + 500

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  e1['Scaled'] = e1['Scaled'] * sigma + 500
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  e2['Scaled'] = e2['Scaled'] * sigma + 500
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  e3['Scaled'] = e3['Scaled'] * sigma + 500
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .l

In [35]:
## Dividing each score by the number of reps to get the score value of each rep:

val1 = e1['Scaled']/e1['Reps']
val1[val1 == inf] = 0
scale1 = np.mean(val1)

val2 = e2['Scaled']/e2['Reps']
val2[val2 == inf] = 0
scale2 = np.mean(val2)

val3 = e3['Scaled']/e3['Reps']
val3[val3 == inf] = 0
scale3 = np.mean(val3)

val4 = e4['Scaled']/e4['Reps']
val4[val4 == inf] = 0
scale4 = np.mean(val4)

val5 = e5['Scaled']/e5['Reps']
val5[val5 == inf] = 0
scale5 = np.mean(val5)

In [36]:
print("five weights (one for each exercise):",
      scale1, scale2, scale3, scale4, scale5)

five weights (one for each exercise): 11.674819867301057 8.840234420335218 26.350668561436695 6.944608578038166 5.085105812655719
