In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

### Variable List
df --> DataFrame (1778,10) with original data

df1 --> DataFrame (1612,7) with unused_cols dropped, and null values from Formatted Result dropped

unused_cols -->  DataFrame (1778,3) columns Pukie, Notes and Description

no_work--> DataFrame (63,7) no work was performed - Warmsups and Cooldowns, not needed for performance calcs

lifts --> DataFrame (45,7) lifts only, work completed but no time.  These will be used to calculate volume abilities. 



In [2]:
df = pd.read_csv('data.csv')
df.shape

(1778, 10)

In [3]:
unused_cols = df[['Pukie','Notes','Description']]
unused_cols.shape

(1778, 3)

In [4]:
df1 = df.drop(['Pukie','Notes','Description'], axis=1)

In [5]:
df1.isnull().describe()

Unnamed: 0,Date,Workout,Result,Prescribed,Work performed,Work time,Formatted Result
count,1778,1778,1778,1778,1778,1778,1778
unique,1,1,2,1,1,2,2
top,False,False,False,False,False,True,False
freq,1778,1778,1679,1778,1778,975,1612


In [6]:
df1 = df1.dropna(subset=['Formatted Result'])
df1.shape

(1612, 7)

In [7]:
no_work = df1.loc[(df['Formatted Result'] == 'Completed') & (df['Work performed'] == 0), :]
no_work.shape

(63, 7)

In [8]:
lifts = df1.loc[(df1['Work time'].isnull()) & (df1['Formatted Result'].str.endswith('| Completed') | df1['Formatted Result'].str.startswith('Completed |')), :]

In [9]:
lifts.shape

(45, 7)

In [10]:
df1['workout_name_length'] = df1['Workout'].apply(lambda x: len(x))

In [11]:
bench = df1.loc[df1['workout_name_length'] < 10,:].groupby('Workout')['workout_name_length'].mean()

In [12]:
bench = [x for x in bench.index]

In [30]:
workouts = [x for x in df1.Workout.unique()]
work_string = ' '.join(workouts)

In [33]:
workouts

['5 RFT: Power Cleans, Walking Lunges, and Thrusters',
 'Cindy',
 '21-15-9: Push Press and Box Jumps',
 'Deadlift 4x3',
 'Split Jerk 1 Rep',
 'Row : 3x 1 km, rest 3 mins',
 'CrossFit Games Open 17.5',
 'Every 1:30 for 10:30: Front Squat',
 'AMRAP 5 mins: Squat Cleans and AbMat Sit-ups',
 'AMRAP 5 mins: Hanging Knee Raises and Push Press',
 'AMRAP 5 mins: Wall Balls and Pull-ups',
 'AMRAP 10 mins: Lateral Burpee (Over Barbell)s and Power Snatches',
 'Back Squat : 1 Rep Max',
 '5 RFT: 200 m, Dumbbell Walking Lunges, and Dumbbell Thrusters',
 'Every 1 min for 10 mins: Hang Snatch High Pulls and Hang Squat Snatches',
 'Lifting: Weighted Strict Pull-ups and Weighted Strict Bar Dips',
 'CrossFit Games Open 16.4 / 17.4',
 'Chipper: Pistols (Alternating Legs)s, Wall Balls, Burpees, and Hang Power Snatches',
 'Thruster 5-5-5-3-3-3',
 '"Tabata" - AbMat Sit-ups : 8 x 20 secs / 10 secs',
 '4 RFT: Power Cleans, Front Squats, and Burpees',
 'Every 2 mins for 10 mins: Deadlift',
 'CrossFit Games Open

In [32]:
from collections import Counter
work_dict = Counter(work_string.split())
work_dict

Counter({'"9/25/84"': 1,
         '"Death': 5,
         '"FGB': 5,
         '"Tabata"': 38,
         '"Team': 1,
         '#1': 1,
         '#2': 2,
         '#3': 2,
         '&': 33,
         '(16': 1,
         '(18': 6,
         '(2': 2,
         '(2)': 5,
         '(20': 4,
         '(24': 3,
         '(3-2-1863)': 1,
         '(4': 1,
         '(4)': 1,
         '(7)': 1,
         '(Alternating': 4,
         '(Alternating)s': 6,
         '(Alternating)s,': 3,
         '(American)': 2,
         '(American)s': 19,
         '(American)s,': 8,
         '(Assisted': 1,
         '(Bar': 1,
         '(Calories)s': 6,
         '(Calories)s,': 3,
         '(Chest': 1,
         '(Chin': 1,
         '(Deadlifts,': 1,
         '(GHD': 1,
         '(Ghd)s': 1,
         '(Grace': 1,
         '(Hanging)s': 2,
         '(Heavy': 1,
         '(Height)s,': 1,
         '(JCD)': 1,
         '(Kettlebell)': 1,
         '(Kettlebell)s,': 1,
         '(Left': 1,
         '(Legless)s': 2,
         '(Over

In [37]:
movements = pd.read_csv('movements.csv')
movements

Unnamed: 0.1,Unnamed: 0,movement,frequency,move_class
0,0,airdyne bike,4,Monostructural
1,1,airdyne calories,7,Monostructural
2,2,assault bike,1,Monostructural
3,3,assault bike calories,3,Monostructural
4,4,"back stroke, swimming",1,Monostructural
5,5,bear crawl,4,Monostructural
6,6,bike,3,Monostructural
7,7,bike ride (mountain bike),1,Monostructural
8,8,"crawl, swimming",1,Monostructural
9,9,double under,153,Monostructural


In [39]:
move_in_work = []
for w in workouts:
    moves = []
    if w in bench:
        moves.append(w)
    for m in movements.movement:
        if m in w.lower():
            moves.append(m)
    move_in_work.append(moves)
    
move_in_work
            
            

[['lunge', 'walking lunge', 'clean', 'power clean', 'thruster'],
 ['Cindy'],
 ['box jump', 'press', 'push press'],
 ['deadlift'],
 ['jerk', 'split jerk'],
 ['row'],
 [],
 ['front squat'],
 ['abmat sit-up', 'sit-up', 'clean', 'squat clean'],
 ['hanging knee raise', 'press', 'push press'],
 ['pull-up', 'wall ball'],
 ['burpee', 'lateral burpee (over barbell)', 'power snatch', 'snatch'],
 ['back squat'],
 ['lunge',
  'walking lunge',
  'dumbbell thruster',
  'dumbbell walking lunge',
  'thruster'],
 ['hang snatch',
  'hang snatch high pull',
  'hang squat snatch',
  'snatch',
  'snatch high pull',
  'squat snatch'],
 ['bar dip',
  'dip',
  'pull-up',
  'strict bar dip',
  'strict pull-up',
  'weighted strict bar dip',
  'weighted strict pull-up'],
 [],
 ['burpee',
  'pistol',
  'pistols (alternating legs)',
  'hang power snatch',
  'power snatch',
  'snatch',
  'wall ball'],
 ['thruster'],
 ['abmat sit-up', 'sit-up'],
 ['burpee', 'clean', 'front squat', 'power clean'],
 ['deadlift'],
 [],