In [1]:
import re
import os

import pandas as pd
import numpy as np

GRAVITUS_DATA_PATH="./user_data_1739476833.csv"
STRONG_DATA_PATH="./strong.csv"

GRAVITUS_OUTPUT_DIR = "./gravitus_data"

### Inspect the data exported from strong

The data was generated by starting a workout, adding some random exercises and the exporting the data from the app.

In [2]:
strong = pd.read_csv(STRONG_DATA_PATH)
strong.head()

Unnamed: 0,Date,Workout Name,Duration,Exercise Name,Set Order,Weight,Reps,Distance,Seconds,Notes,Workout Notes,RPE
0,2025-08-03 13:04:33,Midday Workout,5m,Back Extension,1,20.0,10.0,0,0.0,,,
1,2025-08-03 13:04:33,Midday Workout,5m,Back Extension,Rest Timer,0.0,0.0,0,120.0,,,
2,2025-08-03 13:04:33,Midday Workout,5m,Back Extension,2,20.0,10.0,0,0.0,,,
3,2025-08-03 13:04:33,Midday Workout,5m,Back Extension,Rest Timer,0.0,0.0,0,120.0,,,
4,2025-08-03 13:04:33,Midday Workout,5m,Bench Press (Dumbbell),1,5.0,20.0,0,0.0,,,



I can see, the data rows follow a repeated sequence of excercise set - rest after set.
I'm not sure if the data of how much I rested after each set will be important to me, I'll see if I can skip it.

The data is split into 12 different columns:
1. `Date` - this seems to be a timestamp fixed to the start of the workout, does not change as we progress with the exercises
2. `Workout Name` - pretty self explanatory, my guess is any workout can by identified by the combination of `Date` and `Workout Name`
3. `Duration` - this refers to the entirety of the workout. In my sample data it is set in minutes, I wonder what happens when we exceed 60.
4. `Exercise Name` - this column might pose some challenges, as I will have to map the names from Gravitus to the names used by Strong
5. `Set Order` - seems to be increasing as we progress, nothing out of the ordinary
6. `Weight`
7. `Reps`
8. `Distance` - this won't apply for most of the exercises
9. `Seconds` - for regular exercises this applies only to the Rest Timers, probably the value is set also for some isometrics like holding a plank
10. `Notes` - i have been adding some notes to my exercises, but do not rely on them heavily, so probably won't bother with transferring this column
11. `Workout Notes`
12. `RPE` - this probably stands for the Rating of Perceived Exertion

### Clean the gravitus data

All of the data is exported to a single csv file. We have to split it first, to get the separate tables.

In [3]:
table_name_pattern = re.compile("^(.*),,,,,,,,,,,$")
os.makedirs(GRAVITUS_OUTPUT_DIR, exist_ok=True)

with open(GRAVITUS_DATA_PATH, 'r') as gravitus_fd:
    lines = gravitus_fd.readlines()
    lines = (l for l in lines)
    line = next(lines)
    while line:
        match = table_name_pattern.match(line)
        table_name = match.group(1)
        file_name = table_name.replace(" ", "_").lower()

        with open(os.path.join(GRAVITUS_OUTPUT_DIR, f"{file_name}.csv"), "w") as fd:
            line = next(lines)
            while not table_name_pattern.match(line):
                fd.write(line)
                try:
                    line = next(lines)
                except StopIteration:
                    line = None
                    break


### Explore gravitus data
We got a couple of tables exported. The ones that instantly strike me are `workouts.csv` and `workout_sets.csv`

In [4]:
os.listdir(GRAVITUS_OUTPUT_DIR)

['workout_template_group.csv',
 'workouts.csv',
 'workout_templates.csv',
 'exercise_videos.csv',
 'workout_sets.csv',
 'profile.csv',
 'exercise_notes.csv',
 'exercise_tips.csv',
 'workout_group_notes.csv',
 'workout_videos.csv',
 'workout_template_set_goals.csv']

In [5]:
workouts = pd.read_csv(os.path.join(GRAVITUS_OUTPUT_DIR, "workouts.csv"))
workouts.head()

Unnamed: 0,created,updated,started_at,duration_seconds,is_private,description,title,bodyweight,gym,template,Unnamed: 10,Unnamed: 11
0,2025-08-01 16:05:12.077875+00:00,2025-08-01 16:05:12.077890+00:00,2025-08-01 15:09:22+00:00,3348.0,False,,,177.03,,,,
1,2025-07-28 13:20:26.335508+00:00,2025-07-28 13:20:26.335523+00:00,2025-07-28 12:35:22+00:00,2701.0,False,,,,,,,
2,2025-07-25 13:28:58.184440+00:00,2025-07-25 13:28:58.184453+00:00,2025-07-25 12:36:53+00:00,3122.0,False,,,,,,,
3,2025-07-04 15:56:43.656359+00:00,2025-07-04 15:56:43.656373+00:00,2025-07-04 15:06:53+00:00,2987.0,False,,,,,,,
4,2025-07-01 12:57:28.046756+00:00,2025-07-01 12:57:28.046773+00:00,2025-07-01 12:17:43+00:00,2382.0,False,,,,,,,


So there are a couple of columns here. The title columns seems to have a lot of missing value. We also store the duration in seconds instead of minutes. Let's look at the workout sets.

In [6]:
workout_sets = pd.read_csv(os.path.join(GRAVITUS_OUTPUT_DIR, "workout_sets.csv"))
workout_sets.head()

Unnamed: 0,group_number,set_number,seconds,reps,weight,one_rep_equivalent,is_pr,rpe,exercise,workout,Unnamed: 10,Unnamed: 11
0,0,0,,10.0,61.73,82.31,False,,Dumbbell Bench Press,2025-03-26 10:20:28+00:00 None,,
1,0,0,,7.0,,,False,,3D Lunge Warmup,2021-07-02 13:20:41+00:00 FBW A,,
2,0,0,,7.0,,,False,,3D Lunge Warmup,2021-07-09 08:14:26+00:00 FBW A,,
3,0,0,,7.0,22.05,26.46,False,,Weighted Pull-Up,2025-06-18 17:05:46+00:00 None,,
4,0,0,,7.0,,,False,,3D Lunge Warmup,2021-06-08 07:33:23+00:00 FBW B,,


Okay, we have the exercise name, reps, weight and workout. The sets are numbered differently here. The combination of `set_number` and `group_number` seems to give us the ordering (`group_number` refers to the numbering of excercises in a workout, my guess is for a superset, both exercises get the same group number).

The `workout` column looks like a concatenation of `started_at` and `title` columns in the workouts table. When the title is unset, we default to `"None"`.

Let's try to merge these two tables.

In [7]:
# filter out the columns we don't need
workouts = workouts[["started_at", "duration_seconds", "title"]]
# replace NaN values in the title column with None, then create a column that will serve as a key for merging the tables.
workouts.fillna({'title': 'None'}, inplace=True)
workouts["workout"] = workouts.started_at + " " + workouts.title
workouts.head()

Unnamed: 0,started_at,duration_seconds,title,workout
0,2025-08-01 15:09:22+00:00,3348.0,,2025-08-01 15:09:22+00:00 None
1,2025-07-28 12:35:22+00:00,2701.0,,2025-07-28 12:35:22+00:00 None
2,2025-07-25 12:36:53+00:00,3122.0,,2025-07-25 12:36:53+00:00 None
3,2025-07-04 15:06:53+00:00,2987.0,,2025-07-04 15:06:53+00:00 None
4,2025-07-01 12:17:43+00:00,2382.0,,2025-07-01 12:17:43+00:00 None


In [8]:
# filter out the unwanted columns
workout_sets = workout_sets[["group_number", "set_number", "reps", "weight", "exercise", "workout"]]
workout_sets.head()

Unnamed: 0,group_number,set_number,reps,weight,exercise,workout
0,0,0,10.0,61.73,Dumbbell Bench Press,2025-03-26 10:20:28+00:00 None
1,0,0,7.0,,3D Lunge Warmup,2021-07-02 13:20:41+00:00 FBW A
2,0,0,7.0,,3D Lunge Warmup,2021-07-09 08:14:26+00:00 FBW A
3,0,0,7.0,22.05,Weighted Pull-Up,2025-06-18 17:05:46+00:00 None
4,0,0,7.0,,3D Lunge Warmup,2021-06-08 07:33:23+00:00 FBW B


In [9]:
# let's use an inner join on the workout column in each table
merged = pd.merge(workout_sets, workouts, how="inner", on="workout")
merged.drop(columns="workout", inplace=True)
merged = merged.sort_values(["started_at", "group_number", "set_number"])

assert len(merged) == len(workout_sets), "Something went wrong, we lost some data during merging"
merged.head()

Unnamed: 0,group_number,set_number,reps,weight,exercise,started_at,duration_seconds,title
52,0,0,7.0,,3D Lunge Warmup,2021-03-04 10:09:18+00:00,3420.0,FBW A
318,1,0,6.0,88.18,Squat,2021-03-04 10:09:18+00:00,3420.0,FBW A
454,1,1,5.0,110.23,Squat,2021-03-04 10:09:18+00:00,3420.0,FBW A
559,1,2,5.0,110.23,Squat,2021-03-04 10:09:18+00:00,3420.0,FBW A
733,2,0,12.0,44.09,Barbell Hip Thrust,2021-03-04 10:09:18+00:00,3420.0,FBW A


In [10]:
strong.head()

Unnamed: 0,Date,Workout Name,Duration,Exercise Name,Set Order,Weight,Reps,Distance,Seconds,Notes,Workout Notes,RPE
0,2025-08-03 13:04:33,Midday Workout,5m,Back Extension,1,20.0,10.0,0,0.0,,,
1,2025-08-03 13:04:33,Midday Workout,5m,Back Extension,Rest Timer,0.0,0.0,0,120.0,,,
2,2025-08-03 13:04:33,Midday Workout,5m,Back Extension,2,20.0,10.0,0,0.0,,,
3,2025-08-03 13:04:33,Midday Workout,5m,Back Extension,Rest Timer,0.0,0.0,0,120.0,,,
4,2025-08-03 13:04:33,Midday Workout,5m,Bench Press (Dumbbell),1,5.0,20.0,0,0.0,,,


Seems like we have all that is necessary. Now to transform the data to match the strong format. First let's map the exercices.

In [11]:
merged.exercise.value_counts().head()

exercise
Bench Press             120
Dumbbell Bench Press    104
Squat                    64
Chinup                   64
Neutral Grip Pull-Up     63
Name: count, dtype: int64

In [12]:
excs_mapping = {
    "Bench Press": "Bench Press (Barbell)",
    "Dumbbell Bench Press": "Bench Press (Dumbbell)",
    "Chinup": "Chin Up",
    "Squat": "Squat (Barbell)",
    "Neutral Grip Pull-Up": "Chin Up",
    "Dumbbell Shoulder Press": "Overhead Press (Dumbbell)",
    "Deadlift": "Deadlift (Barbell)",
    "Overhead Press": "Overhead Press (Barbell)",
    "Lat Pulldown": "Lat Pulldown (Cable)",
    "Trap Bar Deadlift": "Trap Bar Deadlift",
    "Reverse Barbell Lunge": "Lunge (Barbell)",
    "Barbell Hip Thrust": "Hip Thrust (Barbell)",
    "3D Lunge Warmup": "3D Lunge Warmup",
    "Weighted Pull-Up": "Pull Up",
    "Farmer's Walk": "Farmer's Walk",
    "Reverse Dumbbell Lunge": "Lunge (Dumbbell)",
    "Seated Row": "Seated Row (Cable)",
    "Barbell Row": "Bent Over Row (Barbell)",
    "Machine Seated Row": "Seated Row (Machine)",
    "Face Pull": "Face Pull (Cable)",
    "Overhead One Arm Farmers Walk": "Overhead One Arm Farmers Walk",
    "Push-Up": "Push Up",
    "Hanging Knee Raise": "Hanging Knee Raise",
    "Weighted Chinup": "Chin Up",
    "Dip": "Chest Dip",
    "Leg Press": "Leg Press",
    "Machine Fly": "Chest Fly",
    "Leg Extension": "Leg Extension",
    "Inverted Bodyweight Row": "Inverted Row (Bodyweight)",
    "Ring Push Up": "Ring Push Up",
    "Weighted Ring Dips": "Ring Dip",
    "Dumbbell Romanian Deadlift": "Romanian Deadlift (Dumbbell)",
    "Reverse-Grip Lat Pulldown": "Lat Pulldown - Underhand (Cable)",
    "Chest Press Machine": "Chest Press (Machine)",
    "One-Arm Dumbbell Row": "Bent Over One Arm Row (Dumbbell)",
    "Ring Dips": "Ring Dip",
    "Chest Supported Dumbbell Row": "Chest Supported Dumbbell Row",
    "Goblet Squat": "Goblet Squat (Kettlebell)",
    "Back Extension": "Back Extension",
    "Slant Board Hamstring Stretch": "Slant Board Hamstring Stretch",
    "Slant Board Calf Raise": "Slant Board Calf Raise",
    "Shoulder Press Machine": "Shoulder Press (Machine)",
    "Incline Bench Press": "Incline Bench Press (Barbell)",
    "Close-Grip Lat Pulldown": "Lat Pulldown (Cable)",
    "Standing Strict Overhead Press (Dumbell)": "Overhead Press (Dumbell)",
    "Glute Bridge": "Glute Bridge",
    "Machine Row": "Seated Row (Machine)",
    "Seated Hip Adduction": "Hip Adductor (Machine)",
    "Cable Fly": "Chest Fly",
    "Seated Hip Abduction": "Hip Abductor (Machine)",
    "Big Toe Mobilization": "Mobilization",
    "Cable Curl": "Bicep Curl (Cable)",
    "Loaded Butterfly Stretch": "Loaded Butterfly Stretch",
    "Pushdown": "Triceps Pushdown (Cable - Straight Bar)",
    "Slant Board Calf Stretch": "Slant Board Calf Stretch",
    "Single Leg Box Step Up": "Step-up",
    "Skull Crusher": "Skullcrusher (Dumbbell)",
    "Barbell Curl": "Bicep Curl (Barbell)",
    "Couch Stretch": "Couch Stretch",
    "Seated Leg Curl": "Seated Leg Curl (Machine)",
}


In [14]:
merged.fillna({"duration_seconds": 0}, inplace=True)
data = {
    "Date": merged.started_at.str.removesuffix("+00:00"),
    "Workout Name": merged.title,
    "Duration": (merged.duration_seconds.astype(int) // 60).astype(str) + "m",
    "Exercise Name": merged.exercise.map(lambda x: excs_mapping[x]),
    "Set Order": merged.set_number,
    "Weight": (merged.weight/2.2).round(decimals=1), # gravitus exported the data in pounds, divide by 2.2 to get value in kgs
    "Reps": merged.reps,
    "Distance": pd.Series(0, index=merged.index),
    "Seconds": pd.Series(0., index=merged.index),
    "Notes": pd.Series(np.nan, index=merged.index),
    "Workout Notes": pd.Series(np.nan, index=merged.index),
    "RPE": pd.Series(np.nan, index=merged.index),
}
pd.DataFrame(data).to_csv('output.csv', index=False)
