In [3]:
# Import libraries to be safe
import csv
import random
import math
import copy
import pandas as pd
import numpy as np
import pytz
import gpxpy
import geopandas as gpd
from shapely.geometry import Point
from datetime import timedelta
from pytz import timezone
from gpxcsv import gpxtolist
from geopy.distance import geodesic

In [4]:
def create_activity_gdf(gpx_file):

    # Load the gpx file into a list using gpxcsv
    gpx_list = gpxtolist(gpx_file)

    # Initialize a geopandas data frame 
    gpx_df = pd.DataFrame(gpx_list)

    # Convert time column into datetime object using the datetime package
    gpx_df.time = pd.to_datetime(gpx_df['time'], format='%Y-%m-%dT%H:%M:%SZ')

    # Looking into the gpx file, Track-1 took place in Fayetteville in January of 2015, meaning the timezone should be CST
    # Set the timezone to CST
    cst_zone = pytz.timezone('America/Chicago')
    gpx_df['datetime'] = gpx_df['time'].dt.tz_localize(cst_zone)

    # Creating a geometry object that uses lon and lat from the gpx file
    gpx_df['geometry'] = gpd.points_from_xy(gpx_df['lon'], gpx_df['lat'])

    # Calculate the timedelta between consecutive points using .shift()
    gpx_df['timedelta'] = gpx_df['time'] - gpx_df['time'].shift(1)

    # I asked chatGPT how I would calculate the distance in meters between each epoch and this is what it gave me:
    # Calculate distance between consecutive points using the geodesic function (in meters)
    gpx_df['distance'] = gpx_df.apply(
        lambda row: geodesic(
            (row['lat'], row['lon']),
            (gpx_df['lat'].shift(1).iloc[row.name], gpx_df['lon'].shift(1).iloc[row.name])).meters if row.name > 0 else 0,
        axis=1
    )

    # Building off this distance between epoch calculation I determined the pace in a similar way
    # Calculate the pace by converting the time from seconds to minutes and the distance from meters to miles
    gpx_df['pace'] = gpx_df.apply(
        lambda row: (row['timedelta'].total_seconds() / 60) / (row['distance'] / 1609.34) 
        # Error checking if distance is 0
        if row['distance'] > 0 else None,
        axis=1
    )
    # Only return the five necessary columns
    return gpx_df[['datetime', 'timedelta', 'geometry', 'distance', 'pace', 'hr']]




In [5]:
activity_gdf = create_activity_gdf('./Strava/Track-1.gpx')

print(activity_gdf)

                     datetime       timedelta                    geometry  \
0   2015-01-06 22:48:58-06:00             NaT  POINT (-94.16255 36.10295)   
1   2015-01-06 22:48:59-06:00 0 days 00:00:01  POINT (-94.16255 36.10294)   
2   2015-01-06 22:49:01-06:00 0 days 00:00:02  POINT (-94.16256 36.10292)   
3   2015-01-06 22:49:02-06:00 0 days 00:00:01  POINT (-94.16258 36.10289)   
4   2015-01-06 22:49:04-06:00 0 days 00:00:02  POINT (-94.16263 36.10284)   
..                        ...             ...                         ...   
480 2015-01-06 23:32:50-06:00 0 days 00:00:06  POINT (-94.16252 36.10601)   
481 2015-01-06 23:32:56-06:00 0 days 00:00:06   POINT (-94.16241 36.1062)   
482 2015-01-06 23:33:02-06:00 0 days 00:00:06  POINT (-94.16234 36.10641)   
483 2015-01-06 23:33:07-06:00 0 days 00:00:05  POINT (-94.16232 36.10659)   
484 2015-01-06 23:33:09-06:00 0 days 00:00:02  POINT (-94.16232 36.10666)   

      distance       pace     hr  
0     0.000000        NaN   72.0  
1    