### Route segmentation 

Explanation of the segmentation of a route (.gpx format ) in logical segments ( ascent, descent, flat ) 

In [1]:
import gpxpy
import pandas as pd
import numpy as np
import folium
from folium import plugins
from pprint import pprint
import matplotlib.pyplot as plt
from math import radians, cos, sin, asin, sqrt
import warnings
warnings.filterwarnings('ignore')


In [2]:
def sign_equal(a, b):
    """
    Compares the two-digit sign and indicates whether they are identical
    Takes the 0 as a separate value
    ex : sign_equal(0,-5) >>> False
    Return True/False
    """
    return np.sign(a) == np.sign(b)

In [3]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    # Radius of earth in kilometers is 6371
    km = 6371* c
    m = km * 1000
    return m

In [4]:
def segmentation(dataframe):
        """
        Takes a dataframe from a .gpx with an altitude_gain column as an input.

        altitude_gain for n equals the difference in altitude between n and n-1
        Calculation of the logical segments (ascent, flat, descent) as a function of altitude gain.
        Compare for each line the altitude gain sign n & n-1.

        Same sign, same segment (ex 8 and 5, same ascent segment)
        Change of sign change of segment (ex 8 and -5, from a downhill segment to an uphill segment)

        Special case for zeros :
        if n = 0 et n-1 != 0 : same segment
        In case of a succession of zeros ( from two ): flat segment

        Returns a dataframe with a segment column
        """
        for i in range(len(dataframe['altitude_gain'])):

            # First row starts at segment zero.
            if i == 0:
                dataframe.loc[i, "segment"] = 0

            # For all other rows
            else:

                # if n and n-1 have the same sign
                if sign_equal(dataframe.loc[i, "altitude_gain"], dataframe.loc[i - 1, "altitude_gain"]):

                    # In the case where n and n-1 are zero
                    if dataframe.loc[i, 'altitude_gain'] == 0 and dataframe.loc[i - 1, 'altitude_gain'] == 0:

                        # In case we are on the second line, we cannot check n-2 if n and n-1 = 0.
                        if i == 1:
                            dataframe.loc[i, "segment"] = dataframe.loc[i - 1, "segment"]
                        else:

                            # if n-2 is not equal to zero
                            # Flat segment, we want to make a new segment, and change it retrospectively,
                            # the segment of n-1 which is no longer a "alone" zero anymore
                            if dataframe.loc[i - 2, 'altitude_gain'] != 0:
                                dataframe.loc[i, "segment"] = dataframe.loc[i - 1, "segment"] + 1
                                dataframe.loc[i - 1, "segment"] = dataframe.loc[i - 1, "segment"] + 1
                            # if n , n-1 , n-2 = 0 , you don't want to change segment
                            # succession of zero
                            else:
                                dataframe.loc[i, "segment"] = dataframe.loc[i - 1, "segment"]

                    # If the same sign without any special case, same segment
                    else:
                        dataframe.loc[i, "segment"] = dataframe.loc[i - 1, "segment"]

                # If not same sign
                else:
                    if dataframe.loc[i, 'altitude_gain'] == 0 and dataframe.loc[i - 1, 'altitude_gain'] != 0:
                        dataframe.loc[i, "segment"] = dataframe.loc[i - 1, "segment"]
                    # Otherwise we change segment
                    else:
                        dataframe.loc[i, "segment"] = dataframe.loc[i - 1, "segment"] + 1
        return dataframe

#### Gpx Parser

the example file represents the mythical ascent of alpe d'huez ( France ) ...

In [5]:
gpx_file = "datas/example.gpx"
parsed_gpx = gpxpy.parse(open(gpx_file))

In [6]:
data = [
    {"latitude": point.latitude, "longitude": point.longitude, "elevation": point.elevation}
    for track in parsed_gpx.tracks
    for segment in track.segments
    for point in segment.points
]

In [7]:
df = pd.DataFrame(data)
df.head(10)

Unnamed: 0,latitude,longitude,elevation
0,45.05476,6.03177,722.02
1,45.05475,6.03199,721.97
2,45.05489,6.0324,721.56
3,45.05511,6.03276,721.56
4,45.055505,6.03322,721.31
5,45.0559,6.03368,721.18
6,45.05637,6.0342,721.22
7,45.05684,6.03472,721.69
8,45.05743,6.035395,723.39
9,45.05802,6.03607,725.51


In [8]:
df.shape

(479, 3)

In [9]:
df.dtypes

latitude     float64
longitude    float64
elevation    float64
dtype: object

In [10]:
df['elevation'] = round(df['elevation'],2)

In [12]:
df = df.head(10)
df

Unnamed: 0,latitude,longitude,elevation
0,45.05476,6.03177,722.02
1,45.05475,6.03199,721.97
2,45.05489,6.0324,721.56
3,45.05511,6.03276,721.56
4,45.055505,6.03322,721.31
5,45.0559,6.03368,721.18
6,45.05637,6.0342,721.22
7,45.05684,6.03472,721.69
8,45.05743,6.035395,723.39
9,45.05802,6.03607,725.51


#### reduction of the number of points to have slightly "coarser" segments 

reducing the number of points can lead to an erroneous distance calculation (as the crow flies). 


In [None]:
# reduced_df = df.iloc[::5, :]
# reduced_df = reduced_df.reset_index(drop=True)
# reduced_df.head(5)

In [None]:
# reduced_df.shape

### Compute informations Route

#### Distance

In [15]:
# distance to the last point 
for i in range(df.shape[0]):
    if i == 0:
        df.loc[i,"distance_to_last_point"] = 0
    else:
        df.loc[i,"distance_to_last_point"] = round(haversine(df['longitude'][i],df['latitude'][i],df['longitude'][i-1],df['latitude'][i-1]),2)

df

Unnamed: 0,latitude,longitude,elevation,distance_to_last_point,total_distance
0,45.05476,6.03177,722.02,0.0,0.0
1,45.05475,6.03199,721.97,17.32,17.32
2,45.05489,6.0324,721.56,35.77,53.09
3,45.05511,6.03276,721.56,37.39,90.48
4,45.055505,6.03322,721.31,56.87,147.35
5,45.0559,6.03368,721.18,56.87,204.22
6,45.05637,6.0342,721.22,66.33,270.55
7,45.05684,6.03472,721.69,66.33,336.88
8,45.05743,6.035395,723.39,84.35,421.23
9,45.05802,6.03607,725.51,84.35,505.58


In [14]:
df['total_distance'] = df['distance_to_last_point'].cumsum()
df.head(5)

Unnamed: 0,latitude,longitude,elevation,distance_to_last_point,total_distance
0,45.05476,6.03177,722.02,0.0,0.0
1,45.05475,6.03199,721.97,17.32,17.32
2,45.05489,6.0324,721.56,35.77,53.09
3,45.05511,6.03276,721.56,37.39,90.48
4,45.055505,6.03322,721.31,56.87,147.35


#### Altitude Gain

In [None]:
altitude_gain = [
    round(df['elevation'][i] - df['elevation'][i-1],2) for i in range(df.shape[0]) if i > 0
]

In [None]:
altitude_gain.insert(0, np.nan)

In [None]:
df['altitude_gain'] = altitude_gain
df.head(5)

#### Visualization

In [None]:
plt.style.use('dark_background')
plt.figure(figsize=(20,10))
plt.plot(df['total_distance'],df['elevation'])
# plt.plot(df['timestamp'],df['altitude'],markersize=5,linestyle="dotted",linewidth=2)
plt.title("Alpe d'huez")
plt.show()

It looks like the ascent of the Alpe d'huez...

#### Segmentation 

In [None]:
# for i in range(df.shape[0]):
#     if i == 0 or i == 1:
#         df.loc[i,"segment"] = 0
#     else:
#         if not sign_equal(df.loc[i-1,'altitude_gain'], df.loc[i,"altitude_gain"]):
#             df.loc[i,"segment"] = df.loc[i-1,"segment"] + 1 
#         else:
#             df.loc[i,"segment"] = df.loc[i-1,"segment"]

df = segmentation(df)

In [None]:
df

In [None]:
df_start_end_segments = df.groupby('segment').agg(['first', 'last']).stack()
df_start_end_segments

### Compute informations Segments

In [None]:
segments = []
for i in range(len(df_start_end_segments.index.levels[0].unique())):
    if i == 0 :
        total_distance = df_start_end_segments.xs('last', level=1)['total_distance'][i]
        altitude_gain = df_start_end_segments.xs('last', level=1)['elevation'][i] - df_start_end_segments.xs('first', level=1)['elevation'][i] 
    else:
        total_distance = df_start_end_segments.xs('last', level=1)['total_distance'][i] - df_start_end_segments.xs('last', level=1)['total_distance'][i-1] 
        altitude_gain = df_start_end_segments.xs('last', level=1)['elevation'][i] - df_start_end_segments.xs('last', level=1)['elevation'][i-1]
    
    vertical_drop = (altitude_gain * 100) / total_distance
    
    segment = {
        "total_distance": round(total_distance,2),
        "altitude_gain": round(altitude_gain,2),
        "vertical_drop": round(vertical_drop,2)
    }
    segments.append(segment)
segments

In [None]:
segments_df = pd.DataFrame(segments)
segments_df