# Universal gpx parser

The aim of this project is quite basic: to create a standarized procedure that can parse a number of *gpx* files and output its contents as a **Pandas** Dataframe in a reasonable amount of time.

In [1]:
import pandas as pd
import time
import re
import pathlib
import os
import gpxpy
import gpxpy.gpx
import time
from pathlib import Path
import os

In [27]:
#Defining a function to parse our gpx files while retaining the valuable data.

def parser(file):
    try:
        gpx_file = open(file, 'r', encoding='utf-8') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        alt = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
                    alt.append(point.elevation)
        parsed_file = {'name': track.name, 
                       'coords': [coords], 
                       'alt': [alt], 
                       'distance': track.length_3d()/1000, 
                       'climb': int(gpx.get_uphill_downhill()[0]),
                       'min_alt': int(gpx.get_elevation_extremes()[0]),
                       'max_alt': int(gpx.get_elevation_extremes()[1]),
                       's_la': coords[0][0],
                       's_lo': coords[0][1],
                       'f_la': coords[-1][0],
                       'f_lo': coords[-1][1],}
        return parsed_file
    except:
        pass

In [28]:
#Running our function and saving the output as a list of dictionaries.

start = time.time()

dict_list = []

directory = 'gpx' #Our very original gpx folder name.
 
files = Path(directory).glob('*')
for file in files:
    dict_list.append(parser(file))
    
stop = time.time() 
duration = (stop - start) / 60
print('Minutes:', duration)

Minutes: 10.779968400796255


In [29]:
#Creating a dataframe with the list of dictionaries.

df = pd.DataFrame(dict_list)

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 11 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   name      10000 non-null  object 
 1   coords    10000 non-null  object 
 2   alt       10000 non-null  object 
 3   distance  10000 non-null  float64
 4   climb     10000 non-null  int64  
 5   min_alt   10000 non-null  int64  
 6   max_alt   10000 non-null  int64  
 7   s_la      10000 non-null  float64
 8   s_lo      10000 non-null  float64
 9   f_la      10000 non-null  float64
 10  f_lo      10000 non-null  float64
dtypes: float64(5), int64(3), object(3)
memory usage: 859.5+ KB


In [26]:
df.head(10)

Unnamed: 0,name,coords,alt,distance,climb,min_alt,max_alt,s_la,s_lo,f_la,f_lo
0,!. Mai Route,"[[(53.05482, 7.33192), (53.05385, 7.33212), (5...","[[4.0, 5.0, 5.0, 5.0, 4.0, 3.0, 3.0, 3.0, 3.0,...",50.670543,56,-1,8,53.05482,7.33192,53.05483,7.3299
1,!. Versuch mit Dieter,"[[(50.87778, 6.69063), (50.87723, 6.68634), (5...","[[84.0, 82.0, 85.0, 84.0, 83.0, 86.0, 87.0, 83...",2.885819,2,82,87,50.87778,6.69063,50.86786,6.68437
2,!3nádrže NMlýnů.. Nik-VNěm-Hust-Zaj-Mil-Pav-DV...,"[[(48.99251, 16.75447), (48.99248, 16.75414), ...","[[265.0, 265.0, 265.0, 266.0, 252.0, 251.0, 25...",71.425151,454,162,269,48.99251,16.75447,48.99189,16.75409
3,!hg,"[[(52.10393, 20.63397), (52.10457, 20.63537), ...","[[104.0, 106.0, 106.0, 105.0, 105.0, 105.0, 10...",50.486244,99,94,132,52.10393,20.63397,52.13123,20.65984
4,# 13 Boucle du moulin,"[[(45.85329, -72.39056), (45.85237, -72.3889),...","[[104.0, 102.0, 101.0, 100.0, 101.0, 103.0, 10...",82.124462,368,80,201,45.85329,-72.39056,45.85329,-72.39056
5,# 17 La rivière Bécancour,"[[(46.37204, -71.62193), (46.36304, -71.62854)...","[[137.0, 132.0, 132.0, 138.0, 140.0, 145.0, 14...",48.426866,74,110,145,46.37204,-71.62193,46.37204,-71.62193
6,# 4 Route panoramique,"[[(46.05596, -71.95895), (46.0565, -71.95861),...","[[131.0, 131.0, 131.0, 130.0, 132.0, 132.0, 13...",90.887461,938,122,529,46.05596,-71.95895,46.05596,-71.95895
7,# 5 La Route des antiquaires,"[[(46.20084, -72.13717), (46.20092, -72.13679)...","[[89.0, 89.0, 92.0, 92.0, 92.0, 91.0, 89.0, 87...",70.664303,160,74,119,46.20084,-72.13717,46.20084,-72.13717
8,# Danix64 > Emko29,"[[(48.95545, 20.53853), (48.9543, 20.53821), (...","[[474.0, 473.0, 473.0, 473.0, 472.0, 471.0, 47...",3.127121,1,448,474,48.95545,20.53853,48.94046,20.56766
9,# Časovka na Gretľu po ceste,"[[(48.94375, 20.56181), (48.94316, 20.56109), ...","[[453.0, 453.0, 454.0, 454.0, 454.0, 451.0, 45...",11.253011,451,450,836,48.94375,20.56181,48.87901,20.53264
