# Creating a parser function to manipulate gpx files

Our objecive is to create a function that can be fed *gpx* files and will compose a dataframe containing the most useful information of said route, as well as changing the name of the *gpx* file to one more easily accessed.

In [107]:
#Importing our libraries.

import pandas as pd
import time
import re
import pathlib
import os
import gpxpy
import gpxpy.gpx
import time
from pathlib import Path
import os
import random, string
from geopy.geocoders import Nominatim
import reverse_geocode

In [2]:
#Importing our list of names.

names = pd.read_csv('names.csv')
alpha = names['names'].tolist()

In [24]:
#Our parser function.

def parser(file, name):
    try:
        gpx_file = open(file, 'r') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file
    except:
        gpx_file = open(file, 'r', encoding='utf-8') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file

## Name generation

In [51]:
#Generating a list of names using alphanumeric characters.

alpha = []

for i in range(50000000):
    x = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(6))
    alpha.append(x)

In [121]:
#Dropping duplicate names.

alpha = set(alpha)

In [122]:
len(alpha)

49430049

In [61]:
#Storing our names as a dataframe for future use.

names = pd.DataFrame(set(alpha), columns =['names'])

In [63]:
#Storing it.

names.to_csv('names.csv', index=False)

## Final function

In [213]:
def gpx_creator():
    """
    Input: none, but all target gpx files must be in a folder named 'gpx'.
    
    Output: new gpx files created as per 'parser' function.
    
    """
    start = time.time() #Starting our fimer.

    directory = 'gpx' #The folder containing the gpx files.
    parsed_list = []
    
    files = Path(directory).glob('*') #Using all files in the folder as input.
    i = 0
    for file in files:
        name = alpha[i]
        parsed_list.append(parser(file, name)) #Applying the previous function to every file.
        i = i+1

    stop = time.time() #Stopping our timer.
    duration = (stop - start) / 60
    
    df = pd.DataFrame(parsed_list)
    return df #Returning the elapsed minutes.

In [17]:
start = time.time()

df = gpx_creator()

stop = time.time() #Stopping our timer.
duration = (stop - start) / 60

print('Minutes:', duration)

Minutes: 0.09244021972020468


In [18]:
df.head()

Unnamed: 0,name,original_name,source,start,distance,climb,min_alt,max_alt
0,54yu2w,besaide martxa 2011,Garmin Connect,"(43.0696788802743, -2.47713318094611)",[35.65579643643311],[1298],[193],[631]
1,00v5fe,Untitled,Garmin Connect,"(37.1465449780226, -3.56366956606507)",[28.30995863439158],[433],[680],[1054]
2,f4r9n2,Carretera costa Asterrika Baurdo,Garmin Connect,"(43.3455239608884, -2.48506112024188)",[34.621333051583576],[869],[6],[226]
3,lu6s5f,Circuito AlbuÃ±uelas,Garmin Connect,"(37.1134440042078, -3.61588572151959)",[71.29276829383188],[946],[503],[874]
4,rgxaz9,prova,Garmin Connect,"(41.6769059747458, 1.2808879930526)",[53.19353112278639],[677],[485],[767]


In [79]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           10000 non-null  object
 1   original_name  10000 non-null  object
 2   source         10000 non-null  object
 3   start          10000 non-null  object
 4   distance       10000 non-null  object
 5   climb          10000 non-null  object
 6   min_alt        10000 non-null  object
 7   max_alt        10000 non-null  object
dtypes: object(8)
memory usage: 625.1+ KB


## TESTING

In [35]:
#Our parser function.

def parser(file, name):
    try:
        gpx_file = open(file, 'r') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file
    except:
        gpx_file = open(file, 'r', encoding='utf-8') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file

In [36]:
start = time.time()

df = gpx_creator()

stop = time.time() #Stopping our timer.
duration = (stop - start) / 60

print('Minutes:', duration)

Minutes: 0.08963996569315592


In [37]:
df.head()

Unnamed: 0,name,original_name,source,start,distance,climb,min_alt,max_alt
0,54yu2w,besaide martxa 2011,Garmin Connect,"(43.0696788802743, -2.47713318094611)",35.655796,1298,193,631
1,00v5fe,Untitled,Garmin Connect,"(37.1465449780226, -3.56366956606507)",28.309959,433,680,1054
2,f4r9n2,Carretera costa Asterrika Baurdo,Garmin Connect,"(43.3455239608884, -2.48506112024188)",34.621333,869,6,226
3,lu6s5f,Circuito AlbuÃ±uelas,Garmin Connect,"(37.1134440042078, -3.61588572151959)",71.292768,946,503,874
4,rgxaz9,prova,Garmin Connect,"(41.6769059747458, 1.2808879930526)",53.193531,677,485,767


## Assigning country to each route

For this purpose we'll be using *Nominatim*.

In [39]:
#Initializing an instance.

app = Nominatim(user_agent="tutorial")

In [45]:
#Testing a function I found online:

def get_address_by_location(latitude, longitude, language="en"):
    """This function returns an address as raw from a location
    will repeat until success"""
    # build coordinates string to pass to reverse() function
    coordinates = f"{latitude}, {longitude}"
    # sleep for a second to respect Usage Policy
    time.sleep(1)
    try:
        return app.reverse(coordinates, language=language).raw
    except:
        return get_address_by_location(latitude, longitude)

In [145]:
#Testing the function.

address = get_address_by_location(41.6769059747458, 1.2808879930526)

In [146]:
#Accessing the country.

address['address']['country']

'Spain'

In [93]:
#Incorporating the function into our parser:

#Our parser function.

def parser(file, name):
    try:
        gpx_file = open(file, 'r') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        country = get_address_by_location(coords[0][0], coords[0][1])['address']['country']
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'country': country,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file
    except:
        gpx_file = open(file, 'r', encoding='utf-8') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        country = get_address_by_location(coords[0][0], coords[0][1])['address']['country']
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'country': country,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file

In [94]:
#Testing our new parser function:

start = time.time()

df = gpx_creator()

stop = time.time() #Stopping our timer.
duration = (stop - start) / 60

print('Minutes:', duration)

Minutes: 1.9374013145764668


In [97]:
df.head(5)

Unnamed: 0,name,original_name,source,country,start,distance,climb,min_alt,max_alt
0,54yu2w,besaide martxa 2011,Garmin Connect,Spain,"(43.0696788802743, -2.47713318094611)",35.655796,1298,193,631
1,00v5fe,Untitled,Garmin Connect,Spain,"(37.1465449780226, -3.56366956606507)",28.309959,433,680,1054
2,f4r9n2,Carretera costa Asterrika Baurdo,Garmin Connect,Spain,"(43.3455239608884, -2.48506112024188)",34.621333,869,6,226
3,lu6s5f,Circuito AlbuÃ±uelas,Garmin Connect,Spain,"(37.1134440042078, -3.61588572151959)",71.292768,946,503,874
4,rgxaz9,prova,Garmin Connect,Spain,"(41.6769059747458, 1.2808879930526)",53.193531,677,485,767


The function works fine but it takes a very long time to process each file. This is unacceptable, I will have to find a better (faster) way of determining a route's country of origin.

In [148]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="http")
location = geolocator.reverse(tuple([51.5074,0.1278]))

print(location.address.split(',')[-1][1:])

United Kingdom


In [113]:
type(location.address)

str

In [114]:
split_string = location.address.split(',')

In [132]:
split_string[-1][1:]

'United Kingdom'

### Testing new parser function with a different geolocating method

In [139]:
def parser(file, name):
    try:
        gpx_file = open(file, 'r') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        geolocator = Nominatim(user_agent="http")
        location = geolocator.reverse(coords[0])
        split_string = location.address.split(',')
        country = split_string[-1]
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'country': country,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file
    except:
        gpx_file = open(file, 'r', encoding='utf-8') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        geolocator = Nominatim(user_agent="http")
        location = geolocator.reverse(coords[0])
        split_string = location.address.split(',')
        country = split_string[-1]
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'country': country,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file

In [140]:
#Testing our new parser function:

start = time.time()

df = gpx_creator()

stop = time.time() #Stopping our timer.
duration = (stop - start) / 60

print('Minutes:', duration)

Minutes: 0.8352776447931926


While this is much faster than the previous iteration, I believe it could be further optimized.

In [150]:
def parser(file, name):
    try:
        gpx_file = open(file, 'r') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        geolocator = Nominatim(user_agent="http")
        country = geolocator.reverse(coords[0]).address.split(',')[-1]
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'country': country,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file
    except:
        gpx_file = open(file, 'r', encoding='utf-8') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        geolocator = Nominatim(user_agent="http")
        country = geolocator.reverse(coords[0]).address.split(',')[-1]
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'country': country,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file

In [151]:
#Testing our new parser function:

start = time.time()

df = gpx_creator()

stop = time.time() #Stopping our timer.
duration = (stop - start) / 60

print('Minutes:', duration)

Minutes: 33.51064385175705


While the code of the function has been heavily optimized, the real bottleneck is the **Nominatim**. Unless a better method can be found, 0.5 seconds per file is the best that can be achieved.

### Third approach, performing the calculations on the machine

In [188]:
#This function uses a Shapely file to determine if a point is within a country, and returns it.

import requests

from shapely.geometry import mapping, shape
from shapely.prepared import prep
from shapely.geometry import Point


data = requests.get("https://raw.githubusercontent.com/datasets/geo-countries/master/data/countries.geojson").json()

countries = {}
for feature in data["features"]:
    geom = feature["geometry"]
    country = feature["properties"]["ADMIN"]
    countries[country] = prep(shape(geom))

print(len(countries))

def get_country(lon, lat):
    point = Point(lon, lat)
    for country, geom in countries.items():
        if geom.contains(point):
            return country

    return "Unknown"

print(get_country(10.0, 47.0))

255
Austria


In [195]:
#Implementing this function within our parser.

def parser(file, name):
    try:
        gpx_file = open(file, 'r') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        country = get_country(coords[0][1], coords[0][0])
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'country': country,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file
    except:
        gpx_file = open(file, 'r', encoding='utf-8') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        country = get_country(coords[0][1], coords[0][0])
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'country': country,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        with open('generated_gpx/' + name + ".gpx", "w") as f:
            f.write(gpx.to_xml())
        return parsed_file

In [161]:
#Testing our new parser function:

start = time.time()

df = gpx_creator()

stop = time.time() #Stopping our timer.
duration = (stop - start) / 60

print('Minutes:', duration)

Minutes: 0.5423984885215759


In [163]:
df.head(5)

Unnamed: 0,name,original_name,source,country,start,distance,climb,min_alt,max_alt
0,54yu2w,jarapalos integral,Garmin Connect,Spain,"(36.6441969852895, -4.68569928780198)",64.815114,2249,118,1122
1,00v5fe,Guadalajara,Garmin Connect,Spain,"(40.3005968872458, -3.4378572832793)",131.904391,1180,569,941
2,f4r9n2,Subida a Oiz,Garmin Connect,Spain,"(43.1772534828633, -2.63386865146458)",33.586719,921,113,1009
3,lu6s5f,VilafrancadeBonAnyManacorPetraVilafrancadeBonAny,Garmin Connect,Spain,"(39.57054, 3.09186)",41.194597,315,42,221
4,rgxaz9,jarapalos integral,Garmin Connect,Spain,"(36.6441969852895, -4.68569928780198)",64.707138,2176,121,1138


Success! Now the function is almost as fast as the first one while also adding the country to a specific column. The speed is about 0.06 seconds per file, or about 55K per hour.

## Adding routes to folders named by country

It would be useful to have the resulting files in separate folders with the name of the country, and this could be accomplished quite easily.

In [193]:
#Implementing this function within our parser.

def parser(file, name):
    try:
        try:
            gpx_file = open(file, 'r') 
            gpx = gpxpy.parse(gpx_file) 
            coords = []
            for track in gpx.tracks:
                for segment in track.segments:        
                    for point in segment.points:
                        coords.append(tuple([point.latitude, point.longitude]))
            country = get_country(coords[0][1], coords[0][0])
            parsed_file = {'name': name,
                           'original_name': track.name,
                            'source': gpx.creator,
                            'country': country,
                            'start': str(tuple([coords[0][0], coords[0][1]])),
                            'distance': track.length_3d()/1000, 
                            'climb': int(gpx.get_uphill_downhill()[0]),
                            'min_alt': int(gpx.get_elevation_extremes()[0]),
                            'max_alt': int(gpx.get_elevation_extremes()[1])}
            path = "C:/Users/Usuario/Ironhack/Personal_projects/Port-Scrapper/generated_gpx/" + country
            if not os.path.exists(path):
                os.makedirs(path)
            with open(path + '/' + name + ".gpx", "w") as f:
                f.write(gpx.to_xml())
            return parsed_file
        except:
            gpx_file = open(file, 'r', encoding='utf-8') 
            gpx = gpxpy.parse(gpx_file) 
            coords = []
            for track in gpx.tracks:
                for segment in track.segments:        
                    for point in segment.points:
                        coords.append(tuple([point.latitude, point.longitude]))
            country = get_country(coords[0][1], coords[0][0])
            parsed_file = {'name': name,
                           'original_name': track.name,
                            'source': gpx.creator,
                            'country': country,
                            'start': str(tuple([coords[0][0], coords[0][1]])),
                            'distance': track.length_3d()/1000, 
                            'climb': int(gpx.get_uphill_downhill()[0]),
                            'min_alt': int(gpx.get_elevation_extremes()[0]),
                            'max_alt': int(gpx.get_elevation_extremes()[1])}
            path = "C:/Users/Usuario/Ironhack/Personal_projects/Port-Scrapper/generated_gpx/" + country
            if not os.path.exists(path):
                os.makedirs(path)
            with open(path + '/' + name + ".gpx", "w") as f:
                f.write(gpx.to_xml())
            return parsed_file
    except:
        pass

In [194]:
#Testing our new parser function:

start = time.time()

df = gpx_creator()

stop = time.time() #Stopping our timer.
duration = (stop - start) / 60

print('Minutes:', duration)

AttributeError: 'NoneType' object has no attribute 'keys'

In [209]:
def parser(file, name):
    try:
        try:
            gpx_file = open(file, 'r') 
            gpx = gpxpy.parse(gpx_file) 
            coords = []
            for track in gpx.tracks:
                for segment in track.segments:        
                    for point in segment.points:
                        coords.append(tuple([point.latitude, point.longitude]))
            country = get_country(coords[0][1], coords[0][0])
            parsed_file = {'name': name,
                           'original_name': track.name,
                            'source': gpx.creator,
                            'country': country,
                            'start': str(tuple([coords[0][0], coords[0][1]])),
                            'distance': track.length_3d()/1000, 
                            'climb': int(gpx.get_uphill_downhill()[0]),
                            'min_alt': int(gpx.get_elevation_extremes()[0]),
                            'max_alt': int(gpx.get_elevation_extremes()[1])}
            if country == 'Spain':
                with open('Spain/' + name + ".gpx", "w") as f:
                    f.write(gpx.to_xml())
            else:
                pass
        except:
            gpx_file = open(file, 'r', encoding='utf-8') 
            gpx = gpxpy.parse(gpx_file) 
            coords = []
            for track in gpx.tracks:
                for segment in track.segments:        
                    for point in segment.points:
                        coords.append(tuple([point.latitude, point.longitude]))
            country = get_country(coords[0][1], coords[0][0])
            parsed_file = {'name': name,
                           'original_name': track.name,
                            'source': gpx.creator,
                            'country': country,
                            'start': str(tuple([coords[0][0], coords[0][1]])),
                            'distance': track.length_3d()/1000, 
                            'climb': int(gpx.get_uphill_downhill()[0]),
                            'min_alt': int(gpx.get_elevation_extremes()[0]),
                            'max_alt': int(gpx.get_elevation_extremes()[1])}
            if country == 'Spain':
                with open('Spain/' + name + ".gpx", "w") as f:
                    f.write(gpx.to_xml())
            else:
                pass
            return parsed_file
    except:
        pass

In [210]:
#Testing our new parser function:

start = time.time()

df = gpx_creator()

stop = time.time() #Stopping our timer.
duration = (stop - start) / 60

print('Minutes:', duration)

Minutes: 11.744745826721191


In [309]:
def parser(file, name):
    try:
        gpx_file = open(file, 'r') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        if country == 'Spain':
            with open('Spain/' + name + ".gpx", "w") as f:
                f.write(gpx.to_xml())
        return parsed_file
    except:
        pass

In [316]:
#Parser function with country detection:

def parser(file, name):
    try:
        gpx_file = open(file, 'r') 
        gpx = gpxpy.parse(gpx_file) 
        coords = []
        for track in gpx.tracks:
            for segment in track.segments:        
                for point in segment.points:
                    coords.append(tuple([point.latitude, point.longitude]))
        country = get_country(coords[0][1], coords[0][0])
        parsed_file = {'name': name,
                       'original_name': track.name,
                        'source': gpx.creator,
                        'country': country,
                        'start': str(tuple([coords[0][0], coords[0][1]])),
                        'distance': track.length_3d()/1000, 
                        'climb': int(gpx.get_uphill_downhill()[0]),
                        'min_alt': int(gpx.get_elevation_extremes()[0]),
                        'max_alt': int(gpx.get_elevation_extremes()[1])}
        if country == 'Spain':
            with open('Spain/' + name + ".gpx", "w") as f:
                f.write(gpx.to_xml())
                return parsed_file
    except:
        pass

In [317]:
def gpx_creator():
    """
    Input: none, but all target gpx files must be in a folder named 'gpx'.
    
    Output: new gpx files created as per 'parser' function.
    
    """
    start = time.time() #Starting our timer.

    directory = 'gpx' #The folder containing the gpx files.
    parsed_list = []
    
    files = Path(directory).glob('*') #Using all files in the folder as input.
    i = 0
    for file in files:
        name = alpha[i]
        parsed = parser(file, name)
        if parsed != None:
            parsed_list.append(parsed)
            i = i+1

    stop = time.time() #Stopping our timer.
    duration = (stop - start) / 60
    
    df = pd.DataFrame(parsed_list)
    return df #Returning the elapsed minutes.

In [318]:
#Testing our new parser function:

start = time.time()

df = gpx_creator()

stop = time.time() #Stopping our timer.
duration = (stop - start) / 60

print('Minutes:', duration)

Minutes: 0.5918626586596171


In [319]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 484 entries, 0 to 483
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   name           484 non-null    object 
 1   original_name  484 non-null    object 
 2   source         484 non-null    object 
 3   country        484 non-null    object 
 4   start          484 non-null    object 
 5   distance       484 non-null    float64
 6   climb          484 non-null    int64  
 7   min_alt        484 non-null    int64  
 8   max_alt        484 non-null    int64  
dtypes: float64(1), int64(3), object(5)
memory usage: 34.2+ KB


In [320]:
df.head()

Unnamed: 0,name,original_name,source,country,start,distance,climb,min_alt,max_alt
0,54yu2w,circuito btt Valderrobres,Garmin Connect,Spain,"(40.87018978782, 0.152738643810153)",30.639216,760,469,768
1,00v5fe,Enrteno 05/08/2014,Garmin Connect,Spain,"(41.4576401654631, 2.20031881704926)",46.500193,90,6,39
2,f4r9n2,Cerdanyola - Castellar,Garmin Connect,Spain,"(41.494322810322, 2.15276844799519)",34.469302,333,64,309
3,lu6s5f,Palomeras-AgiÃ±a-Aritxulegi-Gurutze.,Garmin Connect,Spain,"(43.310460364446, -1.90735302865505)",107.211654,1901,1,549
4,rgxaz9,Ronda - Alpandeire - Farajan - Juzcar - Ronda,Garmin Connect,Spain,"(36.7461060080677, -5.15955367125571)",48.672365,1040,612,1072
