In [47]:
import pandas as pd
import numpy as np
import gpxpy
from geopy.distance import geodesic
from pathlib import Path
from tqdm import tqdm
import gzip
import tempfile
from fitparse import FitFile
import os
import pickle

In [48]:
def parse_gpx(file_path):
    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
        gpx = gpxpy.parse(f)

    points = []
    elevations = []

    for track in gpx.tracks:
        for segment in track.segments:
            for p in segment.points:
                points.append((p.latitude, p.longitude))
                if p.elevation is not None:
                    elevations.append(p.elevation)

    # Calcul distance
    distance = sum(geodesic(points[i], points[i+1]).km for i in range(len(points)-1)) if len(points) > 1 else 0.0
    # Calcul D+
    dplus = sum(max(elevations[i+1]-elevations[i],0) for i in range(len(elevations)-1)) if len(elevations) > 1 else 0.0

    return {
        "points": points,
        "elevations": elevations,
        "distance_km": round(distance,2),
        "denivele_m": round(dplus,1)
    }

In [49]:
def parse_fit_gz(file_path):
    with gzip.open(file_path, "rb") as f:
        raw_data = f.read()

    with tempfile.NamedTemporaryFile(delete=False, suffix=".fit") as tmp:
        tmp.write(raw_data)
        tmp_path = tmp.name

    fitfile = FitFile(tmp_path)

    points = []
    elevations = []

    for record in fitfile.get_messages("record"):
        data = {d.name: d.value for d in record}

        if "position_lat" in data and "position_long" in data:
            lat = data["position_lat"] * (180 / 2**31)
            lon = data["position_long"] * (180 / 2**31)
            ele = data.get("enhanced_altitude")
            points.append((lat, lon))
            if ele is not None:
                elevations.append(ele)

    # Calcul distance et D+
    distance = sum(geodesic(points[i], points[i+1]).km for i in range(len(points)-1)) if len(points) > 1 else 0.0
    dplus = sum(max(elevations[i+1]-elevations[i],0) for i in range(len(elevations)-1)) if len(elevations) > 1 else 0.0

    return {
        "points": points,
        "elevations": elevations,
        "distance_km": round(distance,2),
        "denivele_m": round(dplus,1)
    }

In [50]:
def parse_file(file_path):
    name = file_path.name.lower()
    if name.endswith(".gpx"):
        return parse_gpx(file_path)
    elif name.endswith(".fit.gz"):
        return parse_fit_gz(file_path)
    else:
        raise ValueError(f"Format non supporté : {file_path}")

In [51]:
DATA_DIR = Path("Data/GPX")
dataset = {}

for file in tqdm(DATA_DIR.iterdir()):
    if not file.is_file():
        continue
    try:
        data = parse_file(file)
        if len(data["points"]) < 2:
            continue
        dataset[file.name] = data
    except Exception as e:
        print(f"❌ Erreur sur {file.name}: {e}")

# Sauvegarde
with open("data/dataset_points.pkl", "wb") as f:
    pickle.dump(dataset, f)

print("✅ Parcours traités :", len(dataset))

2it [00:01,  1.74it/s]

✅ Parcours traités : 2





In [52]:
dataset

{'11800683489.gpx': {'points': [(43.617533, 1.420904),
   (43.617531, 1.420901),
   (43.617529, 1.420898),
   (43.617534, 1.420901),
   (43.617537, 1.420902),
   (43.617538, 1.420905),
   (43.617537, 1.420908),
   (43.617541, 1.420908),
   (43.617544, 1.420909),
   (43.617545, 1.420907),
   (43.617546, 1.420904),
   (43.61755, 1.420866),
   (43.617554, 1.420849),
   (43.617566, 1.420825),
   (43.61759, 1.420806),
   (43.617614, 1.420787),
   (43.617646, 1.420762),
   (43.617673, 1.420734),
   (43.617705, 1.420703),
   (43.617732, 1.420671),
   (43.61776, 1.420639),
   (43.617791, 1.420604),
   (43.617823, 1.420567),
   (43.617853, 1.420526),
   (43.617882, 1.42049),
   (43.617911, 1.420455),
   (43.617936, 1.42042),
   (43.617962, 1.420387),
   (43.617986, 1.420348),
   (43.618011, 1.420317),
   (43.618033, 1.420287),
   (43.618055, 1.420252),
   (43.618081, 1.420214),
   (43.618102, 1.420173),
   (43.618123, 1.420128),
   (43.618146, 1.420084),
   (43.61817, 1.420036),
   (43.61819, 1