In [None]:
%pip install haversine
%pip install tqdm



In [None]:
from haversine import haversine

import numpy as np
import json
from pprint import *

from tqdm.notebook import tqdm

In [None]:
class Endomondo_DataLoader():
    def __init__(self, datapath):
        self.datapath = datapath
        
    def __len__(self):
        with open(self.datapath, "r") as file:
            self.line_count = 0
            for line in file:
                if line != "\n":
                    self.line_count += 1
        
        return self.line_count
        
    def get_available_datachannels(self):
        demo_sample = self.load_number_of_fitness_samples(nr_maximum_sample=1)[0]
        return list(demo_sample.keys())
        
    def load_number_of_fitness_samples(self, nr_maximum_sample=None, start_idx=0):
        sample_list = []
        
        with open(self.datapath, encoding='utf8') as file:
            for index, line in enumerate(file):
                
                # start at start_idx first (if set else than zero)
                if index < start_idx:
                    continue
    
                # json only supports double quotations
                line = line.replace("\'", "\"")  

                # save properties of json object
                sample_list.append(json.loads(line))
                
                # stop after nr_maximum_sample samples
                if nr_maximum_sample is not None and index == (nr_maximum_sample-1) + start_idx:
                    break
        
        return sample_list
        
    def get_single_fitness_sample(self, idx):
        fitness_sample_raw = linecache.getline(self.datapath, idx)
        
        # json only supports double quotations
        fitness_sample_raw = fitness_sample_raw.replace("\'", "\"")
        
        fitness_sample = json.loads(fitness_sample_raw)
        return fitness_sample

In [None]:
datafile = "/content/drive/MyDrive/endomondoHR_proper.json"
EndomondoData = Endomondo_DataLoader(datafile)

In [None]:
def calculate_mean_speed(sample):
    lat = sample['latitude']
    lon = sample['longitude']
    alt = sample['altitude']
    timestamp = sample['timestamp']

    mean_speed = []

    for idx in range(len(lat)-1):
        lat2, lat1 = lat[idx+1], lat[idx]
        lon2, lon1 = lon[idx+1], lon[idx]
        alt2, alt1 = alt[idx+1], alt[idx]
        time = (timestamp[idx+1] - timestamp[idx])/3600  # sec to hour
        
        if time < 1/3600:
            time = 1/3600

        flat_distance = haversine((lat2, lon2), (lat1, lon1))
        height_distance = (alt2-alt1)/1000  # in km
        total_distance = np.sqrt(flat_distance*flat_distance + height_distance*height_distance)
        speed = total_distance/time

        mean_speed.append(round(speed, 4))

    # add first value of time series (e.g. as copy of the first calculated value or BETTER: to zero as starting velocity) (needed for initialisation and to obtain same time series length)
    mean_speed.insert(0, 0.0)

    return mean_speed

In [None]:
new_dataset = []

with open("/content/drive/MyDrive/endomondoHR_proper.json", encoding='utf8') as file:
    for index, line in  tqdm(enumerate(file), total=167783, leave=True):
        # json only supports double quotations
        line = line.replace("\'", "\"")  

        # save properties of json object
        sample = json.loads(line)
        mean_speed = calculate_mean_speed(sample)
        sample["mean_speed"] = mean_speed

        new_dataset.append(sample)



  0%|          | 0/167783 [00:00<?, ?it/s][A[A

  0%|          | 17/167783 [00:00<16:42, 167.33it/s][A[A

  0%|          | 34/167783 [00:00<16:48, 166.38it/s][A[A

  0%|          | 51/167783 [00:00<16:55, 165.23it/s][A[A

  0%|          | 67/167783 [00:00<17:22, 160.94it/s][A[A

  0%|          | 85/167783 [00:00<17:03, 163.93it/s][A[A

  0%|          | 102/167783 [00:00<17:09, 162.86it/s][A[A

  0%|          | 117/167783 [00:00<17:47, 157.04it/s][A[A

  0%|          | 134/167783 [00:00<17:38, 158.44it/s][A[A

  0%|          | 151/167783 [00:00<17:29, 159.73it/s][A[A

  0%|          | 168/167783 [00:01<17:23, 160.61it/s][A[A

  0%|          | 185/167783 [00:01<17:09, 162.77it/s][A[A

  0%|          | 203/167783 [00:01<16:50, 165.86it/s][A[A

  0%|          | 220/167783 [00:01<16:51, 165.73it/s][A[A

  0%|          | 237/167783 [00:01<16:52, 165.49it/s][A[A

  0%|          | 255/167783 [00:01<16:41, 167.29it/s][A[A

  0%|          | 272/167783 [00:01<16:

KeyboardInterrupt: ignored