In [70]:
import sys
sys.path.append("../")

import matplotlib.pyplot as plt
import pickle
import os
import time

from dotenv import load_dotenv
from importlib import reload
from tqdm.auto import tqdm

from src import strava_api
from src import strava_cleaner
from src import strava_visualizer
reload(strava_api)
reload(strava_cleaner)
reload(strava_visualizer)

from src.strava_api import StravaClient
from src.strava_cleaner import StravaCleaner
from src.strava_visualizer import StravaVisualizer

load_dotenv()

plt.rc("xtick", labelsize=22)
plt.rc("ytick", labelsize=22)
plt.style.use("ggplot")

In [71]:
client = StravaClient(
    client_id=os.getenv("CLIENT_ID"),
    client_secret=os.getenv("CLIENT_SECRET"),
    authorization_code=os.getenv("AUTHORIZATION_CODE"),
    refresh_token=os.getenv("REFRESH_TOKEN")
)

cleaner = StravaCleaner()

visualizer = StravaVisualizer(api_key=os.getenv("GEOAPIFY_API_KEY"), override_max_zoom=True)

In [40]:
def wait(seconds: int = 902) -> None:
    t = 0
    with tqdm(total=seconds) as pbar:
        while t < seconds:
            time.sleep(1)
            mins = f"{(seconds - t) // 60}".rjust(2, "0")
            secs = f"{(seconds - t) % 60}".rjust(2, "0")
            pbar.set_description(f"Rate Limit Time Remaining: {mins}:{secs}")
            pbar.update(1)

            t += 1

## Extract Strava Activities

In [5]:
# activities = client.extract_activities(file="../data/raw/activities.pkl")

In [6]:
with open("../data/raw/activities.pkl", "rb") as f:
    activities = pickle.load(f)

## Clean Strava Activities

In [7]:
activities_clean = cleaner.clean_activities(activities=activities, file="../data/clean/activities.pkl", progress_bar=False)

In [16]:
activities_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 420 entries, 0 to 419
Data columns (total 23 columns):
 #   Column             Non-Null Count  Dtype              
---  ------             --------------  -----              
 0   activity_id        420 non-null    int64              
 1   name               420 non-null    object             
 2   type               420 non-null    object             
 3   distance           420 non-null    float64            
 4   moving_time        420 non-null    int64              
 5   elapsed_time       420 non-null    int64              
 6   elevation_gain     420 non-null    float64            
 7   start_date         420 non-null    datetime64[ns, UTC]
 8   start_date_local   420 non-null    datetime64[ns, UTC]
 9   flagged            420 non-null    bool               
 10  start_latitude     415 non-null    float64            
 11  start_longitude    415 non-null    float64            
 12  end_latitude       415 non-null    float64        

In [37]:
activities_clean

Unnamed: 0,activity_id,name,type,distance,moving_time,elapsed_time,elevation_gain,start_date,start_date_local,flagged,...,end_longitude,average_speed,max_speed,average_heartrate,max_heartrate,min_elevation,max_elevation,kilojoules,suffer_score,map_polyline
0,11832289523,SuperJames,Ride,70509.6,11434,12271,1390.0,2024-07-07 14:32:53+00:00,2024-07-07 08:32:53+00:00,False,...,-105.226865,6.167,22.210,146.7,176.0,1599.2,2605.8,2421.1,236.0,adcsFbcgaS_CDbA_Ea@cCyo@LIft@cCvDWzUwKvGsEfAi[...
1,11816599118,Superman,Ride,36762.5,7189,8112,817.0,2024-07-05 17:00:14+00:00,2024-07-05 11:00:14+00:00,False,...,-105.226841,5.114,19.094,148.7,175.0,1604.8,2349.4,1390.4,165.0,_dcsFxbgaSuBD|@uES_Ba@Ymo@l@Exs@g@lBwAtAZ|YEfG...
2,11808917002,Boulder Creek Path,Ride,22761.6,4758,6663,126.0,2024-07-04 16:20:50+00:00,2024-07-04 10:20:50+00:00,False,...,-105.226860,4.784,12.690,106.1,148.0,1594.8,1645.0,510.3,14.0,secsFf}faSJe@m@mBil@PuBTKd@@|KFIG@DpTG|P[bAw@b...
3,11777566872,Mt. Graham,Ride,64387.7,13979,14487,1830.0,2024-06-30 14:41:18+00:00,2024-06-30 07:41:18+00:00,False,...,-109.733802,4.606,19.332,142.5,174.0,1056.8,2796.6,2504.5,245.0,iqsfErqw|S~sBhtBjUhTlCxGk@lFsC~B_I]qCdAiBrEb@v...
4,11770056607,Mt. Lemmon,Ride,94977.9,17099,23078,2350.0,2024-06-29 13:35:31+00:00,2024-06-29 06:35:31+00:00,False,...,-110.754903,5.555,23.450,148.1,167.0,839.8,2787.4,3690.2,372.0,embdEfz~bTw_Au~@mZwDqTeKjBaD`RrBfCiEkHmIjD}I]c...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,3067067698,Lazy Sunday,Run,5227.1,2029,2033,36.1,2020-02-02 23:28:02+00:00,2020-02-02 15:28:02+00:00,False,...,-121.918445,2.576,4.100,,,145.3,177.1,,9.0,abneFdsbgV@ICWIOGi@Q[M]Ea@a@gAAM@G|@s@^OZSj@SP...
416,3062896560,Morning Musings,Run,8375.0,2769,2801,30.7,2020-02-01 17:15:59+00:00,2020-02-01 09:15:59+00:00,False,...,-121.914203,3.025,5.100,,,145.3,167.6,,49.0,caneFdtbgVg@sAWcAGKEAI]GK?IEWUs@?M@GVGRKlA{@PQ...
417,3055527801,Post CrossFit Haste,Run,5852.6,1899,1903,14.0,2020-01-30 04:23:37+00:00,2020-01-29 20:23:37+00:00,False,...,-122.263449,3.082,9.100,,,1.1,7.5,,50.0,uuweFfxgiV?MBKj@{AxBsEFSd@}@Tq@No@@cAJ_AGu@BI?...
418,3047107124,Sunday Funday,Run,4507.7,1364,1541,24.5,2020-01-27 04:00:37+00:00,2020-01-26 20:00:37+00:00,False,...,-121.914202,3.305,5.400,,,145.3,167.6,,43.0,aaneF`tbgVUe@AKEIa@kAIM[kAY}@C[HKVMt@Yj@Y|@[`B...


## Extract Strava Activity Streams

In [21]:
activity_streams_all = []

In [20]:
wait()

Rate Limit Time Remaining: 00:01: 100%|██████████| 902/902 [15:03<00:00,  1.00s/it]


In [22]:
# If you have more than 1000 activities and do not have an enhanced Strava Developer rate limit
# it will likely be easiest to just manually download your GPX files and proceed to visualization
start = 0
batch_size = 94
end = start + batch_size
total = len(activities)
while start < total:
    print(f"Processing activities {start + 1} to {end}")
    activity_streams = client.extract_activity_streams(
        activity_ids=activities_clean["activity_id"].values[start:end],
        file=None
    )

    activity_streams_all.extend(activity_streams)
    
    start += batch_size
    end = min(total, end + batch_size)
    if start < total:
        wait(902)


Processing activities 1 to 94


Rate Limit Time Remaining: 00:01: 100%|██████████| 902/902 [15:03<00:00,  1.00s/it]


Processing activities 95 to 188


Rate Limit Time Remaining: 00:01: 100%|██████████| 902/902 [15:03<00:00,  1.00s/it]


Processing activities 189 to 282


Rate Limit Time Remaining: 00:01: 100%|██████████| 902/902 [15:03<00:00,  1.00s/it]


Processing activities 283 to 376


Rate Limit Time Remaining: 00:01: 100%|██████████| 902/902 [15:03<00:00,  1.00s/it]


Processing activities 377 to 420


Rate Limit Time Remaining: 00:01: 100%|██████████| 902/902 [15:03<00:00,  1.00s/it]


In [23]:
with open("../data/raw/streams.pkl", "wb") as f:
    pickle.dump(activity_streams_all, f)

In [None]:
# with open("../data/raw/streams.pkl", "rb") as f:
#     activity_streams_all = pickle.load(f)

## Clean Strava Activity Streams

In [25]:
streams = cleaner.clean_activity_streams(activity_streams_all, progress_bar=True, file="../data/clean/activity_streams.pkl")

Processing stream 3038118287: 100%|██████████| 420/420 [00:01<00:00, 402.68it/s]


## Export GPX files from Strava Activity Streams

In [26]:
streams_clean = cleaner.create_gpx_files(
    activities=activities_clean,
    activity_streams=streams,
    save_dir="../data/gpx/"
)

Processing activity 11832289523: 100%|██████████| 420/420 [01:14<00:00,  5.62it/s]


## Generate Heatmap

In [78]:
heatmap = visualizer.generate_heatmap(
    gpx_dir="../data/gpx/",
    file="../img/graham_heatmap.png",
    zoom=15,  # Zoom level for the tiles. -1 auto zooms
    sigma=2,  # Width of heatmap track
    year_filter=2024,  # Activities for this year
    brightness_factor=10.0,  # Manually brighten tracks (1 is default brightness, higher is more bright)
    # lat_lon_bounds=(39.813811, -105.558014, 40.166281, -105.195465)  # Boulder
    # lat_lon_bounds=(37.788624, -122.392159, 37.895718, -122.219810)  # Oakland
    # lat_lon_bounds=(31.614796, -111.743317, 32.598891, -110.533447),  # Tucson
    lat_lon_bounds=(32.614653, -109.899330, 32.735740, -109.708271)  # Mt. Graham
)

Processing 9972386845.gpx: 100%|██████████| 415/415 [00:02<00:00, 206.57it/s]
Downloading tiles: 100%|██████████| 143/143 [01:33<00:00,  1.53it/s]


In [None]:
heatmap = visualizer.generate_heatmap(
    gpx_dir="../data/gpx/",
    file="../img/tucson_heatmap.png",
    zoom=14,  # Zoom level for the tiles. -1 auto zooms
    sigma=2,  # Width of heatmap track
    year_filter=2024,  # Activities for this year
    brightness_factor=10.0,  # Manually brighten tracks (1 is default brightness, higher is more bright)
    # lat_lon_bounds=(39.813811, -105.558014, 40.166281, -105.195465)  # Boulder
    # lat_lon_bounds=(37.788624, -122.392159, 37.895718, -122.219810)  # Oakland
    lat_lon_bounds=(31.614796, -111.743317, 32.598891, -110.533447),  # Tucson
    # lat_lon_bounds=(32.614653, -109.899330, 32.735740, -109.708271)  # Mt. Graham
)

In [None]:
# Some open source map tiles cause problems unless you visit the webpage
# Displaying and visiting the urls sometimes fixes the issue
# Looking at you Wikimedia
for url in visualizer.bad_urls:
    print(url)