In [1]:
import time
import pickle
import pandas as pd
from stravalib.client import Client
import webbrowser
import sys

sys.path.append('../config')
from utils import col_names

In [3]:
class StravaIntegration:
    def __init__(self, client_id, client_secret, redirect_uri):
        self.client_id = client_id
        self.client_secret = client_secret
        self.redirect_uri = redirect_uri
        self.client = Client()
        
    def authenticate(self):
        url = self.client.authorization_url(client_id=self.client_id,
                                            redirect_uri=self.redirect_uri,
                                            scope=['read_all','profile:read_all','activity:read_all'])
        webbrowser.open(url)
        code = input("Enter the authorization code: ")
        access_token = self.exchange_code_for_token(code)
        return access_token
    
    def exchange_code_for_token(self, code):
        return self.client.exchange_code_for_token(client_id=self.client_id,
                                                    client_secret=self.client_secret,
                                                    code=code)
    
    def refresh_token(self, access_token):
        if time.time() > access_token['expires_at']:
            refresh_response = self.client.refresh_access_token(client_id=self.client_id,
                                                                 client_secret=self.client_secret,
                                                                 refresh_token=access_token['refresh_token'])
            access_token = refresh_response
            return access_token
        else:
            return access_token

    def get_activities_dataframe(self, access_token, limit=1000):
        activities = self.client.get_activities(limit=limit)
        data = []
        for activity in activities:
            my_dict = activity.to_dict()
            data.append([activity.id] + [my_dict.get(x) for x in col_names])
        
        df = pd.DataFrame(data, columns=['id'] + col_names)
        return df


In [4]:
CLIENT_ID, CLIENT_SECRET = open('../config/client.secret').read().strip().split(',')
REDIRECT_URI = 'http://127.0.0.1:5000/authorization'

strava_integration = StravaIntegration(CLIENT_ID, CLIENT_SECRET, REDIRECT_URI)
access_token = strava_integration.authenticate()
access_token = strava_integration.refresh_token(access_token)

df = strava_integration.get_activities_dataframe(access_token)

Enter the authorization code:  425ce38b85df2d8168fdcc9fe5528b596149c295


No rates present in response headers


In [5]:
df.head()

Unnamed: 0,id,id.1,achievement_count,athlete,athlete_count,average_speed,average_watts,comment_count,commute,device_watts,...,average_heartrate,max_heartrate,average_cadence,average_temp,instagram_primary_photo,partner_logo_url,partner_brand_tag,from_accepted_tag,segment_leaderboard_opt_out,perceived_exertion
0,11080730558,11080730558,0,"{'id': 28281639, 'city': None, 'country': None...",1,2.375,,0,False,,...,,,,,,,,False,,
1,11066396187,11066396187,0,"{'id': 28281639, 'city': None, 'country': None...",1,2.209,,0,False,,...,,,,,,,,False,,
2,11025382855,11025382855,0,"{'id': 28281639, 'city': None, 'country': None...",1,2.31,,0,False,,...,,,,,,,,False,,
3,11018518865,11018518865,0,"{'id': 28281639, 'city': None, 'country': None...",1,2.692,,0,False,,...,,,,,,,,False,,
4,11005657858,11005657858,0,"{'id': 28281639, 'city': None, 'country': None...",1,2.537,,0,False,,...,,,,,,,,False,,


In [6]:
# Write DataFrame to CSV file
df.to_csv('../data/raw/strava_activities.csv', index=False)
