First we write a function to collect Data from the last.fm Api endpoint.
The endpoint being used is the GetRecentTracks endpoint to get all the tracks scrobbled

In [1]:
import requests as rq

API_KEY = 'insertkey'
USER_AGENT = 'Ewaoluwa'
user='ewaoluwa'

def lastfm_get(payload):
    headers= {'user-agent': USER_AGENT}
    url = 'https://ws.audioscrobbler.com/2.0/'

    payload['limit'] = 200
    payload['user']= user
    payload['api_key'] = API_KEY
    payload['format'] = 'json'


    response = rq.get(url,headers=headers, params=payload)
    return response

import requests_cache
requests_cache.install_cache()

import time
from IPython.core.display import clear_output

responses = []

page = 1
total_pages = 99999 # this is just a dummy number so the loop starts

while page <= total_pages:
    payload = {
        'method': 'user.getrecenttracks',
        'limit': 200,
        'user': user,
        'page': page
    }

    # print some output so we can see the status
    print("Requesting page {}/{}".format(page, total_pages))
    # clear the output to make things neater
    clear_output(wait = True)

    # make the API call
    response = lastfm_get(payload)

    # if we get an error, print the response and halt the loop
    if response.status_code != 200:
        print(response.text)
        break

    # extract pagination info
    page = int(response.json()['recenttracks']['@attr']['page'])
    total_pages = int(response.json()['recenttracks']['@attr']['totalPages'])

    # append response
    responses.append(response)

    # if it's not a cached result, sleep
    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)

    # increment the page number
    page += 1

import pandas as pd

r0 = responses[0]
r0_json = r0.json()
r0_tracks = r0_json['recenttracks']['track']
r0_df = pd.DataFrame(r0_tracks)
r0_df.head()

frames = [pd.DataFrame(r.json()['recenttracks']['track']) for r in responses]
tracks = pd.concat(frames)
tracks.head()

print(tracks.columns)

tracks.head()

Index(['artist', 'streamable', 'image', 'mbid', 'album', 'name', 'url',
       'date'],
      dtype='object')


Unnamed: 0,artist,streamable,image,mbid,album,name,url,date
0,{'mbid': 'e520459c-dff4-491d-a6e4-c97be35e0044...,0,"[{'size': 'small', '#text': 'https://lastfm.fr...",dddbb3b9-7ed5-362c-983f-0d28b047dbf0,{'mbid': '16f3fcf2-4511-4c8c-93bb-b4c8910aa9db...,Novacane,https://www.last.fm/music/Frank+Ocean/_/Novacane,"{'uts': '1699143073', '#text': '05 Nov 2023, 0..."
1,{'mbid': 'e21857d5-3256-4547-afb3-4b6ded592596...,0,"[{'size': 'small', '#text': 'https://lastfm.fr...",e28c3490-8d2a-487a-8039-5540ed58d652,{'mbid': '0380808d-2211-4869-a287-cc94e0d18162...,Don't Get Lost in Heaven,https://www.last.fm/music/Gorillaz/_/Don%27t+G...,"{'uts': '1699142942', '#text': '05 Nov 2023, 0..."
2,{'mbid': 'a6c6897a-7415-4f8d-b5a5-3a5e05f3be67...,0,"[{'size': 'small', '#text': 'https://lastfm.fr...",11161115-4dd0-418b-b0b8-5a5c6571b09f,{'mbid': '136434d5-9ddf-4c62-8dcc-021ead11fe0c...,Goner,https://www.last.fm/music/twenty+one+pilots/_/...,"{'uts': '1699142704', '#text': '05 Nov 2023, 0..."
3,{'mbid': '01695167-968d-4afc-9f94-06c33ebabe16...,0,"[{'size': 'small', '#text': 'https://lastfm.fr...",,"{'mbid': '', '#text': 'Mr Blue'}",Mr Blue (Radio Mix),https://www.last.fm/music/Catherine+Feeny/_/Mr...,"{'uts': '1699142521', '#text': '05 Nov 2023, 0..."
4,{'mbid': 'e21857d5-3256-4547-afb3-4b6ded592596...,0,"[{'size': 'small', '#text': 'https://lastfm.fr...",5d0df10b-a90b-4515-a876-1b5abf378cba,{'mbid': '0380808d-2211-4869-a287-cc94e0d18162...,El Mañana,https://www.last.fm/music/Gorillaz/_/El+Ma%C3%...,"{'uts': '1699142291', '#text': '04 Nov 2023, 2..."


Cleaning the data by dropping columns that are not needed

In [2]:
tracks=tracks.drop(['mbid','streamable'], axis=1)

Creating a new dataframe that contains only the distinct tracks so we can use the track name to get the top tags from each track using another endpoint

In [3]:
tracksdistinct= tracks.drop_duplicates(subset=["url"])
tracksdistinct.describe()

Unnamed: 0,artist,image,album,name,url,date
count,6028,6028,6028,6028,6028,6028
unique,2269,2676,3069,5698,6028,5951
top,{'mbid': '164f0d73-1234-4e2c-8743-d77bf2191051...,"[{'size': 'small', '#text': 'https://lastfm.fr...","{'mbid': '', '#text': ''}",Intro,https://www.last.fm/music/Frank+Ocean/_/Novacane,"{'uts': '1623907212', '#text': '17 Jun 2021, 0..."
freq,171,675,510,6,1,23


In [7]:
tracksdistinct=tracksdistinct.rename(columns={'name':'track'})
tracksdistinct.head()

Unnamed: 0,artist,image,album,track,url,date
0,Frank Ocean,"[{'size': 'small', '#text': 'https://lastfm.fr...",Novacane,Novacane,https://www.last.fm/music/Frank+Ocean/_/Novacane,"05 Nov 2023, 00:11"
1,Gorillaz,"[{'size': 'small', '#text': 'https://lastfm.fr...",Demon Days,Don't Get Lost in Heaven,https://www.last.fm/music/Gorillaz/_/Don%27t+G...,"05 Nov 2023, 00:09"
2,twenty one pilots,"[{'size': 'small', '#text': 'https://lastfm.fr...",Blurryface,Goner,https://www.last.fm/music/twenty+one+pilots/_/...,"05 Nov 2023, 00:05"
3,Catherine Feeny,"[{'size': 'small', '#text': 'https://lastfm.fr...",Mr Blue,Mr Blue (Radio Mix),https://www.last.fm/music/Catherine+Feeny/_/Mr...,"05 Nov 2023, 00:02"
4,Gorillaz,"[{'size': 'small', '#text': 'https://lastfm.fr...",Demon Days,El Mañana,https://www.last.fm/music/Gorillaz/_/El+Ma%C3%...,"04 Nov 2023, 23:58"


In [5]:
tracksdistinct['artist'] = tracksdistinct['artist'].apply(lambda x: x['#text'])
tracksdistinct['album'] = tracksdistinct['album'].apply(lambda x: x['#text'])
tracksdistinct['date'] = tracksdistinct['date'].apply(lambda x: x['#text'])
tracksdistinct.head()

Unnamed: 0,artist,image,album,track,url,date
0,Frank Ocean,"[{'size': 'small', '#text': 'https://lastfm.fr...",Novacane,Novacane,https://www.last.fm/music/Frank+Ocean/_/Novacane,"05 Nov 2023, 00:11"
1,Gorillaz,"[{'size': 'small', '#text': 'https://lastfm.fr...",Demon Days,Don't Get Lost in Heaven,https://www.last.fm/music/Gorillaz/_/Don%27t+G...,"05 Nov 2023, 00:09"
2,twenty one pilots,"[{'size': 'small', '#text': 'https://lastfm.fr...",Blurryface,Goner,https://www.last.fm/music/twenty+one+pilots/_/...,"05 Nov 2023, 00:05"
3,Catherine Feeny,"[{'size': 'small', '#text': 'https://lastfm.fr...",Mr Blue,Mr Blue (Radio Mix),https://www.last.fm/music/Catherine+Feeny/_/Mr...,"05 Nov 2023, 00:02"
4,Gorillaz,"[{'size': 'small', '#text': 'https://lastfm.fr...",Demon Days,El Mañana,https://www.last.fm/music/Gorillaz/_/El+Ma%C3%...,"04 Nov 2023, 23:58"


In [9]:
def lookup_tags(artist,track):
    responser = lastfm_get({
        'method': 'track.getTopTags',
        'artist':  artist,
        'track': track
    })

    # if there's an error, just return nothing
    if responser.status_code != 200:
        return None

    # extract the top three tags and turn them into a string
    tags = [t['name'] for t in responser.json()['toptags']['tag'][:3]]
    tags_str = ', '.join(tags)

    # rate limiting
    if not getattr(responser, 'from_cache', False):
        time.sleep(0.25)
    return tags_str

from tqdm import tqdm
tqdm.pandas()

tracksdistinct['tags'] = tracksdistinct[['artist', 'track']].progress_apply(lambda row: lookup_tags(row['artist'], row['track']), axis=1)

tracksdistinct.head()

100%|██████████| 6028/6028 [2:00:49<00:00,  1.20s/it]  


Unnamed: 0,artist,image,album,track,url,date,tags
0,Frank Ocean,"[{'size': 'small', '#text': 'https://lastfm.fr...",Novacane,Novacane,https://www.last.fm/music/Frank+Ocean/_/Novacane,"05 Nov 2023, 00:11","rnb, r&b, ofwgkta"
1,Gorillaz,"[{'size': 'small', '#text': 'https://lastfm.fr...",Demon Days,Don't Get Lost in Heaven,https://www.last.fm/music/Gorillaz/_/Don%27t+G...,"05 Nov 2023, 00:09","alternative, trip-hop, gorillaz"
2,twenty one pilots,"[{'size': 'small', '#text': 'https://lastfm.fr...",Blurryface,Goner,https://www.last.fm/music/twenty+one+pilots/_/...,"05 Nov 2023, 00:05","indie pop, alternative hip hop, electropop"
3,Catherine Feeny,"[{'size': 'small', '#text': 'https://lastfm.fr...",Mr Blue,Mr Blue (Radio Mix),https://www.last.fm/music/Catherine+Feeny/_/Mr...,"05 Nov 2023, 00:02",
4,Gorillaz,"[{'size': 'small', '#text': 'https://lastfm.fr...",Demon Days,El Mañana,https://www.last.fm/music/Gorillaz/_/El+Ma%C3%...,"04 Nov 2023, 23:58","alternative, electronic, trip-hop"


In [10]:
tracksdistinct.dtypes

artist    object
image     object
album     object
track     object
url       object
date      object
tags      object
dtype: object

In [11]:
#joining the dataframes

full_tracks=pd.merge(tracks, tracksdistinct, on='url', how='left')
print(full_tracks.head())

                                            artist_x  \
0  {'mbid': 'e520459c-dff4-491d-a6e4-c97be35e0044...   
1  {'mbid': 'e21857d5-3256-4547-afb3-4b6ded592596...   
2  {'mbid': 'a6c6897a-7415-4f8d-b5a5-3a5e05f3be67...   
3  {'mbid': '01695167-968d-4afc-9f94-06c33ebabe16...   
4  {'mbid': 'e21857d5-3256-4547-afb3-4b6ded592596...   

                                             image_x  \
0  [{'size': 'small', '#text': 'https://lastfm.fr...   
1  [{'size': 'small', '#text': 'https://lastfm.fr...   
2  [{'size': 'small', '#text': 'https://lastfm.fr...   
3  [{'size': 'small', '#text': 'https://lastfm.fr...   
4  [{'size': 'small', '#text': 'https://lastfm.fr...   

                                             album_x  \
0  {'mbid': '16f3fcf2-4511-4c8c-93bb-b4c8910aa9db...   
1  {'mbid': '0380808d-2211-4869-a287-cc94e0d18162...   
2  {'mbid': '136434d5-9ddf-4c62-8dcc-021ead11fe0c...   
3                   {'mbid': '', '#text': 'Mr Blue'}   
4  {'mbid': '0380808d-2211-4869-a287-cc94e0d18

In [12]:
full_tracks.head()

Unnamed: 0,artist_x,image_x,album_x,name,url,date_x,artist_y,image_y,album_y,track,date_y,tags
0,{'mbid': 'e520459c-dff4-491d-a6e4-c97be35e0044...,"[{'size': 'small', '#text': 'https://lastfm.fr...",{'mbid': '16f3fcf2-4511-4c8c-93bb-b4c8910aa9db...,Novacane,https://www.last.fm/music/Frank+Ocean/_/Novacane,"{'uts': '1699143073', '#text': '05 Nov 2023, 0...",Frank Ocean,"[{'size': 'small', '#text': 'https://lastfm.fr...",Novacane,Novacane,"05 Nov 2023, 00:11","rnb, r&b, ofwgkta"
1,{'mbid': 'e21857d5-3256-4547-afb3-4b6ded592596...,"[{'size': 'small', '#text': 'https://lastfm.fr...",{'mbid': '0380808d-2211-4869-a287-cc94e0d18162...,Don't Get Lost in Heaven,https://www.last.fm/music/Gorillaz/_/Don%27t+G...,"{'uts': '1699142942', '#text': '05 Nov 2023, 0...",Gorillaz,"[{'size': 'small', '#text': 'https://lastfm.fr...",Demon Days,Don't Get Lost in Heaven,"05 Nov 2023, 00:09","alternative, trip-hop, gorillaz"
2,{'mbid': 'a6c6897a-7415-4f8d-b5a5-3a5e05f3be67...,"[{'size': 'small', '#text': 'https://lastfm.fr...",{'mbid': '136434d5-9ddf-4c62-8dcc-021ead11fe0c...,Goner,https://www.last.fm/music/twenty+one+pilots/_/...,"{'uts': '1699142704', '#text': '05 Nov 2023, 0...",twenty one pilots,"[{'size': 'small', '#text': 'https://lastfm.fr...",Blurryface,Goner,"05 Nov 2023, 00:05","indie pop, alternative hip hop, electropop"
3,{'mbid': '01695167-968d-4afc-9f94-06c33ebabe16...,"[{'size': 'small', '#text': 'https://lastfm.fr...","{'mbid': '', '#text': 'Mr Blue'}",Mr Blue (Radio Mix),https://www.last.fm/music/Catherine+Feeny/_/Mr...,"{'uts': '1699142521', '#text': '05 Nov 2023, 0...",Catherine Feeny,"[{'size': 'small', '#text': 'https://lastfm.fr...",Mr Blue,Mr Blue (Radio Mix),"05 Nov 2023, 00:02",
4,{'mbid': 'e21857d5-3256-4547-afb3-4b6ded592596...,"[{'size': 'small', '#text': 'https://lastfm.fr...",{'mbid': '0380808d-2211-4869-a287-cc94e0d18162...,El Mañana,https://www.last.fm/music/Gorillaz/_/El+Ma%C3%...,"{'uts': '1699142291', '#text': '04 Nov 2023, 2...",Gorillaz,"[{'size': 'small', '#text': 'https://lastfm.fr...",Demon Days,El Mañana,"04 Nov 2023, 23:58","alternative, electronic, trip-hop"


In [13]:
full_tracks=full_tracks.drop(['artist_x','album_x','name','date_y','image_x'],axis=1)
full_tracks['date_x'] = full_tracks['date_x'].apply(lambda x: x['#text'])
full_tracks.head()

Unnamed: 0,url,date_x,artist_y,image_y,album_y,track,tags
0,https://www.last.fm/music/Frank+Ocean/_/Novacane,"05 Nov 2023, 00:11",Frank Ocean,"[{'size': 'small', '#text': 'https://lastfm.fr...",Novacane,Novacane,"rnb, r&b, ofwgkta"
1,https://www.last.fm/music/Gorillaz/_/Don%27t+G...,"05 Nov 2023, 00:09",Gorillaz,"[{'size': 'small', '#text': 'https://lastfm.fr...",Demon Days,Don't Get Lost in Heaven,"alternative, trip-hop, gorillaz"
2,https://www.last.fm/music/twenty+one+pilots/_/...,"05 Nov 2023, 00:05",twenty one pilots,"[{'size': 'small', '#text': 'https://lastfm.fr...",Blurryface,Goner,"indie pop, alternative hip hop, electropop"
3,https://www.last.fm/music/Catherine+Feeny/_/Mr...,"05 Nov 2023, 00:02",Catherine Feeny,"[{'size': 'small', '#text': 'https://lastfm.fr...",Mr Blue,Mr Blue (Radio Mix),
4,https://www.last.fm/music/Gorillaz/_/El+Ma%C3%...,"04 Nov 2023, 23:58",Gorillaz,"[{'size': 'small', '#text': 'https://lastfm.fr...",Demon Days,El Mañana,"alternative, electronic, trip-hop"


In [14]:
full_tracks=full_tracks.rename(columns={'artist_y':'artist', 'album_y':'album', 'date_x':'date', 'image_y':'image'})

In [15]:
full_tracks=full_tracks.astype(str)

In [16]:
print(full_tracks.dtypes)

url       object
date      object
artist    object
image     object
album     object
track     object
tags      object
dtype: object


In [17]:
import datetime
full_tracks['date'] = full_tracks['date'].apply(lambda x: datetime.datetime.strptime(x, '%d %b %Y, %H:%M'))


In [18]:
print(full_tracks.dtypes)

url               object
date      datetime64[ns]
artist            object
image             object
album             object
track             object
tags              object
dtype: object


In [22]:
import ast
def extract_medium_link(row):
    try:
        row_list = ast.literal_eval(row)  # Safely convert the string to a list
        for item in row_list:
            if item['size'] == 'medium':
                return item['#text']
    except (ValueError, SyntaxError):
        pass  # Handle the case where the data can't be parsed

    return None  # Return None if a medium-sized image is not found

full_tracks['image'] = full_tracks['image'].apply(extract_medium_link)
full_tracks.head()

Unnamed: 0,url,date,artist,image,album,track,tags
0,https://www.last.fm/music/Frank+Ocean/_/Novacane,2023-11-05 00:11:00,Frank Ocean,https://lastfm.freetls.fastly.net/i/u/64s/7344...,Novacane,Novacane,"rnb, r&b, ofwgkta"
1,https://www.last.fm/music/Gorillaz/_/Don%27t+G...,2023-11-05 00:09:00,Gorillaz,https://lastfm.freetls.fastly.net/i/u/64s/2714...,Demon Days,Don't Get Lost in Heaven,"alternative, trip-hop, gorillaz"
2,https://www.last.fm/music/twenty+one+pilots/_/...,2023-11-05 00:05:00,twenty one pilots,https://lastfm.freetls.fastly.net/i/u/64s/dc5b...,Blurryface,Goner,"indie pop, alternative hip hop, electropop"
3,https://www.last.fm/music/Catherine+Feeny/_/Mr...,2023-11-05 00:02:00,Catherine Feeny,https://lastfm.freetls.fastly.net/i/u/64s/81e0...,Mr Blue,Mr Blue (Radio Mix),
4,https://www.last.fm/music/Gorillaz/_/El+Ma%C3%...,2023-11-04 23:58:00,Gorillaz,https://lastfm.freetls.fastly.net/i/u/64s/2714...,Demon Days,El Mañana,"alternative, electronic, trip-hop"


In [24]:
full_tracks.to_csv('Ewaoluwa.csv', index=False)