In [66]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
from dotenv import load_dotenv
import os
import pandas as pd
import config
import time

load_dotenv()

SPOTIPY_CLIENT_ID = os.getenv('SPOTIPY_CLIENT_ID')
SPOTIPY_CLIENT_SECRET = os.getenv('SPOTIPY_CLIENT_SECRET')
SPOTIPY_REDIRECT_URI = os.getenv('SPOTIPY_REDIRECT_URI')


auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

## Collect all songs released by the top artists
After collecting data for all artists, Spotipy was used to collect all Spotify songs for each artist. The following function collects data on all songs released by a given artist. Note that the maximum number of records in a specific query is 1000.

In [67]:
def get_songs(sp, artist):
    counter = 0
    
    # empty list to store data
    song_id = []
    song_name = []
    artist_id = []
    artist_name = []
    release_date = []
    song_popularity = []

    # keep executing api calls until the offset parameter reaches 1000
    while counter < 1000:
        # call api

        results = sp.search(q = f"artist:{artist}", type = "track", limit = 50, offset = 0)
        for idx, song in enumerate(results['tracks']['items']):
            try:
                if song['artists'][0]['name'] == artist:
                    song_id.append(song['id'])
                    song_name.append(song['name'])
                    artist_id.append(song['artists'][0]['id'])
                    artist_name.append(song['artists'][0]['name'])
                    release_date.append(song['album']['release_date'])
                    song_popularity.append(song['popularity'])
            except Exception as e:
                print(e)
        
        counter += 50
    
    songs_df = pd.DataFrame()
    songs_df['song_id'] = song_id
    songs_df['song_name'] = song_name
    songs_df['artist_id'] = artist_id
    songs_df['artist_name'] = artist_name
    songs_df['release_date'] = release_date
    songs_df['song_popularity'] = song_popularity
    
    return songs_df

In [69]:
# print(type(sp))
artists = pd.read_excel('./data/artists.xlsx')
artists = list(artists['artist'])
# print(artists)
df = pd.DataFrame()

for idx, artist in enumerate(artists):
    df_artist = get_songs(sp, artist)
        
    if idx == 0:
        df = df_artist
    else:
        df = pd.concat([df, df_artist])
    time.sleep(3)
df.to_excel('./data/songs.xlsx', index = False)
df

Unnamed: 0,song_id,song_name,artist_id,artist_name,release_date,song_popularity
0,5y7o08Grl3WRSy9uyUToM0,Menjadi Dia,0kPb52ySN2k9P6wEZPTUzm,Tiara Andini,2023-09-15,0
1,1HpR1ZX6sf6cQbKxSl7vF6,Merasa Indah,0kPb52ySN2k9P6wEZPTUzm,Tiara Andini,2023-09-15,0
2,3nHxZEe1dT7zs73DG6pNll,Usai,0kPb52ySN2k9P6wEZPTUzm,Tiara Andini,2022-11-25,80
3,53fKDMfQhWMSw7QKVDOTBP,Cintanya Aku,0kPb52ySN2k9P6wEZPTUzm,Tiara Andini,2021-01-22,81
4,3Ysf0Y3iu23mrDsVh1mdu0,Merasa Indah,0kPb52ySN2k9P6wEZPTUzm,Tiara Andini,2021-12-17,75
...,...,...,...,...,...,...
815,0teuIhlNd6Wxm1lFWkfm9J,Pasti Ku Bisa,6q87vizIEdEN4NvlR6mjfT,Sheila On 7,2011-03-15,47
816,2a49aqOHDWxsdbNjmTVdF3,Tunjukkan Padaku,6q87vizIEdEN4NvlR6mjfT,Sheila On 7,2002-03-26,44
817,0X92Y5xehewiMZyFTzMecP,Sephia,6q87vizIEdEN4NvlR6mjfT,Sheila On 7,2014-06-16,27
818,6vqPXMmtwipyeg4pkdQ2S1,Hingga Ujung Waktu,6q87vizIEdEN4NvlR6mjfT,Sheila On 7,2002-03-26,44
