In [49]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
from dotenv import load_dotenv
import os
import pandas as pd
import config
import time

load_dotenv()

SPOTIPY_CLIENT_ID = os.getenv('SPOTIPY_CLIENT_ID')
SPOTIPY_CLIENT_SECRET = os.getenv('SPOTIPY_CLIENT_SECRET')
SPOTIPY_REDIRECT_URI = os.getenv('SPOTIPY_REDIRECT_URI')


auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

## Collect top artists
Since song popularity depends on the artists themselves, it is important to collect basic information like their follower count and music genres. The following function is used to collect data for 1000 artists.

In [50]:
def get_top_artist_info(artist_id):
    """obtain artist info for the given artist"""
    try:
        artist_info = sp.artist(artist_id)
        return artist_info
    except:
        return float("NaN")

## Collect all tracks released by the top artists
After collecting data for all artists, Spotipy was used to collect all Spotify tracks for each artist. The following function collects data on all tracks released by a given artist. Note that the maximum number of records in a specific query is 1000.

In [51]:
def get_songs(sp, artist):
    counter = 0
    
    # empty list to store data
    track_id = []
    track_name = []
    artist_id = []
    artist_name = []
    release_date = []
    song_popularity = []

    # keep executing api calls until the offset parameter reaches 1000
    while counter < 1000:
        # call api

        results = sp.search(q = f"artist:{artist}", type = "track", limit = 50, offset = 0)
        for idx, song in enumerate(results['tracks']['items']):
            try:
                if song['artists'][0]['name'] == artist:
                    track_id.append(song['id'])
                    track_name.append(song['name'])
                    artist_id.append(song['artists'][0]['id'])
                    artist_name.append(song['artists'][0]['name'])
                    release_date.append(song['album']['release_date'])
                    song_popularity.append(song['popularity'])
            except Exception as e:
                print(e)
        
        counter += 50
    
    songs_df = pd.DataFrame()
    songs_df['track_id'] = track_id
    songs_df['track_name'] = track_name
    songs_df['artist_id'] = artist_id
    songs_df['artist_name'] = artist_name
    songs_df['release_date'] = release_date
    songs_df['song_popularity'] = song_popularity
    
    return songs_df

In [55]:
df = get_songs(sp=sp, artist="NewJeans")
df

Unnamed: 0,track_id,track_name,artist_id,artist_name,release_date,song_popularity
0,5sdQOyqq2IDhvmx2lHOpwd,Super Shy,6HvZYsbFfjnjFrWF950C9d,NewJeans,2023-07-07,94
1,56v8WEnGzLByGsDAXDiv4d,ETA,6HvZYsbFfjnjFrWF950C9d,NewJeans,2023-07-21,89
2,02wk5BttM0QL38ERjLPQJB,Cool With You,6HvZYsbFfjnjFrWF950C9d,NewJeans,2023-07-21,87
3,65FftemJ1DbbZ45DUfHJXE,OMG,6HvZYsbFfjnjFrWF950C9d,NewJeans,2023-01-02,88
4,6rdkCkjk6D12xRpdMXy0I2,New Jeans,6HvZYsbFfjnjFrWF950C9d,NewJeans,2023-07-07,86
...,...,...,...,...,...,...
355,5expoVGQPvXuwBBFuNGqBd,Hurt,6HvZYsbFfjnjFrWF950C9d,NewJeans,2022-08-01,79
356,5LMoKDVzW2kDneNu2UbspP,Zero,6HvZYsbFfjnjFrWF950C9d,NewJeans,2023-04-03,74
357,7CHDUDw89DCR8vvI0yTXGa,Beautiful Restriction,6HvZYsbFfjnjFrWF950C9d,NewJeans,2023-09-01,68
358,5702raF31K9rvD6KZ6sCTo,Ditto,6HvZYsbFfjnjFrWF950C9d,NewJeans,2023-01-02,72
