In [34]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
from dotenv import load_dotenv
import os
import pandas as pd
import config
import time

load_dotenv()

SPOTIPY_CLIENT_ID = os.getenv('SPOTIPY_CLIENT_ID')
SPOTIPY_CLIENT_SECRET = os.getenv('SPOTIPY_CLIENT_SECRET')
SPOTIPY_REDIRECT_URI = os.getenv('SPOTIPY_REDIRECT_URI')


auth_manager = SpotifyClientCredentials()
sp = spotipy.Spotify(auth_manager=auth_manager)

## Collect all song feature data for all songs of top artists
Spotipy is used to collect all song data for the songs collected in the previous step. The following function collects song data for the given song (each song is identified by a song id).

In [35]:
def get_song_features(song_id):
    """obtain song feature data for the given song"""
    try: 
        audio = sp.audio_features(song_id)[0]
        return audio
    except:
        return float('NaN')

In [36]:
get_song_features('1HpR1ZX6sf6cQbKxSl7vF6')

{'danceability': 0.386,
 'energy': 0.416,
 'key': 4,
 'loudness': -10.543,
 'mode': 1,
 'speechiness': 0.0306,
 'acousticness': 0.766,
 'instrumentalness': 1.07e-05,
 'liveness': 0.119,
 'valence': 0.345,
 'tempo': 122.536,
 'type': 'audio_features',
 'id': '1HpR1ZX6sf6cQbKxSl7vF6',
 'uri': 'spotify:track:1HpR1ZX6sf6cQbKxSl7vF6',
 'track_href': 'https://api.spotify.com/v1/tracks/1HpR1ZX6sf6cQbKxSl7vF6',
 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1HpR1ZX6sf6cQbKxSl7vF6',
 'duration_ms': 220250,
 'time_signature': 4}

In [37]:
songs_df = pd.read_excel('./data/songs.xlsx')
artists = pd.read_excel('./data/artists.xlsx')
artists = list(artists['artist'])

In [38]:
songs_ids = list(songs_df['song_id'].unique())
songs_lists = [songs_ids[i:i + 100] for i in range(0, len(songs_ids), 100)]

In [39]:
audio_features_df = pd.DataFrame()

for songs_list in songs_lists:
    # print(songs_list)
    subset = pd.DataFrame()
    audio_features = sp.audio_features(songs_list)
    subset['song_id'] = songs_list
    subset['audio_features'] = audio_features

    audio_features_df = pd.concat([audio_features_df, subset])
    if len(audio_features_df)%100000 == 0:
            print(f'{len(audio_features_df)} records collected')

audio_features_df

Unnamed: 0,song_id,audio_features
0,5y7o08Grl3WRSy9uyUToM0,"{'danceability': 0.472, 'energy': 0.283, 'key'..."
1,1HpR1ZX6sf6cQbKxSl7vF6,"{'danceability': 0.386, 'energy': 0.416, 'key'..."
2,3nHxZEe1dT7zs73DG6pNll,"{'danceability': 0.461, 'energy': 0.36, 'key':..."
3,53fKDMfQhWMSw7QKVDOTBP,"{'danceability': 0.516, 'energy': 0.338, 'key'..."
4,3Ysf0Y3iu23mrDsVh1mdu0,"{'danceability': 0.386, 'energy': 0.416, 'key'..."
...,...,...
23,0teuIhlNd6Wxm1lFWkfm9J,"{'danceability': 0.505, 'energy': 0.721, 'key'..."
24,2a49aqOHDWxsdbNjmTVdF3,"{'danceability': 0.388, 'energy': 0.746, 'key'..."
25,0X92Y5xehewiMZyFTzMecP,"{'danceability': 0.401, 'energy': 0.651, 'key'..."
26,6vqPXMmtwipyeg4pkdQ2S1,"{'danceability': 0.462, 'energy': 0.58, 'key':..."


In [40]:
artist_info_df = pd.DataFrame()
list_artist_info = []
for artist in artists:
    result = sp.search(artist, limit=1, type="artist")
    artist_info = result["artists"]["items"][0]
    # Display genres associated with the first search result
    list_artist_info.append(artist_info)

artist_info_df['artist'] = artists
artist_info_df['artist_info'] = list_artist_info

artist_info_df['artist_info']

0     {'external_urls': {'spotify': 'https://open.sp...
1     {'external_urls': {'spotify': 'https://open.sp...
2     {'external_urls': {'spotify': 'https://open.sp...
3     {'external_urls': {'spotify': 'https://open.sp...
4     {'external_urls': {'spotify': 'https://open.sp...
5     {'external_urls': {'spotify': 'https://open.sp...
6     {'external_urls': {'spotify': 'https://open.sp...
7     {'external_urls': {'spotify': 'https://open.sp...
8     {'external_urls': {'spotify': 'https://open.sp...
9     {'external_urls': {'spotify': 'https://open.sp...
10    {'external_urls': {'spotify': 'https://open.sp...
11    {'external_urls': {'spotify': 'https://open.sp...
12    {'external_urls': {'spotify': 'https://open.sp...
13    {'external_urls': {'spotify': 'https://open.sp...
14    {'external_urls': {'spotify': 'https://open.sp...
15    {'external_urls': {'spotify': 'https://open.sp...
16    {'external_urls': {'spotify': 'https://open.sp...
17    {'external_urls': {'spotify': 'https://ope

In [41]:
audio_features_df.to_excel('./data/audio_features.xlsx', index=False)
artist_info_df.to_excel('./data/artist_info.xlsx', index=False)