In [1]:
import pandas as pd
from time import sleep
import numpy as np
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import sys
from config import *

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=Client_ID,
                                                           client_secret=Client_Secret))

In [2]:
hot_songs = pd.read_csv('~/Desktop/data_analytics/Week_6/Day_1/Afternoon/lab-not-hot-songs/hot_songs_updated.csv')

In [3]:
not_hot = pd.read_csv('~/Desktop/data_analytics/Week_6/Day_1/Afternoon/lab-not-hot-songs/not_hot_v2.csv')

In [4]:
not_hot.head()

Unnamed: 0.1,Unnamed: 0,artist,title
0,0,Bob Dylan,Like a Rolling Stone
1,1,The Rolling Stones,(I Can't Get No) Satisfaction
2,2,The Beach Boys,Good Vibrations
3,3,Nirvana,Smells Like Teen Spirit
4,4,Aretha Franklin,Respect


In [5]:
hot_songs.head()

Unnamed: 0.1,Unnamed: 0,title,artist
0,0,Rockin' Around The Christmas Tree,Brenda Lee
1,1,All I Want For Christmas Is You,Mariah Carey
2,2,Jingle Bell Rock,Bobby Helms
3,3,Last Christmas,Wham!
4,4,A Holly Jolly Christmas,Burl Ives


In [6]:
hot_songs = hot_songs.drop('Unnamed: 0', axis = 1)
not_hot = not_hot.drop('Unnamed: 0', axis = 1)

In [7]:
def search_song(title, artist, limit = 1):
    '''
    Function to search a given single song in the Spotify API. 
    Input: title = song name, artist = song singer
    Output: Song id from Spotify
    '''
    
    search_query = f"track:{title} artist:{artist}"
    id = sp.search(q = search_query, limit = limit)['tracks']['items'][0]['id']
    return id 

In [8]:
search_song("Like a Rolling Stone", "Bob Dylan")

'3AhXZa8sUQht0UEdBJgpGc'

## Hot songs

In [9]:
def add_id(df: pd.DataFrame) -> pd.DataFrame:
    '''
    Function to obtain the IDs of a given list of songs through the Spotify API.
    Input: pd.DataFrame
    Output: pd.DataFrae with the column "id" for each song
    Chunks used in order to prevent reaching the API request limit 
    '''
    
    chunks = 50
    list_of_ids = []

    for i in range(0, len(df), chunks):
        chunk = df.iloc[i:i+chunks]
        print("Collecting IDs for chunk...")
    
        for index, row in chunk.iterrows():
            title = row["title"]
            artist = row["artist"]
            try:
                id = search_song(title, artist, 1)
                list_of_ids.append(id)
            except:
                print("Song not found!")
                list_of_ids.append("")
        sleep(20)
        print("Sleep...")

    df["id"] = list_of_ids
    return df

In [10]:
add_id(hot_songs)

Collecting IDs for chunk...
Song not found!
Song not found!
Song not found!
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!
Song not found!
Song not found!
Song not found!
Song not found!
Song not found!
Song not found!
Song not found!
Song not found!
Song not found!
Sleep...


Unnamed: 0,title,artist,id
0,Rockin' Around The Christmas Tree,Brenda Lee,2EjXfH91m7f8HiJN1yQg97
1,All I Want For Christmas Is You,Mariah Carey,0bYg9bo50gSsH3LtXe2SQn
2,Jingle Bell Rock,Bobby Helms,7vQbuQcyTflfCIOu3Uzzya
3,Last Christmas,Wham!,2FRnf9qhLbvw8fu4IBXx78
4,A Holly Jolly Christmas,Burl Ives,77khP2fIVhSW23NwxrRluh
...,...,...,...
95,El Amor de Su Vida,Grupo Frontera & Grupo Firme,0O3U5iwTbiXCREMkvotJuN
96,Standing Next To You,Jung Kook,2KslE17cAJNHTsI2MI0jb2
97,Man Made A Bar,Morgan Wallen Featuring Eric Church,
98,Que Onda,Calle 24 x Chino Pacas x Fuerza Regida,


In [11]:
list_of_ids = hot_songs["id"].tolist()

In [12]:
filtered_ids = [id for id in list_of_ids if id]

In [13]:
filtered_ids

['2EjXfH91m7f8HiJN1yQg97',
 '0bYg9bo50gSsH3LtXe2SQn',
 '7vQbuQcyTflfCIOu3Uzzya',
 '2FRnf9qhLbvw8fu4IBXx78',
 '77khP2fIVhSW23NwxrRluh',
 '3gBhiIj76UJQ7jlrUR4qjc',
 '2uFaJJtFpPDc5Pa95XzTvg',
 '0oPdaY4dXtc3ZsaG17V972',
 '4xhsWYTOGcal8zt0J161CU',
 '5ASM6Qjiav2xPe7gRkQMsQ',
 '0lizgQ7Qw35od7CYaoMBZb',
 '4PS1e8f2LvuTFgUs1Cn3ON',
 '3YZE5qDV7u1ZD1gZc47ZeR',
 '5mM1jHHXhKc0ZYi0R8EOLn',
 '46pF1zFimM582ss1PrMy68',
 '1TH5fhztFZmUGWaCXmZ6ie',
 '3QiAAp20rPC3dcAtKtMaqQ',
 '1BxfuPKGuaTgP7aM0Bbdwr',
 '1foCxQtxBweJtZmdxhEHVO',
 '25leEEaz1gIpp7o21Fqyjo',
 '2pnPe4pJtq7689i5ydzvJJ',
 '3ZfRpDV74gUbBCmW8yb6ml',
 '3rUGC1vUpkDG9CZFHMur1t',
 '5xlS0QkVrSH7ssEbBgBzbM',
 '1SV1fxF65n9NhRHp3KlBuu',
 '3QIoEi8Enr9uHffwInGIsC',
 '2IGMVunIBsBLtEQyoI1Mu7',
 '2QpN1ZVw8eJO5f7WcvUA1k',
 '5aIVCx5tnk0ntmdiinnYvw',
 '4mybTd8gHkEpCmMxOFKHwv',
 '38xhBO2AKrJnjdjVnhJES6',
 '7dJYggqjKo71KI9sLzqCs8',
 '7K3BhSpAxZBznislvUMVtn',
 '7xapw9Oy21WpfEcib2ErSA',
 '3sDdyBHQ60Cs1opmIyRvhp',
 '6NQchnlpozM5o4n6WfDznd',
 '1OjmlSFuzYflWjSMTCyTJv',
 

In [14]:
def get_audio_features(list_of_ids: list) -> pd.DataFrame:
    '''
    Function to obtain the audio features of a given list of songs 
    Input: List with the song ids 
    Output: pd.DataFrame with the audio features for each id
    Chunks used in order to prevent reaching the API request limit
    '''
    
    chunks = 50
    audio_features = []

    for i in range(0, len(list_of_ids), chunks):
        chunk_ids = list_of_ids[i:i+chunks]
        try:
            features_chunk = sp.audio_features(tracks = chunk_ids)
            if features_chunk:
                audio_features.extend(features_chunk)  
        except:
                print("Error retrieving audio features for chunk!")
                
        sleep(20)
        print("Sleep...")
        
    audio_features = [af for af in audio_features if af is not None]
    audio_features_df = pd.DataFrame(audio_features)

    return audio_features_df

In [15]:
audio_features_df = get_audio_features(filtered_ids)

Sleep...
Sleep...


In [16]:
def add_audio_features(df: pd.DataFrame, audio_features_df: pd.DataFrame, key_column) -> pd.DataFrame:
    '''
    Function to merge the original df with the created df which has the song features.
    Input: df: pd.DataFrame -> original df, audio_features_df and the column on which both will merge 
    Output: The merged dataframe from the input ones
    '''

    merged_df = pd.merge(df, audio_features_df, how = 'inner', on = key_column)
    return merged_df

In [17]:
hot_features = add_audio_features(hot_songs, audio_features_df, 'id')

In [18]:
hot_features.head()

Unnamed: 0,title,artist,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature
0,Rockin' Around The Christmas Tree,Brenda Lee,2EjXfH91m7f8HiJN1yQg97,0.589,0.472,8,-8.749,1,0.0502,0.614,0.0,0.505,0.898,67.196,audio_features,spotify:track:2EjXfH91m7f8HiJN1yQg97,https://api.spotify.com/v1/tracks/2EjXfH91m7f8...,https://api.spotify.com/v1/audio-analysis/2EjX...,126267,4
1,All I Want For Christmas Is You,Mariah Carey,0bYg9bo50gSsH3LtXe2SQn,0.336,0.627,7,-7.463,1,0.0384,0.164,0.0,0.0708,0.35,150.273,audio_features,spotify:track:0bYg9bo50gSsH3LtXe2SQn,https://api.spotify.com/v1/tracks/0bYg9bo50gSs...,https://api.spotify.com/v1/audio-analysis/0bYg...,241107,4
2,Jingle Bell Rock,Bobby Helms,7vQbuQcyTflfCIOu3Uzzya,0.754,0.424,2,-8.463,1,0.0363,0.643,0.0,0.0652,0.806,119.705,audio_features,spotify:track:7vQbuQcyTflfCIOu3Uzzya,https://api.spotify.com/v1/tracks/7vQbuQcyTflf...,https://api.spotify.com/v1/audio-analysis/7vQb...,130973,4
3,Last Christmas,Wham!,2FRnf9qhLbvw8fu4IBXx78,0.735,0.478,2,-12.472,1,0.0293,0.189,2e-06,0.355,0.947,107.682,audio_features,spotify:track:2FRnf9qhLbvw8fu4IBXx78,https://api.spotify.com/v1/tracks/2FRnf9qhLbvw...,https://api.spotify.com/v1/audio-analysis/2FRn...,262960,4
4,A Holly Jolly Christmas,Burl Ives,77khP2fIVhSW23NwxrRluh,0.683,0.375,0,-13.056,1,0.0303,0.579,0.0,0.076,0.888,140.467,audio_features,spotify:track:77khP2fIVhSW23NwxrRluh,https://api.spotify.com/v1/tracks/77khP2fIVhSW...,https://api.spotify.com/v1/audio-analysis/77kh...,135533,4


In [19]:
hot_features.shape

(84, 20)

In [20]:
hot_features.to_csv('hot_features.csv')

## Not so hot 

In [21]:
add_id(not_hot)

Collecting IDs for chunk...
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!
Song not found!
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!
Song not found!
Song not found!
Sleep...
Collecting IDs for chunk...
Song not found!
Song not found!


Unnamed: 0,artist,title,id
0,Bob Dylan,Like a Rolling Stone,3AhXZa8sUQht0UEdBJgpGc
1,The Rolling Stones,(I Can't Get No) Satisfaction,2PzU4IB8Dr6mxV3lHuaG34
2,The Beach Boys,Good Vibrations,5t9KYe0Fhd5cW6UYT4qP8f
3,Nirvana,Smells Like Teen Spirit,5ghIJDpPoe3CfHMGu71E6T
4,Aretha Franklin,Respect,7s25THrKz86DM225dOYwnr
...,...,...,...
2993,Black Box Recorder,The Facts of Life,4eappNAqv5JsqWN1LJqpfY
2994,Bob Dylan,Just Like Tom Thumb's Blues,0V1771LhL3tG36pb55EZAH
2995,Sebadoh,On Fire,091SOmej015R3W0pUkc05v
2996,Phoenix,Long Distance Call,7dGubYmPavaI9YNansq9TW


In [22]:
list_of_ids_nothot = not_hot["id"].tolist()

filtered_ids_nothot = [id for id in list_of_ids if id]

filtered_ids_nothot

['2EjXfH91m7f8HiJN1yQg97',
 '0bYg9bo50gSsH3LtXe2SQn',
 '7vQbuQcyTflfCIOu3Uzzya',
 '2FRnf9qhLbvw8fu4IBXx78',
 '77khP2fIVhSW23NwxrRluh',
 '3gBhiIj76UJQ7jlrUR4qjc',
 '2uFaJJtFpPDc5Pa95XzTvg',
 '0oPdaY4dXtc3ZsaG17V972',
 '4xhsWYTOGcal8zt0J161CU',
 '5ASM6Qjiav2xPe7gRkQMsQ',
 '0lizgQ7Qw35od7CYaoMBZb',
 '4PS1e8f2LvuTFgUs1Cn3ON',
 '3YZE5qDV7u1ZD1gZc47ZeR',
 '5mM1jHHXhKc0ZYi0R8EOLn',
 '46pF1zFimM582ss1PrMy68',
 '1TH5fhztFZmUGWaCXmZ6ie',
 '3QiAAp20rPC3dcAtKtMaqQ',
 '1BxfuPKGuaTgP7aM0Bbdwr',
 '1foCxQtxBweJtZmdxhEHVO',
 '25leEEaz1gIpp7o21Fqyjo',
 '2pnPe4pJtq7689i5ydzvJJ',
 '3ZfRpDV74gUbBCmW8yb6ml',
 '3rUGC1vUpkDG9CZFHMur1t',
 '5xlS0QkVrSH7ssEbBgBzbM',
 '1SV1fxF65n9NhRHp3KlBuu',
 '3QIoEi8Enr9uHffwInGIsC',
 '2IGMVunIBsBLtEQyoI1Mu7',
 '2QpN1ZVw8eJO5f7WcvUA1k',
 '5aIVCx5tnk0ntmdiinnYvw',
 '4mybTd8gHkEpCmMxOFKHwv',
 '38xhBO2AKrJnjdjVnhJES6',
 '7dJYggqjKo71KI9sLzqCs8',
 '7K3BhSpAxZBznislvUMVtn',
 '7xapw9Oy21WpfEcib2ErSA',
 '3sDdyBHQ60Cs1opmIyRvhp',
 '6NQchnlpozM5o4n6WfDznd',
 '1OjmlSFuzYflWjSMTCyTJv',
 

In [23]:
audio_features_nothotdf = get_audio_features(filtered_ids_nothot)

Sleep...
Sleep...


In [24]:
not_hot_features = add_audio_features(not_hot, audio_features_nothotdf, 'id')

In [25]:
not_hot_features.head()

Unnamed: 0,artist,title,id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature
0,Nat King Cole,The Christmas Song,4PS1e8f2LvuTFgUs1Cn3ON,0.319,0.21,1,-15.231,1,0.0341,0.92,8.1e-05,0.138,0.209,78.696,audio_features,spotify:track:4PS1e8f2LvuTFgUs1Cn3ON,https://api.spotify.com/v1/tracks/4PS1e8f2LvuT...,https://api.spotify.com/v1/audio-analysis/4PS1...,192160,4


In [26]:
not_hot_features.shape

(1, 20)

In [27]:
not_hot_features.to_csv('not_hot_features.csv')

In [28]:
%%writefile functions_api.py

import pandas as pd
from time import sleep
import numpy as np
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import sys
from config import *

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=Client_ID,
                                                           client_secret=Client_Secret))

def search_song(title, artist, limit = 1):
    '''
    Function to search a given single song in the Spotify API. 
    Input: title = song name, artist = song singer
    Output: song id from Spotify
    '''
    
    search_query = f"track:{title} artist:{artist}"
    id = sp.search(q = search_query, limit = limit)['tracks']['items'][0]['id']
    return id 

def add_id(df: pd.DataFrame) -> pd.DataFrame:
    '''
    Function to obtain the IDs of a given list of songs through the Spotify API.
    Input: pd.DataFrame
    Output: pd.DataFrae with the column "id" for each song
    Chunks used in order to prevent reaching the API request limit 
    '''
    
    chunks = 50
    list_of_ids = []

    for i in range(0, len(df), chunks):
        chunk = df.iloc[i:i+chunks]
        print("Collecting IDs for chunk...")
    
        for index, row in chunk.iterrows():
            title = row["title"]
            artist = row["artist"]
            try:
                id = search_song(title, artist, 1)
                list_of_ids.append(id)
            except:
                print("Song not found!")
                list_of_ids.append("")
        sleep(20)
        print("Sleep...")

    df["id"] = list_of_ids
    return df

def get_audio_features(list_of_ids: list) -> pd.DataFrame:
    '''
    Function to obtain the audio features of a given list of songs 
    Input: List with the song ids 
    Output: pd.DataFrame with the audio features for each id
    Chunks used in order to prevent reaching the API request limit
    '''
    
    chunks = 50
    audio_features = []

    for i in range(0, len(list_of_ids), chunks):
        chunk_ids = list_of_ids[i:i+chunks]
        try:
            features_chunk = sp.audio_features(tracks = chunk_ids)
            if features_chunk:
                audio_features.extend(features_chunk)  
        except:
                print("Error retrieving audio features for chunk!")
                
        sleep(20)
        print("Sleep...")
        
    audio_features = [af for af in audio_features if af is not None]
    audio_features_df = pd.DataFrame(audio_features)

    return audio_features_df

def add_audio_features(df: pd.DataFrame, audio_features_df: pd.DataFrame, key_column) -> pd.DataFrame:
    '''
    Function to merge the original df with the created df which has the song features.
    Input: df: pd.DataFrame -> original df, audio_features_df and the column on which both will merge 
    Output: The merged dataframe from the input ones
    '''

    merged_df = pd.merge(df, audio_features_df, how = 'inner', on = key_column)
    return merged_df

Writing functions_api.py
