In [1]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

## Authentication

In [2]:
spotify_client_info = pd.read_csv('Spotify_auth_cred_flow.csv', header=None)
client_id = spotify_client_info.iloc[0,0]
client_secret = spotify_client_info.iloc[1,0]
redirect_uri = spotify_client_info.iloc[2,0]

In [3]:
scope = 'user-library-read'

auth_manager = SpotifyOAuth(client_id, client_secret, redirect_uri, scope=scope)

## Get Playlist Songs

In [4]:
# Function to retrieve song data from Spotify API
sp = spotipy.Spotify(auth_manager= auth_manager)

In [5]:
# Retrieves Songs from 3 Playlists:
# 1: Rock, Metal
# 2: Disco, Funk, Rock Pop, Old Ballads
# 3: RnB, Soul, Slow pop
playlist_ids = ['https://open.spotify.com/playlist/1fe3iAJDParVnIt1t7C8xw?si=74244980c18d4bfc',
                'https://open.spotify.com/playlist/1A80GmIeSnZo29E0OBokMK?si=7dd80542d03a45ac',
               'https://open.spotify.com/playlist/5aOKSZJxYaCg81yZDiZcqV?si=be4363b21f1c4496']

## Get Song Features

In [6]:
def get_song_features():
    
    title = []
    artist = []
    danceability = []
    energy =  []
    key = []
    loudness = []
    mode = []
    speechiness = []
    acousticness = []
    instrumentalness = []
    liveness = []
    valence = []
    tempo = []
    duration_ms = []
    time_signature = []
    
    for playlist in playlist_ids:
        song = sp.playlist(playlist)
        for i in range(100):
            feature = sp.audio_features(song['tracks']['items'][i]['track']['id'])[0]
            
            # get song and artist
            title.append(song['tracks']['items'][i]['track']['name'])
            artist.append(song['tracks']['items'][i]['track']['album']['artists'][0]['name'])
            
            # get song features
            danceability.append(feature['danceability'])
            energy.append(feature['energy'])
            key.append(feature['key'])
            loudness.append(feature['loudness'])
            mode.append(feature['mode'])
            speechiness.append(feature['speechiness'])
            acousticness.append(feature['acousticness'])
            instrumentalness.append(feature['instrumentalness'])
            liveness.append(feature['liveness'])
            valence.append(feature['valence'])
            tempo.append(feature['tempo'])
            duration_ms.append(feature['duration_ms'])
            time_signature.append(feature['time_signature'])
    
    # create dataframe
    df = pd.DataFrame(zip(title,artist, danceability, energy, key, loudness,
                mode, speechiness, acousticness, instrumentalness,
                liveness, valence, tempo, duration_ms, time_signature), 
                      columns = ['song_name','artist', 'danceability','energy','key','loudness','mode', 
                     'speechiness', 'acousticness','instrumentalness', 'liveness', 
                     'valence', 'tempo', 'duration_ms','time_signature'])
    return df

In [7]:
df = get_song_features()

In [8]:
# check df first row
df.iloc[0]

song_name           Feel Invincible
artist                      Skillet
danceability                  0.603
energy                        0.886
key                               5
loudness                     -3.777
mode                              0
speechiness                  0.0837
acousticness                0.00045
instrumentalness                0.0
liveness                       0.26
valence                       0.395
tempo                       126.025
duration_ms                  229933
time_signature                    4
Name: 0, dtype: object

In [9]:
# convert to csv file
df.to_csv('songs_to_cluster.csv')