# Spotify API Experiments

https://developer.spotify.com/community

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np

# Spotify Credentials
import spot_creds

# Graphing
import matplotlib.pyplot as plt

# Scikit-Learn
from sklearn.feature_extraction.text import TfidfVectorizer

clid = spot_creds.client_id
secret = spot_creds.secret


In [3]:
#Authentication - without user
client_credentials_manager = SpotifyClientCredentials(client_id=clid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

## Get Songs from Playlist
Using my "Tom's Funky Jazz Group" playlist.

In [None]:
# playlist_link = "https://open.spotify.com/playlist/1jAT4AEQab8P4Qg1olncPw" # Tom's Funky Jazz Group
playlist_link = "https://open.spotify.com/playlist/7eWWLoTfmLUcD0viBP6Hr0?si=e8b0760749404749" #Funky Songs
playlist_URI = playlist_link.split("/")[-1].split("?")[0]
track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_URI)["items"]]

In [None]:
# sp.playlist_tracks(playlist_URI)

In [None]:
first_track = sp.playlist_tracks(playlist_URI)["items"][0]

In [None]:
first_track

In [None]:
def extract_audio_feat(track_uri, track_dict):
        # Audio Features
    audio_feat_list = ['acousticness',
                       'danceability',
                       'energy',
                       'instrumentalness',
                       'key',
                       'liveness',
                       'loudness',
                       'mode',
                       'speechiness',
                       'tempo',
                       'time_signature',
                       'valence']
                       
    audio_feat = sp.audio_features(track_uri)[0]
    
    audio_feat_list = ['acousticness',
                       'danceability',
                       'energy',
                       'instrumentalness',
                       'key',
                       'liveness',
                       'loudness',
                       'mode',
                       'speechiness',
                       'tempo',
                       'time_signature',
                       'valence']
    
    for feat in audio_feat_list:
        track_dict[feat] = audio_feat[feat]
        
    return track_dict




In [None]:
tracks_df = pd.DataFrame()

for track in sp.playlist_tracks(playlist_URI)["items"]:
    this_track = {}
    #URI
    track_uri = track["track"]["uri"]
    this_track['track_uri'] = track_uri
    
    #Track name
    this_track['track_name'] = track["track"]["name"]
    
    #Main Artist
    artist_uri = track["track"]["artists"][0]["uri"]
    this_track['artist_uri'] = artist_uri
    artist_info = sp.artist(artist_uri)
    
    #Name, popularity, genre
    this_track['artist_name'] = track["track"]["artists"][0]["name"]
    this_track['artist_pop'] = artist_info["popularity"]
    this_track['artist_genres'] = artist_info["genres"]
    
    #Album
    this_track['album'] = track["track"]["album"]["name"]
    
    #Track Metadata
    this_track['track_pop'] = track["track"]["popularity"]
    this_track['explicit'] = track["track"]['explicit']
    
    # Audio Features
    this_track = extract_audio_feat(track_uri, this_track)
    
    
# Convert to DataFrame
    this_track_df = pd.json_normalize(this_track)
    
    tracks_df = pd.concat([tracks_df,this_track_df], ignore_index=True)
    
    # Make sure there are no duplicates
    tracks_df = tracks_df.drop_duplicates('track_uri')
    
tracks_df.head()

In [None]:
tracks_df.loc[tracks_df.explicit==True]

In [None]:
genres_df = pd.DataFrame(
        pd.DataFrame(tracks_df.artist_genres.tolist())[0].tolist(), index=tracks_df.track_name
)
genres_df.columns=['genre']
genres_df.reset_index(inplace=True)
genres_df.head()

In [None]:
genres_df.genre.value_counts().plot(kind='bar', rot=45)

In [None]:
tracks_df.artist_name.value_counts()

## Audio Features

In [None]:
tracks_df.iloc[0]

In [None]:
track_uri = tracks_df.iloc[1]['track_uri']
audio_feat = sp.audio_features(track_uri)
audio_feat

In [None]:
audio_feat[0]['danceability']

In [None]:
# sp.audio_analysis(track_uri)

In [None]:
tracks_df.describe()

## Features

### Meta Data

In [None]:
def ohe_prep(df, column, new_name): 
    ''' 
    Create One Hot Encoded features of a specific column
    ---
    Input: 
    df (pandas dataframe): Spotify Dataframe
    column (str): Column to be processed
    new_name (str): new column name to be used
        
    Output: 
    tf_df: One-hot encoded features 
    '''
    
    tf_df = pd.get_dummies(df[column])
    feature_names = tf_df.columns
    tf_df.columns = [new_name + "|" + str(i) for i in feature_names]
    tf_df.reset_index(drop = True, inplace = True)    
    return tf_df



In [None]:
# TF-IDF implementation
tfidf = TfidfVectorizer()
tfidf_matrix =  tfidf.fit_transform(tracks_df['artist_genres'].apply(lambda x: " ".join(x)))
genre_df = pd.DataFrame(tfidf_matrix.toarray())
genre_df.columns = ['genre' + "|" + i for i in tfidf.get_feature_names()]
# genre_df.drop(columns='genre|unknown') # Drop unknown genre
genre_df.reset_index(drop = True, inplace=True)
genre_df.iloc[0]

In [None]:
list(genre_df.columns)

In [None]:
# normalize popularity
tracks_df['artist_pop'] = tracks_df['artist_pop']/100
tracks_df['track_pop'] = tracks_df['track_pop']/100
