In [7]:
import joblib
from sklearn.ensemble import RandomForestClassifier

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util

from pathlib import Path
import os

from dotenv import load_dotenv

import pandas as pd
import numpy as np

In [2]:
load_dotenv()
env_path = ('../.env')
load_dotenv(dotenv_path=env_path)

SPOTIFY_CLIENT = os.getenv('SPOTIFY_CLIENT')
SPOTIFY_SECRET = os.getenv('SPOTIFY_SECRET')
USERNAME = os.getenv('SPOTIFY_USERNAME')
redirect_uri = 'http://localhost:8888/callback/'

os.environ['SPOTIPY_CLIENT_ID']= SPOTIFY_CLIENT
os.environ['SPOTIPY_CLIENT_SECRET']= USERNAME
os.environ['SPOTIPY_REDIRECT_URI']= redirect_uri

In [4]:
filename = '../models/finalized_model.sav'

loaded_model = joblib.load(filename)

In [18]:
#authorizations and connecting to spotify
scope = 'user-library-read user-top-read playlist-modify-public playlist-read-private'


credentials_manager = SpotifyClientCredentials(client_id=SPOTIFY_CLIENT, client_secret=SPOTIFY_SECRET) 
sp = spotipy.Spotify(client_credentials_manager=credentials_manager)
token = util.prompt_for_user_token(USERNAME, scope, SPOTIFY_CLIENT, SPOTIFY_SECRET, redirect_uri)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", USERNAME) 

In [8]:
## load raw library data

library = pd.read_csv('../data/raw/raw_library_data.csv')

library.head()

Unnamed: 0,id,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,...,added_time,song_name,short_pts,med_pts,long_pts,time_pts,total_pts,artist_short_pts,artist_med_pts,artist_long_pts
0,46lZpwebFWgqad5CX1iv6r,0.0637,0.919,158040,0.672,0.0144,10,0.0597,-3.404,0,...,2021-05-14 19:10:01+00:00,Entrégala,3.0,0.0,0.0,3.0,6.0,0.0,0.0,0.0
1,5Pc594FhDA2Fa2prE75GT0,0.0311,0.906,194607,0.474,5e-06,1,0.286,-6.124,1,...,2021-05-13 22:30:16+00:00,Pretty Please,3.0,0.0,0.0,3.0,6.0,0.0,0.0,0.0
2,0fea68AdmYNygeTGI4RC18,0.152,0.754,242573,0.646,1.8e-05,7,0.108,-5.795,1,...,2021-05-13 03:41:42+00:00,LA CANCIÓN,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0
3,5ZicFGBDAi9J2YCVesboUp,0.162,0.405,368720,0.619,0.000334,11,0.0788,-8.92,0,...,2021-05-11 16:09:42+00:00,Professional,0.0,0.0,0.0,3.0,6.0,3.0,0.0,0.0
4,4pHQ9RYFhn3W0ha5KEuMnj,0.151,0.642,283933,0.79,0.000583,11,0.305,-6.8,0,...,2021-05-11 16:09:32+00:00,Adaptation,0.0,0.0,0.0,3.0,6.0,3.0,0.0,0.0


In [19]:
import random

rec_tracks = []

ids = []

for x in range(5):
    ids.append(random.choice(library.loc[library['total_pts'] >= 3]['id'].values.tolist()))


rec_tracks = sp.recommendations(seed_tracks=ids, limit=100)['tracks']

rec_track_ids = []
rec_track_names = []

for i in rec_tracks:
    rec_track_ids.append(i['id'])
    rec_track_names.append(i['name'])

rec_features = []
for i in range(0,len(rec_track_ids)):
    rec_audio_features = sp.audio_features(rec_track_ids[i])
    for track in rec_audio_features:
        rec_features.append(track)
        
rec_playlist_df = pd.DataFrame(rec_features, index = rec_track_ids)

In [20]:
rec_playlist_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
4CUTUsBGjHslu2Bd3AOXGg,0.766,0.3,11,-8.911,0,0.0375,0.585,0.00174,0.114,0.44,85.993,audio_features,4CUTUsBGjHslu2Bd3AOXGg,spotify:track:4CUTUsBGjHslu2Bd3AOXGg,https://api.spotify.com/v1/tracks/4CUTUsBGjHsl...,https://api.spotify.com/v1/audio-analysis/4CUT...,176599,4
74wpYGsVh2J13iAmmg1Uln,0.698,0.707,9,-3.778,0,0.114,0.284,0.0,0.177,0.413,155.009,audio_features,74wpYGsVh2J13iAmmg1Uln,spotify:track:74wpYGsVh2J13iAmmg1Uln,https://api.spotify.com/v1/tracks/74wpYGsVh2J1...,https://api.spotify.com/v1/audio-analysis/74wp...,247688,4
61TklWcG8PbT5hvUTzwLaY,0.858,0.65,8,-4.919,1,0.241,0.0066,2e-06,0.121,0.653,147.031,audio_features,61TklWcG8PbT5hvUTzwLaY,spotify:track:61TklWcG8PbT5hvUTzwLaY,https://api.spotify.com/v1/tracks/61TklWcG8PbT...,https://api.spotify.com/v1/audio-analysis/61Tk...,210545,4
644rGY6maSElnKVsAg1gJj,0.645,0.418,6,-10.065,0,0.29,0.558,0.0003,0.562,0.123,96.963,audio_features,644rGY6maSElnKVsAg1gJj,spotify:track:644rGY6maSElnKVsAg1gJj,https://api.spotify.com/v1/tracks/644rGY6maSEl...,https://api.spotify.com/v1/audio-analysis/644r...,140760,4
4RIi1gNmKDzSH04Vvws2DK,0.56,0.311,9,-8.508,1,0.038,0.934,6e-05,0.0706,0.452,180.05,audio_features,4RIi1gNmKDzSH04Vvws2DK,spotify:track:4RIi1gNmKDzSH04Vvws2DK,https://api.spotify.com/v1/tracks/4RIi1gNmKDzS...,https://api.spotify.com/v1/audio-analysis/4RIi...,161600,4


In [21]:
#keeping only relevant features

rec_tracks_df = rec_playlist_df[["id", "acousticness", "danceability", "duration_ms", 
                         "energy", "instrumentalness",  "key", "liveness",
                         "loudness", "speechiness", "tempo", "valence", 'uri']]

rec_tracks_df.head()

Unnamed: 0,id,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,uri
4CUTUsBGjHslu2Bd3AOXGg,4CUTUsBGjHslu2Bd3AOXGg,0.585,0.766,176599,0.3,0.00174,11,0.114,-8.911,0.0375,85.993,0.44,spotify:track:4CUTUsBGjHslu2Bd3AOXGg
74wpYGsVh2J13iAmmg1Uln,74wpYGsVh2J13iAmmg1Uln,0.284,0.698,247688,0.707,0.0,9,0.177,-3.778,0.114,155.009,0.413,spotify:track:74wpYGsVh2J13iAmmg1Uln
61TklWcG8PbT5hvUTzwLaY,61TklWcG8PbT5hvUTzwLaY,0.0066,0.858,210545,0.65,2e-06,8,0.121,-4.919,0.241,147.031,0.653,spotify:track:61TklWcG8PbT5hvUTzwLaY
644rGY6maSElnKVsAg1gJj,644rGY6maSElnKVsAg1gJj,0.558,0.645,140760,0.418,0.0003,6,0.562,-10.065,0.29,96.963,0.123,spotify:track:644rGY6maSElnKVsAg1gJj
4RIi1gNmKDzSH04Vvws2DK,4RIi1gNmKDzSH04Vvws2DK,0.934,0.56,161600,0.311,6e-05,9,0.0706,-8.508,0.038,180.05,0.452,spotify:track:4RIi1gNmKDzSH04Vvws2DK


In [24]:
X_rec_tracks = rec_tracks_df.drop(columns = ['id', 'uri'])

X_rec_tracks.head()

Unnamed: 0,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence
4CUTUsBGjHslu2Bd3AOXGg,0.585,0.766,176599,0.3,0.00174,11,0.114,-8.911,0.0375,85.993,0.44
74wpYGsVh2J13iAmmg1Uln,0.284,0.698,247688,0.707,0.0,9,0.177,-3.778,0.114,155.009,0.413
61TklWcG8PbT5hvUTzwLaY,0.0066,0.858,210545,0.65,2e-06,8,0.121,-4.919,0.241,147.031,0.653
644rGY6maSElnKVsAg1gJj,0.558,0.645,140760,0.418,0.0003,6,0.562,-10.065,0.29,96.963,0.123
4RIi1gNmKDzSH04Vvws2DK,0.934,0.56,161600,0.311,6e-05,9,0.0706,-8.508,0.038,180.05,0.452


In [26]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

pipeline = Pipeline([
    ('std_scaler', MinMaxScaler())
])

In [29]:
X_rec_scaled = pipeline.fit_transform(X_rec_tracks)

In [30]:
from sklearn.ensemble import RandomForestClassifier

rec_predict = loaded_model.predict(X_rec_scaled)

In [32]:
rec_tracks_df['predict'] = rec_predict

rec_tracks_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rec_tracks_df['predict'] = rec_predict


Unnamed: 0,id,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,uri,predict
4CUTUsBGjHslu2Bd3AOXGg,4CUTUsBGjHslu2Bd3AOXGg,0.585,0.766,176599,0.3,0.00174,11,0.114,-8.911,0.0375,85.993,0.44,spotify:track:4CUTUsBGjHslu2Bd3AOXGg,0
74wpYGsVh2J13iAmmg1Uln,74wpYGsVh2J13iAmmg1Uln,0.284,0.698,247688,0.707,0.0,9,0.177,-3.778,0.114,155.009,0.413,spotify:track:74wpYGsVh2J13iAmmg1Uln,1
61TklWcG8PbT5hvUTzwLaY,61TklWcG8PbT5hvUTzwLaY,0.0066,0.858,210545,0.65,2e-06,8,0.121,-4.919,0.241,147.031,0.653,spotify:track:61TklWcG8PbT5hvUTzwLaY,0
644rGY6maSElnKVsAg1gJj,644rGY6maSElnKVsAg1gJj,0.558,0.645,140760,0.418,0.0003,6,0.562,-10.065,0.29,96.963,0.123,spotify:track:644rGY6maSElnKVsAg1gJj,1
4RIi1gNmKDzSH04Vvws2DK,4RIi1gNmKDzSH04Vvws2DK,0.934,0.56,161600,0.311,6e-05,9,0.0706,-8.508,0.038,180.05,0.452,spotify:track:4RIi1gNmKDzSH04Vvws2DK,1


In [33]:
rec_tracks_df['predict'].value_counts()

0    66
1    34
Name: predict, dtype: int64

In [34]:
final_recs = rec_tracks_df.loc[rec_tracks_df['predict'] == 1]['id'].values.tolist()

In [36]:
# creating a playlist with these recs
recs_playlist = sp.user_playlist_create(USERNAME, name= 'Python Recs')

#adding songs to playlist
sp.user_playlist_add_tracks(USERNAME, recs_playlist['id'], final_recs)

{'snapshot_id': 'MixlZWNkYmIzODc2Yjg2MDE5NjA3Zjg1MDgwODk3M2NiMWQwMzVlOGVj'}