# Beecomender Model Implementation

This Jupyter Notebook has been created to provide an example of how Beecomender will create it's recomendation based on user's Playlists.

---

### Importing required Libraries

In [1]:
from spotipy.oauth2 import SpotifyOAuth,SpotifyClientCredentials
import spotipy.oauth2 as oauth2
import pandas as pd
import spotipy
import config
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


### Spotify's Credential Manager Authentication

In [2]:
client_credentials_manager = SpotifyClientCredentials(client_id = config.client_ID, client_secret = config.client_secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

### Getting User's Playlist data

In [3]:
def extract_playlist_URI(playlist_URL):
    # Extract the playlist URI from the playlist URL
    playlist_URI = playlist_URL.split("/")[-1].split("?")[0]
    return playlist_URI

def get_tracks_info(playlist_URL):
    # Get the playlist URI
    playlist_URI = extract_playlist_URI(playlist_URL)
    
    # Get the tracks of the playlist
    playlist_tracks = sp.playlist_tracks(playlist_URI)
    
    tracks_data = []
    for track in playlist_tracks["items"]:
        playlist_data = {}
        
        # Get the URI of the track
        track_uri = track["track"]["uri"]
        playlist_data.update({'track_uri' : track_uri})
        
        # Get the name of the track
        playlist_data.update({'track_name' : track["track"]["name"]})
        
        # Get the main artist URI
        artist_uri = track["track"]["artists"][0]["uri"]
        playlist_data.update({ 'artist_uri' : artist_uri})
        
        # Get information about the main artist
        artist_info = sp.artist(artist_uri)
        
        # Get the name, popularity, and genres of the artist
        playlist_data.update({'artist_name' : track["track"]["artists"][0]["name"]})
        playlist_data.update({ 'artist_popularity' : artist_info["popularity"]})
        playlist_data.update({ 'genres' : artist_info["genres"]})
        
        # Get the album information
        playlist_data.update({ 'album_name' : track["track"]["album"]["name"]})
        playlist_data.update({ 'album_uri' : track["track"]["album"]["uri"]})
        playlist_data.update({ 'release_date' : track["track"]["album"]["release_date"]})
        
        # Get the popularity of the track
        playlist_data.update({'track_popularity' : track["track"]["popularity"]})
        
        # Get the audio features of the track
        audio_features = sp.audio_features(track_uri)
        playlist_data.update(audio_features[0])
        
        keys_to_delete = ['type', 'id', 'uri', 'track_href', 'analysis_url']
        
        # Remove unnecessary keys from the playlist data
        for key in keys_to_delete:
            if key in playlist_data:
                del playlist_data[key]
        
        # Add the playlist data to the tracks data list
        tracks_data.append(playlist_data)

    return tracks_data
        

In [4]:
URL = r'https://open.spotify.com/playlist/6jTDePjg26b34wa3nuz3HM'
#URL = r'https://open.spotify.com/playlist/3MwWplprtniJHPxuWYxRZf'
data = get_tracks_info(URL)
data = pd.DataFrame(data)
data

Unnamed: 0,track_uri,track_name,artist_uri,artist_name,artist_popularity,genres,album_name,album_uri,release_date,track_popularity,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,spotify:track:4Gz4TnkcpYlMWMfqNyfkvC,Más Buena,spotify:artist:1Db5GsIoVWYktPoD2nnPZZ,Gloria Trevi,69,"[latin arena pop, latin pop, mexican pop]",Versus,spotify:album:2qHa6D9zffuZhKxi0xtpUB,2017-06-23,59,...,-4.435,0,0.0702,0.0155,0.000000,0.367,0.525,98.079,233960,4
1,spotify:track:3GoKDP0kBKnsj9SlK1oYVz,Que Ganas de No Verte Nunca Mas,spotify:artist:3mGyF5kXDjEkLlFypJ93en,Lupita D'Alessio,55,[latin pop],Coleccion de Oro,spotify:album:7EeC2bxAGvdygHdAuzYVEf,2011-06-27,0,...,-7.125,1,0.0524,0.2630,0.000000,0.197,0.172,129.946,235293,4
2,spotify:track:2kfSFdq2h0xLXq01em1zc7,La Gata Bajo la Lluvia,spotify:artist:2uyweLa0mvPZH6eRzDddeB,Rocío Dúrcal,69,"[cancion melodica, latin pop, ranchera]",Sus 16 Grandes Exitos,spotify:album:1QXxmsxolhkqiFtI1mpX4i,1983-03-05,70,...,-5.812,1,0.0331,0.7230,0.000000,0.576,0.464,88.140,219107,4
3,spotify:track:4FsVB8msXzmemjJdTKUoal,Ya Te Olvidé,spotify:artist:5B8ApeENp4bE4EE3LI8jK2,Yuridia,73,"[latin arena pop, latin pop, mexican pop]",Para Mí,spotify:album:0nN4sE3MnNnZQSCXcYFiF0,2011-12-13,75,...,-3.500,1,0.0448,0.3180,0.000000,0.199,0.530,191.964,211373,4
4,spotify:track:6Vy53iriyX8Vqx0V68Jzn2,Cómo Te Va Mi Amor,spotify:artist:44nb9BaqV2jVvxKCaXHwlP,Pandora,58,[],Locas Por La Música,spotify:album:5skXUuGQ0Pg7H07k6NywN5,1990-01-01,69,...,-11.224,1,0.0280,0.5130,0.000000,0.071,0.334,75.535,222413,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,spotify:track:1nULBrlzWatdcjA2ZctIMv,Labios Rotos - En Vivo,spotify:artist:6IdtcAwaNVAggwd6sCKgTI,Zoé,72,"[latin alternative, latin rock, mexican rock, ...",MTV Unplugged Música De Fondo (Reissue/Live),spotify:album:5fl6q7SKOuaO2tHXeTaZeV,2011-01-01,78,...,-7.080,1,0.0281,0.0731,0.002920,0.708,0.288,109.054,243040,4
96,spotify:track:55DQStH29qHXa29EgJa7fb,Besos De Ceniza,spotify:artist:1FbygZnWsyUjzjTGLhWVlO,Timbiriche,63,"[latin pop, mexican pop]",Timbiriche 7,spotify:album:5srfo2XTGJPW9EAiky3TdM,2009-01-01,65,...,-13.961,0,0.0669,0.1720,0.000000,0.081,0.951,142.677,176133,4
97,spotify:track:2yIUUOe7UVhvxQmaLVqRKr,El Hombre Que Yo Amo,spotify:artist:6NHTDaYSUeMaroI5U0jytT,Myriam Hernandez,59,"[latin pop, pop chileno]",Myriam Hernandez,spotify:album:2ev6My86W8Ri5qXygB645A,1988,69,...,-9.393,0,0.0647,0.2000,0.000000,0.108,0.285,177.433,216093,4
98,spotify:track:3EllE1ODNugJPpyo3o47aq,La Nave del Olvido - Sinfónico,spotify:artist:4mN0qcMxWX8oToqfDPM5yV,José José,74,"[bolero, cancion melodica, ranchera]",Sinfónico,spotify:album:3OBMNQfTggseTBEN8Xh3v2,2018-02-23,36,...,-8.280,0,0.0344,0.6690,0.000087,0.163,0.265,134.502,233507,4


### Preprocessing the Data to 'vectorize' it.
Once we have the data from User's playlist, we need to preprocess it. To make the recomendations, we'll summarize the whole playlist as a single song.

**Dropping the unnecesary columns**

In [5]:
dropped_df = data.drop(['artist_name', 'artist_uri', 'track_name', 'album_uri', 'album_name'], axis = 1)
dropped_df

Unnamed: 0,track_uri,artist_popularity,genres,release_date,track_popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,spotify:track:4Gz4TnkcpYlMWMfqNyfkvC,69,"[latin arena pop, latin pop, mexican pop]",2017-06-23,59,0.603,0.879,11,-4.435,0,0.0702,0.0155,0.000000,0.367,0.525,98.079,233960,4
1,spotify:track:3GoKDP0kBKnsj9SlK1oYVz,55,[latin pop],2011-06-27,0,0.514,0.657,8,-7.125,1,0.0524,0.2630,0.000000,0.197,0.172,129.946,235293,4
2,spotify:track:2kfSFdq2h0xLXq01em1zc7,69,"[cancion melodica, latin pop, ranchera]",1983-03-05,70,0.499,0.648,7,-5.812,1,0.0331,0.7230,0.000000,0.576,0.464,88.140,219107,4
3,spotify:track:4FsVB8msXzmemjJdTKUoal,73,"[latin arena pop, latin pop, mexican pop]",2011-12-13,75,0.445,0.786,5,-3.500,1,0.0448,0.3180,0.000000,0.199,0.530,191.964,211373,4
4,spotify:track:6Vy53iriyX8Vqx0V68Jzn2,58,[],1990-01-01,69,0.343,0.436,3,-11.224,1,0.0280,0.5130,0.000000,0.071,0.334,75.535,222413,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,spotify:track:1nULBrlzWatdcjA2ZctIMv,72,"[latin alternative, latin rock, mexican rock, ...",2011-01-01,78,0.579,0.758,11,-7.080,1,0.0281,0.0731,0.002920,0.708,0.288,109.054,243040,4
96,spotify:track:55DQStH29qHXa29EgJa7fb,63,"[latin pop, mexican pop]",2009-01-01,65,0.859,0.644,3,-13.961,0,0.0669,0.1720,0.000000,0.081,0.951,142.677,176133,4
97,spotify:track:2yIUUOe7UVhvxQmaLVqRKr,59,"[latin pop, pop chileno]",1988,69,0.309,0.461,8,-9.393,0,0.0647,0.2000,0.000000,0.108,0.285,177.433,216093,4
98,spotify:track:3EllE1ODNugJPpyo3o47aq,74,"[bolero, cancion melodica, ranchera]",2018-02-23,36,0.325,0.437,9,-8.280,0,0.0344,0.6690,0.000087,0.163,0.265,134.502,233507,4


In [6]:
def vectorize_genres(data_df):
    # Create a copy of the input dataframe
    df = data_df.copy()

    # Create an instance of TfidfVectorizer
    tfidf = TfidfVectorizer()

    # Apply TF-IDF vectorization on the 'genres' column
    tfidf_matrix = tfidf.fit_transform(df['genres'].apply(lambda x: " ".join(x)))

    # Convert the TF-IDF matrix to a DataFrame
    genre_df = pd.DataFrame(tfidf_matrix.toarray())

    # Set column names for the genre DataFrame
    genre_df.columns = ['genre' + "|" + i for i in tfidf.get_feature_names_out()]

    # Reset the index of the genre DataFrame
    genre_df.reset_index(drop=True, inplace=True)

    # Concatenate the original dataframe and the genre DataFrame horizontally
    final_df = pd.concat([df, genre_df], axis=1)
    
    # Return the final dataframe
    return final_df

final_df = vectorize_genres(dropped_df)
final_df

Unnamed: 0,track_uri,artist_popularity,genres,release_date,track_popularity,danceability,energy,key,loudness,mode,...,genre|musica,genre|pop,genre|puerto,genre|ranchera,genre|reggaeton,genre|regional,genre|rican,genre|rock,genre|spanish,genre|tejano
0,spotify:track:4Gz4TnkcpYlMWMfqNyfkvC,69,"[latin arena pop, latin pop, mexican pop]",2017-06-23,59,0.603,0.879,11,-4.435,0,...,0.0,0.715542,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
1,spotify:track:3GoKDP0kBKnsj9SlK1oYVz,55,[latin pop],2011-06-27,0,0.514,0.657,8,-7.125,1,...,0.0,0.696109,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
2,spotify:track:2kfSFdq2h0xLXq01em1zc7,69,"[cancion melodica, latin pop, ranchera]",1983-03-05,70,0.499,0.648,7,-5.812,1,...,0.0,0.259225,0.0,0.415720,0.0,0.0,0.0,0.000000,0.0,0.0
3,spotify:track:4FsVB8msXzmemjJdTKUoal,73,"[latin arena pop, latin pop, mexican pop]",2011-12-13,75,0.445,0.786,5,-3.500,1,...,0.0,0.715542,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
4,spotify:track:6Vy53iriyX8Vqx0V68Jzn2,58,[],1990-01-01,69,0.343,0.436,3,-11.224,1,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,spotify:track:1nULBrlzWatdcjA2ZctIMv,72,"[latin alternative, latin rock, mexican rock, ...",2011-01-01,78,0.579,0.758,11,-7.080,1,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.684148,0.0,0.0
96,spotify:track:55DQStH29qHXa29EgJa7fb,63,"[latin pop, mexican pop]",2009-01-01,65,0.859,0.644,3,-13.961,0,...,0.0,0.758531,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
97,spotify:track:2yIUUOe7UVhvxQmaLVqRKr,59,"[latin pop, pop chileno]",1988,69,0.309,0.461,8,-9.393,0,...,0.0,0.450045,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
98,spotify:track:3EllE1ODNugJPpyo3o47aq,74,"[bolero, cancion melodica, ranchera]",2018-02-23,36,0.325,0.437,9,-8.280,0,...,0.0,0.000000,0.0,0.355103,0.0,0.0,0.0,0.000000,0.0,0.0


In [7]:
preprocess_data = vectorize_genres(dropped_df)
preprocess_data

Unnamed: 0,track_uri,artist_popularity,genres,release_date,track_popularity,danceability,energy,key,loudness,mode,...,genre|musica,genre|pop,genre|puerto,genre|ranchera,genre|reggaeton,genre|regional,genre|rican,genre|rock,genre|spanish,genre|tejano
0,spotify:track:4Gz4TnkcpYlMWMfqNyfkvC,69,"[latin arena pop, latin pop, mexican pop]",2017-06-23,59,0.603,0.879,11,-4.435,0,...,0.0,0.715542,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
1,spotify:track:3GoKDP0kBKnsj9SlK1oYVz,55,[latin pop],2011-06-27,0,0.514,0.657,8,-7.125,1,...,0.0,0.696109,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
2,spotify:track:2kfSFdq2h0xLXq01em1zc7,69,"[cancion melodica, latin pop, ranchera]",1983-03-05,70,0.499,0.648,7,-5.812,1,...,0.0,0.259225,0.0,0.415720,0.0,0.0,0.0,0.000000,0.0,0.0
3,spotify:track:4FsVB8msXzmemjJdTKUoal,73,"[latin arena pop, latin pop, mexican pop]",2011-12-13,75,0.445,0.786,5,-3.500,1,...,0.0,0.715542,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
4,spotify:track:6Vy53iriyX8Vqx0V68Jzn2,58,[],1990-01-01,69,0.343,0.436,3,-11.224,1,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,spotify:track:1nULBrlzWatdcjA2ZctIMv,72,"[latin alternative, latin rock, mexican rock, ...",2011-01-01,78,0.579,0.758,11,-7.080,1,...,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.684148,0.0,0.0
96,spotify:track:55DQStH29qHXa29EgJa7fb,63,"[latin pop, mexican pop]",2009-01-01,65,0.859,0.644,3,-13.961,0,...,0.0,0.758531,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
97,spotify:track:2yIUUOe7UVhvxQmaLVqRKr,59,"[latin pop, pop chileno]",1988,69,0.309,0.461,8,-9.393,0,...,0.0,0.450045,0.0,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0
98,spotify:track:3EllE1ODNugJPpyo3o47aq,74,"[bolero, cancion melodica, ranchera]",2018-02-23,36,0.325,0.437,9,-8.280,0,...,0.0,0.000000,0.0,0.355103,0.0,0.0,0.0,0.000000,0.0,0.0


In [8]:
def extract_decade(date_str):
    year = int(date_str.split('-')[0])
    decade = (year // 10) * 10
    return int(decade)

def preprocess_decade(data_df):
    # Make a copy of the original DataFrame
    df = data_df.copy()
    
    # Apply the function to the "release_date" column
    df['release_date'] = df['release_date'].apply(extract_decade)
    
    return df

In [9]:
# Get the preprocessed release Date
preprocess_data = preprocess_decade(preprocess_data)
preprocess_data['release_date']

0     2010
1     2010
2     1980
3     2010
4     1990
      ... 
95    2010
96    2000
97    1980
98    2010
99    2010
Name: release_date, Length: 100, dtype: int64

In [10]:
def data_normalization(data_df, columns_to_scale):
    df = data_df.copy()
    # Create a MinMaxScaler object
    scaler = MinMaxScaler()

    # Apply the MinMaxScaler to the selected columns
    df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])
    
    return df

In [11]:
def create_summarized_data(playlist_data):
    # Calculate the mean of each column
    summarized_data = playlist_data.mean(numeric_only = 'True').to_frame(name='mean_value')
    
    return summarized_data

In [12]:
summarized_data = create_summarized_data(preprocess_data).T
summarized_data

Unnamed: 0,artist_popularity,release_date,track_popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,...,genre|musica,genre|pop,genre|puerto,genre|ranchera,genre|reggaeton,genre|regional,genre|rican,genre|rock,genre|spanish,genre|tejano
mean_value,66.61,1998.8,57.12,0.57752,0.59251,5.08,-7.31076,0.68,0.049826,0.371186,...,0.048486,0.42097,0.005709,0.143407,0.012852,0.031403,0.005709,0.041138,0.039845,0.031584


# Create the recomendations
Once we have the summarized playlist data, we'll use the metric of Cosine Similarity included in sklearn.metrics library. This will give us a parameter on how similar is the playlist to every song in the dataset.

---

**We load the whole database to generate the recommendations**

In [13]:
tracks_database = pd.read_csv(r'Data\model_data.csv')
tracks_database

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,genre|ye,genre|yodeling,genre|york,genre|youth,genre|zambian,genre|zhongguo,genre|zilizopendwa,genre|zolo,genre|zouk,genre|zuliana
0,0.904,0.813,4,-7.105,0,0.1210,0.0311,0.006970,0.0471,0.810,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.884,0.677,1,-5.603,1,0.2830,0.0778,0.000000,0.0732,0.584,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.794,0.805,0,-6.554,1,0.2300,0.5380,0.122000,0.0952,0.658,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.622,0.669,9,-8.419,1,0.3290,0.0266,0.000003,0.1520,0.570,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.797,0.750,0,-9.369,1,0.2470,0.5330,0.108000,0.0950,0.740,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34316,0.434,0.279,8,-11.947,1,0.0465,0.7700,0.042400,0.1330,0.157,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34317,0.416,0.394,11,-9.269,1,0.0641,0.5130,0.001550,0.0988,0.131,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34318,0.669,0.228,2,-12.119,1,0.0690,0.7920,0.065000,0.0944,0.402,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34319,0.493,0.727,1,-5.031,1,0.2170,0.0873,0.000000,0.1290,0.289,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
def align_dataframes(playlist_database, tracks_database):
    #Set a copy of the dataframes
    playlist_data = playlist_database.copy()
    tracks_data = tracks_database.copy()
    
    # Find missing columns in playlist_data
    missing_columns = list(set(tracks_data.columns) - set(playlist_data.columns))

    # Create a DataFrame with missing columns and default value of zero
    missing_columns_df = pd.DataFrame(0, index=playlist_data.index, columns=missing_columns)

    # Concatenate missing_columns_df with playlist_data
    playlist_data = pd.concat([playlist_data, missing_columns_df], axis=1)

    # Find missing columns in tracks_database
    missing_columns = list(set(playlist_data.columns) - set(tracks_data.columns))

    # Create a DataFrame with missing columns and default value of zero
    missing_columns_df = pd.DataFrame(0, index=tracks_data.index, columns=missing_columns)

    # Concatenate missing_columns_df with tracks_database
    tracks_data = pd.concat([tracks_data, missing_columns_df], axis=1)

    return playlist_data, tracks_data


In [15]:
# Assuming you have the playlist_data and tracks_database DataFrames
aligned_playlist_data, aligned_tracks_database = align_dataframes(summarized_data, tracks_database)


In [16]:
aligned_playlist_data

Unnamed: 0,artist_popularity,release_date,track_popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,...,genre|shimmer,genre|slam,genre|latina,genre|norske,genre|corrido,genre|indiana,genre|colombiana,genre|neo,genre|sabahan,genre|nantes
mean_value,66.61,1998.8,57.12,0.57752,0.59251,5.08,-7.31076,0.68,0.049826,0.371186,...,0,0,0,0,0,0,0,0,0,0


In [17]:
aligned_tracks_database

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,genre|york,genre|youth,genre|zambian,genre|zhongguo,genre|zilizopendwa,genre|zolo,genre|zouk,genre|zuliana,genre|regional,genre|femenil
0,0.904,0.813,4,-7.105,0,0.1210,0.0311,0.006970,0.0471,0.810,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,0.884,0.677,1,-5.603,1,0.2830,0.0778,0.000000,0.0732,0.584,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,0.794,0.805,0,-6.554,1,0.2300,0.5380,0.122000,0.0952,0.658,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,0.622,0.669,9,-8.419,1,0.3290,0.0266,0.000003,0.1520,0.570,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,0.797,0.750,0,-9.369,1,0.2470,0.5330,0.108000,0.0950,0.740,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34316,0.434,0.279,8,-11.947,1,0.0465,0.7700,0.042400,0.1330,0.157,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
34317,0.416,0.394,11,-9.269,1,0.0641,0.5130,0.001550,0.0988,0.131,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
34318,0.669,0.228,2,-12.119,1,0.0690,0.7920,0.065000,0.0944,0.402,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
34319,0.493,0.727,1,-5.031,1,0.2170,0.0873,0.000000,0.1290,0.289,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [18]:
def generate_recommendations(playlist_data, tracks_database, recommendation_num=30):
    tracks_data = tracks_database.copy()
    
    #Get only the nummeric features
    playlist_vector = playlist_data.drop('track_uri', axis=1).values
    track_vectors = tracks_data.drop('track_uri', axis=1).values
    
    # Normalize the features before making the recommendations
    scaler = MinMaxScaler()
    track_vectors = scaler.fit_transform(track_vectors)
    playlist_vector = scaler.transform(playlist_vector)

    # Calculate cosine similarity between the playlist and the complete song set
    similarity_scores = cosine_similarity(track_vectors, playlist_vector)[:, 0]
    tracks_data['similarity'] = similarity_scores

    top_recommendations = tracks_data.sort_values('similarity', ascending=False).head(recommendation_num)
    return top_recommendations.index.to_list(), top_recommendations['similarity']

def get_recommendations_info(recommendations_id, tracks_database):
    # Filter 'tracks_database' DataFrame based on indices from 'recommendations_id'
    track_uris = tracks_database.loc[recommendations_id, 'track_uri']
    
    # Initialize lists to store the track information
    track_names = []
    track_artists = []
    track_albums = []
    track_urls = []
    
    # Retrieve track information for each URI
    for track_uri in track_uris:
        track_info = sp.track(track_uri)

        # Extract the desired information
        name = track_info['name']
        artists = ', '.join([artist['name'] for artist in track_info['artists']])
        album = track_info['album']['name']
        url = track_info['external_urls']['spotify']

        # Append the information to the respective lists
        track_names.append(name)
        track_artists.append(artists)
        track_albums.append(album)
        track_urls.append(url)

    # Create a DataFrame to store the track information
    track_info_df = pd.DataFrame({
        'Name': track_names,
        'Artist': track_artists,
        'Album': track_albums,
        'URL': track_urls
    })

    return track_info_df

In [19]:
recommendations, similarity = generate_recommendations(aligned_playlist_data, aligned_tracks_database)

In [20]:
similarity

7699     0.512714
13290    0.503265
8159     0.497756
14281    0.487534
12472    0.485701
26513    0.484817
31846    0.482962
13948    0.482241
5208     0.478375
12439    0.478000
28842    0.477536
14253    0.474735
18536    0.473111
19042    0.472379
33228    0.471215
8066     0.470208
2163     0.470201
5324     0.469401
17598    0.468545
8060     0.467635
14289    0.466115
2159     0.465627
23060    0.464118
8105     0.463939
30848    0.463935
8184     0.463730
18449    0.463195
23138    0.463097
2158     0.463096
19944    0.463044
Name: similarity, dtype: float64

In [21]:
recommendations

[7699,
 13290,
 8159,
 14281,
 12472,
 26513,
 31846,
 13948,
 5208,
 12439,
 28842,
 14253,
 18536,
 19042,
 33228,
 8066,
 2163,
 5324,
 17598,
 8060,
 14289,
 2159,
 23060,
 8105,
 30848,
 8184,
 18449,
 23138,
 2158,
 19944]

In [22]:
'''
[7699,
 13290,
 8159,
 28842,
 26513,
 14281,
 31846,
 12472,
 13948,
 12439,
 5208,
 18536,
 33228,
 8066,
 26982,
 19042,
 17598,
 14253,
 8184,
 12046,
 23060,
 5324,
 8155,
 8105,
 2163,
 27186,
 19944,
 2804,
 26983,
 8060]
'''

'\n[7699,\n 13290,\n 8159,\n 28842,\n 26513,\n 14281,\n 31846,\n 12472,\n 13948,\n 12439,\n 5208,\n 18536,\n 33228,\n 8066,\n 26982,\n 19042,\n 17598,\n 14253,\n 8184,\n 12046,\n 23060,\n 5324,\n 8155,\n 8105,\n 2163,\n 27186,\n 19944,\n 2804,\n 26983,\n 8060]\n'

In [23]:
recommendations_info = get_recommendations_info(recommendations, tracks_database)
recommendations_info

Unnamed: 0,Name,Artist,Album,URL
0,Bring Me To Life,Evanescence,Fallen,https://open.spotify.com/track/0COqiPhxzoWICwF...
1,Can't Be Touched (feat. Mr. Magic & Trouble),"Roy Jones Jr., Mr Magic, Trouble","Body Head Bangerz, Vol. 1",https://open.spotify.com/track/3zmduBNsQ6BPDTZ...
2,Duality,Slipknot,Vol. 3: The Subliminal Verses,https://open.spotify.com/track/61mWefnWQOLf90g...
3,Highway to Hell,AC/DC,Highway to Hell,https://open.spotify.com/track/2zYzyRzz6pRmhPz...
4,In the End,Linkin Park,Hybrid Theory (Bonus Edition),https://open.spotify.com/track/60a0Rd6pjrkxjPb...
5,Sarcasm,Get Scared,Best Kind Of Mess,https://open.spotify.com/track/40i81AcCCxiVEFU...
6,La Planta,Caos,La Vida Gacha,https://open.spotify.com/track/2GggG2lQVYuus2a...
7,It's My Life,Bon Jovi,Crush,https://open.spotify.com/track/0v1XpBHnsbkCn7i...
8,Best of You,Foo Fighters,In Your Honor,https://open.spotify.com/track/5FZxsHWIvUsmSK1...
9,B.Y.O.B.,System Of A Down,Mezmerize,https://open.spotify.com/track/0EYOdF5FCkgOJJl...
