In [1]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn import preprocessing
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.spatial.distance import cdist



In [2]:
# USEFUL FUNCTIONS

# def function_name(var):
#     '''
#     Preprocess the genre data
#     ---
#     Input: 
#     Output: 
#     '''
#     return 

def clean_fake_lists(df,col, quotation_mark):
    """
    Clean columns containing fake lists which are actually just strings.
    ---
    Input:
    df (pandas dataframe): dataframe of interest
    col (string): column containing fake list
    quotation_marks (string): delimiter in column
    ---
    Output:
    df (pandas dataframe): cleaned dataframe
    """
    df[col] = df[col].str.replace("[","")
    df[col] = df[col].str.replace("]","")
    df[col] = df[col].str.replace(quotation_mark,"")
    df[col] = df[col].str.lower()
    return df

def genre_preprocess(df, col):
    '''
    Preprocess the genre data
    ---
    Input: 
    df (pandas dataframe): dataframe of interest
    col (string): column containing list to split
    
    Output: 
    df (pandas dataframe): dataframe containing split column as a list
    '''
    col_name = col + '_list'
    df[col_name] = df[col].apply(lambda x: x.split(" "))
    return df

def scale_columns(df, columns):
    """
    Perform z-score scaling using StandardScaler and normalization using MinMaxScaler.
    ---
    Input:
    df (pandas dataframe): dataframe of interest
    columns (list): list containing columns to scale
    ---
    Output:
    df (pandas dataframe): cleaned dataframe
    """
    for col in columns:
        standard_scaler = StandardScaler()
        col_name = col + '_zscaled'
        df[col_name] = pd.DataFrame(standard_scaler.fit_transform(df[[col]]))
        min_max_scaler = MinMaxScaler()
        col_name = col + '_norm'
        df[col_name] = pd.DataFrame(min_max_scaler.fit_transform(df[[col]]))
    return df

###################################################
"""
LMAO there are 3 ways to get a similar result to OHE

"""

def onehotencode_data(df, col):
    '''
    Perform one-hot encoding on specified column (col) and return a new dataframe.
    ---
    Input: 
    df (pandas dataframe): dataframe of interest
    col (string): column to perform one-hot encode on
        
    Output: 
    df_encode (pandas dataframe): dataframe containing one-hot encoded values.
    '''
    to_encode = df[[col]]
    # proper one-hot encoding
    le = preprocessing.LabelEncoder()
    # changing to correct category
    to_encode[col] = to_encode[col].astype('category')
    X_2 = to_encode.apply(le.fit_transform)
    # fitting and encoding data
    encoder = OneHotEncoder(handle_unknown='ignore')
    encoder.fit(X_2)
    onehotlabels = encoder.transform(X_2).toarray()
    labeled = pd.DataFrame(onehotlabels)
    # rename dataframe
    sorted_col_names = list(df[col].unique())
    sorted_col_names.sort()
    key_dict = {}
    for i in range(len(sorted_col_names)):
        if i not in key_dict:
            key_dict[i] = sorted_col_names[i]
    df_encode = df_encode.rename(columns=key_dict)
    return df_encode

def ohe_prep(df, col):
    '''
    Create ohe of specified column.
    ---
    Input: 
    df (pandas dataframe): dataframe of interest
    col (string): column containing records to calculate tf-idf
    
    Output: 
    tf_idf (pandas dataframe): dataframe containing ohe value
    '''
    tf_df = pd.get_dummies(df[col])
    feature_names = tf_df.columns
    tf_df.columns = [col + "|" + str(i) for i in feature_names]
    tf_df.reset_index(drop = True, inplace = True)    
    return tf_df

def tf_idf_transform(df, col):
    '''
    Create ohe of specified column.
    ---
    Input: 
    df (pandas dataframe): dataframe of interest
    col (string): column containing records to calculate tf-idf
    
    Output: 
    tf_idf (pandas dataframe): dataframe containing ohe value
    '''
    # TF-IDF implementation
    combined_df = genre_preprocess(df, col)
    new_col = col + '_list'
    tfidf = TfidfVectorizer()
    tfidf_matrix =  tfidf.fit_transform(combined_df[new_col].apply(lambda x: " ".join(x)))
    tfidf_matrix_df = pd.DataFrame(tfidf_matrix.toarray())
    tfidf_matrix_df.columns = ['col1' + "|" + i for i in tfidf.get_feature_names_out()]
    # genre_df.drop(columns='col1|unknown') # Drop unknown genre
    tfidf_matrix_df.reset_index(drop = True, inplace=True)
    return tfidf_matrix_df
###################################################

def generate_playlist_feature(complete_feature_set, playlist_df):
    '''
    Summarize a user's playlist into a single vector and generate feature set of songs not in playlist_df.
    ---
    Input: 
    complete_feature_set (pandas dataframe): Dataframe containing the features of all the spotify songs
    playlist_df (pandas dataframe): Dataframe containing playlist to sample from
        
    Output: 
    vectorized_playlist (pandas series): single vector feature that summarizes playist_df using sum(axis = 0)
    nonplaylist_features (pandas dataframe): Dataframe containing feature of songs not in the playlist
    '''
    
    # Find song features in the playlist
    playlist_features = complete_feature_set[complete_feature_set['track_id'].isin(playlist_df['track_id'].values)]
    # Find all non-playlist song features
    nonplaylist_features = complete_feature_set[~complete_feature_set['track_id'].isin(playlist_df['track_id'].values)]
    vectorized_playlist = playlist_features.drop(columns = "track_id")
    return vectorized_playlist.sum(axis = 0), nonplaylist_features

def generate_playlist_recos(df, vectorized_playlist, nonplaylist_features, top_songs):
    '''
    Generated recommendation based on cosine similarity of songs in a specific playlist.
    ---
    Input: 
    df (pandas dataframe): Dataframe containing all details of all the spotify songs
    vectorized_playlist (pandas series): single vector feature that summarizes playist_df using sum(axis = 0)
    nonplaylist_features (pandas dataframe): Dataframe containing feature of songs not in the playlist
    top_songs (int): Integer representing top x songs to generate
        
    Output: 
    recommendations: Top {top_songs} recommendations for that playlist
    '''
    non_playlist_df = df[df['track_id'].isin(nonplaylist_features['track_id'].values)]
    # Find cosine similarity between the playlist and the complete song set
    non_playlist_df['similarity'] = cosine_similarity(nonplaylist_features.drop('track_id', axis = 1).values, vectorized_playlist.values.reshape(1, -1))[:,0]
    recommendations = non_playlist_df.sort_values('similarity',ascending = False).head(top_songs)
    
    return recommendations

# Merging emotions into recommender system

In [8]:
# only contains features
scaled_df = pd.read_csv('data/scaled_600k.csv')
print(scaled_df.shape)
scaled_df.head(5)

(586672, 15)


Unnamed: 0,explicit,energy,mode,instrumentalness,liveness,valence,popularity_norm,duration_ms_norm,danceability_norm,key_norm,loudness_norm,speechiness_norm,acousticness_norm,tempo_norm,time_signature_norm
0,0,0.445,1,0.744,0.151,0.127,0.06,0.021994,0.650858,0.0,0.713748,0.46447,0.676707,0.425564,0.6
1,0,0.263,1,0.0,0.148,0.655,0.0,0.016885,0.701312,0.0,0.579173,0.985582,0.800201,0.414029,0.2
2,0,0.177,1,0.0218,0.212,0.457,0.0,0.031737,0.437941,0.090909,0.593796,0.052729,0.997992,0.529335,1.0
3,0,0.0946,1,0.918,0.104,0.397,0.0,0.030895,0.323915,0.636364,0.490073,0.051905,0.998996,0.689907,0.6
4,0,0.158,0,0.13,0.311,0.196,0.0,0.028434,0.405651,0.272727,0.659263,0.040165,0.992972,0.418945,0.8


In [11]:
scaled_df.columns

Index(['explicit', 'energy', 'mode', 'instrumentalness', 'liveness', 'valence',
       'popularity_norm', 'duration_ms_norm', 'danceability_norm', 'key_norm',
       'loudness_norm', 'speechiness_norm', 'acousticness_norm', 'tempo_norm',
       'time_signature_norm'],
      dtype='object')

In [9]:
artist_df = pd.read_csv('data/artist_cleaned_genre.csv')
artist_df = artist_df.drop(columns='Unnamed: 0')
print(artist_df.shape)
artist_df.head(5)

(305590, 5)


Unnamed: 0,id,followers,genres,name,popularity
0,1uNFoZAHBGtllmzznpCI3s,44606973.0,"['canadian pop', ' pop', ' post-teen pop']",Justin Bieber,100
1,06HL4z0CvFAxyc27GXpf02,38869193.0,"['pop', ' post-teen pop']",Taylor Swift,98
2,3TVXtAsR1Inumwj472S9r4,54416812.0,"['canadian hip hop', ' canadian pop', ' hip ho...",Drake,98
3,4q3ewBCX7sLwd24euuV69X,32244734.0,"['latin', ' reggaeton', ' trap latino']",Bad Bunny,98
4,3Nrfpe0tUJi4K4DXYWgMUX,31623813.0,"['k-pop', ' k-pop boy group']",BTS,96


In [25]:
# isolate singular artist
songs_df = pd.read_csv('data/tracks.csv')
songs_df['artist'] = songs_df['artists'].apply(lambda x: eval(x)[0] if len(x) > 0 else None)
songs_df.head()

Unnamed: 0,id,name,popularity,duration_ms,explicit,artists,id_artists,release_date,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,artist
0,35iwgR4jXetI318WEWsa1Q,Carve,6,126903,0,['Uli'],['45tIt06XoI0Iio4LBEVpls'],1922-02-22,0.645,0.445,...,-13.338,1,0.451,0.674,0.744,0.151,0.127,104.851,3,Uli
1,021ht4sdgPcrDgSk7JTbKY,Capítulo 2.16 - Banquero Anarquista,0,98200,0,['Fernando Pessoa'],['14jtPCOoNZwquk5wd9DxrY'],1922-06-01,0.695,0.263,...,-22.136,1,0.957,0.797,0.0,0.148,0.655,102.009,1,Fernando Pessoa
2,07A5yehtSnoedViJAZkNnc,Vivo para Quererte - Remasterizado,0,181640,0,['Ignacio Corsini'],['5LiOoJbxVSAMkBS2fUm3X2'],1922-03-21,0.434,0.177,...,-21.18,1,0.0512,0.994,0.0218,0.212,0.457,130.418,5,Ignacio Corsini
3,08FmqUhxtyLTn6pAh6bk45,El Prisionero - Remasterizado,0,176907,0,['Ignacio Corsini'],['5LiOoJbxVSAMkBS2fUm3X2'],1922-03-21,0.321,0.0946,...,-27.961,1,0.0504,0.995,0.918,0.104,0.397,169.98,3,Ignacio Corsini
4,08y9GfoqCWfOGsKdwojr5e,Lady of the Evening,0,163080,0,['Dick Haymes'],['3BiJGZsyX9sJchTqcSA7Su'],1922,0.402,0.158,...,-16.9,0,0.039,0.989,0.13,0.311,0.196,103.22,4,Dick Haymes


In [31]:
# merging songs and artist and creating a new identifier
scale_merge_songs_df = scaled_df.copy()
scale_merge_songs_df['id'] = songs_df['id']
scale_merge_songs_df['name'] = songs_df['name'].str.lower()
scale_merge_songs_df['artist'] = songs_df['artist'].str.lower()
scale_merge_songs_df['new_identifier'] = scale_merge_songs_df['name'] + '_' + scale_merge_songs_df['artist']
desired_order = ['id', 'name', 'artist', 'new_identifier', 'explicit', 'energy', 'mode', 'instrumentalness', 'liveness', 'valence',
       'popularity_norm', 'duration_ms_norm', 'danceability_norm', 'key_norm',
       'loudness_norm', 'speechiness_norm', 'acousticness_norm', 'tempo_norm',
       'time_signature_norm']
scale_merge_songs_df = scale_merge_songs_df[desired_order]
scale_merge_songs_df.head()

Unnamed: 0,id,name,artist,new_identifier,explicit,energy,mode,instrumentalness,liveness,valence,popularity_norm,duration_ms_norm,danceability_norm,key_norm,loudness_norm,speechiness_norm,acousticness_norm,tempo_norm,time_signature_norm
0,35iwgR4jXetI318WEWsa1Q,carve,uli,carve_uli,0,0.445,1,0.744,0.151,0.127,0.06,0.021994,0.650858,0.0,0.713748,0.46447,0.676707,0.425564,0.6
1,021ht4sdgPcrDgSk7JTbKY,capítulo 2.16 - banquero anarquista,fernando pessoa,capítulo 2.16 - banquero anarquista_fernando p...,0,0.263,1,0.0,0.148,0.655,0.0,0.016885,0.701312,0.0,0.579173,0.985582,0.800201,0.414029,0.2
2,07A5yehtSnoedViJAZkNnc,vivo para quererte - remasterizado,ignacio corsini,vivo para quererte - remasterizado_ignacio cor...,0,0.177,1,0.0218,0.212,0.457,0.0,0.031737,0.437941,0.090909,0.593796,0.052729,0.997992,0.529335,1.0
3,08FmqUhxtyLTn6pAh6bk45,el prisionero - remasterizado,ignacio corsini,el prisionero - remasterizado_ignacio corsini,0,0.0946,1,0.918,0.104,0.397,0.0,0.030895,0.323915,0.636364,0.490073,0.051905,0.998996,0.689907,0.6
4,08y9GfoqCWfOGsKdwojr5e,lady of the evening,dick haymes,lady of the evening_dick haymes,0,0.158,0,0.13,0.311,0.196,0.0,0.028434,0.405651,0.272727,0.659263,0.040165,0.992972,0.418945,0.8


In [34]:
multiLabel_df.columns

Index(['artist', 'genre', 'title', 'album', 'year', 'lyrics', 'labels',
       'new_identifier'],
      dtype='object')

In [36]:
# creating new_identifier for multilabel dataset as well
multiLabel_df = pd.read_csv('data/MultiLabel.csv')
multiLabel_df['title'] = multiLabel_df['title'].str.lower()
multiLabel_df['artist'] = multiLabel_df['artist'].str.lower()
multiLabel_df['new_identifier'] = multiLabel_df['title'] + '_' + multiLabel_df['artist']
desired_order = ['new_identifier','artist', 'genre', 'title', 'album', 'year', 'lyrics', 'labels',]
multiLabel_df= multiLabel_df[desired_order]
multiLabel_df.head(5)

Unnamed: 0,new_identifier,artist,genre,title,album,year,lyrics,labels
0,you know you’re right_nirvana,nirvana,Rock,you know you’re right,Nirvana,2002.0,I will never bother you\nI will never promise ...,"Calmness, Sadness"
1,here we go_damian marley,damian marley,Reggae,here we go,Stony Hill,2017.0,Here we go\nMy big ego is gonna get me in trou...,"Power, Tension"
2,jade_the mission uk,the mission uk,Rock,jade,Another Fall from Grace,2016.0,She came as Lolita dressed as Venus\nAnd adorn...,"Amazement, Calmness, Solemnity, Tenderness"
3,food for thought_ub40,ub40,Reggae,food for thought,Signing Off,1980.0,"Ivory Madonna, dying in the dust\nWaiting for ...","Joyful activation, Sadness, Tension"
4,i’ve been everywhere_johnny cash,johnny cash,Country,i’ve been everywhere,American II: Unchained,1996.0,I was totin' my pack along the dusty Winnemucc...,"Amazement, Calmness, Joyful activation"


In [38]:
merge_emotion_scaled_songs = pd.merge(scale_merge_songs_df, multiLabel_df, on='new_identifier', how='inner')
merge_emotion_scaled_songs.head()

Unnamed: 0,id,name,artist_x,new_identifier,explicit,energy,mode,instrumentalness,liveness,valence,...,acousticness_norm,tempo_norm,time_signature_norm,artist_y,genre,title,album,year,lyrics,labels
0,0Z44FoBocXlcWS70n2vtcB,if i were a carpenter,johnny cash,if i were a carpenter_johnny cash,0,0.365,1,0.00515,0.177,0.667,...,0.509036,0.356614,0.8,johnny cash,Country,if i were a carpenter,"Hello, I’m Johnny Cash",,If I were a carpenter\nAnd you were a lady\nWo...,"Nostalgia, Tenderness"
1,1UN6NjkwkztYxl6XzIp0pJ,if i were a carpenter,johnny cash,if i were a carpenter_johnny cash,0,0.366,1,0.00586,0.181,0.689,...,0.544177,0.357897,0.8,johnny cash,Country,if i were a carpenter,"Hello, I’m Johnny Cash",,If I were a carpenter\nAnd you were a lady\nWo...,"Nostalgia, Tenderness"
2,6WJ9cUB9mgx7CAzIghmPLP,if i were a carpenter,johnny cash,if i were a carpenter_johnny cash,0,0.365,1,0.00515,0.177,0.667,...,0.509036,0.356614,0.8,johnny cash,Country,if i were a carpenter,"Hello, I’m Johnny Cash",,If I were a carpenter\nAnd you were a lady\nWo...,"Nostalgia, Tenderness"
3,1XCtquImTMy9aYF7N6Asnq,folsom prison blues,johnny cash,folsom prison blues_johnny cash,0,0.54,1,7.5e-05,0.0868,0.79,...,0.693775,0.41293,0.8,johnny cash,Country,folsom prison blues,Johnny Cash With His Hot and Blue Guitar,1955.0,"I hear the train a comin', it's rolling 'round...","Calmness, Sadness, Tension"
4,0Avmi9t3sOcaGSs1DSbgDg,folsom prison blues,johnny cash,folsom prison blues_johnny cash,0,0.54,1,7.5e-05,0.0868,0.79,...,0.693775,0.41293,0.8,johnny cash,Country,folsom prison blues,Johnny Cash With His Hot and Blue Guitar,1955.0,"I hear the train a comin', it's rolling 'round...","Calmness, Sadness, Tension"


In [39]:
merge_emotion_scaled_songs.columns

Index(['id', 'name', 'artist_x', 'new_identifier', 'explicit', 'energy',
       'mode', 'instrumentalness', 'liveness', 'valence', 'popularity_norm',
       'duration_ms_norm', 'danceability_norm', 'key_norm', 'loudness_norm',
       'speechiness_norm', 'acousticness_norm', 'tempo_norm',
       'time_signature_norm', 'artist_y', 'genre', 'title', 'album', 'year',
       'lyrics', 'labels'],
      dtype='object')

In [41]:
desired_columns = ['id', 'name', 'artist_x', 'explicit', 'energy',
       'mode', 'instrumentalness', 'liveness', 'valence', 'popularity_norm',
       'duration_ms_norm', 'danceability_norm', 'key_norm', 'loudness_norm',
       'speechiness_norm', 'acousticness_norm', 'tempo_norm',
       'time_signature_norm', 'genre', 'labels']
working_df = merge_emotion_scaled_songs[desired_columns]
working_df.head()

Unnamed: 0,id,name,artist_x,explicit,energy,mode,instrumentalness,liveness,valence,popularity_norm,duration_ms_norm,danceability_norm,key_norm,loudness_norm,speechiness_norm,acousticness_norm,tempo_norm,time_signature_norm,genre,labels
0,0Z44FoBocXlcWS70n2vtcB,if i were a carpenter,johnny cash,0,0.365,1,0.00515,0.177,0.667,0.16,0.031486,0.503532,0.636364,0.719591,0.041813,0.509036,0.356614,0.8,Country,"Nostalgia, Tenderness"
1,1UN6NjkwkztYxl6XzIp0pJ,if i were a carpenter,johnny cash,0,0.366,1,0.00586,0.181,0.689,0.51,0.031547,0.481332,0.636364,0.715446,0.044696,0.544177,0.357897,0.8,Country,"Nostalgia, Tenderness"
2,6WJ9cUB9mgx7CAzIghmPLP,if i were a carpenter,johnny cash,0,0.365,1,0.00515,0.177,0.667,0.1,0.031486,0.503532,0.636364,0.719591,0.041813,0.509036,0.356614,0.8,Country,"Nostalgia, Tenderness"
3,1XCtquImTMy9aYF7N6Asnq,folsom prison blues,johnny cash,0,0.54,1,7.5e-05,0.0868,0.79,0.19,0.029408,0.760848,0.454545,0.790412,0.027394,0.693775,0.41293,0.8,Country,"Calmness, Sadness, Tension"
4,0Avmi9t3sOcaGSs1DSbgDg,folsom prison blues,johnny cash,0,0.54,1,7.5e-05,0.0868,0.79,0.55,0.029408,0.760848,0.454545,0.790412,0.027394,0.693775,0.41293,0.8,Country,"Calmness, Sadness, Tension"


In [46]:
# calculate tf-idf for genre
tfidif_genre = tf_idf_transform(working_df, 'genre')
tfidif_genre

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col_name] = df[col].apply(lambda x: x.split(" "))


Unnamed: 0,col1|country,col1|hip,col1|hop,col1|reggae,col1|rock
0,1.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...
861,0.0,0.0,0.0,1.0,0.0
862,0.0,0.0,0.0,1.0,0.0
863,0.0,0.0,0.0,1.0,0.0
864,0.0,0.0,0.0,0.0,1.0


In [47]:
# calculate tf-idf for labels
tfidf_labels = tf_idf_transform(working_df, 'labels')
tfidf_labels

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col_name] = df[col].apply(lambda x: x.split(" "))


Unnamed: 0,col1|activation,col1|amazement,col1|calmness,col1|joyful,col1|nostalgia,col1|power,col1|sadness,col1|solemnity,col1|tenderness,col1|tension
0,0.000000,0.000000,0.000000,0.000000,0.719851,0.0,0.000000,0.000000,0.694129,0.000000
1,0.000000,0.000000,0.000000,0.000000,0.719851,0.0,0.000000,0.000000,0.694129,0.000000
2,0.000000,0.000000,0.000000,0.000000,0.719851,0.0,0.000000,0.000000,0.694129,0.000000
3,0.000000,0.000000,0.741279,0.000000,0.000000,0.0,0.461773,0.000000,0.000000,0.487105
4,0.000000,0.000000,0.741279,0.000000,0.000000,0.0,0.461773,0.000000,0.000000,0.487105
...,...,...,...,...,...,...,...,...,...,...
861,0.480942,0.613796,0.000000,0.480942,0.000000,0.0,0.000000,0.000000,0.400804,0.000000
862,0.707107,0.000000,0.000000,0.707107,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
863,0.401965,0.513003,0.492769,0.401965,0.000000,0.0,0.000000,0.413346,0.000000,0.000000
864,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.499744,0.672932,0.545361,0.000000


In [59]:
# renaming some items
working_df = working_df.rename(columns={'id':'track_id'})

# merging the dataframes
feature_of_interest = ['track_id', 'explicit', 'energy',
       'mode', 'instrumentalness', 'liveness', 'valence', 'popularity_norm',
       'duration_ms_norm', 'danceability_norm', 'key_norm', 'loudness_norm',
       'speechiness_norm', 'acousticness_norm', 'tempo_norm',
       'time_signature_norm']
feature_df = pd.concat([working_df[feature_of_interest],tfidf_labels,tfidif_genre], axis=1)
feature_df

Unnamed: 0,track_id,explicit,energy,mode,instrumentalness,liveness,valence,popularity_norm,duration_ms_norm,danceability_norm,...,col1|power,col1|sadness,col1|solemnity,col1|tenderness,col1|tension,col1|country,col1|hip,col1|hop,col1|reggae,col1|rock
0,0Z44FoBocXlcWS70n2vtcB,0,0.365,1,0.005150,0.1770,0.667,0.16,0.031486,0.503532,...,0.0,0.000000,0.000000,0.694129,0.000000,1.0,0.0,0.0,0.0,0.0
1,1UN6NjkwkztYxl6XzIp0pJ,0,0.366,1,0.005860,0.1810,0.689,0.51,0.031547,0.481332,...,0.0,0.000000,0.000000,0.694129,0.000000,1.0,0.0,0.0,0.0,0.0
2,6WJ9cUB9mgx7CAzIghmPLP,0,0.365,1,0.005150,0.1770,0.667,0.10,0.031486,0.503532,...,0.0,0.000000,0.000000,0.694129,0.000000,1.0,0.0,0.0,0.0,0.0
3,1XCtquImTMy9aYF7N6Asnq,0,0.540,1,0.000075,0.0868,0.790,0.19,0.029408,0.760848,...,0.0,0.461773,0.000000,0.000000,0.487105,1.0,0.0,0.0,0.0,0.0
4,0Avmi9t3sOcaGSs1DSbgDg,0,0.540,1,0.000075,0.0868,0.790,0.55,0.029408,0.760848,...,0.0,0.461773,0.000000,0.000000,0.487105,1.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
861,4YcoVkg7iEPFClectMFem5,0,0.720,0,0.000000,0.0692,0.562,0.37,0.036795,0.511604,...,0.0,0.000000,0.000000,0.400804,0.000000,0.0,0.0,0.0,1.0,0.0
862,3yf6pp33fEh7TbUXsN6nzG,0,0.749,1,0.000000,0.0822,0.832,0.48,0.030467,0.679112,...,0.0,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,1.0,0.0
863,6f6jGJxVGVy5FuCHgFrTmA,0,0.864,0,0.000000,0.2730,0.858,0.44,0.045246,0.730575,...,0.0,0.000000,0.413346,0.000000,0.000000,0.0,0.0,0.0,1.0,0.0
864,00xcGFYL3ZFFKiv9VstuVy,0,0.889,1,0.493000,0.8340,0.596,0.31,0.038207,0.231080,...,0.0,0.499744,0.672932,0.545361,0.000000,0.0,0.0,0.0,0.0,1.0


In [52]:
working_df['artist_x'].value_counts().head(20)

nirvana                     56
bob marley & the wailers    45
ub40                        37
the cure                    29
the rolling stones          26
johnny cash                 25
nickelback                  24
elvis presley               22
2pac                        21
steel pulse                 20
jimmy cliff                 19
soundgarden                 18
eminem                      17
50 cent                     16
the doors                   15
lil wayne                   15
the who                     15
the jam                     15
drake                       14
pearl jam                   14
Name: artist_x, dtype: int64

In [60]:
sample_playlist = working_df[working_df['artist_x'] == 'eminem'][7:11]
sample_playlist

Unnamed: 0,track_id,name,artist_x,explicit,energy,mode,instrumentalness,liveness,valence,popularity_norm,...,key_norm,loudness_norm,speechiness_norm,acousticness_norm,tempo_norm,time_signature_norm,genre,labels,genre_list,labels_list
611,48RrDBpOSSl1aLVCalGl5C,the monster,eminem,1,0.853,0,0.0,0.12,0.624,0.76,...,0.090909,0.861478,0.073635,0.052711,0.446662,0.8,Hip-Hop,"Amazement, Joyful activation, Nostalgia, Power...",[Hip-Hop],"[Amazement,, Joyful, activation,, Nostalgia,, ..."
612,3aUp8U4MNWctRXlY5MPysv,the monster,eminem,1,0.853,0,0.0,0.12,0.624,0.52,...,0.090909,0.861478,0.073635,0.052711,0.446662,0.8,Hip-Hop,"Amazement, Joyful activation, Nostalgia, Power...",[Hip-Hop],"[Amazement,, Joyful, activation,, Nostalgia,, ..."
618,6Xk7PnitV9jCRorWt2LiVZ,berzerk,eminem,1,0.872,0,0.0,0.26,0.684,0.68,...,1.0,0.855681,0.342945,0.021787,0.385923,0.8,Hip-Hop,"Amazement, Joyful activation, Power",[Hip-Hop],"[Amazement,, Joyful, activation,, Power]"
619,2IJI9wQqrZlCIXv3439Ign,berzerk,eminem,1,0.872,0,0.0,0.26,0.684,0.47,...,1.0,0.855681,0.342945,0.021787,0.385923,0.8,Hip-Hop,"Amazement, Joyful activation, Power",[Hip-Hop],"[Amazement,, Joyful, activation,, Power]"


In [61]:
single_vector, nonplaylist_features = generate_playlist_feature(feature_df, sample_playlist)
reccomendation = generate_playlist_recos(working_df, single_vector, nonplaylist_features, 10)
reccomendation

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_playlist_df['similarity'] = cosine_similarity(nonplaylist_features.drop('track_id', axis = 1).values, vectorized_playlist.values.reshape(1, -1))[:,0]


Unnamed: 0,track_id,name,artist_x,explicit,energy,mode,instrumentalness,liveness,valence,popularity_norm,...,loudness_norm,speechiness_norm,acousticness_norm,tempo_norm,time_signature_norm,genre,labels,genre_list,labels_list,similarity
392,5R1WTY85gge1Ggd5rTMUJK,tha shiznit,snoop dogg,1,0.691,0,0.0,0.298,0.681,0.6,...,0.847559,0.057981,0.001978,0.392855,0.8,Hip-Hop,"Joyful activation, Power",[Hip-Hop],"[Joyful, activation,, Power]",0.960521
639,0N3W5peJUQtI4eyR6GJT5O,king kunta,kendrick lamar,1,0.657,0,0.0,0.154,0.489,0.75,...,0.84788,0.100618,0.005914,0.434526,0.8,Hip-Hop,"Joyful activation, Nostalgia, Power, Tension",[Hip-Hop],"[Joyful, activation,, Nostalgia,, Power,, Tens...",0.960216
629,6Ius4TC0L3cN74HT7ENE6e,wet dreamz,j. cole,1,0.705,0,0.0,0.128,0.584,0.81,...,0.792263,0.374871,0.075402,0.713959,0.8,Hip-Hop,"Amazement, Joyful activation, Nostalgia, Tende...",[Hip-Hop],"[Amazement,, Joyful, activation,, Nostalgia,, ...",0.955098
506,7gKgd0P3dAAePiSQQBqrlf,break ya neck,busta rhymes,1,0.809,0,0.00175,0.0613,0.737,0.71,...,0.814381,0.258496,0.444779,0.336921,0.8,Hip-Hop,"Joyful activation, Power, Tension",[Hip-Hop],"[Joyful, activation,, Power,, Tension]",0.951078
517,3G0yz3DZn3lfraledmBCT0,p.i.m.p.,50 cent,1,0.772,0,4e-06,0.0368,0.848,0.76,...,0.871512,0.356334,0.052309,0.343866,0.8,Hip-Hop,"Amazement, Power",[Hip-Hop],"[Amazement,, Power]",0.948012
378,74kHlIr01X459gqsSdNilW,i get around,2pac,1,0.717,0,1.3e-05,0.298,0.675,0.58,...,0.850954,0.108136,0.120482,0.39055,0.8,Hip-Hop,"Joyful activation, Nostalgia, Power, Tenderness",[Hip-Hop],"[Joyful, activation,, Nostalgia,, Power,, Tend...",0.947938
518,2D1hlMwWWXpkc3CZJ5U351,p.i.m.p.,50 cent,1,0.758,0,1.3e-05,0.0401,0.889,0.67,...,0.871696,0.352214,0.044578,0.319396,0.8,Hip-Hop,"Amazement, Power",[Hip-Hop],"[Amazement,, Power]",0.946924
462,1NHwvBmrUje4L1dxfWnXCH,put your hands where my eyes could see,busta rhymes,1,0.623,0,4e-06,0.0753,0.715,0.63,...,0.815116,0.312049,0.008675,0.403838,0.8,Hip-Hop,"Joyful activation, Power",[Hip-Hop],"[Joyful, activation,, Power]",0.944757
531,1CYyIsAaTrnmJ8MO2rec5f,what up gangsta,50 cent,1,0.767,0,0.0,0.0996,0.344,0.53,...,0.881256,0.040886,0.002139,0.33492,0.8,Hip-Hop,"Joyful activation, Power, Tension",[Hip-Hop],"[Joyful, activation,, Power,, Tension]",0.941777
532,56gKqLyxcyyXm4aSByvn59,what up gangsta,50 cent,1,0.767,0,0.0,0.0995,0.344,0.49,...,0.881256,0.040886,0.002139,0.334916,0.8,Hip-Hop,"Joyful activation, Power, Tension",[Hip-Hop],"[Joyful, activation,, Power,, Tension]",0.94105


# Using emotions as a measure of recommender system performance