# 2. Data Wrangling

source: https://www.aicrowd.com/challenges/spotify-million-playlist-dataset-challenge

# 2.3 Import

In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob, collections


# 2.4 Load Spotify music playlists

In [2]:
path = "spotify_million_playlist_dataset/data/"
file_name = "mpd.slice.0-999.json"

# Load the first single json file 
data = json.load(open(path + file_name))

# Read json as DataFrame
df = pd.DataFrame(data['playlists'])

df.info

<bound method DataFrame.info of                  name collaborative  pid  modified_at  num_tracks  num_albums  \
0          Throwbacks         false    0   1493424000          52          47   
1    Awesome Playlist         false    1   1506556800          39          23   
2             korean          false    2   1505692800          64          51   
3                 mat         false    3   1501027200         126         107   
4                 90s         false    4   1401667200          17          16   
..                ...           ...  ...          ...         ...         ...   
995               old         false  995   1507852800          41          40   
996              Daze         false  996   1479254400          17          17   
997               rap         false  997   1410307200         119          98   
998           Country         false  998   1507939200         108          75   
999   thinking of you         false  999   1507766400          44          43

In [3]:
df.head()

Unnamed: 0,name,collaborative,pid,modified_at,num_tracks,num_albums,num_followers,tracks,num_edits,duration_ms,num_artists,description
0,Throwbacks,False,0,1493424000,52,47,1,"[{'pos': 0, 'artist_name': 'Missy Elliott', 't...",6,11532414,37,
1,Awesome Playlist,False,1,1506556800,39,23,1,"[{'pos': 0, 'artist_name': 'Survivor', 'track_...",5,11656470,21,
2,korean,False,2,1505692800,64,51,1,"[{'pos': 0, 'artist_name': 'Hoody', 'track_uri...",18,14039958,31,
3,mat,False,3,1501027200,126,107,1,"[{'pos': 0, 'artist_name': 'Camille Saint-Saën...",4,28926058,86,
4,90s,False,4,1401667200,17,16,2,"[{'pos': 0, 'artist_name': 'The Smashing Pumpk...",7,4335282,16,


In [4]:
df.dtypes

name             object
collaborative    object
pid               int64
modified_at       int64
num_tracks        int64
num_albums        int64
num_followers     int64
tracks           object
num_edits         int64
duration_ms       int64
num_artists       int64
description      object
dtype: object

In [5]:
df.describe()

Unnamed: 0,pid,modified_at,num_tracks,num_albums,num_followers,num_edits,duration_ms,num_artists
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,499.5,1476247000.0,67.503,50.526,2.583,17.546,15740610.0,38.746
std,288.819436,37195670.0,55.00334,41.448997,32.813501,20.978751,12905750.0,31.245091
min,0.0,1322611000.0,5.0,2.0,1.0,2.0,1002971.0,3.0
25%,249.75,1459944000.0,25.0,19.0,1.0,4.0,5810390.0,16.0
50%,499.5,1491048000.0,50.0,38.0,1.0,10.0,11631160.0,30.0
75%,749.25,1505779000.0,92.5,70.0,1.25,22.0,21819150.0,53.0
max,999.0,1509494000.0,245.0,225.0,1038.0,178.0,57817130.0,175.0


#### Averagely, each playlist has around 68 tracks.

In [6]:
print("Number of playlists: " , len(df))

Number of playlists:  1000


In [7]:
df.isnull().sum()

name               0
collaborative      0
pid                0
modified_at        0
num_tracks         0
num_albums         0
num_followers      0
tracks             0
num_edits          0
duration_ms        0
num_artists        0
description      980
dtype: int64

In [8]:
df.columns

Index(['name', 'collaborative', 'pid', 'modified_at', 'num_tracks',
       'num_albums', 'num_followers', 'tracks', 'num_edits', 'duration_ms',
       'num_artists', 'description'],
      dtype='object')

# 2.5 Explore The Data

In [9]:
# Only include useful columns
df = df.loc[:, ['name', 'pid', 'num_tracks', 'tracks', 'num_albums', 'num_artists']]
df.head()

Unnamed: 0,name,pid,num_tracks,tracks,num_albums,num_artists
0,Throwbacks,0,52,"[{'pos': 0, 'artist_name': 'Missy Elliott', 't...",47,37
1,Awesome Playlist,1,39,"[{'pos': 0, 'artist_name': 'Survivor', 'track_...",23,21
2,korean,2,64,"[{'pos': 0, 'artist_name': 'Hoody', 'track_uri...",51,31
3,mat,3,126,"[{'pos': 0, 'artist_name': 'Camille Saint-Saën...",107,86
4,90s,4,17,"[{'pos': 0, 'artist_name': 'The Smashing Pumpk...",16,16


In [10]:
# Explore all the tracks in the first playlist
fst_playlist = pd.DataFrame(df.iloc[0]['tracks'])
fst_playlist['playlist_name'] = df.iloc[0]['name']
fst_playlist['playlist_pid'] = df.iloc[0]['pid']
fst_playlist.head(3)

Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name,playlist_name,playlist_pid
0,0,Missy Elliott,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook,Throwbacks,0
1,1,Britney Spears,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,198800,In The Zone,Throwbacks,0
2,2,Beyoncé,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,235933,Dangerously In Love (Alben für die Ewigkeit),Throwbacks,0


In [11]:
columns = list(fst_playlist.columns)
print("first playlist length: {} \n".format(len(fst_playlist)))
columns


first playlist length: 52 



['pos',
 'artist_name',
 'track_uri',
 'artist_uri',
 'track_name',
 'album_uri',
 'duration_ms',
 'album_name',
 'playlist_name',
 'playlist_pid']

In [12]:
df_track = fst_playlist
col_name_lst =  df_track.columns

def create_tracks(df, df_track):
    n = len(df_track)
    
    for i in range(1, len(df)):
        # create df for next playlist
        playlist = pd.DataFrame(df.iloc[i]['tracks'])
        playlist['playlist_name'] = df.iloc[i]['name']
        playlist['playlist_pid'] = df.iloc[i]['pid']

        # append subset df of each playlist to the big df
        df_track = df_track.append(playlist)
        
    return df_track
        
df_track = create_tracks(df, df_track)

In [13]:
def reorder_columns(df_track, col_name_lst):
    df_track = df_track[['playlist_name', 'playlist_pid'] + columns[:-2]]
    return df_track

In [14]:
print("Total tracks: ", len(df_track))
df_track.tail(3)

Total tracks:  67503


Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name,playlist_name,playlist_pid
41,41,Allan Rayman,spotify:track:2oM4BuruDnEvk59IvIXCwn,spotify:artist:6Yv6OBXD6ZQakEljaGaDAk,25.22,spotify:album:3CbNgBzI7r9o0F6VjH9sTY,189213,Roadhouse 01,thinking of you,999
42,42,Jon Jason,spotify:track:4Ri5TTUgjM96tbQZd5Ua7V,spotify:artist:77bNdkKYBBmc30CisCA6tE,Good Feeling,spotify:album:2dZ7oVNQBeLlpoUYfbEsJP,194720,Good Feeling,thinking of you,999
43,43,Grizfolk,spotify:track:5RVuBrXVLptAEbGJdSDzL5,spotify:artist:6Xa4nbrSTfbioA4lLShbjh,Cosmic Angel - Acoustic From Capitol Studios,spotify:album:7D6Y19tjm4DQNch39FeWpO,257194,Cosmic Angel,thinking of you,999


In [15]:
df_track = reorder_columns(df_track, columns)
df_track.tail()

Unnamed: 0,playlist_name,playlist_pid,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name
39,thinking of you,999,39,James Arthur,spotify:track:5uCax9HTNlzGybIStD3vDh,spotify:artist:4IWBUUAFIplrNtaOHcJPRM,Say You Won't Let Go,spotify:album:7oiJYvEJHsmYtrgviAVIBD,211466,Back from the Edge
40,thinking of you,999,40,Big Words,spotify:track:0P1oO2gREMYUCoOkzYAyFu,spotify:artist:0sHN89qak07mnug3LVVjzP,The Answer,spotify:album:5jrsRHRAmetu5e7RRBoxj7,263679,"Hollywood, a Beautiful Coincidence"
41,thinking of you,999,41,Allan Rayman,spotify:track:2oM4BuruDnEvk59IvIXCwn,spotify:artist:6Yv6OBXD6ZQakEljaGaDAk,25.22,spotify:album:3CbNgBzI7r9o0F6VjH9sTY,189213,Roadhouse 01
42,thinking of you,999,42,Jon Jason,spotify:track:4Ri5TTUgjM96tbQZd5Ua7V,spotify:artist:77bNdkKYBBmc30CisCA6tE,Good Feeling,spotify:album:2dZ7oVNQBeLlpoUYfbEsJP,194720,Good Feeling
43,thinking of you,999,43,Grizfolk,spotify:track:5RVuBrXVLptAEbGJdSDzL5,spotify:artist:6Xa4nbrSTfbioA4lLShbjh,Cosmic Angel - Acoustic From Capitol Studios,spotify:album:7D6Y19tjm4DQNch39FeWpO,257194,Cosmic Angel


### Tracks spare dataframe
This data frame contains all the tracks from all the playlists, with a spare vector of 0 and 1 that indicates whether the track is in that playlist. The position of each index is corresponding to the position of the playlist.

In [16]:
df_track.head(3)

Unnamed: 0,playlist_name,playlist_pid,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name
0,Throwbacks,0,0,Missy Elliott,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook
1,Throwbacks,0,1,Britney Spears,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,198800,In The Zone
2,Throwbacks,0,2,Beyoncé,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,235933,Dangerously In Love (Alben für die Ewigkeit)


In [17]:
id = df_track.iloc[3]['track_uri']
print(id)
df_track.loc[df_track['track_uri'] == id][['playlist_pid', 'track_uri']]

spotify:track:1AWQoqb9bSvzTjaLralEkT


Unnamed: 0,playlist_pid,track_uri
3,0,spotify:track:1AWQoqb9bSvzTjaLralEkT
45,38,spotify:track:1AWQoqb9bSvzTjaLralEkT
27,161,spotify:track:1AWQoqb9bSvzTjaLralEkT
24,211,spotify:track:1AWQoqb9bSvzTjaLralEkT
13,355,spotify:track:1AWQoqb9bSvzTjaLralEkT
5,416,spotify:track:1AWQoqb9bSvzTjaLralEkT
30,594,spotify:track:1AWQoqb9bSvzTjaLralEkT
154,815,spotify:track:1AWQoqb9bSvzTjaLralEkT
17,904,spotify:track:1AWQoqb9bSvzTjaLralEkT


## Create tracks dictionary with key as track_uri and value is playlist vector

In [18]:
track_dict = {}

track_dict

for i in range(len(df_track)):
    song_id = df_track.iloc[i]['track_uri']
    
    playlist_id = df_track.iloc[i]['playlist_pid']
    
    if song_id not in track_dict:
        track_dict[song_id] = []
    

    track_dict[song_id].append(playlist_id)
    
    
track_dict

{'spotify:track:0UaMYEvWZi0ZqiDOoHU3YI': [0, 123, 218, 342, 382, 844],
 'spotify:track:6I9VzXrHxO9rA9A5euc8Ak': [0,
  38,
  123,
  262,
  355,
  389,
  635,
  717,
  782,
  795,
  815,
  833,
  844],
 'spotify:track:0WqIKmW4BTrj3eJFmnCKMv': [0,
  38,
  50,
  123,
  161,
  180,
  205,
  284,
  314,
  316,
  355,
  359,
  367,
  380,
  416,
  499,
  516,
  572,
  747,
  779,
  782,
  808,
  815,
  825,
  839,
  844,
  920],
 'spotify:track:1AWQoqb9bSvzTjaLralEkT': [0,
  38,
  161,
  211,
  355,
  416,
  594,
  815,
  904],
 'spotify:track:1lzr43nnXAijIGYnCT8M8H': [0,
  105,
  115,
  121,
  123,
  237,
  355,
  403,
  427,
  452,
  471,
  560,
  575,
  643,
  656,
  668,
  697,
  721,
  734,
  736,
  848,
  913,
  980,
  990,
  992],
 'spotify:track:0XUfyU2QviPAs6bxSpXYG4': [0,
  5,
  38,
  53,
  106,
  123,
  150,
  161,
  207,
  211,
  218,
  342,
  355,
  380,
  403,
  426,
  439,
  516,
  526,
  542,
  572,
  642,
  661,
  663,
  687,
  701,
  721,
  734,
  747,
  782,
  788,
  825,
 

In [19]:
print('Numbers of tracks in all playlists (with duplicate -> df_track) : ', len(df_track))
print('Numbers of unique tracks in all playlist: ', len(track_dict))

Numbers of tracks in all playlists (with duplicate -> df_track) :  67503
Numbers of unique tracks in all playlist:  34443


### The Compressed dot product algorithm

In [20]:
# The Compressed dot product algorithm is only work for BINARY compressed vector
# Ex: compressed vector = [0, 3, 5, 6, 7] <=> Sparse vector = [1, 0, 0, 1, 0, 1, 1, 1]
# or compressed([1, 0, 0, 1, 0, 1, 1, 1]) = [0, 3, 5, 6, 7]


def compressed_dot_product(v1, v2):
    '''
    This function calculate the dot product of two sparse vector (with same dimensionality)
    @param: 
        vector1, vector2: list of the playlists that include that track
        
    @return: the dot product of 2 compressed vectors
    '''
    matched = 0
    n, m = len(v1), len(v2)
    #print(f"N = {n}, M = {m}")
    i , j = 0, 0
    
    while i < n and j < m: # create two pointers
        
        if v1[i] == v2[j]:
            matched += 1
            i += 1
            j += 1
        elif v1[i] < v2[j]:
            i += 1
        else:
            j += 1
        
        #print(f"matched = {matched}")
    
    return matched

v1 = [0,1,3,5, 10, 100, 999]
v2 = [0,5, 13, 207]
print(compressed_dot_product(v1, v2))
    

2


## Cosine simlilarity between 2 songs (based on the playlist vector)

In [21]:
from sklearn.metrics.pairwise import cosine_similarity
from math import sqrt


# v1, v2 are compressed lists that represents sparse vectors 
# len(compressed(x)) = ||x||^2

def compressed_cos_similarity(v1, v2):
    return compressed_dot_product(v1, v2) / sqrt(len(v1) * len(v2))

compressed_cos_similarity([0,2,4], [0,1,3])

0.3333333333333333

## Example of comparing the cosine similarity of two tracks:
Crazy In Love (spotify:track:0WqIKmW4BTrj3eJFmnCKMv) and Toxic (spotify:track:6I9VzXrHxO9rA9A5euc8Ak)

In [22]:
# Create two vectors of the two songs
crazy_in_love = track_dict['spotify:track:0WqIKmW4BTrj3eJFmnCKMv']
toxic = track_dict['spotify:track:6I9VzXrHxO9rA9A5euc8Ak']

similarity = compressed_cos_similarity(crazy_in_love, toxic)
print(f"The cosine similarity of 'crazy in love' and 'toxic' tracks is: {similarity}")

The cosine similarity of 'crazy in love' and 'toxic' tracks is: 0.3736323588785367


Cosine similarity gives a real number ranged [0, 1] indicating how similar two songs are. 0.0 means the songs are in completely different playlists, 1.0 means the songs are included in all the same playlists.

In [23]:
print(f"Length of playlist df: {len(df_track)}")
df_track.head(3)

Length of playlist df: 67503


Unnamed: 0,playlist_name,playlist_pid,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name
0,Throwbacks,0,0,Missy Elliott,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook
1,Throwbacks,0,1,Britney Spears,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,198800,In The Zone
2,Throwbacks,0,2,Beyoncé,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,235933,Dangerously In Love (Alben für die Ewigkeit)


Recommendation score for each track

In [24]:
toxic_id = "spotify:track:6I9VzXrHxO9rA9A5euc8Ak"

def get_recs_for_track(track_id):
    '''
        @param: track_id: a string that contain one SINGLE track_id (ex: "spotify:track:6I9VzXrHxO9rA9A5euc8Ak")
        @return: dictionary with key = track_id, value = cosine score
    '''
    track_vector = track_dict[track_id]
    track_score_dict = {}
    
    for playlist_pid in track_vector:
        playlist = df_track.loc[df_track['playlist_pid'] == playlist_pid][['playlist_pid', 'track_uri']]
        track_list = (playlist['track_uri']).tolist()

        # Compare the song to ALL the tracks on each playlist
        for item_id in track_list:
            track_score_dict[item_id] = compressed_cos_similarity(track_dict[track_id], track_dict[item_id])
            
    return track_score_dict

# At this point, this is ok to compare with itself because I will exclude those in the end and sort them later.
x = get_recs_for_track(toxic_id)
print(x)
print('length = ', len(x))

{'spotify:track:0UaMYEvWZi0ZqiDOoHU3YI': 0.3396831102433787, 'spotify:track:6I9VzXrHxO9rA9A5euc8Ak': 1.0, 'spotify:track:0WqIKmW4BTrj3eJFmnCKMv': 0.3736323588785367, 'spotify:track:1AWQoqb9bSvzTjaLralEkT': 0.3698001308168194, 'spotify:track:1lzr43nnXAijIGYnCT8M8H': 0.16641005886756874, 'spotify:track:0XUfyU2QviPAs6bxSpXYG4': 0.23782574707724702, 'spotify:track:68vgtRHr7iZHpzGpon6Jlo': 0.09805806756909202, 'spotify:track:3BxWKCI06eQ5Od8TY2JBeA': 0.4193139346887673, 'spotify:track:7H6ev70Weq6DdpZyyTmUXk': 0.16012815380508713, 'spotify:track:2PpruBYCo4H7WOBJ7Q2EwM': 0.16641005886756874, 'spotify:track:2gam98EZKrF9XuOkU13ApN': 0.38461538461538464, 'spotify:track:4Y45aqo9QMa57rDsAJv40A': 0.16012815380508713, 'spotify:track:1HwpWwa6bnqqRhK8agG4RS': 0.16012815380508713, 'spotify:track:20ORwCJusz4KS2PbTPVNKo': 0.2773500981126146, 'spotify:track:7k6IzwMGpxnRghE7YosnXT': 0.10482848367219183, 'spotify:track:1Bv0Yl01xBDZD4OQP93fyl': 0.1386750490563073, 'spotify:track:4omisSlTk6Dsq2iQD7MA07': 0.167

In [25]:
def get_track_ids(playlist_record):
    '''
    @param: 
        playlist_record: a "list" that only contain tracks information(dict)
    @ return
        track_ids : a list contains all track ids in ONE playlist
    '''
    track_ids = list() 
    
    for track_dict in playlist_record:
        track_id = track_dict['track_uri']
        track_ids.append(track_id)
    
    return track_ids


def get_recs_for_playlist(playlist_record):
    '''
    @param: 
        playlist_record: a "list" that only contain tracks information(dict)
    @return:
        track_scores_dict: a "dictionary" that has all recommendation songs for all tracks in the playlist 
    '''
    track_scores_dict = {}
    track_ids = get_track_ids(playlist_record)

    for track_dict in playlist_record:
        track_id = track_dict['track_uri']
        single_track_score = get_recs_for_track(track_id)
        
        #print(f"Track_id = {track_id}, length = {len(single_track_score)}")
        
        for k,v in single_track_score.items():
            if k in track_ids:
                continue
            
            if k not in track_scores_dict:
                track_scores_dict[k] = v
            else:
                track_scores_dict[k] = max(track_scores_dict[k], v)
        
    return track_scores_dict


def sorted_cosine_tracks(rec_tracks_dict):
    '''
    The function sorts cosine similarity scores of the tracks in one playlist
    
    @param:
        rec_tracks_dict: a dict of playlist contain all recommendation songs, key = track_id (in diff playlist)
            value = cosine similarity score
    @return:
        recs_list: a list of sorted recommendation songs based on cosine score (max = 1, min = 0)
    '''
    recs_list = []

    for k in sorted(rec_tracks_dict, key=rec_tracks_dict.get, reverse=True):
        #recs_list.append([k, rec_tracks_dict[k]])
        recs_list.append(k)

    return recs_list
    
    
def get_n_recommendation_songs_id(playlist_record, n):
    '''
    @param: 
        playlist_record: a "list" that only contain tracks information(dict)
    @return 
        recommendation_songs: a list of "n" recommendation songs for a single playlist
    '''
    
    rec_tracks_dict = get_recs_for_playlist(playlist_record)
    sorted_tracks_list = sorted_cosine_tracks(rec_tracks_dict)
    
    # If n < length of recommendation, give n songs, else give the maximum number of songs in the cosine maxtrix
    rec_songs_id = sorted_tracks_list[: min(n, len(sorted_tracks_list))]

    return rec_songs_id
    
def get_track_name(track_id):
    return df_track[df_track['track_uri'] == track_id].iloc[0]['track_name']

In [26]:
def get_n_recommendation_songs_name(df_track, playlist_record, n):
    rec_songs_id = get_n_recommendation_songs_id(playlist_record, n)
    rec_songs_name = []
    
    for track_id in rec_songs_id:
        name = get_track_name(track_id)
        rec_songs_name.append(name)
    
    return rec_songs_name

### Testing a recommmendation system with cosine similarity for one playlist

In [35]:
n = 20
throwback_lst_pid = 0
country_pid = 456

playlist = df[df['pid'] == country_pid].iloc[0]['tracks']
playlist_name = df[df['pid'] == country_pid].iloc[0]['name']

rec_tracks_dict = get_recs_for_playlist(playlist)
rec_songs = get_n_recommendation_songs_id(playlist, n)

print(f"All recommendation songs for the `{playlist_name}` playlist is {len(rec_tracks_dict)}")
print(f"The {n} recommendation songs for is: \n{rec_songs}")

All recommendation songs for the `Country` playlist is 5753
The 20 recommendation songs for is: 
['spotify:track:1ipcb9qXpSHWhSUvdxJhsx', 'spotify:track:7puxIVNdj5nsBJk43zM3bH', 'spotify:track:7k1Xm1wy00hCKJDYJL5p1n', 'spotify:track:4pFNWAU8e8F32NQkMyLAZi', 'spotify:track:38hbQ1wHHBNXDpPGJRMY28', 'spotify:track:3xx0jE52PbtvwXgNaBUdch', 'spotify:track:3jSyE8r8vgg8fmwojoMzql', 'spotify:track:1ewLdzcpSQkU9wpFdqFauw', 'spotify:track:3rYOzxOIYjq2hXPRFjCQ2v', 'spotify:track:5pgx4Q33u1c1jlpE64WVGY', 'spotify:track:3ZbSAHswIsfLH3im5Dc0t7', 'spotify:track:3C6WblKJq7rpsPsIafytEV', 'spotify:track:5o9LteJdhkA3ndUDz4JVfV', 'spotify:track:7EAFj7eJcMF5koWSRJVZcL', 'spotify:track:3YM0vEH7taFj7OLW3dWfiu', 'spotify:track:28Ity8iD8yajjIi58syN83', 'spotify:track:6LcPSBPSYRTMG7brqZQ7aq', 'spotify:track:0Uiz1nSJmK5TxZ6zfTFX2Q', 'spotify:track:4G9eIyEcVwLoG7kYNFpSII', 'spotify:track:0ABHhxQTaluB94ohp2RLSr']


In [36]:
rec_songs_name = get_n_recommendation_songs_name(df_track, playlist, n)
rec_songs_name
    

['Forever And Ever, Amen',
 'Carrying Your Love With Me',
 "All My Ex's Live In Texas",
 'Love Without End, Amen',
 'Drive (For Daddy Gene)',
 'Deeper Than The Holler',
 "Already Callin' You Mine",
 'Fly',
 "Missin' You Crazy",
 'Tonight Looks Good on You',
 "Just Gettin' Started",
 '1994',
 'Make You Mine',
 'The Fighter',
 'Love Triangle',
 "Lovin' Lately",
 'Everybody We Know Does',
 'Outta My Head',
 'Ocean Front Property',
 'Feelin’ It']

# Modelling:
1. Matrix factorization models
2. Neighborhood=based collaborative fitering moels
3. autoencoder model + CNN