In [1]:
import pandas as pd
import math
import logging
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import pickle
from os.path import exists
import numpy as np
import re
import tsfresh
from tsfresh.feature_extraction import MinimalFCParameters

In [2]:
def get_song(name_song, name_artist):
    results = sp.search(q=name_song + ' '+name_artist, type='track')
    items = results['tracks']['items']
    if len(items) > 0:
        return items[0]
    else:
        return None

# Spotify Song Features
def get_audio_features(song_name, artist_name):

    song = get_song(song_name, artist_name)

    #if get request didnt get anything
    if(song is None):
        return None

    song_features = sp.audio_features([song['uri']])

    audio_feature_list = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 
                          'instrumentalness', 'liveness', 'valence', 'tempo', 'time_signature']

    audio_feat = []
    
    audio_feat.append(song_features[0].get('danceability'))
    audio_feat.append(song_features[0].get('energy'))
    audio_feat.append(song_features[0].get('loudness'))
    audio_feat.append(song_features[0].get('speechiness'))
    audio_feat.append(song_features[0].get('acousticness'))
    audio_feat.append(song_features[0].get('instrumentalness'))
    audio_feat.append(song_features[0].get('liveness'))
    audio_feat.append(song_features[0].get('valence'))
    audio_feat.append(song_features[0].get('tempo'))

    return audio_feat

In [3]:
def find_diffs(feature1, feature2):
    diffs = 0
    for i in range(len(feature1)):
        diffs += abs(feature1[i] - feature2[i])
    print(diffs)
    return diffs

def find_diffs_sq(feature1, feature2):
    diffs = 0
    for i in range(len(feature1)):
        diffs += (feature1[i] - feature2[i])**2
    return diffs

def find_closest_song(features):
    
    minDistance = sum(abs(features))
    minIndex = 0
        
    for i in range(len(song_artist_features)):
        diffs = find_diffs(features, song_artist_features[i][2])
        if diffs < minDistance:
            minDistance = diffs
            minIndex = i
            
    return minIndex

def split_ele(x):
    a = np.array(re.findall(r"[-+]?\d*\.?\d+|[-+]?\d+", x))
    return a.astype(float)

In [4]:
def show_recommendations_for_song(song):
    results = sp.recommendations(seed_tracks=[song['id']], limit=5)
    print("Recommendations:")
    for track in results['tracks']:
        print("TRACK: ",track['name'], " - ",track['artists'][0]['name'])
        sp.add_to_queue(track['uri'])

In [5]:
def preprocess(file_name):
    num_features = 10
    num_nodes = 14
    num_samples = 10

    pose_landmark_subset = ['LEFT_EYE_OUTER', 'RIGHT_EYE_OUTER', 'LEFT_SHOULDER', 'RIGHT_SHOULDER', 
                            'LEFT_ELBOW', 'RIGHT_ELBOW', 'LEFT_WRIST', 'RIGHT_WRIST', 'LEFT_HIP', 'RIGHT_HIP',
                            'LEFT_KNEE', 'RIGHT_KNEE', 'LEFT_ANKLE', 'RIGHT_ANKLE']
    df_columns = ['LEFT_EYE_OUTER_POS', 'LEFT_EYE_OUTER_DIST', 'RIGHT_EYE_OUTER_POS', 'RIGHT_EYE_OUTER_DIST', 
        'LEFT_SHOULDER_POS', 'LEFT_SHOULDER_DIST', 'RIGHT_SHOULDER_POS', 'RIGHT_SHOULDER_DIST', 
        'LEFT_ELBOW_POS', 'LEFT_ELBOW_DIST', 'RIGHT_ELBOW_POS', 'RIGHT_ELBOW_DIST', 'LEFT_WRIST_POS', 
        'LEFT_WRIST_DIST', 'RIGHT_WRIST_POS', 'RIGHT_WRIST_DIST', 'LEFT_HIP_POS', 'LEFT_HIP_DIST', 
        'RIGHT_HIP_POS', 'RIGHT_HIP_DIST', 'LEFT_KNEE_POS', 'LEFT_KNEE_DIST', 'RIGHT_KNEE_POS', 
        'RIGHT_KNEE_DIST', 'LEFT_ANKLE_POS', 'LEFT_ANKLE_DIST', 'RIGHT_ANKLE_POS', 'RIGHT_ANKLE_DIST']
    df = pd.read_csv(file_name, sep = ',', usecols=[4, 7, 12, 13, 14, 15, 16, 17, 24, 25, 26, 27, 28, 29])

    splitDf = df

    x_data = pd.DataFrame(columns=df_columns, index=range(len(df)))
    y_data = pd.DataFrame(columns=df_columns, index=range(len(df)))

    for node in pose_landmark_subset:
        curr = df[node]
        vals = [split_ele(x) for x in curr]
        for row in range(len(vals)):
            colname_pos = node + "_POS"
            colname_dist = node + "_DIST"

            if(row == 0):
                x_data[colname_dist][row] = 0
                y_data[colname_dist][row] = 0
            else:
                x_data[colname_dist][row] = vals[row][0] - vals[row-1][0]
                y_data[colname_dist][row] = vals[row][1] - vals[row-1][1]

            x_data[colname_pos][row] = vals[row][0]
            y_data[colname_pos][row] = vals[row][1]

    return [x_data, y_data]

In [6]:
def parse(x_data, y_data):
    num_features = 10
    num_nodes = 14
    num_samples = 10

    df_columns = ['LEFT_EYE_OUTER_POS', 'LEFT_EYE_OUTER_DIST', 'RIGHT_EYE_OUTER_POS', 'RIGHT_EYE_OUTER_DIST', 
        'LEFT_SHOULDER_POS', 'LEFT_SHOULDER_DIST', 'RIGHT_SHOULDER_POS', 'RIGHT_SHOULDER_DIST', 'LEFT_ELBOW_POS', 
        'LEFT_ELBOW_DIST', 'RIGHT_ELBOW_POS', 'RIGHT_ELBOW_DIST', 'LEFT_WRIST_POS', 'LEFT_WRIST_DIST', 
        'RIGHT_WRIST_POS', 'RIGHT_WRIST_DIST', 'LEFT_HIP_POS', 'LEFT_HIP_DIST', 'RIGHT_HIP_POS', 'RIGHT_HIP_DIST', 
        'LEFT_KNEE_POS', 'LEFT_KNEE_DIST', 'RIGHT_KNEE_POS', 'RIGHT_KNEE_DIST', 'LEFT_ANKLE_POS', 
        'LEFT_ANKLE_DIST', 'RIGHT_ANKLE_POS', 'RIGHT_ANKLE_DIST']

    #MOTION FEATURE EXTRACTION
    curr_extracted_vector = pd.DataFrame()

    #LOOP THROUGH NODES
    for col in df_columns:
        col_x = x_data[col]
        col_y = y_data[col]

        #Every Node (Body Part) has this set of feaures

        xname = col + "_x"
        yname = col + "_y"

        # Timeseries features to extract
        # TODO: tune parameters for autocorrelation, approximate_entropy, c3, cid_ce
        settings = {
            xname: {
                "kurtosis": None, 
                "standard_deviation": None, 
                "autocorrelation": [{"lag": 10}],
                "approximate_entropy": [{"m": 20, "r": 0.05}],
                "c3": [{"lag": 10}],
                "cid_ce": [{"normalize": True}]
            }, 
            yname: {
                "kurtosis": None, 
                "standard_deviation": None, 
                "autocorrelation": [{"lag": 10}],
                "approximate_entropy": [{"m": 20, "r": 0.05}],
                "c3": [{"lag": 10}],
                "cid_ce": [{"normalize": True}]
            }
        }

        comb = pd.DataFrame(data=[col_x, col_y], index=[xname, yname]).T
        comb.rename_axis("time")
        comb["id"] = 1
        comb["time"] = comb.index

        curr_extracted = tsfresh.extract_features(comb, column_id = "id", column_sort="time", 
                                                  column_kind=None, column_value=None, 
                                                  kind_to_fc_parameters=settings, disable_progressbar=True)
        curr_extracted_vector = pd.concat([curr_extracted_vector, curr_extracted], axis=1)
    
    return curr_extracted_vector

In [7]:
song_artist_pairs = {
    '1':('thats_what_i_like','bruno_mars'),
    '2':('humble','kendrick_lamar'),
    '3':('skeletons','keshi'),
    '4':('slow_dancing_in_the_dark','joji'),
    '5':('lite_spots','kaytranada'),
    '6':('woman','doja_cat'),
    '7':('get_up','ciara'),
    '8':('throwin_elbows','excision'),
    '9':('power','little mix'),
    '10':('peaches','justin_bieber'),
    '11':('knife_talk','drake'),
    '12':('fool_around','yas'),
    '13':('levitating','dua_lipa'),
    '14':('feed_the_fire','lucky_daye'),
    '15':('easily','bruno_major'),
    '16':('good_4_u','olivia_rodrigo'),
    '17':('all_i_wanna_do','jay_park'),
    '18':('sad_girlz_luv_money','amaarae'),
    '19':('tik_tok','kesha'),
    '20':('ymca','village_people'),
    '21':('intuition_interlude','jamie_foxx'),
    '22':('kilby_girl','the_backseat_lovers'),
    '23':('party_rock_anthem','lmfao'),
    '24':('frozen','sabrina_claudio'),
    '25':('weve_never_met_but_can_we_have_a_cup_of_coffee_or_something','in_love_with_a_ghost'),
    '26':('piano_song','eryn_allen_kane'),
    '27':('tacones_rojos','sebastian_yatra'),
    '28':('face_to_face','rex_orange_county'),
    '29':('violin_concerto_no_1_in_a_minor','johann_sebastian_bach'),
    '30':('candy','baekhyun'),
    '31':('whisper','boombox_cartel'),
    '32':('in_the_name_of_love','martin_garrix'),
    '33':('brukka','jack_and_lewis'),
    '34':('bum_bum_tam_tam','mc_fioti'),
    '35':('partition','beyonce'),
    '36':('greedy','ariana_grande'),
    '37':('idontwannabeyouanymore','billie_eilish'),
    '38':('lost_in_japan','shawn_mendes'),
    '39':('the_saints','andy_mineo'),
    '40':('dynamite','bts')
}

In [8]:
# Instantiate Spotify Client
logger = logging.getLogger()
logging.basicConfig()
CLIENT_ID="9793440f0a5047c59c70bcfcf91ad589"
CLIENT_SECRET= "b66dc3a5f9f34207bebee32a25745368"
REDIRECT_URL="http://localhost/"
client_credentials_manager = SpotifyClientCredentials(client_id = CLIENT_ID, client_secret = CLIENT_SECRET)
oAuth = SpotifyOAuth(client_id = CLIENT_ID, client_secret = CLIENT_SECRET, redirect_uri = REDIRECT_URL, scope = 'user-modify-playback-state')
sp = spotipy.Spotify(auth_manager =oAuth)

In [9]:
song_artist_features = []

# Extract all features
for key in song_artist_pairs.keys():

    song_name = song_artist_pairs[key][0]
    artist_name = song_artist_pairs[key][1]

    try:
        audio_feat = get_audio_features(song_name, artist_name)
    except:
        oAuth = SpotifyOAuth(client_id = CLIENT_ID, client_secret = CLIENT_SECRET, redirect_uri = REDIRECT_URL, scope = 'user-modify-playback-state')
        sp = spotipy.Spotify(auth_manager =oAuth)
        audio_feat = get_audio_features(song_name, artist_name)
    if audio_feat:
        song_artist_features.append((song_name, artist_name, audio_feat))

In [10]:
for i in range(len(song_artist_features)):
    print(song_artist_features[i])

('thats_what_i_like', 'bruno_mars', [0.853, 0.56, -4.961, 0.0406, 0.013, 0, 0.0944, 0.86, 134.066])
('humble', 'kendrick_lamar', [0.908, 0.621, -6.638, 0.102, 0.000282, 5.39e-05, 0.0958, 0.421, 150.011])
('skeletons', 'keshi', [0.719, 0.265, -12.782, 0.0754, 0.296, 0.0865, 0.108, 0.273, 80.021])
('slow_dancing_in_the_dark', 'joji', [0.515, 0.479, -7.458, 0.0261, 0.544, 0.00598, 0.191, 0.284, 88.964])
('lite_spots', 'kaytranada', [0.884, 0.549, -11.683, 0.471, 0.0346, 0.0698, 0.112, 0.394, 120.461])
('woman', 'doja_cat', [0.824, 0.764, -4.175, 0.0854, 0.0888, 0.00294, 0.117, 0.881, 107.998])
('get_up', 'ciara', [0.964, 0.595, -6.887, 0.109, 0.0248, 5.14e-06, 0.0405, 0.629, 128.593])
('throwin_elbows', 'excision', [0.574, 0.94, -3.038, 0.569, 0.00317, 0.0431, 0.113, 0.362, 149.988])
('power', 'little mix', [0.566, 0.855, -4.016, 0.211, 0.0372, 0.000157, 0.181, 0.517, 172.966])
('peaches', 'justin_bieber', [0.677, 0.696, -6.181, 0.119, 0.321, 0, 0.42, 0.464, 90.03])
('knife_talk', 'drake'

In [18]:
data = pd.read_csv("parser_output/extracted_features_split_3_28_upsample.csv")
data = data.dropna()

y = data[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']]
x = data[data.columns[1:len(data.columns)-12]]

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.7)

In [19]:
#lINEAR REGRESSION - 
reg_model = LinearRegression().fit(X_train, y_train)
ypred_train = reg_model.predict(X_train)
ypred_test = reg_model.predict(X_test)
print("LINEAR REGRESSION")
print("Train Score: ")
print(r2_score(y_train, ypred_train))
print("Test Score: ")
print(r2_score(y_test, ypred_test))

LINEAR REGRESSION
Train Score: 
0.8660095266081365
Test Score: 
-0.07230363024733669


In [20]:
#SAVED MODEL
filename = 'tempModel.sav'
model = pickle.load(open(filename, 'rb'))

In [21]:
x_data

Unnamed: 0,LEFT_EYE_OUTER_POS,LEFT_EYE_OUTER_DIST,RIGHT_EYE_OUTER_POS,RIGHT_EYE_OUTER_DIST,LEFT_SHOULDER_POS,LEFT_SHOULDER_DIST,RIGHT_SHOULDER_POS,RIGHT_SHOULDER_DIST,LEFT_ELBOW_POS,LEFT_ELBOW_DIST,...,RIGHT_HIP_POS,RIGHT_HIP_DIST,LEFT_KNEE_POS,LEFT_KNEE_DIST,RIGHT_KNEE_POS,RIGHT_KNEE_DIST,LEFT_ANKLE_POS,LEFT_ANKLE_DIST,RIGHT_ANKLE_POS,RIGHT_ANKLE_DIST
0,183.498932,0,177.297028,0,205.585052,0,168.519928,0,234.106415,0,...,184.400208,0,203.231873,0,186.451874,0,204.082458,0,188.130295,0
1,183.498932,0.0,177.297028,0.0,205.585052,0.0,168.519928,0.0,234.106415,0.0,...,184.400208,0.0,203.231873,0.0,186.451874,0.0,204.082458,0.0,188.130295,0.0
2,183.498932,0.0,177.297028,0.0,205.585052,0.0,168.519928,0.0,234.106415,0.0,...,184.400208,0.0,203.231873,0.0,186.451874,0.0,204.082458,0.0,188.130295,0.0
3,180.424942,-3.07399,182.649109,5.352081,177.454956,-28.130096,206.357498,37.83757,166.003998,-68.102417,...,196.937943,12.537735,183.392578,-19.839294,192.414459,5.962585,188.453201,-15.629257,198.052338,9.922043
4,180.424942,0.0,182.649109,0.0,177.454956,0.0,206.357498,0.0,166.003998,0.0,...,196.937943,0.0,183.392578,0.0,192.414459,0.0,188.453201,0.0,198.052338,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350,139.713257,-1.824188,133.097275,-1.618881,175.585587,-0.715012,131.022461,0.190018,198.137772,-0.704773,...,143.1698,0.347214,143.884995,-1.102722,138.416458,-0.266907,137.015961,-1.612961,164.95314,5.548477
351,139.713257,0.0,133.097275,0.0,175.585587,0.0,131.022461,0.0,198.137772,0.0,...,143.1698,0.0,143.884995,0.0,138.416458,0.0,137.015961,0.0,164.95314,0.0
352,141.130569,1.417313,133.878082,0.780807,175.536606,-0.048981,131.994324,0.971863,195.916931,-2.22084,...,146.857788,3.687988,149.011795,5.126801,146.332962,7.916504,138.017654,1.001694,171.310425,6.357285
353,141.130569,0.0,133.878082,0.0,175.536606,0.0,131.994324,0.0,195.916931,0.0,...,146.857788,0.0,149.011795,0.0,146.332962,0.0,138.017654,0.0,171.310425,0.0


In [44]:
#HANDLE MOTION DATA
#[x_data, y_data] = preprocess("recorded.csv")
[x_data, y_data] = preprocess("data/7_woman_doja_cat.csv")
extracted_features = parse(x_data, y_data)
predicted_song = model.predict(extracted_features)

Feature names must be in the same order as they were in fit.



In [45]:
maxDiffs = find_closest_song(predicted_song[0])
print(song_artist_features[maxDiffs][0], song_artist_features[maxDiffs][1])

get_up ciara


In [39]:
song = get_song(song_artist_features[maxDiffs][0], song_artist_features[maxDiffs][1])
if song:
    show_recommendations_for_song(song)

ERROR:spotipy.client:HTTP Error for POST to https://api.spotify.com/v1/me/player/queue?uri=spotify:track:543jStmHR1VrSfpxmjUIn6 with Params: {} returned 404 due to Player command failed: No active device found


Recommendations:
TRACK:  フィクション  -  sumika


SpotifyException: http status: 404, code:-1 - https://api.spotify.com/v1/me/player/queue?uri=spotify:track:543jStmHR1VrSfpxmjUIn6:
 Player command failed: No active device found, reason: NO_ACTIVE_DEVICE