In [137]:
import math
import operator
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score

df = pd.read_csv("C:/Users/pradn/AnacondaProjects/Spotify Analysis/data/Data Fix Genre.csv")
df = shuffle(df)
df.head()

Unnamed: 0,No,Playlist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
475,476,9,0.624,0.876,9,-3.374,1,0.1,0.0735,0.0,0.327,0.781,99.943,170827
516,517,10,0.833,0.515,11,-5.0,0,0.0462,0.347,0.00156,0.116,0.4,97.035,257333
367,368,7,0.411,0.449,9,-15.659,0,0.0318,0.548,0.609,0.0814,0.579,144.0,250000
320,321,6,0.859,0.75,8,-8.325,0,0.303,0.0193,0.0,0.0538,0.857,98.691,345717
104,105,2,0.506,0.886,11,-3.225,1,0.0655,0.0916,0.0,0.0734,0.595,143.935,226867


In [138]:
data = df.drop(columns=['No', 'Playlist'])
data.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
475,0.624,0.876,9,-3.374,1,0.1,0.0735,0.0,0.327,0.781,99.943,170827
516,0.833,0.515,11,-5.0,0,0.0462,0.347,0.00156,0.116,0.4,97.035,257333
367,0.411,0.449,9,-15.659,0,0.0318,0.548,0.609,0.0814,0.579,144.0,250000
320,0.859,0.75,8,-8.325,0,0.303,0.0193,0.0,0.0538,0.857,98.691,345717
104,0.506,0.886,11,-3.225,1,0.0655,0.0916,0.0,0.0734,0.595,143.935,226867


In [139]:
target = df.Playlist
target.head()

475     9
516    10
367     7
320     6
104     2
Name: Playlist, dtype: int64

In [140]:
min_max_scaler = MinMaxScaler()

In [141]:
def euclideanDistance(instance1, instance2, length):
    distance = 0
    for x in range(length):
        distance += pow((instance1[x] - instance2[x]), 2)
    return math.sqrt(distance)

def getNeighbors(trainingSet, testInstance, trainingTarget, k):
    distances = []
    length = len(testInstance)-1
    for x in range(len(trainingSet)):
        dist = euclideanDistance(testInstance, trainingSet[x], length)
        distances.append((trainingTarget[x], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors

def getResponse(neighbors):
    max = 0
    result = 0
    classVotes = {}
    for x in range(len(neighbors)):
        response = neighbors[x]
        if response in classVotes:
            classVotes[response] += 1
        else:
            classVotes[response] = 1
    for key, value in classVotes.items():
        if(value > max):
            result = key
            max = value
    return result
def getAccuracy(testSet, predictions):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == predictions[x]:
            correct += 1
    return (correct/float(len(testSet))) * 100.0

In [142]:
X = data.values
y = target.values
kf = KFold(n_splits=10)
k = 10

# KNN
fold = 0
accuracy_total = 0

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    X_train_scale = min_max_scaler.fit_transform(X_train)
    X_test_scale = min_max_scaler.transform(X_test)
    
    predictions=[]
    
    fold += 1
    print('KFold ' + str(fold))
    print('======================================')
          
    for i in range(len(X_test_scale)):
        neighbors = getNeighbors(X_train_scale, X_test_scale[i], y_train, k)
        result = getResponse(neighbors)
        predictions.append(result)
        
        print('> ' + str(i + 1) + ' predicted = ' + repr(result) + ', actual = ' + repr(y_test[i]))
        
    accuracy = accuracy_score(y_test, predictions)
    accuracy_total += accuracy
    
    print('---------------------------------------')
    print('KFold ' + str(fold) + ', Accuracy ' + str(accuracy))
    print('')

print('======================================')
print('Average Accuracy = ' + str(accuracy_total/fold))
print('======================================')

KFold 1
> 1 predicted = 2, actual = 9
> 2 predicted = 10, actual = 10
> 3 predicted = 7, actual = 7
> 4 predicted = 11, actual = 6
> 5 predicted = 3, actual = 2
> 6 predicted = 12, actual = 12
> 7 predicted = 10, actual = 11
> 8 predicted = 10, actual = 11
> 9 predicted = 2, actual = 2
> 10 predicted = 7, actual = 7
> 11 predicted = 5, actual = 5
> 12 predicted = 13, actual = 13
> 13 predicted = 0, actual = 13
> 14 predicted = 6, actual = 10
> 15 predicted = 7, actual = 7
> 16 predicted = 1, actual = 1
> 17 predicted = 2, actual = 4
> 18 predicted = 5, actual = 5
> 19 predicted = 6, actual = 4
> 20 predicted = 0, actual = 7
> 21 predicted = 10, actual = 10
> 22 predicted = 6, actual = 11
> 23 predicted = 1, actual = 1
> 24 predicted = 8, actual = 8
> 25 predicted = 13, actual = 13
> 26 predicted = 6, actual = 6
> 27 predicted = 3, actual = 4
> 28 predicted = 9, actual = 10
> 29 predicted = 7, actual = 7
> 30 predicted = 13, actual = 13
> 31 predicted = 11, actual = 9
> 32 predicted = 0

> 3 predicted = 3, actual = 3
> 4 predicted = 13, actual = 7
> 5 predicted = 11, actual = 9
> 6 predicted = 13, actual = 13
> 7 predicted = 3, actual = 12
> 8 predicted = 4, actual = 9
> 9 predicted = 13, actual = 0
> 10 predicted = 8, actual = 3
> 11 predicted = 12, actual = 3
> 12 predicted = 3, actual = 3
> 13 predicted = 6, actual = 6
> 14 predicted = 9, actual = 3
> 15 predicted = 12, actual = 12
> 16 predicted = 11, actual = 10
> 17 predicted = 3, actual = 2
> 18 predicted = 0, actual = 0
> 19 predicted = 2, actual = 9
> 20 predicted = 2, actual = 3
> 21 predicted = 10, actual = 10
> 22 predicted = 2, actual = 4
> 23 predicted = 2, actual = 2
> 24 predicted = 0, actual = 0
> 25 predicted = 12, actual = 12
> 26 predicted = 3, actual = 8
> 27 predicted = 4, actual = 4
> 28 predicted = 11, actual = 6
> 29 predicted = 2, actual = 2
> 30 predicted = 10, actual = 8
> 31 predicted = 11, actual = 10
> 32 predicted = 12, actual = 12
> 33 predicted = 4, actual = 4
> 34 predicted = 12, actu

> 67 predicted = 4, actual = 9
> 68 predicted = 6, actual = 6
> 69 predicted = 8, actual = 8
> 70 predicted = 10, actual = 2
---------------------------------------
KFold 8, Accuracy 0.45714285714285713

KFold 9
> 1 predicted = 9, actual = 0
> 2 predicted = 11, actual = 11
> 3 predicted = 1, actual = 1
> 4 predicted = 10, actual = 0
> 5 predicted = 13, actual = 0
> 6 predicted = 12, actual = 8
> 7 predicted = 3, actual = 6
> 8 predicted = 1, actual = 1
> 9 predicted = 7, actual = 7
> 10 predicted = 3, actual = 3
> 11 predicted = 10, actual = 11
> 12 predicted = 7, actual = 7
> 13 predicted = 3, actual = 9
> 14 predicted = 1, actual = 7
> 15 predicted = 5, actual = 5
> 16 predicted = 13, actual = 13
> 17 predicted = 13, actual = 7
> 18 predicted = 1, actual = 1
> 19 predicted = 10, actual = 10
> 20 predicted = 3, actual = 3
> 21 predicted = 11, actual = 6
> 22 predicted = 6, actual = 10
> 23 predicted = 13, actual = 13
> 24 predicted = 1, actual = 7
> 25 predicted = 3, actual = 3
> 26 p