In [6]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.linear_model import Perceptron
from sklearn.preprocessing import MinMaxScaler

X_train = pd.read_csv("dataset/train_features.csv")
y_train = pd.read_csv("dataset/train_labels.csv")
X_test = pd.read_csv("dataset/valid_features.csv")
y_test = pd.read_csv("dataset/valid_labels.csv")

X_train = pd.DataFrame(X_train, columns=["loudness","title","tempo", "time_signature","key","mode","duration"])
X_test = pd.DataFrame(X_test, columns=["loudness","title","tempo", "time_signature","key","mode","duration"])

cleanup_nums = {"genre": 
                {"classic pop and rock": 1, 
                 "dance and electronica": 2,
                 "folk":3,
                 "jazz and blues":4,
                 "metal":5,
                 "pop":6,
                 "punk":7,
                 "soul and reggae":8
                }}
y_train.replace(cleanup_nums, inplace=True)
y_test.replace(cleanup_nums, inplace=True)


tfidf_vect = TfidfVectorizer()
X_title_cv_train = tfidf_vect.fit_transform(X_train['title'])
df_title_cv_train = pd.DataFrame(X_title_cv_train.toarray(), columns=tfidf_vect.get_feature_names())
X_train = pd.concat([X_train, df_title_cv_train], axis=1)
X_train = X_train.drop(["title"], 1)

X_title_cv_test = tfidf_vect.transform(X_test['title'])
df_title_cv_test = pd.DataFrame(X_title_cv_test.toarray(), columns=tfidf_vect.get_feature_names())
X_test = pd.concat([X_test, df_title_cv_test], axis=1)
X_test = X_test.drop(["title"], 1)

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_train.shape

(7678, 6354)

In [2]:
lgr = Perceptron(tol=1e-3, random_state=0)
lgr.fit(X_train,y_train["genre"])
lgr_pred = lgr.predict(X_test)

# let's how our model performed
print("Classificaion report: \n")
print(classification_report(y_test["genre"], lgr_pred))
print("Confusion matrix: \n")
print(confusion_matrix(y_test["genre"], lgr_pred))

Classificaion report: 

              precision    recall  f1-score   support

           1       0.23      0.40      0.29        55
           2       0.00      0.00      0.00        45
           3       0.30      0.47      0.37        64
           4       0.21      0.07      0.10        44
           5       0.61      0.33      0.43        66
           6       0.72      0.53      0.61        74
           7       0.25      0.70      0.37        44
           8       0.29      0.09      0.13        58

    accuracy                           0.34       450
   macro avg       0.33      0.32      0.29       450
weighted avg       0.36      0.34      0.31       450

Confusion matrix: 

[[22  1 17  2  1  1 10  1]
 [13  0  8  4  1  3 12  4]
 [20  1 30  2  0  2  7  2]
 [13  0 19  3  2  0  6  1]
 [ 8  1  3  0 22  2 26  4]
 [ 6  1  9  1  1 39 17  0]
 [ 4  2  3  1  0  3 31  0]
 [11  2 11  1  9  4 15  5]]
