In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import MinMaxScaler

X_train = pd.read_csv("dataset/train_features.csv")
y_train = pd.read_csv("dataset/train_labels.csv")
X_test = pd.read_csv("dataset/valid_features.csv")
y_test = pd.read_csv("dataset/valid_labels.csv")

X_train = pd.DataFrame(X_train, columns=["loudness","title","tempo", "time_signature","key","mode","duration"])
X_test = pd.DataFrame(X_test, columns=["loudness","title","tempo", "time_signature","key","mode","duration"])

cleanup_nums = {"genre": 
                {"classic pop and rock": 1, 
                 "dance and electronica": 2,
                 "folk":3,
                 "jazz and blues":4,
                 "metal":5,
                 "pop":6,
                 "punk":7,
                 "soul and reggae":8
                }}
y_train.replace(cleanup_nums, inplace=True)
y_test.replace(cleanup_nums, inplace=True)


tfidf_vect = TfidfVectorizer()
X_title_cv_train = tfidf_vect.fit_transform(X_train['title'])
df_title_cv_train = pd.DataFrame(X_title_cv_train.toarray(), columns=tfidf_vect.get_feature_names())
X_train = pd.concat([X_train, df_title_cv_train], axis=1)
X_train = X_train.drop(["title"], 1)

X_title_cv_test = tfidf_vect.transform(X_test['title'])
df_title_cv_test = pd.DataFrame(X_title_cv_test.toarray(), columns=tfidf_vect.get_feature_names())
X_test = pd.concat([X_test, df_title_cv_test], axis=1)
X_test = X_test.drop(["title"], 1)

scaler = MinMaxScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

array([[0.67829633, 0.33661613, 0.42857143, ..., 0.        , 0.        ,
        0.        ],
       [0.76121432, 0.47822839, 0.14285714, ..., 0.        , 0.        ,
        0.        ],
       [0.84900317, 0.45218072, 0.57142857, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.68393181, 0.79811964, 0.14285714, ..., 0.        , 0.        ,
        0.        ],
       [0.66130494, 0.57790986, 0.57142857, ..., 0.        , 0.        ,
        0.        ],
       [0.89776846, 0.37162696, 0.14285714, ..., 0.        , 0.        ,
        0.        ]])

In [2]:
mnb=MultinomialNB(alpha = 1.0)
y_train["genre"] = y_train["genre"].astype('int')
mnb.fit(X_train, y_train["genre"])
pred_mnb=mnb.predict(X_test)

# let's how our model performed
print("Classificaion report: \n")
print(classification_report(y_test["genre"], pred_mnb))
print("Confusion matrix: \n")
print(confusion_matrix(y_test["genre"], pred_mnb))

print("\n\n\n")

def evaluate(pred, true):
    CM = metrics.confusion_matrix(true, pred) # Confusion Matrix
    Acc = metrics.accuracy_score(true, pred) # Accuracy
    precf1 = metrics.precision_recall_fscore_support(true, pred) # Precision, Recall and F1-score
    return CM, Acc, precf1

CM, Acc, precf1 = evaluate(pred_mnb, y_test["genre"])

correct = 0
for i in range(len(y_test)):
    correct = correct + int(pred_mnb[i]==y_test["genre"][i])
    
print("Confusion Matrix:\n{}\naccuracy: {}\naccuracy by sklearn.metric: {}\nprecision: {}\nrecall: {}\nF1: {}\n".format(
                                                CM,
                                                correct / len(y_test), 
                                                Acc,
                                                precf1[0],
                                                precf1[1],
                                                precf1[2]))

Classificaion report: 

              precision    recall  f1-score   support

           1       0.15      0.67      0.24        55
           2       0.00      0.00      0.00        45
           3       0.30      0.53      0.38        64
           4       0.00      0.00      0.00        44
           5       0.48      0.15      0.23        66
           6       0.91      0.43      0.59        74
           7       0.52      0.32      0.39        44
           8       0.80      0.07      0.13        58

    accuracy                           0.29       450
   macro avg       0.39      0.27      0.25       450
weighted avg       0.43      0.29      0.27       450

Confusion matrix: 

[[37  0 17  0  1  0  0  0]
 [33  0 11  0  0  0  1  0]
 [27  0 34  0  3  0  0  0]
 [31  0 11  0  1  0  0  1]
 [38  0 18  0 10  0  0  0]
 [27  0  4  0  0 32 11  0]
 [21  0  6  0  0  3 14  0]
 [34  0 13  0  6  0  1  4]]






  _warn_prf(average, modifier, msg_start, len(result))


NameError: name 'metrics' is not defined