In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB


import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier


In [None]:
x = pd.read_csv('ml-25m/pca_final.csv')
y = pd.read_csv('ml-25m/rating_only.csv')

In [None]:
def run_GaussianNB(x, y):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    model = GaussianNB()
    model.fit(x, y)
    y_pred = model.predict(x_test)
    print(classification_report(y_test, y_pred))

    with open('Naive\\reportGaussianNB.txt', 'w') as f:
        print(classification_report(y_test, y_pred), file=f)

    plt.figure(figsize=(10,10))
    plt.matshow(confusion_matrix(y_test, y_pred),fignum=1)
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues', xticklabels=['bad', 'good', 'ok'], yticklabels=['bad', 'good', 'ok'])
    plt.title('Confusion Matrix for GaussianNB')
    plt.savefig('Naive\confusion_matrixGaussianNB.png')
    plt.show()

    unique, counts = np.unique(y, return_counts=True)

    y = label_binarize(y, classes=unique)
    n_classes = len(unique)

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    
    clf = OneVsRestClassifier(GaussianNB())
    y_score = clf.fit(x_train, y_train).predict_proba(x_test)

    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])


    fig, ax = plt.subplots(figsize=(10,10))
    ax.plot([0, 1], [0, 1], 'k--')
    ax.axis(xmin=0, xmax=1, ymin=0, ymax=1.05)
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('Receiver operating characteristic for GaussianNB')
    # Plot of a ROC curve for a specific class
    for i in range(n_classes):
        
        ax.plot(fpr[i], tpr[i], label='ROC curve for class %s (area = %0.2f)' % (unique[i], roc_auc[i]))
        
    ax.legend(loc="lower right")
    fig.savefig('Naive\\roc_curveGaussianNB.png')
    plt.show()

In [None]:
run_GaussianNB(x, y)

In [None]:
def run_MultinomialNB(x, y):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    model = MultinomialNB()
    model.fit(x, y)
    y_pred = model.predict(x_test)
    print(classification_report(y_test, y_pred))

    with open('Naive\\reportMultinomialNB.txt', 'w') as f:
        print(classification_report(y_test, y_pred), file=f)

    plt.figure(figsize=(10,10))
    plt.matshow(confusion_matrix(y_test, y_pred),fignum=1)
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues', xticklabels=['bad', 'good', 'ok'], yticklabels=['bad', 'good', 'ok'])
    plt.title('Confusion Matrix forMultinomialNB')
    plt.savefig('Naive\confusion_matrixMultinomialNB.png')
    plt.show()

    unique, counts = np.unique(y, return_counts=True)

    y = label_binarize(y, classes=unique)
    n_classes = len(unique)

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    
    clf = OneVsRestClassifier(MultinomialNB())
    y_score = clf.fit(x_train, y_train).predict_proba(x_test)

    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])


    fig, ax = plt.subplots(figsize=(10,10))
    ax.plot([0, 1], [0, 1], 'k--')
    ax.axis(xmin=0, xmax=1, ymin=0, ymax=1.05)
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('Receiver operating characteristic for MultinomialNB')
    # Plot of a ROC curve for a specific class
    for i in range(n_classes):
        
        ax.plot(fpr[i], tpr[i], label='ROC curve for class %s (area = %0.2f)' % (unique[i], roc_auc[i]))
        
    ax.legend(loc="lower right")
    fig.savefig('Naive\\roc_curveMultinomialNB.png')
    plt.show()

In [None]:
run_MultinomialNB(abs(x), y)

In [None]:
run_BernoulliNB(x, y)

In [None]:
def run_BernoulliNB(x, y):
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    model = BernoulliNB()
    model.fit(x, y)
    y_pred = model.predict(x_test)
    print(classification_report(y_test, y_pred))

    with open('Naive\\reportBernoulliNB.txt', 'w') as f:
        print(classification_report(y_test, y_pred), file=f)

    plt.figure(figsize=(10,10))
    plt.matshow(confusion_matrix(y_test, y_pred),fignum=1)
    sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues', xticklabels=['bad', 'good', 'ok'], yticklabels=['bad', 'good', 'ok'])
    plt.title('Confusion Matrix for BernoulliNB')
    plt.savefig('Naive\confusion_matrixBernoulliNB.png')
    plt.show()

    unique, counts = np.unique(y, return_counts=True)

    y = label_binarize(y, classes=unique)
    n_classes = len(unique)

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
    
    clf = OneVsRestClassifier(BernoulliNB())
    y_score = clf.fit(x_train, y_train).predict_proba(x_test)

    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])


    fig, ax = plt.subplots(figsize=(10,10))
    ax.plot([0, 1], [0, 1], 'k--')
    ax.axis(xmin=0, xmax=1, ymin=0, ymax=1.05)
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('Receiver operating characteristic for BernoulliNB')
    # Plot of a ROC curve for a specific class
    for i in range(n_classes):
        
        ax.plot(fpr[i], tpr[i], label='ROC curve for class %s (area = %0.2f)' % (unique[i], roc_auc[i]))
        
    ax.legend(loc="lower right")
    fig.savefig('Naive\\roc_curveBernoulliNB.png')
    plt.show()