In [2]:
from MultinomialNBClassifier import multinomial_nb_classifier
from NBClassifier import nb_classifier
from LogisticRegressionClassifer import lr_classifer
from SGDClassifier import sgd_classifier
from Parser import get_vocabulary, bag_of_words, bernoulli
from prettytable import PrettyTable
import numpy as np

In [3]:
def percent(v):
    return f"{v*100:.2f}%"

In [4]:
def add_row(name, data_mode, table, res_arr, hyper_param=""):
    accuracy, precision, recall, f1 = res_arr
    table.add_row([name, data_mode, percent(accuracy), percent(precision), percent(recall), percent(f1), hyper_param])

In [5]:
np.warnings.filterwarnings("ignore", "overflow")
nb = nb_classifier()
mnb = multinomial_nb_classifier()
lr = lr_classifer()
sgd = sgd_classifier()
for i in range(1, 4):
    table = PrettyTable(['classifier', 'data mode', 'accuracy', 'precision', 'recall', 'f1', 'hyper-parameter'])
    table.title = f"dataset{i}"
    vocabulary = get_vocabulary(f"dataset{i}/train")
    bow_train_data, bow_train_classes = bag_of_words(f"dataset{i}/train", vocabulary)
    bow_test_data, bow_test_classes = bag_of_words(f"dataset{i}/test", vocabulary)
    bnl_train_data, bnl_train_classes = bernoulli(f"dataset{i}/train", vocabulary)
    bnl_test_data, bnl_test_classes = bernoulli(f"dataset{i}/test", vocabulary)
    #nb
    nb.train(bnl_train_data, bnl_train_classes)
    add_row("nb", "bernoulli", table, nb.test(bnl_test_data, bnl_test_classes))
    #mnb
    mnb.train(bow_train_data, bow_train_classes)
    add_row("mnb", "bag_of_words", table, mnb.test(bow_test_data, bow_test_classes))
    #lr
    l = lr.train(bow_train_data, bow_train_classes)
    add_row("lr", "bag_of_words", table, lr.test(bow_test_data, bow_test_classes), f"alpha:{l}")
    l = lr.train(bnl_train_data, bnl_train_classes)
    add_row("lr", "bernoulli", table, lr.test(bnl_test_data, bnl_test_classes), f"alpha:{l}")
    #sgd
    hyper_param = sgd.train(bow_train_data, bow_train_classes)
    add_row("sgd", "bag_of_words", table, sgd.test(bow_test_data, bow_test_classes), hyper_param)
    hyper_param = sgd.train(bnl_train_data, bnl_train_classes)
    add_row("sgd", "bernoulli", table, sgd.test(bnl_test_data, bnl_test_classes), hyper_param)
    print(table)

+------------------------------------------------------------------------------------------------------------+
|                                                  dataset1                                                  |
+------------+--------------+----------+-----------+--------+--------+---------------------------------------+
| classifier |  data mode   | accuracy | precision | recall |   f1   |            hyper-parameter            |
+------------+--------------+----------+-----------+--------+--------+---------------------------------------+
|     nb     |  bernoulli   |  76.36%  |  100.00%  | 13.08% | 23.13% |                                       |
|    mnb     | bag_of_words |  94.14%  |   93.22%  | 84.62% | 88.71% |                                       |
|     lr     | bag_of_words |  93.93%  |   87.97%  | 90.00% | 88.97% |               alpha:0.5               |
|     lr     |  bernoulli   |  96.23%  |   93.08%  | 93.08% | 93.08% |               alpha:0.25              |
|