In [None]:
import os
import pickle

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.dummy import DummyClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.svm import SVC

from sklearn.metrics import classification_report

from run_binary_classifier import _load_comments, run

In [None]:
train_comments_path = os.path.join('..', '..', '..', 'data', 'train_binary.csv')
test_comments_path = os.path.join('..', '..', '..', 'data', 'test_clean_binary.csv')

## Dummy classifier

In [None]:
param_grid = {
        'bag_of_words__stop_words': ['english'],
        'bag_of_words__ngram_range': [(1, 2)],
        'bag_of_words__max_features': [500],
        'dim_reduct__n_components': [300],
        'normalizer__norm': ['l2'],
        'classifier__strategy': ['uniform']
}

clf = DummyClassifier()

#### Train phase

In [None]:
trained_clf = run(param_grid, clf, comments_file=train_comments_path)

#### Saving the model

In [None]:
with open('./saved_models/dummy_trained_binary.pkl', 'wb') as saved_model:
	pickle.dump(trained_clf, file=saved_model)

#### Loading the model and testing it

In [None]:
with open('./saved_models/dummy_trained_binary.pkl', 'rb') as saved_model:
	loaded_clf = pickle.load(saved_model)

	X_test, y_test = _load_comments(test_comments_path)
	y_test_predict = loaded_clf.predict(X_test)

	print(classification_report(y_test, y_test_predict))

## Logistic regression

In [None]:
param_grid = {
        'bag_of_words__stop_words': ['english'],
        'bag_of_words__ngram_range': [(1, 2)],
        'bag_of_words__max_features': [500],
        'dim_reduct__n_components': [300],
        'normalizer__norm': ['l2'],
        'classifier__C': [5., 10.]
}

clf = LogisticRegression()

#### Train phase 

In [None]:
trained_clf = run(param_grid, clf, comments_file=train_comments_path)

#### Saving the model

In [None]:
with open('./saved_models/log_reg_trained_binary.pkl', 'wb') as saved_model:
	pickle.dump(trained_clf, file=saved_model)

#### Loading the model and testing it

In [None]:
with open('./saved_models/log_reg_trained_binary.pkl', 'rb') as saved_model:
	loaded_clf = pickle.load(saved_model)

	X_test, y_test = _load_comments(test_comments_path)
	y_test_predict = loaded_clf.predict(X_test)

	print(classification_report(y_test, y_test_predict))

## Decision tree

In [None]:
param_grid = {
        'bag_of_words__stop_words': ['english'],
        'bag_of_words__ngram_range': [(1, 2)],
        'bag_of_words__max_features': [500],
        'dim_reduct__n_components': [300],
        'normalizer__norm': ['l2'],
        'classifier__max_depth': [5, 10, 15]
}

clf = DecisionTreeClassifier()

#### Train phase

In [None]:
trained_clf = run(param_grid, clf, comments_file=train_comments_path)

#### Saving the model

In [None]:
with open('./saved_models/dec_tree_trained_binary.pkl', 'wb') as saved_model:
	pickle.dump(trained_clf, file=saved_model)

#### Loading the model and testing it

In [None]:
with open('./saved_models/dec_tree_trained_binary.pkl', 'rb') as saved_model:
	loaded_clf = pickle.load(saved_model)

	X_test, y_test = _load_comments(test_comments_path)
	y_test_predict = loaded_clf.predict(X_test)

	print(classification_report(y_test, y_test_predict))

## Naive Bayes classifier

In [None]:
param_grid = {
        'bag_of_words__stop_words': ['english'],
        'bag_of_words__ngram_range': [(1, 2)],
        'bag_of_words__max_features': [500],
        'dim_reduct__n_components': [300],
        'normalizer__norm': ['l2'],
        'classifier__alpha': [1.0],
        'classifier__binarize': [0.0]
}

clf = BernoulliNB()

#### Train phase

In [None]:
trained_clf = run(param_grid, clf, comments_file=train_comments_path)

#### Saving the model

In [None]:
with open('./saved_models/naiveB_trained_binary.pkl', 'wb') as saved_model:
	pickle.dump(trained_clf, file=saved_model)

#### Loading the model and testing it

In [None]:
with open('./saved_models/naiveB_trained_binary.pkl', 'rb') as saved_model:
	loaded_clf = pickle.load(saved_model)

	X_test, y_test = _load_comments(test_comments_path)
	y_test_predict = loaded_clf.predict(X_test)

	print(classification_report(y_test, y_test_predict))

## SVM

In [None]:
param_grid = {
        'bag_of_words__stop_words': ['english'],
        'bag_of_words__ngram_range': [(1, 2)],
        'bag_of_words__max_features': [500],
        'bag_of_words__lowercase': [True, False],
        'dim_reduct__n_components': [100],
        'normalizer__norm': ['l2']
}

clf = SVC()

#### Train phase

In [None]:
trained_clf = run(param_grid, clf, comments_file=train_comments_path)

#### Saving the model

In [None]:
with open('./saved_models/SVM_trained_binary.pkl', 'wb') as saved_model:
	pickle.dump(trained_clf, file=saved_model)

#### Loading the model and testing it

In [None]:
with open('./saved_models/SVM_trained_binary.pkl', 'rb') as saved_model:
	loaded_clf = pickle.load(saved_model)

	X_test, y_test = _load_comments(test_comments_path)
	y_test_predict = loaded_clf.predict(X_test)

	print(classification_report(y_test, y_test_predict))