In [1]:
import os
import pickle

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

import run_binary_classifier
import run_multilabel_classifier

In [2]:
train_binary = os.path.join('../', 'data/train_binary.csv')
test_binary = os.path.join('../', 'data/test_clean_binary.csv')

train_multilabel = os.path.join('../', 'data/train.csv')
test_multilabel = os.path.join('../', 'data/test_final.csv')

## Logistic regression

In [3]:
binary_param_grid = {
        'bag_of_words__stop_words': ['english'],
        'bag_of_words__ngram_range': [(1, 2)],
        'bag_of_words__max_features': [500],
        'dim_reduct__n_components': [300],
        'normalizer__norm': ['l2'],
        'classifier__C': [5., 10.]
}


multilabel_param_grid  = [{
        'estimator__bag_of_words__stop_words': ['english'],
        'estimator__bag_of_words__ngram_range': [(1, 2)],
        'estimator__bag_of_words__max_features': [500],
        'estimator__dim_reduct__n_components': [300],
        'estimator__normalizer__norm': ['l2'],
        'estimator__classifier__C': [5., 10.]
}]

#### Train binary

In [4]:
binary_clf = run_binary_classifier.run(binary_param_grid, LogisticRegression(), comments_file=train_binary)

with open('./saved_models/log_reg_joint_binary.pkl', 'wb') as saved_model:
	pickle.dump(binary_clf, file=saved_model)



Fitting 5 folds for each of 2 candidates, totalling 10 fits
             precision    recall  f1-score   support

          0       0.80      0.75      0.77        16
          1       0.73      0.79      0.76        14

avg / total       0.77      0.77      0.77        30

{'bag_of_words__max_features': 500, 'bag_of_words__ngram_range': (1, 2), 'bag_of_words__stop_words': 'english', 'classifier__C': 10.0, 'dim_reduct__n_components': 300, 'normalizer__norm': 'l2'}


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    0.2s finished


#### Train multilabel

In [5]:
multilabel_clf = run_multilabel_classifier.run(multilabel_param_grid, LogisticRegression(), comments_file=train_multilabel)
with open('./saved_models/log_reg_joint_multilabel.pkl', 'wb') as saved_model:
	pickle.dump(binary_clf, file=saved_model)

  str(classes[c]))
  str(classes[c]))


Fitting 5 folds for each of 2 candidates, totalling 10 fits


  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))


             precision    recall  f1-score   support

          0       1.00      1.00      1.00        30
          1       0.00      0.00      0.00         3
          2       0.80      0.44      0.57        18
          3       0.00      0.00      0.00         0
          4       0.42      0.45      0.43        11
          5       0.00      0.00      0.00         1

avg / total       0.78      0.68      0.72        63

{'estimator__bag_of_words__max_features': 500,
 'estimator__bag_of_words__ngram_range': (1, 2),
 'estimator__bag_of_words__stop_words': 'english',
 'estimator__classifier__C': 5.0,
 'estimator__dim_reduct__n_components': 300,
 'estimator__normalizer__norm': 'l2'}


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    1.0s finished
  str(classes[c]))
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


#### Predict binary

In [6]:
X_binary_test, y_binary_test = run_binary_classifier.load_comments(test_binary)
y_binary_test_predict = binary_clf.predict(X_binary_test)

print(classification_report(y_binary_test, y_binary_test_predict))

             precision    recall  f1-score   support

          0       0.69      0.86      0.77        50
          1       0.82      0.62      0.70        50

avg / total       0.75      0.74      0.74       100



#### Predict multilabel

In [7]:
X_multilabel_test, y_multilabel_test = run_multilabel_classifier.load_comments(test_multilabel)
y_multilabel_test_predict = multilabel_clf.predict(X_multilabel_test)

print(classification_report(y_multilabel_test, y_multilabel_test_predict))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00       100
          1       0.00      0.00      0.00        11
          2       0.80      0.40      0.53        70
          3       0.00      0.00      0.00         3
          4       0.63      0.39      0.48        56
          5       0.00      0.00      0.00        11

avg / total       0.76      0.60      0.66       251



  'precision', 'predicted', average, warn_for)


### Final joint prediction

In [8]:
final_predictions = np.full_like(y_multilabel_test_predict, -1)


non_toxic_indices = np.argwhere(y_binary_test_predict == 0).flatten()
toxic_indices = np.argwhere(y_binary_test_predict == 1).flatten()

# place binary classifier's prediction of clean comments
final_predictions[non_toxic_indices] = np.array([0, 0, 0, 0, 0, 0])

multilabel_toxic_predictions = y_multilabel_test_predict[toxic_indices]
final_predictions[toxic_indices] = multilabel_toxic_predictions

print(classification_report(y_multilabel_test, final_predictions))

             precision    recall  f1-score   support

          0       1.00      0.38      0.55       100
          1       0.00      0.00      0.00        11
          2       0.75      0.13      0.22        70
          3       0.00      0.00      0.00         3
          4       0.53      0.14      0.23        56
          5       0.00      0.00      0.00        11

avg / total       0.73      0.22      0.33       251



  'precision', 'predicted', average, warn_for)


## Naive Bayes

In [9]:
binary_param_grid = {
        'bag_of_words__stop_words': ['english'],
        'bag_of_words__ngram_range': [(1, 2)],
        'bag_of_words__max_features': [500],
        'dim_reduct__n_components': [300],
        'normalizer__norm': ['l2'],
        'classifier__alpha': [1.0],
        'classifier__binarize': [0.0]
}

multilabel_param_grid = [{
        'estimator__bag_of_words__stop_words': ['english'],
        'estimator__bag_of_words__ngram_range': [(1, 2)],
        'estimator__bag_of_words__max_features': [500],
        'estimator__dim_reduct__n_components': [300],
        'estimator__normalizer__norm': ['l2'],
        'estimator__classifier__alpha': [1.0],
        'estimator__classifier__binarize': [0.0]
}]

#### Train binary

In [10]:
binary_clf = run_binary_classifier.run(binary_param_grid, BernoulliNB(), comments_file=train_binary)

with open('./saved_models/naiveB_joint_binary.pkl', 'wb') as saved_model:
	pickle.dump(binary_clf, file=saved_model)

Fitting 5 folds for each of 1 candidates, totalling 5 fits
             precision    recall  f1-score   support

          0       0.69      0.73      0.71        15
          1       0.71      0.67      0.69        15

avg / total       0.70      0.70      0.70        30

{'bag_of_words__max_features': 500, 'bag_of_words__ngram_range': (1, 2), 'bag_of_words__stop_words': 'english', 'classifier__alpha': 1.0, 'classifier__binarize': 0.0, 'dim_reduct__n_components': 300, 'normalizer__norm': 'l2'}


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.0s finished


#### Train multilabel

In [11]:
multilabel_clf = run_multilabel_classifier.run(multilabel_param_grid, BernoulliNB(), comments_file=train_multilabel)
with open('./saved_models/naiveB_joint_binary.pkl', 'wb') as saved_model:
	pickle.dump(binary_clf, file=saved_model)

  str(classes[c]))
  str(classes[c]))
  str(classes[c]))


Fitting 5 folds for each of 1 candidates, totalling 5 fits


  str(classes[c]))
  str(classes[c]))
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.4s finished


             precision    recall  f1-score   support

          0       1.00      1.00      1.00        30
          1       0.33      0.25      0.29         4
          2       0.52      0.92      0.67        13
          3       0.00      0.00      0.00         0
          4       0.52      0.87      0.65        15
          5       0.00      0.00      0.00         2

avg / total       0.72      0.88      0.77        64

{'estimator__bag_of_words__max_features': 500,
 'estimator__bag_of_words__ngram_range': (1, 2),
 'estimator__bag_of_words__stop_words': 'english',
 'estimator__classifier__alpha': 1.0,
 'estimator__classifier__binarize': 0.0,
 'estimator__dim_reduct__n_components': 300,
 'estimator__normalizer__norm': 'l2'}


  str(classes[c]))
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


#### Predict binary

In [12]:
X_binary_test, y_binary_test = run_binary_classifier.load_comments(test_binary)
y_binary_test_predict = binary_clf.predict(X_binary_test)

print(classification_report(y_binary_test, y_binary_test_predict))

             precision    recall  f1-score   support

          0       0.72      0.45      0.55        51
          1       0.59      0.82      0.68        49

avg / total       0.65      0.63      0.62       100



#### Predict multilabel

In [13]:
X_multilabel_test, y_multilabel_test = run_multilabel_classifier.load_comments(test_multilabel)
y_multilabel_test_predict = multilabel_clf.predict(X_multilabel_test)

print(classification_report(y_multilabel_test, y_multilabel_test_predict))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00       100
          1       0.00      0.00      0.00         3
          2       0.63      0.75      0.69        64
          3       0.33      0.33      0.33         3
          4       0.59      0.77      0.67        57
          5       0.00      0.00      0.00        12

avg / total       0.73      0.81      0.77       239



## Final joint prediction

In [14]:
final_predictions = np.full_like(y_multilabel_test_predict, -1)


non_toxic_indices = np.argwhere(y_binary_test_predict == 0).flatten()
toxic_indices = np.argwhere(y_binary_test_predict == 1).flatten()

# place binary classifier's prediction of clean comments
final_predictions[non_toxic_indices] = np.array([0, 0, 0, 0, 0, 0])

multilabel_toxic_predictions = y_multilabel_test_predict[toxic_indices]
final_predictions[toxic_indices] = multilabel_toxic_predictions

print(classification_report(y_multilabel_test, final_predictions))

             precision    recall  f1-score   support

          0       1.00      0.68      0.81       100
          1       0.00      0.00      0.00         3
          2       0.64      0.56      0.60        64
          3       0.50      0.33      0.40         3
          4       0.62      0.61      0.62        57
          5       0.00      0.00      0.00        12

avg / total       0.75      0.59      0.65       239



## Decision tree

In [15]:
binary_param_grid = {
        'bag_of_words__stop_words': ['english'],
        'bag_of_words__ngram_range': [(1, 2)],
        'bag_of_words__max_features': [500],
        'dim_reduct__n_components': [300],
        'normalizer__norm': ['l2'],
        'classifier__max_depth': [5, 10, 15]
}

multilabel_param_grid = [{
        'estimator__bag_of_words__stop_words': ['english'],
        'estimator__bag_of_words__ngram_range': [(1, 2)],
        'estimator__bag_of_words__max_features': [500],
        'estimator__dim_reduct__n_components': [300],
        'estimator__normalizer__norm': ['l2'],
        'estimator__classifier__max_depth': [5, 10, 15]
}]

#### Train binary

In [16]:
binary_clf = run_binary_classifier.run(binary_param_grid, DecisionTreeClassifier(), comments_file=train_binary)

with open('./saved_models/dec_tree_joint_binary.pkl', 'wb') as saved_model:
	pickle.dump(binary_clf, file=saved_model)



Fitting 5 folds for each of 3 candidates, totalling 15 fits
             precision    recall  f1-score   support

          0       0.50      0.35      0.41        17
          1       0.39      0.54      0.45        13

avg / total       0.45      0.43      0.43        30

{'bag_of_words__max_features': 500, 'bag_of_words__ngram_range': (1, 2), 'bag_of_words__stop_words': 'english', 'classifier__max_depth': 15, 'dim_reduct__n_components': 300, 'normalizer__norm': 'l2'}


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    0.3s finished


#### Train multilabel

In [17]:
multilabel_clf = run_multilabel_classifier.run(multilabel_param_grid, DecisionTreeClassifier(), comments_file=train_multilabel)
with open('./saved_models/dec_tree_joint_binary.pkl', 'wb') as saved_model:
	pickle.dump(binary_clf, file=saved_model)

  str(classes[c]))
  str(classes[c]))


Fitting 5 folds for each of 3 candidates, totalling 15 fits


  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))
  str(classes[c]))


             precision    recall  f1-score   support

          0       1.00      1.00      1.00        30
          1       0.33      0.12      0.18         8
          2       0.89      0.73      0.80        22
          3       0.00      0.00      0.00         1
          4       0.60      0.25      0.35        24
          5       0.00      0.00      0.00         3

avg / total       0.76      0.60      0.65        88

{'estimator__bag_of_words__max_features': 500,
 'estimator__bag_of_words__ngram_range': (1, 2),
 'estimator__bag_of_words__stop_words': 'english',
 'estimator__classifier__max_depth': 5,
 'estimator__dim_reduct__n_components': 300,
 'estimator__normalizer__norm': 'l2'}


[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    1.6s finished
  str(classes[c]))
  'precision', 'predicted', average, warn_for)


#### Predict binary

In [18]:
X_binary_test, y_binary_test = run_binary_classifier.load_comments(test_binary)
y_binary_test_predict = binary_clf.predict(X_binary_test)

print(classification_report(y_binary_test, y_binary_test_predict))

             precision    recall  f1-score   support

          0       0.52      0.43      0.47        51
          1       0.50      0.59      0.54        49

avg / total       0.51      0.51      0.51       100



#### Predict multilabel

In [19]:
X_multilabel_test, y_multilabel_test = run_multilabel_classifier.load_comments(test_multilabel)
y_multilabel_test_predict = multilabel_clf.predict(X_multilabel_test)

print(classification_report(y_multilabel_test, y_multilabel_test_predict))

             precision    recall  f1-score   support

          0       1.00      1.00      1.00       100
          1       0.21      0.30      0.25        10
          2       0.78      0.67      0.72        63
          3       0.00      0.00      0.00         4
          4       0.65      0.60      0.63        60
          5       0.25      0.17      0.20        12

avg / total       0.78      0.73      0.75       249



## Final joint prediction

In [20]:
final_predictions = np.full_like(y_multilabel_test_predict, -1)


non_toxic_indices = np.argwhere(y_binary_test_predict == 0).flatten()
toxic_indices = np.argwhere(y_binary_test_predict == 1).flatten()

# place binary classifier's prediction of clean comments
final_predictions[non_toxic_indices] = np.array([0, 0, 0, 0, 0, 0])

multilabel_toxic_predictions = y_multilabel_test_predict[toxic_indices]
final_predictions[toxic_indices] = multilabel_toxic_predictions

print(classification_report(y_multilabel_test, final_predictions))

             precision    recall  f1-score   support

          0       1.00      0.58      0.73       100
          1       0.12      0.10      0.11        10
          2       0.88      0.37      0.52        63
          3       0.00      0.00      0.00         4
          4       0.61      0.28      0.39        60
          5       0.25      0.17      0.20        12

avg / total       0.79      0.41      0.53       249

