In [1]:
import ipdb
import sys
import csv

import numpy as np
import torch
import torch.nn as nn
from collections import Counter, defaultdict

from mlearn.data.fileio import *
from mlearn.data import clean
from mlearn.data import loaders
from mlearn.utils.metrics import Metrics
from mlearn.utils.early_stopping import EarlyStopping
from mlearn.utils.pipeline import select_vectorizer

from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, StratifiedKFold, GridSearchCV

In [2]:
torch.random.manual_seed(42)
np.random.seed(42)
datadir = '/Users/zeerakw/Documents/PhD/projects/active/MTL_abuse/data/'
c = clean.Cleaner(processes = ['lower', 'url', 'hashtag'])
p = clean.Preprocessors(liwc_dir = '/Users/zeerakw/Documents/PhD/projects/active/Generalisable_abuse/data/')

In [3]:
def train_model(train, dev, dataset, gridsearch, metrics):
    model = LinearSVC()

    trainX, trainY = train
    model = GridSearchCV(model, gridsearch, 'f1_macro', n_jobs = -1, cv = 5, refit = True)
    model.fit(trainX, trainY)
    print(model.best_params_)
    print(model.cv_results_)
    
    devX, devY = dev
    preds = model.predict(devX)
    metrics.compute(preds, devY)
    
    print(metrics.last_display())
    
    return model

In [4]:
def evaluate_single_task(model, vect, data, metrics):
    testX, testY = vect.transform([Counter(doc.text) for doc in data]), [doc.label for doc in data]
    
    preds = model.predict(testX)
    metrics.compute(preds, testY)
    print(metrics.scores)

In [5]:
def vectorize(data, vect, vect_type = 'count'):
    if vect_type in ['count', 'tfidf']:
        data = [" ".join(doc.text) for doc in data]
    elif vect_type == 'dict':
        data = [Counter(doc.text) for doc in data]
    
    if vect.fitted:
        vectorized = vect.transform(data)
    else:
        vect.fit(data)
        vectorized = vect.transform(data)
        vect.fitted = True
    return vectorized

# Load datasets

In [6]:
m = Metrics(['f1-score'], 'f1-score')
# BPE
tok = c.bpe_tokenize
exp = p.select_experiment('word')
davidson_bpe = loaders.davidson(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
wulczyn_bpe = loaders.wulczyn(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
waseem_bpe = loaders.waseem(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
waseem_hovy_bpe = loaders.waseem_hovy(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
oraby_sarcasm_bpe = loaders.oraby_sarcasm(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
oraby_factfeel_bpe = loaders.oraby_fact_feel(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
hoover_bpe = loaders.hoover(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')

Loading Davidson et al. (train): 24783it [00:04, 5401.58it/s]
Loading Wulczyn et al. (train): 95692it [01:23, 1139.19it/s]
Loading Wulczyn et al. (dev): 32128it [00:25, 1257.56it/s]
Loading Wulczyn et al. (test): 31866it [00:30, 1043.23it/s]
Loading Waseem (train): 6908it [00:03, 2086.80it/s]
Loading Waseem-Hovy (train): 16906it [00:06, 2619.20it/s]
Loading Oraby et al. (Sarcasm) (train): 9386it [00:05, 1647.60it/s]
Loading Oraby et al. (Fact-feel) (train): 8433it [00:05, 1418.10it/s]
Loading Oraby et al. (Fact-feel) (dev): 1169it [00:00, 1282.40it/s]
Loading Oraby et al. (Fact-feel) (test): 586it [00:00, 1088.15it/s]
Loading Hoover et al. (train): 34987it [00:06, 5070.63it/s]


In [7]:
# LIWC
tok = c.tokenize
exp = p.select_experiment('liwc')

davidson_liwc = loaders.davidson(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
waseem_liwc = loaders.waseem(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
waseem_hovy_liwc = loaders.waseem_hovy(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
oraby_sarcasm_liwc = loaders.oraby_sarcasm(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
oraby_factfeel_liwc = loaders.oraby_fact_feel(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
hoover_liwc = loaders.hoover(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')
wulczyn_liwc = loaders.wulczyn(cleaners = tok, data_path = datadir, preprocessor = exp, stratify = 'label')

Loading Davidson et al. (train): 24783it [04:52, 84.64it/s] 
Loading Waseem (train): 6908it [01:29, 76.85it/s] 
Loading Waseem-Hovy (train): 16906it [04:02, 69.76it/s]
Loading Oraby et al. (Sarcasm) (train): 9386it [04:13, 37.10it/s]
Loading Oraby et al. (Fact-feel) (train): 8433it [04:29, 31.33it/s]
Loading Oraby et al. (Fact-feel) (dev): 1169it [00:45, 25.73it/s]
Loading Oraby et al. (Fact-feel) (test): 586it [00:18, 32.14it/s]
Loading Hoover et al. (train): 34987it [09:38, 60.44it/s] 
Loading Wulczyn et al. (train): 95692it [56:56, 28.01it/s]
Loading Wulczyn et al. (dev): 32128it [23:33, 22.73it/s]
Loading Wulczyn et al. (test): 31866it [27:20, 19.43it/s]


## Process datasets

In [8]:
davidson_bpe.build_token_vocab(davidson_bpe.data)
davidson_bpe.build_label_vocab(davidson_bpe.data)
davidson_bpe.process_labels(davidson_bpe.data)
davidson_bpe.process_labels(davidson_bpe.dev)
davidson_bpe.process_labels(davidson_bpe.test)
wulczyn_bpe.build_token_vocab(wulczyn_bpe.data)
wulczyn_bpe.build_label_vocab(wulczyn_bpe.data)
wulczyn_bpe.process_labels(wulczyn_bpe.data)
wulczyn_bpe.process_labels(wulczyn_bpe.dev)
wulczyn_bpe.process_labels(wulczyn_bpe.test)
waseem_bpe.build_token_vocab(waseem_bpe.data)
waseem_bpe.build_label_vocab(waseem_bpe.data)
waseem_bpe.process_labels(waseem_bpe.data)
waseem_bpe.process_labelss(waseem_bpe.dev)
waseem_bpe.process_labels(waseem_bpe.test)
waseem_hovy_bpe.build_token_vocab(waseem_hovy_bpe.data)
waseem_hovy_bpe.build_label_vocab(waseem_hovy_bpe.data)
waseem_hovy_bpe.process_labels(waseem_hovy_bpe.data)
waseem_hovy_bpe.process_labels(waseem_hovy_bpe.dev)
waseem_hovy_bpe.process_labels(waseem_hovy_bpe.test)
oraby_sarcasm_bpe.build_token_vocab(oraby_sarcasm_bpe.data)
oraby_sarcasm_bpe.build_label_vocab(oraby_sarcasm_bpe.data)
oraby_sarcasm_bpe.process_labels(oraby_sarcasm_bpe.data)
oraby_sarcasm_bpe.process_labels(oraby_sarcasm_bpe.dev)
oraby_sarcasm_bpe.process_labels(oraby_sarcasm_bpe.test)
oraby_factfeel_bpe.build_token_vocab(oraby_factfeel_bpe.data)
oraby_factfeel_bpe.build_label_vocab(oraby_factfeel_bpe.data)
oraby_factfeel_bpe.process_labels(oraby_factfeel_bpe.data)
oraby_factfeel_bpe.process_labels(oraby_factfeel_bpe.dev)
oraby_factfeel_bpe.process_labels(oraby_factfeel_bpe.test)
hoover_bpe.build_token_vocab(hoover_bpe.data)
hoover_bpe.build_label_vocab(hoover_bpe.data)
hoover_bpe.process_labels(hoover_bpe.data)
hoover_bpe.process_labels(hoover_bpe.dev)
hoover_bpe.process_labels(hoover_bpe.test)

Building vocabulary (Davidson et al.): 100%|██████████| 19826/19826 [00:00<00:00, 56046.48it/s]
Encoding vocabulary: 100%|██████████| 23428/23428 [00:00<00:00, 249741.79it/s]
Encode label vocab (Davidson et al.): 100%|██████████| 3/3 [00:00<00:00, 3198.50it/s]
Building vocabulary (Wulczyn et al.): 100%|██████████| 95692/95692 [00:03<00:00, 26772.89it/s]
Encoding vocabulary: 100%|██████████| 95362/95362 [00:00<00:00, 589947.34it/s]
Encode label vocab (Wulczyn et al.): 100%|██████████| 2/2 [00:00<00:00, 3081.78it/s]
Building vocabulary (Waseem): 100%|██████████| 5526/5526 [00:00<00:00, 35444.28it/s]
Encoding vocabulary: 100%|██████████| 11919/11919 [00:00<00:00, 622564.25it/s]
Encode label vocab (Waseem): 100%|██████████| 4/4 [00:00<00:00, 342.61it/s]
Building vocabulary (Waseem-Hovy): 100%|██████████| 13524/13524 [00:00<00:00, 66072.57it/s]
Encoding vocabulary: 100%|██████████| 19905/19905 [00:00<00:00, 412178.71it/s]
Encode label vocab (Waseem-Hovy): 100%|██████████| 3/3 [00:00<00:00, 

In [9]:
davidson_liwc.build_token_vocab(davidson_liwc.data)
davidson_liwc.build_label_vocab(davidson_liwc.data)
davidson_liwc.process_labels(davidson_liwc.data)
davidson_liwc.process_labels(davidson_liwc.dev)
davidson_liwc.process_labels(davidson_liwc.test)
wulczyn_liwc.build_token_vocab(wulczyn_liwc.data)
wulczyn_liwc.build_label_vocab(wulczyn_liwc.data)
wulczyn_liwc.process_labels(wulczyn_liwc.data)
wulczyn_liwc.process_labels(wulczyn_liwc.dev)
wulczyn_liwc.process_labels(wulczyn_liwc.test)
waseem_liwc.build_token_vocab(waseem_liwc.data)
waseem_liwc.build_label_vocab(waseem_liwc.data)
waseem_liwc.process_labels(waseem_liwc.data)
waseem_liwc.process_labels(waseem_liwc.dev)
waseem_liwc.process_labels(waseem_liwc.test)
waseem_hovy_liwc.build_token_vocab(waseem_hovy_liwc.data)
waseem_hovy_liwc.build_label_vocab(waseem_hovy_liwc.data)
waseem_hovy_liwc.process_labels(waseem_hovy_liwc.data)
waseem_hovy_liwc.process_labels(waseem_hovy_liwc.dev)
waseem_hovy_liwc.process_labels(waseem_hovy_liwc.test)
oraby_sarcasm_liwc.build_token_vocab(oraby_sarcasm_liwc.data)
oraby_sarcasm_liwc.build_label_vocab(oraby_sarcasm_liwc.data)
oraby_sarcasm_liwc.process_labels(oraby_sarcasm_liwc.data)
oraby_sarcasm_liwc.process_labels(oraby_sarcasm_liwc.dev)
oraby_sarcasm_liwc.process_labels(oraby_sarcasm_liwc.test)
oraby_factfeel_liwc.build_token_vocab(oraby_factfeel_liwc.data)
oraby_factfeel_liwc.build_label_vocab(oraby_factfeel_liwc.data)
oraby_factfeel_liwc.process_labels(oraby_factfeel_liwc.data)
oraby_factfeel_liwc.process_labels(oraby_factfeel_liwc.dev)
oraby_factfeel_liwc.process_labels(oraby_factfeel_liwc.test)
hoover_liwc.build_token_vocab(hoover_liwc.data)
hoover_liwc.build_label_vocab(hoover_liwc.data)
hoover_liwc.process_labels(hoover_liwc.data)
hoover_liwc.process_labels(hoover_liwc.dev)
hoover_liwc.process_labels(hoover_liwc.test)

Building vocabulary (Davidson et al.): 100%|██████████| 19826/19826 [00:00<00:00, 52012.56it/s]
Encoding vocabulary: 100%|██████████| 840/840 [00:00<00:00, 314938.35it/s]
Encode label vocab (Davidson et al.): 100%|██████████| 3/3 [00:00<00:00, 451.53it/s]
Building vocabulary (Wulczyn et al.): 100%|██████████| 95692/95692 [00:02<00:00, 37544.23it/s]
Encoding vocabulary: 100%|██████████| 1005/1005 [00:00<00:00, 258494.85it/s]
Encode label vocab (Wulczyn et al.): 100%|██████████| 2/2 [00:00<00:00, 464.23it/s]
Building vocabulary (Waseem): 100%|██████████| 5526/5526 [00:00<00:00, 87341.16it/s]
Encoding vocabulary: 100%|██████████| 741/741 [00:00<00:00, 303958.85it/s]
Encode label vocab (Waseem): 100%|██████████| 4/4 [00:00<00:00, 3209.72it/s]
Building vocabulary (Waseem-Hovy): 100%|██████████| 13524/13524 [00:00<00:00, 73891.88it/s]
Encoding vocabulary: 100%|██████████| 831/831 [00:00<00:00, 231731.04it/s]
Encode label vocab (Waseem-Hovy): 100%|██████████| 3/3 [00:00<00:00, 2286.55it/s]
Bu

In [52]:
wulczyn_bpe.vocab_size()

95364

In [54]:
wulczyn_liwc.vocab_size()

1007

In [57]:
len(wulczyn_bpe)

95692

In [56]:
waseem_bpe.vocab_size()

11921

In [55]:
waseem_liwc.vocab_size()

743

In [58]:
len(waseem_bpe)

5526

In [95]:
waseem_bpe.token_counts.most_common(2187)[-1]

('▁owns', 6)

In [80]:
6302 - waseem_bpe.vocab_size()

-5619

In [79]:
waseem_liwc.token_counts.most_common(473)[-1]

('RELATIV_COGPROC_POWER_CAUSE_MOTION_WORK_DRIVES_ACHIEVE', 6)

## Train BPE models
### Davidson

In [10]:
davidson_bpe_vect = select_vectorizer('dict')
davidson_tr_bpe = vectorize(davidson_bpe.data, davidson_bpe_vect, 'dict')
davidson_de_bpe = vectorize(davidson_bpe.dev, davidson_bpe_vect, 'dict')
davidson_te_bpe = vectorize(davidson_bpe.test, davidson_bpe_vect, 'dict')
wulczyn_tr_bpe = vectorize(wulczyn_bpe.data, davidson_bpe_vect, 'dict')
wulczyn_de_bpe = vectorize(wulczyn_bpe.dev, davidson_bpe_vect, 'dict')
wulczyn_te_bpe = vectorize(wulczyn_bpe.test, davidson_bpe_vect, 'dict')
waseem_tr_bpe = vectorize(waseem_bpe.data, davidson_bpe_vect, 'dict')
waseem_de_bpe = vectorize(waseem_bpe.dev, davidson_bpe_vect, 'dict')
waseem_te_bpe = vectorize(waseem_bpe.test, davidson_bpe_vect, 'dict')
waseem_hovy_tr_bpe = vectorize(waseem_hovy_bpe.data, davidson_bpe_vect, 'dict')
waseem_hovy_de_bpe = vectorize(waseem_hovy_bpe.dev, davidson_bpe_vect, 'dict')
waseem_hovy_te_bpe = vectorize(waseem_hovy_bpe.test, davidson_bpe_vect, 'dict')
oraby_sarcasm_tr_bpe = vectorize(oraby_sarcasm_bpe.data, davidson_bpe_vect, 'dict')
oraby_sarcasm_de_bpe = vectorize(oraby_sarcasm_bpe.dev, davidson_bpe_vect, 'dict')
oraby_sarcasm_te_bpe = vectorize(oraby_sarcasm_bpe.test, davidson_bpe_vect, 'dict')
oraby_factfeel_tr_bpe = vectorize(oraby_factfeel_bpe.data, davidson_bpe_vect, 'dict')
oraby_factfeel_de_bpe = vectorize(oraby_factfeel_bpe.dev, davidson_bpe_vect, 'dict')
oraby_factfeel_te_bpe = vectorize(oraby_factfeel_bpe.test, davidson_bpe_vect, 'dict')
hoover_tr_bpe = vectorize(hoover_bpe.data, davidson_bpe_vect, 'dict')
hoover_de_bpe = vectorize(hoover_bpe.dev, davidson_bpe_vect, 'dict')
hoover_te_bpe = vectorize(hoover_bpe.test, davidson_bpe_vect, 'dict')

In [11]:
bpe_svm_grid = {'penalty': ['l2', 'l1'], 'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
davidson_bpe_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
davidson_bpe_model = train_model((davidson_tr_bpe, [doc.label for doc in davidson_bpe.data]), 
                                  (davidson_de_bpe, [doc.label for doc in davidson_bpe.dev]),
                                   davidson_bpe, bpe_svm_grid, davidson_bpe_metrics)
print(davidson_bpe_metrics.scores)



{'C': 0.2, 'penalty': 'l2'}
{'mean_fit_time': array([22.72827902,  0.04482808, 17.61110091,  0.03571968, 15.286622  ,
        0.04548588, 16.19527402,  0.04840336, 19.3002368 ,  0.0461421 ,
       16.71334071,  0.05543785, 15.7507853 ,  0.04619956, 15.58869324,
        0.04392338, 15.70662107,  0.04838548, 17.86204   ,  0.07479815]), 'std_fit_time': array([0.79183028, 0.01479371, 2.30140179, 0.00718663, 1.07034963,
       0.01719994, 0.43899644, 0.01479094, 1.18980813, 0.01618741,
       0.11597973, 0.01577857, 0.74227268, 0.01520801, 0.5562177 ,
       0.01295592, 1.58669093, 0.01555656, 1.22319282, 0.04772338]), 'mean_score_time': array([0.01098132, 0.        , 0.01151137, 0.        , 0.00900064,
       0.        , 0.01206031, 0.        , 0.02295728, 0.        ,
       0.01744089, 0.        , 0.0145968 , 0.        , 0.02173038,
       0.        , 0.01324134, 0.        , 0.02190542, 0.        ]), 'std_score_time': array([0.0013537 , 0.        , 0.0045867 , 0.        , 0.00164269,
    



### Waseem

In [12]:
waseem_bpe_vect = select_vectorizer('dict')
waseem_tr_bpe = vectorize(waseem_bpe.data, waseem_bpe_vect, 'dict')
waseem_de_bpe = vectorize(waseem_bpe.dev, waseem_bpe_vect, 'dict')
waseem_te_bpe = vectorize(waseem_bpe.test, waseem_bpe_vect, 'dict')
davidson_tr_bpe = vectorize(davidson_bpe.data, waseem_bpe_vect, 'dict')
davidson_de_bpe = vectorize(davidson_bpe.dev, waseem_bpe_vect, 'dict')
davidson_te_bpe = vectorize(davidson_bpe.test, waseem_bpe_vect, 'dict')
wulczyn_tr_bpe = vectorize(wulczyn_bpe.data, waseem_bpe_vect, 'dict')
wulczyn_de_bpe = vectorize(wulczyn_bpe.dev, waseem_bpe_vect, 'dict')
wulczyn_te_bpe = vectorize(wulczyn_bpe.test, waseem_bpe_vect, 'dict')
waseem_hovy_tr_bpe = vectorize(waseem_hovy_bpe.data, waseem_bpe_vect, 'dict')
waseem_hovy_de_bpe = vectorize(waseem_hovy_bpe.dev, waseem_bpe_vect, 'dict')
waseem_hovy_te_bpe = vectorize(waseem_hovy_bpe.test, waseem_bpe_vect, 'dict')
oraby_sarcasm_tr_bpe = vectorize(oraby_sarcasm_bpe.data, waseem_bpe_vect, 'dict')
oraby_sarcasm_de_bpe = vectorize(oraby_sarcasm_bpe.dev, waseem_bpe_vect, 'dict')
oraby_sarcasm_te_bpe = vectorize(oraby_sarcasm_bpe.test, waseem_bpe_vect, 'dict')
oraby_factfeel_tr_bpe = vectorize(oraby_factfeel_bpe.data, waseem_bpe_vect, 'dict')
oraby_factfeel_de_bpe = vectorize(oraby_factfeel_bpe.dev, waseem_bpe_vect, 'dict')
oraby_factfeel_te_bpe = vectorize(oraby_factfeel_bpe.test, waseem_bpe_vect, 'dict')
hoover_tr_bpe = vectorize(hoover_bpe.data, waseem_bpe_vect, 'dict')
hoover_de_bpe = vectorize(hoover_bpe.dev, waseem_bpe_vect, 'dict')
hoover_te_bpe = vectorize(hoover_bpe.test, waseem_bpe_vect, 'dict')

In [13]:
bpe_svm_grid = {'penalty': ['l2', 'l1'], 'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
waseem_bpe_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_bpe_model = train_model((waseem_tr_bpe, [doc.label for doc in waseem_bpe.data]), 
                                  (waseem_de_bpe, [doc.label for doc in waseem_bpe.dev]),
                                   waseem_bpe, bpe_svm_grid, waseem_bpe_metrics)
print(waseem_bpe_metrics.scores)



{'C': 0.8, 'penalty': 'l2'}
{'mean_fit_time': array([1.30778685, 0.02879815, 2.23712902, 0.00925546, 2.50268359,
       0.01449647, 2.52570167, 0.01211638, 2.70203409, 0.01552944,
       2.62534957, 0.00706468, 2.56128745, 0.01694307, 2.45539341,
       0.01387458, 2.28486614, 0.00907626, 2.49457498, 0.0175561 ]), 'std_fit_time': array([0.15493408, 0.01432054, 0.17670812, 0.0020671 , 0.15924242,
       0.00465892, 0.20369552, 0.00584023, 0.07083737, 0.00630077,
       0.12527852, 0.00141643, 0.05914997, 0.01449252, 0.07914115,
       0.00723131, 0.13164848, 0.00519906, 0.29901841, 0.00687235]), 'mean_score_time': array([0.00958505, 0.        , 0.00563717, 0.        , 0.0038332 ,
       0.        , 0.00690093, 0.        , 0.00681233, 0.        ,
       0.00674181, 0.        , 0.00535169, 0.        , 0.00458436,
       0.        , 0.0060441 , 0.        , 0.00691104, 0.        ]), 'std_score_time': array([0.00685461, 0.        , 0.00319003, 0.        , 0.00042838,
       0.        , 0.003



### Wulczyn

In [14]:
wulczyn_bpe_vect = select_vectorizer('dict')
wulczyn_tr_bpe = vectorize(wulczyn_bpe.data, wulczyn_bpe_vect, 'dict')
wulczyn_de_bpe = vectorize(wulczyn_bpe.dev, wulczyn_bpe_vect, 'dict')
wulczyn_te_bpe = vectorize(wulczyn_bpe.test, wulczyn_bpe_vect, 'dict')
waseem_tr_bpe = vectorize(waseem_bpe.data, wulczyn_bpe_vect, 'dict')
waseem_de_bpe = vectorize(waseem_bpe.dev, wulczyn_bpe_vect, 'dict')
waseem_te_bpe = vectorize(waseem_bpe.test, wulczyn_bpe_vect, 'dict')
davidson_tr_bpe = vectorize(davidson_bpe.data, wulczyn_bpe_vect, 'dict')
davidson_de_bpe = vectorize(davidson_bpe.dev, wulczyn_bpe_vect, 'dict')
davidson_te_bpe = vectorize(davidson_bpe.test, wulczyn_bpe_vect, 'dict')
waseem_hovy_tr_bpe = vectorize(waseem_hovy_bpe.data, wulczyn_bpe_vect, 'dict')
waseem_hovy_de_bpe = vectorize(waseem_hovy_bpe.dev, wulczyn_bpe_vect, 'dict')
waseem_hovy_te_bpe = vectorize(waseem_hovy_bpe.test, wulczyn_bpe_vect, 'dict')
oraby_sarcasm_tr_bpe = vectorize(oraby_sarcasm_bpe.data, wulczyn_bpe_vect, 'dict')
oraby_sarcasm_de_bpe = vectorize(oraby_sarcasm_bpe.dev, wulczyn_bpe_vect, 'dict')
oraby_sarcasm_te_bpe = vectorize(oraby_sarcasm_bpe.test, wulczyn_bpe_vect, 'dict')
oraby_factfeel_tr_bpe = vectorize(oraby_factfeel_bpe.data, wulczyn_bpe_vect, 'dict')
oraby_factfeel_de_bpe = vectorize(oraby_factfeel_bpe.dev, wulczyn_bpe_vect, 'dict')
oraby_factfeel_te_bpe = vectorize(oraby_factfeel_bpe.test, wulczyn_bpe_vect, 'dict')
hoover_tr_bpe = vectorize(hoover_bpe.data, wulczyn_bpe_vect, 'dict')
hoover_de_bpe = vectorize(hoover_bpe.dev, wulczyn_bpe_vect, 'dict')
hoover_te_bpe = vectorize(hoover_bpe.test, wulczyn_bpe_vect, 'dict')

In [15]:
bpe_svm_grid = {'penalty': ['l2', 'l1'], 'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
wulczyn_bpe_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
wulczyn_bpe_model = train_model((wulczyn_tr_bpe, [doc.label for doc in wulczyn_bpe.data]), 
                                  (wulczyn_de_bpe, [doc.label for doc in wulczyn_bpe.dev]),
                                   wulczyn_bpe, bpe_svm_grid, wulczyn_bpe_metrics)
print(wulczyn_bpe_metrics.scores)



{'C': 0.1, 'penalty': 'l2'}
{'mean_fit_time': array([49.72019706,  0.37321825, 55.86017551,  0.43571887, 61.89705043,
        0.67213583, 47.26648231,  0.51847668, 57.84532781,  0.4317853 ,
       48.30527329,  0.36120477, 54.11490011,  0.27214465, 55.15331717,
        0.27982488, 43.53611708,  0.37079339, 40.35864658,  0.39567986]), 'std_fit_time': array([2.02115302, 0.0684996 , 7.47653176, 0.05466971, 1.38904204,
       0.16870616, 4.79589775, 0.08606609, 6.30138053, 0.05686856,
       4.22285014, 0.06808495, 3.75265569, 0.01344239, 5.26072371,
       0.01548505, 1.88928589, 0.01841447, 9.63919465, 0.02441604]), 'mean_score_time': array([0.07469573, 0.        , 0.079667  , 0.        , 0.06472502,
       0.        , 0.0829308 , 0.        , 0.10434556, 0.        ,
       0.071737  , 0.        , 0.12530117, 0.        , 0.07414017,
       0.        , 0.07934136, 0.        , 0.06405759, 0.        ]), 'std_score_time': array([0.01631106, 0.        , 0.00936127, 0.        , 0.00938919,
    



### Evaluate Single task BPE models

In [16]:
davidson_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(davidson_bpe_model, davidson_bpe_vect, davidson_bpe.test, davidson_bpe_eval_metrics)

waseem_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(waseem_bpe_model, waseem_bpe_vect, waseem_bpe.test, waseem_bpe_eval_metrics)

wulczyn_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(wulczyn_bpe_model, wulczyn_bpe_vect, wulczyn_bpe.test, wulczyn_bpe_eval_metrics)

{'f1-score': [0.701282448896403], 'accuracy': [0.8922952803549818], 'precision': [0.7356355520089006], 'recall': [0.6839996439570558], 'loss': []}
{'f1-score': [0.5845064355933921], 'accuracy': [0.9132947976878613], 'precision': [0.7041701685318706], 'recall': [0.5378510378510378], 'loss': []}
{'f1-score': [0.8704438660314124], 'accuracy': [0.9582313437519613], 'precision': [0.9006247137343721], 'recall': [0.8454710064739721], 'loss': []}


## Train LIWC models
### Davidson

In [17]:
davidson_liwc_vect = select_vectorizer('dict')
davidson_tr_liwc = vectorize(davidson_liwc.data, davidson_liwc_vect, 'dict')
davidson_de_liwc = vectorize(davidson_liwc.dev, davidson_liwc_vect, 'dict')
davidson_te_liwc = vectorize(davidson_liwc.test, davidson_liwc_vect, 'dict')
wulczyn_tr_liwc = vectorize(wulczyn_liwc.data, davidson_liwc_vect, 'dict')
wulczyn_de_liwc = vectorize(wulczyn_liwc.dev, davidson_liwc_vect, 'dict')
wulczyn_te_liwc = vectorize(wulczyn_liwc.test, davidson_liwc_vect, 'dict')
waseem_tr_liwc = vectorize(waseem_liwc.data, davidson_liwc_vect, 'dict')
waseem_de_liwc = vectorize(waseem_liwc.dev, davidson_liwc_vect, 'dict')
waseem_te_liwc = vectorize(waseem_liwc.test, davidson_liwc_vect, 'dict')
waseem_hovy_tr_liwc = vectorize(waseem_hovy_liwc.data, davidson_liwc_vect, 'dict')
waseem_hovy_de_liwc = vectorize(waseem_hovy_liwc.dev, davidson_liwc_vect, 'dict')
waseem_hovy_te_liwc = vectorize(waseem_hovy_liwc.test, davidson_liwc_vect, 'dict')
oraby_sarcasm_tr_liwc = vectorize(oraby_sarcasm_liwc.data, davidson_liwc_vect, 'dict')
oraby_sarcasm_de_liwc = vectorize(oraby_sarcasm_liwc.dev, davidson_liwc_vect, 'dict')
oraby_sarcasm_te_liwc = vectorize(oraby_sarcasm_liwc.test, davidson_liwc_vect, 'dict')
oraby_factfeel_tr_liwc = vectorize(oraby_factfeel_liwc.data, davidson_liwc_vect, 'dict')
oraby_factfeel_de_liwc = vectorize(oraby_factfeel_liwc.dev, davidson_liwc_vect, 'dict')
oraby_factfeel_te_liwc = vectorize(oraby_factfeel_liwc.test, davidson_liwc_vect, 'dict')
hoover_tr_liwc = vectorize(hoover_liwc.data, davidson_liwc_vect, 'dict')
hoover_de_liwc = vectorize(hoover_liwc.dev, davidson_liwc_vect, 'dict')
hoover_te_liwc = vectorize(hoover_liwc.test, davidson_liwc_vect, 'dict')

In [18]:
davidson_liwc_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
davidson_liwc_model = train_model((davidson_tr_liwc, [doc.label for doc in davidson_liwc.data]), 
                                 (davidson_de_liwc, [doc.label for doc in davidson_liwc.dev]),
                                  davidson_liwc, bpe_svm_grid, davidson_liwc_metrics)
print(davidson_liwc_metrics.scores)



{'C': 0.3, 'penalty': 'l2'}
{'mean_fit_time': array([2.09791523e+01, 3.71622086e-02, 2.01150269e+01, 5.27858734e-02,
       1.82899623e+01, 3.36939812e-02, 3.15752122e+01, 1.14541006e-01,
       3.32538730e+01, 2.88573265e-02, 2.99026863e+01, 5.67827225e-02,
       3.05260980e+01, 4.45348740e-02, 2.73552652e+01, 5.23555756e-02,
       3.00645124e+01, 8.44983101e-02, 2.36143081e+01, 3.39894295e-02]), 'std_fit_time': array([1.04336747e+00, 5.10454891e-03, 4.69969305e-01, 3.73912903e-02,
       1.91691734e+00, 8.10286654e-03, 6.04228196e+00, 9.08852147e-02,
       2.78724994e+00, 6.87970063e-03, 5.76091285e-01, 1.83003835e-02,
       9.51659722e-01, 1.42013674e-02, 1.34225990e+00, 2.24340390e-02,
       7.82876370e-01, 1.87383348e-02, 4.37100611e+00, 7.01483779e-03]), 'mean_score_time': array([0.01812716, 0.        , 0.01134281, 0.        , 0.01086574,
       0.        , 0.03187895, 0.        , 0.01165051, 0.        ,
       0.01091318, 0.        , 0.01725526, 0.        , 0.00959272,
    



### Waseem

In [19]:
waseem_liwc_vect = select_vectorizer('dict')
waseem_tr_liwc = vectorize(waseem_liwc.data, waseem_liwc_vect, 'dict')
waseem_de_liwc = vectorize(waseem_liwc.dev, waseem_liwc_vect, 'dict')
waseem_te_liwc = vectorize(waseem_liwc.test, waseem_liwc_vect, 'dict')
davidson_tr_liwc = vectorize(davidson_liwc.data, waseem_liwc_vect, 'dict')
davidson_de_liwc = vectorize(davidson_liwc.dev, waseem_liwc_vect, 'dict')
davidson_te_liwc = vectorize(davidson_liwc.test, waseem_liwc_vect, 'dict')
wulczyn_tr_liwc = vectorize(wulczyn_liwc.data, waseem_liwc_vect, 'dict')
wulczyn_de_liwc = vectorize(wulczyn_liwc.dev, waseem_liwc_vect, 'dict')
wulczyn_te_liwc = vectorize(wulczyn_liwc.test, waseem_liwc_vect, 'dict')
waseem_hovy_tr_liwc = vectorize(waseem_hovy_liwc.data, waseem_liwc_vect, 'dict')
waseem_hovy_de_liwc = vectorize(waseem_hovy_liwc.dev, waseem_liwc_vect, 'dict')
waseem_hovy_te_liwc = vectorize(waseem_hovy_liwc.test, waseem_liwc_vect, 'dict')
oraby_sarcasm_tr_liwc = vectorize(oraby_sarcasm_liwc.data, waseem_liwc_vect, 'dict')
oraby_sarcasm_de_liwc = vectorize(oraby_sarcasm_liwc.dev, waseem_liwc_vect, 'dict')
oraby_sarcasm_te_liwc = vectorize(oraby_sarcasm_liwc.test, waseem_liwc_vect, 'dict')
oraby_factfeel_tr_liwc = vectorize(oraby_factfeel_liwc.data, waseem_liwc_vect, 'dict')
oraby_factfeel_de_liwc = vectorize(oraby_factfeel_liwc.dev, waseem_liwc_vect, 'dict')
oraby_factfeel_te_liwc = vectorize(oraby_factfeel_liwc.test, waseem_liwc_vect, 'dict')
hoover_tr_liwc = vectorize(hoover_liwc.data, waseem_liwc_vect, 'dict')
hoover_de_liwc = vectorize(hoover_liwc.dev, waseem_liwc_vect, 'dict')
hoover_te_liwc = vectorize(hoover_liwc.test, waseem_liwc_vect, 'dict')

In [20]:
liwc_svm_grid = {'penalty': ['l2', 'l1'], 'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
waseem_liwc_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_liwc_model = train_model((waseem_tr_liwc, [doc.label for doc in waseem_liwc.data]), 
                                  (waseem_de_liwc, [doc.label for doc in waseem_liwc.dev]),
                                   waseem_liwc, liwc_svm_grid, waseem_liwc_metrics)
print(waseem_liwc_metrics.scores)



{'C': 0.8, 'penalty': 'l2'}
{'mean_fit_time': array([4.22403383, 0.0098484 , 2.63490648, 0.00996604, 2.44947295,
       0.01109676, 2.6258316 , 0.03366647, 2.66835661, 0.00905681,
       2.26226349, 0.0108592 , 2.41505313, 0.01221161, 2.30807238,
       0.00887079, 2.10340519, 0.01001945, 2.29293976, 0.01352162]), 'std_fit_time': array([0.67254439, 0.0049201 , 0.13917457, 0.00331552, 0.08768305,
       0.00449528, 0.05959656, 0.0331939 , 0.21161145, 0.00315538,
       0.02735701, 0.00443825, 0.1008624 , 0.00675816, 0.13813594,
       0.00395959, 0.13666417, 0.00372464, 0.0942535 , 0.01066036]), 'mean_score_time': array([0.00491123, 0.        , 0.00428467, 0.        , 0.00748925,
       0.        , 0.00498242, 0.        , 0.00416541, 0.        ,
       0.00403895, 0.        , 0.01017261, 0.        , 0.00621419,
       0.        , 0.00391016, 0.        , 0.00327144, 0.        ]), 'std_score_time': array([0.00138119, 0.        , 0.00085122, 0.        , 0.00718145,
       0.        , 0.001



### Wulczyn

In [21]:
wulczyn_liwc_vect = select_vectorizer('dict')
waseem_tr_liwc = vectorize(waseem_liwc.data, wulczyn_liwc_vect, 'dict')
waseem_de_liwc = vectorize(waseem_liwc.dev, wulczyn_liwc_vect, 'dict')
waseem_te_liwc = vectorize(waseem_liwc.test, wulczyn_liwc_vect, 'dict')
wulczyn_tr_liwc = vectorize(wulczyn_liwc.data, wulczyn_liwc_vect, 'dict')
wulczyn_de_liwc = vectorize(wulczyn_liwc.dev, wulczyn_liwc_vect, 'dict')
wulczyn_te_liwc = vectorize(wulczyn_liwc.test, wulczyn_liwc_vect, 'dict')
davidson_tr_liwc = vectorize(davidson_liwc.data, wulczyn_liwc_vect, 'dict')
davidson_de_liwc = vectorize(davidson_liwc.dev, wulczyn_liwc_vect, 'dict')
davidson_te_liwc = vectorize(davidson_liwc.test, wulczyn_liwc_vect, 'dict')
waseem_hovy_tr_liwc = vectorize(waseem_hovy_liwc.data, wulczyn_liwc_vect, 'dict')
waseem_hovy_de_liwc = vectorize(waseem_hovy_liwc.dev, wulczyn_liwc_vect, 'dict')
waseem_hovy_te_liwc = vectorize(waseem_hovy_liwc.test, wulczyn_liwc_vect, 'dict')
oraby_sarcasm_tr_liwc = vectorize(oraby_sarcasm_liwc.data, wulczyn_liwc_vect, 'dict')
oraby_sarcasm_de_liwc = vectorize(oraby_sarcasm_liwc.dev, wulczyn_liwc_vect, 'dict')
oraby_sarcasm_te_liwc = vectorize(oraby_sarcasm_liwc.test, wulczyn_liwc_vect, 'dict')
oraby_factfeel_tr_liwc = vectorize(oraby_factfeel_liwc.data, wulczyn_liwc_vect, 'dict')
oraby_factfeel_de_liwc = vectorize(oraby_factfeel_liwc.dev, wulczyn_liwc_vect, 'dict')
oraby_factfeel_te_liwc = vectorize(oraby_factfeel_liwc.test, wulczyn_liwc_vect, 'dict')
hoover_tr_liwc = vectorize(hoover_liwc.data, wulczyn_liwc_vect, 'dict')
hoover_de_liwc = vectorize(hoover_liwc.dev, wulczyn_liwc_vect, 'dict')
hoover_te_liwc = vectorize(hoover_liwc.test, wulczyn_liwc_vect, 'dict')

In [22]:
liwc_svm_grid = {'penalty': ['l2', 'l1'], 'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
wulczyn_liwc_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
wulczyn_liwc_model = train_model((wulczyn_tr_liwc, [doc.label for doc in wulczyn_liwc.data]), 
                                  (wulczyn_de_liwc, [doc.label for doc in wulczyn_liwc.dev]),
                                   wulczyn_liwc, liwc_svm_grid, wulczyn_liwc_metrics)
print(wulczyn_liwc_metrics.scores)



{'C': 1.0, 'penalty': 'l2'}
{'mean_fit_time': array([ 54.94813437,   0.25237861,  61.59437184,   0.3416223 ,
        70.34237223,   0.35101914,  79.62076797,   0.25146661,
       108.38874164,   0.59970965, 105.02918854,   0.60522113,
        92.63073702,   0.29760838,  86.17313061,   0.24022584,
        75.61475205,   0.23624535,  69.19804459,   0.25574999]), 'std_fit_time': array([5.74220091e-01, 5.50749719e-02, 3.04116780e+00, 6.09563249e-02,
       3.28490313e+00, 6.52832545e-02, 3.27101833e+00, 3.04943491e-02,
       6.82171272e+00, 2.01033528e-01, 4.39220907e+00, 2.37830079e-01,
       7.38677448e+00, 5.09113545e-02, 6.75260926e-01, 1.69361620e-02,
       3.15747405e+00, 2.47168872e-02, 1.54810445e+01, 9.33257468e-03]), 'mean_score_time': array([0.04317298, 0.        , 0.06609054, 0.        , 0.0540226 ,
       0.        , 0.07556028, 0.        , 0.08801513, 0.        ,
       0.09679732, 0.        , 0.06182199, 0.        , 0.05958343,
       0.        , 0.05049233, 0.        , 0



## Evaluate single task liwc models

In [23]:
davidson_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(davidson_liwc_model, davidson_liwc_vect, davidson_liwc.test, davidson_liwc_eval_metrics)

waseem_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(waseem_liwc_model, waseem_liwc_vect, waseem_liwc.test, waseem_liwc_eval_metrics)

wulczyn_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(wulczyn_liwc_model, wulczyn_liwc_vect, wulczyn_liwc.test, wulczyn_liwc_eval_metrics)

{'f1-score': [0.6142350240280037], 'accuracy': [0.8898749495764421], 'precision': [0.7830107163000565], 'recall': [0.6338177447552448], 'loss': []}
{'f1-score': [0.29093294982337775], 'accuracy': [0.846820809248555], 'precision': [0.3409606656580938], 'recall': [0.2848094738026479], 'loss': []}
{'f1-score': [0.8566454911660655], 'accuracy': [0.9555011611121571], 'precision': [0.9050873520472182], 'recall': [0.820637548546594], 'loss': []}


# Ensemble Classifier

Linear SVM classifiers for each task and a LogisticRegression Classifier on top

In [24]:
def ensemble_train(classifiers, train, dev, dataset, grid_search, metrics):
    model = LogisticRegression(max_iter = 1000)
    
    trainY = [doc.label for doc in train]
    trainX = [Counter(doc.text) for doc in train]
    
    ensemble_tr = []
    
    for doc in trainX:
        doc_preds = {}
        for clf, vect in classifiers:
            d = vect.transform([doc])
            pred = clf.predict(d)
            doc_preds[clf.name] = pred
        ensemble_tr.append(doc_preds)
        
    vect = select_vectorizer('dict')
    vect.fit(ensemble_tr)
    
    trainX = vect.transform(ensemble_tr)
    model = GridSearchCV(model, grid_search, 'f1_macro', n_jobs = -1, cv = 5, refit = True)
    model.fit(trainX, trainY)
    
    print(model.best_params_)
    print(model.cv_results_)
    
    devY = [doc.label for doc in dev]
    devX = vect.transform([Counter(doc.text) for doc in dev])
    preds = model.predict(devX)
    metrics.compute(preds, devY)
    
    print(metrics.last_display())
    
    return model, vect

In [25]:
def evaluate_ensemble_model(ensemble_model, task_models, data, metrics):
    
    ensemble_clf, ensemble_vect = ensemble_model
    test, testY = [doc.text for doc in data], [doc.label for doc in data]

    testX = []
    for doc in test:
        doc_preds = {}
        for clf, vect in task_models:
            d = vect.transform([Counter(doc)])
            pred = clf.predict(d)
            doc_preds[clf.name] = pred[0]
        testX.append(doc_preds)
    
    testX = ensemble_vect.transform(testX)
    preds = ensemble_clf.predict(testX)
    metrics.compute(preds, testY)
    
    print(metrics.last_display())

## Ensemble BPE Models

In [26]:
davidson_ensemble_bpe_vect = select_vectorizer('dict')
davidson_ensemble_tr_bpe = vectorize(davidson_bpe.data, davidson_ensemble_bpe_vect, 'dict')
davidson_ensemble_de_bpe = vectorize(davidson_bpe.dev, davidson_ensemble_bpe_vect, 'dict')
davidson_ensemble_te_bpe = vectorize(davidson_bpe.test, davidson_ensemble_bpe_vect, 'dict')

wulczyn_ensemble_bpe_vect = select_vectorizer('dict')
wulczyn_ensemble_tr_bpe = vectorize(wulczyn_bpe.data, wulczyn_ensemble_bpe_vect, 'dict')
wulczyn_ensemble_de_bpe = vectorize(wulczyn_bpe.dev, wulczyn_ensemble_bpe_vect, 'dict')
wulczyn_ensemble_te_bpe = vectorize(wulczyn_bpe.test, wulczyn_ensemble_bpe_vect, 'dict')

waseem_ensemble_bpe_vect = select_vectorizer('dict')
waseem_ensemble_tr_bpe = vectorize(waseem_bpe.data, waseem_ensemble_bpe_vect, 'dict')
waseem_ensemble_de_bpe = vectorize(waseem_bpe.dev, waseem_ensemble_bpe_vect, 'dict')
waseem_ensemble_te_bpe = vectorize(waseem_bpe.test, waseem_ensemble_bpe_vect, 'dict')

waseem_hovy_ensemble_bpe_vect = select_vectorizer('dict')
waseem_hovy_ensemble_tr_bpe = vectorize(waseem_hovy_bpe.data, waseem_hovy_ensemble_bpe_vect, 'dict')
waseem_hovy_ensemble_de_bpe = vectorize(waseem_hovy_bpe.dev, waseem_hovy_ensemble_bpe_vect, 'dict')
waseem_hovy_ensemble_te_bpe = vectorize(waseem_hovy_bpe.test, waseem_hovy_ensemble_bpe_vect, 'dict')

oraby_sarcasm_ensemble_bpe_vect = select_vectorizer('dict')
oraby_sarcasm_ensemble_tr_bpe = vectorize(oraby_sarcasm_bpe.data, oraby_sarcasm_ensemble_bpe_vect, 'dict')
oraby_sarcasm_ensemble_de_bpe = vectorize(oraby_sarcasm_bpe.dev, oraby_sarcasm_ensemble_bpe_vect, 'dict')
oraby_sarcasm_ensemble_te_bpe = vectorize(oraby_sarcasm_bpe.test, oraby_sarcasm_ensemble_bpe_vect, 'dict')

oraby_factfeel_ensemble_bpe_vect = select_vectorizer('dict')
oraby_factfeel_ensemble_tr_bpe = vectorize(oraby_factfeel_bpe.data, oraby_factfeel_ensemble_bpe_vect, 'dict')
oraby_factfeel_ensemble_de_bpe = vectorize(oraby_factfeel_bpe.dev, oraby_factfeel_ensemble_bpe_vect, 'dict')
oraby_factfeel_ensemble_te_bpe = vectorize(oraby_factfeel_bpe.test, oraby_factfeel_ensemble_bpe_vect, 'dict')

hoover_ensemble_bpe_vect = select_vectorizer('dict')
hoover_ensemble_tr_bpe = vectorize(hoover_bpe.data, hoover_ensemble_bpe_vect, 'dict')
hoover_ensemble_de_bpe = vectorize(hoover_bpe.dev, hoover_ensemble_bpe_vect, 'dict')
hoover_ensemble_te_bpe = vectorize(hoover_bpe.test, hoover_ensemble_bpe_vect, 'dict')

In [27]:
grid_search = {'penalty': ['l2', 'l1'], 'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
davidson_ensemble_metrics_bpe = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
wulczyn_ensemble_metrics_bpe = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_ensemble_metrics_bpe = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_hovy_ensemble_metrics_bpe = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
oraby_sarcasm_ensemble_metrics_bpe = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
oraby_factfeel_ensemble_metrics_bpe = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
hoover_ensemble_metrics_bpe = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')

In [28]:
davidson_ensemble_model_bpe = train_model((davidson_ensemble_tr_bpe, [doc.label for doc in davidson_bpe.data]), 
                                      (davidson_ensemble_de_bpe, [doc.label for doc in davidson_bpe.dev]), 
                                      davidson_bpe, grid_search, davidson_ensemble_metrics_bpe)
setattr(davidson_ensemble_model_bpe, 'name', 'davidson')
wulczyn_ensemble_model_bpe = train_model((wulczyn_ensemble_tr_bpe, [doc.label for doc in wulczyn_bpe.data]), 
                                      (wulczyn_ensemble_de_bpe, [doc.label for doc in wulczyn_bpe.dev]), 
                                      wulczyn_bpe, grid_search, wulczyn_ensemble_metrics_bpe)
setattr(wulczyn_ensemble_model_bpe, 'name', 'wulczyn')
waseem_ensemble_model_bpe = train_model((waseem_ensemble_tr_bpe, [doc.label for doc in waseem_bpe.data]), 
                                    (waseem_ensemble_de_bpe, [doc.label for doc in waseem_bpe.dev]), 
                                    waseem_bpe, grid_search, waseem_ensemble_metrics_bpe)
setattr(waseem_ensemble_model_bpe, 'name', 'waseem')
waseem_hovy_ensemble_model_bpe = train_model((waseem_hovy_ensemble_tr_bpe, [doc.label for doc in waseem_hovy_bpe.data]), 
                                         (waseem_hovy_ensemble_de_bpe, [doc.label for doc in waseem_hovy_bpe.dev]), 
                                         waseem_hovy_bpe, grid_search, waseem_hovy_ensemble_metrics_bpe)
setattr(waseem_hovy_ensemble_model_bpe, 'name', 'waseem-hovy')
oraby_sarcasm_ensemble_model_bpe = train_model((oraby_sarcasm_ensemble_tr_bpe, [doc.label for doc in oraby_sarcasm_bpe.data]), 
                                           (oraby_sarcasm_ensemble_de_bpe, [doc.label for doc in oraby_sarcasm_bpe.dev]), 
                                           oraby_sarcasm_bpe, grid_search, oraby_sarcasm_ensemble_metrics_bpe)
setattr(oraby_sarcasm_ensemble_model_bpe, 'name', 'oraby-sarcasm')
oraby_factfeel_ensemble_model_bpe = train_model((oraby_factfeel_ensemble_tr_bpe, [doc.label for doc in oraby_factfeel_bpe.data]), 
                                            (oraby_factfeel_ensemble_de_bpe, [doc.label for doc in oraby_factfeel_bpe.dev]), 
                                            oraby_factfeel_bpe, grid_search, oraby_factfeel_ensemble_metrics_bpe)
setattr(oraby_factfeel_ensemble_model_bpe, 'name', 'oraby-factfeel')
hoover_ensemble_model_bpe = train_model((hoover_ensemble_tr_bpe, [doc.label for doc in hoover_bpe.data]), 
                                    (hoover_ensemble_de_bpe, [doc.label for doc in hoover_bpe.dev]), 
                                    hoover_bpe, grid_search, hoover_ensemble_metrics_bpe)
setattr(hoover_ensemble_model_bpe, 'name', 'hoover')



{'C': 0.2, 'penalty': 'l2'}
{'mean_fit_time': array([18.34312882,  0.0358686 , 18.10254712,  0.04098105, 16.54497986,
        0.03770423, 16.74513526,  0.04405651, 16.26372828,  0.04423418,
       14.91031075,  0.04024143, 15.58093524,  0.03702993, 14.30964665,
        0.03711805, 14.07926006,  0.04086337, 13.22344704,  0.03786292]), 'std_fit_time': array([0.72277652, 0.00476419, 0.43636205, 0.00991362, 0.44418092,
       0.00947297, 0.58377108, 0.01971872, 0.46643294, 0.01126637,
       0.56864651, 0.00836352, 1.34417944, 0.00593362, 0.98622413,
       0.00546213, 0.86382472, 0.01937218, 3.07613221, 0.00735063]), 'mean_score_time': array([0.01438422, 0.        , 0.01821699, 0.        , 0.0101243 ,
       0.        , 0.01566825, 0.        , 0.01396623, 0.        ,
       0.01846595, 0.        , 0.01419034, 0.        , 0.01690154,
       0.        , 0.01064782, 0.        , 0.01516294, 0.        ]), 'std_score_time': array([0.00281503, 0.        , 0.0063426 , 0.        , 0.00236304,
    



{'C': 0.1, 'penalty': 'l2'}
{'mean_fit_time': array([40.98641376,  0.35224195, 38.38948841,  0.32494707, 41.32799592,
        0.38122439, 59.39116926,  0.59649606, 51.3167325 ,  0.35279527,
       44.57777486,  0.41447296, 56.32493563,  0.33923974, 64.55004959,
        0.43928156, 46.18717618,  0.46907821, 39.65747242,  0.3639668 ]), 'std_fit_time': array([1.09361407, 0.02092041, 0.51550739, 0.02824659, 2.87353278,
       0.08734314, 8.18270332, 0.16368749, 5.77390944, 0.06668079,
       3.8257028 , 0.08200626, 6.43468776, 0.07858846, 1.94103981,
       0.10092275, 0.47259923, 0.0517796 , 7.57258609, 0.06014593]), 'mean_score_time': array([0.06328039, 0.        , 0.05927238, 0.        , 0.09384627,
       0.        , 0.12936873, 0.        , 0.07632408, 0.        ,
       0.08968706, 0.        , 0.09216232, 0.        , 0.06253033,
       0.        , 0.07403579, 0.        , 0.0797749 , 0.        ]), 'std_score_time': array([0.00991145, 0.        , 0.01031468, 0.        , 0.07407959,
    



{'C': 0.7, 'penalty': 'l2'}
{'mean_fit_time': array([1.3401361 , 0.01057439, 1.70602593, 0.01316895, 2.31351018,
       0.00789852, 2.70546608, 0.01278   , 2.49619007, 0.01289477,
       2.75268936, 0.01263342, 2.53998919, 0.02669792, 2.6152494 ,
       0.0198626 , 2.64963479, 0.01518421, 2.71759357, 0.01998296]), 'std_fit_time': array([0.11985454, 0.00480437, 0.12645751, 0.00729632, 0.32894664,
       0.00290581, 0.20759325, 0.00678382, 0.15913875, 0.00648274,
       0.16155683, 0.00339661, 0.22442123, 0.01938575, 0.16321424,
       0.00809528, 0.18612744, 0.00822035, 0.54889736, 0.01234854]), 'mean_score_time': array([0.00427117, 0.        , 0.01132984, 0.        , 0.00897093,
       0.        , 0.0073195 , 0.        , 0.00384302, 0.        ,
       0.00594144, 0.        , 0.00700107, 0.        , 0.00784898,
       0.        , 0.0116076 , 0.        , 0.00497384, 0.        ]), 'std_score_time': array([0.00087109, 0.        , 0.00571801, 0.        , 0.00858517,
       0.        , 0.003



{'C': 0.1, 'penalty': 'l2'}
{'mean_fit_time': array([3.47740479, 0.02514682, 5.09941201, 0.02272897, 5.26017942,
       0.02990451, 5.19005828, 0.02861223, 5.40120735, 0.01978874,
       4.80489597, 0.01834388, 4.63162122, 0.02018867, 4.56380849,
       0.02093811, 4.68187785, 0.01589489, 4.80463076, 0.0223598 ]), 'std_fit_time': array([0.54925158, 0.01330062, 0.20688678, 0.00465598, 0.25027343,
       0.01447299, 0.24791618, 0.0257264 , 0.32532648, 0.00622378,
       0.21810309, 0.0052147 , 0.3676677 , 0.00725772, 0.13182352,
       0.01109749, 0.12903366, 0.00446137, 0.78088746, 0.00595513]), 'mean_score_time': array([0.00608358, 0.        , 0.00798254, 0.        , 0.0049469 ,
       0.        , 0.00610738, 0.        , 0.00700145, 0.        ,
       0.00634937, 0.        , 0.00817637, 0.        , 0.00916538,
       0.        , 0.00648618, 0.        , 0.00952005, 0.        ]), 'std_score_time': array([0.00512415, 0.        , 0.00430441, 0.        , 0.00145561,
       0.        , 0.001



{'C': 0.1, 'penalty': 'l2'}
{'mean_fit_time': array([5.59000077, 0.02255635, 5.07571306, 0.02818694, 5.62914305,
       0.01868176, 5.4807333 , 0.02764387, 5.35579014, 0.02006836,
       6.34782462, 0.03740726, 6.46113119, 0.02059879, 4.99326277,
       0.01832771, 5.14000797, 0.02087131, 4.39990048, 0.03115444]), 'std_fit_time': array([0.36457046, 0.00326995, 0.31372987, 0.01200398, 0.17483111,
       0.00718445, 0.14227142, 0.00765953, 0.08710858, 0.00500694,
       1.16599375, 0.01825764, 1.04169413, 0.00716654, 0.28013817,
       0.00375059, 0.14709906, 0.00417787, 0.85253257, 0.00983682]), 'mean_score_time': array([0.00836644, 0.        , 0.00607581, 0.        , 0.01044397,
       0.        , 0.00641208, 0.        , 0.01012063, 0.        ,
       0.0084156 , 0.        , 0.00928321, 0.        , 0.00624275,
       0.        , 0.00905318, 0.        , 0.00809865, 0.        ]), 'std_score_time': array([0.00368597, 0.        , 0.00198275, 0.        , 0.00232154,
       0.        , 0.002



In [29]:
grid_search = {'penalty': ['l2', 'l1'], 'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
davidson_ensemble_clfs_metrics_bpe = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
davidson_ensemble_clfs_bpe = [
        (wulczyn_ensemble_model_bpe, wulczyn_ensemble_bpe_vect), (waseem_ensemble_model_bpe, waseem_ensemble_bpe_vect), 
        (waseem_hovy_ensemble_model_bpe, waseem_hovy_ensemble_bpe_vect),
        (oraby_sarcasm_ensemble_model_bpe, oraby_sarcasm_ensemble_bpe_vect), 
        (oraby_factfeel_ensemble_model_bpe, oraby_factfeel_ensemble_bpe_vect),
        (hoover_ensemble_model_bpe, hoover_ensemble_bpe_vect)]
davidson_ensemble_trained_model_bpe = ensemble_train(davidson_ensemble_clfs_bpe, davidson_bpe.data, davidson_bpe.dev, 
                                                     davidson_bpe, grid_search, davidson_ensemble_clfs_metrics_bpe)



{'C': 0.4, 'penalty': 'l2'}
{'mean_fit_time': array([2.15162182, 0.01162229, 1.78843212, 0.01028261, 2.11871057,
       0.02895732, 1.9813983 , 0.02709394, 2.29530439, 0.02019944,
       2.12951365, 0.01846361, 2.33253489, 0.01308637, 2.19370031,
       0.01381121, 1.96858592, 0.01173062, 1.64468765, 0.01094999]), 'std_fit_time': array([1.06784435e-01, 1.34907574e-03, 2.74244795e-01, 4.06366031e-04,
       3.68755114e-01, 1.47842967e-02, 1.76147799e-01, 1.27088500e-02,
       2.14439168e-01, 8.43798540e-03, 1.29387358e-01, 6.47586750e-03,
       4.47281627e-01, 2.54234476e-03, 2.51494594e-01, 3.07188728e-03,
       1.17319108e-01, 3.31118272e-03, 2.98001666e-01, 1.64150999e-03]), 'mean_score_time': array([0.01255999, 0.        , 0.00795689, 0.        , 0.00942879,
       0.        , 0.00902719, 0.        , 0.00905695, 0.        ,
       0.00962548, 0.        , 0.00828052, 0.        , 0.01411538,
       0.        , 0.00803123, 0.        , 0.00891061, 0.        ]), 'std_score_time': arra

In [30]:
waseem_ensemble_clfs_metrics_bpe = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_ensemble_clfs_bpe = [
        (wulczyn_ensemble_model_bpe, wulczyn_ensemble_bpe_vect), (davidson_ensemble_model_bpe, davidson_ensemble_bpe_vect), 
        (waseem_hovy_ensemble_model_bpe, waseem_hovy_ensemble_bpe_vect),
        (oraby_sarcasm_ensemble_model_bpe, oraby_sarcasm_ensemble_bpe_vect), (oraby_factfeel_ensemble_model_bpe, oraby_factfeel_ensemble_bpe_vect),
        (hoover_ensemble_model_bpe, hoover_ensemble_bpe_vect)]
waseem_ensemble_trained_model_bpe = ensemble_train(waseem_ensemble_clfs_bpe, waseem_bpe.data, waseem_bpe.dev, 
                                                   waseem_bpe, grid_search, waseem_ensemble_clfs_metrics_bpe)



{'C': 0.1, 'penalty': 'l2'}
{'mean_fit_time': array([1.40586576, 0.00449653, 1.32316055, 0.00343928, 1.69988661,
       0.00357604, 1.79268126, 0.00725918, 1.74285436, 0.00396323,
       1.81206803, 0.00433803, 1.55313368, 0.00432253, 1.77141738,
       0.00531659, 1.78974037, 0.01573462, 1.44362473, 0.00390868]), 'std_fit_time': array([1.09510427e-01, 2.40763654e-03, 1.09742283e-01, 7.04697888e-05,
       3.28774622e-01, 3.62842968e-04, 1.06795015e-01, 2.71257038e-03,
       1.59503828e-01, 5.07720031e-04, 6.79965189e-02, 1.97656697e-03,
       6.61817015e-02, 1.60738113e-03, 1.45874752e-01, 2.31732802e-03,
       2.59188381e-01, 1.19227855e-02, 8.94887238e-02, 1.68468487e-03]), 'mean_score_time': array([0.00344067, 0.        , 0.00927196, 0.        , 0.00438967,
       0.        , 0.00450039, 0.        , 0.00440259, 0.        ,
       0.0092648 , 0.        , 0.00421977, 0.        , 0.01117296,
       0.        , 0.00810843, 0.        , 0.00595512, 0.        ]), 'std_score_time': arra

In [31]:
wulczyn_ensemble_clfs_metrics_bpe = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
wulczyn_ensemble_clfs_bpe = [
        (davidson_ensemble_model_bpe, davidson_ensemble_bpe_vect), (waseem_ensemble_model_bpe, waseem_ensemble_bpe_vect), 
        (waseem_hovy_ensemble_model_bpe, waseem_hovy_ensemble_bpe_vect),
        (oraby_sarcasm_ensemble_model_bpe, oraby_sarcasm_ensemble_bpe_vect), 
        (oraby_factfeel_ensemble_model_bpe, oraby_factfeel_ensemble_bpe_vect),
        (hoover_ensemble_model_bpe, hoover_ensemble_bpe_vect)]
wulczyn_ensemble_trained_model_bpe = ensemble_train(wulczyn_ensemble_clfs_bpe, wulczyn_bpe.data, wulczyn_bpe.dev, 
                                                wulczyn_bpe, grid_search, wulczyn_ensemble_clfs_metrics_bpe)



{'C': 0.2, 'penalty': 'l2'}
{'mean_fit_time': array([0.79154797, 0.0678988 , 0.71339769, 0.07875328, 0.63756642,
       0.06529994, 0.88209081, 0.11160207, 0.7546484 , 0.09267435,
       0.68538423, 0.07063217, 0.75816431, 0.0728653 , 0.63354006,
       0.06442552, 0.56785135, 0.08279533, 0.53684363, 0.05309205]), 'std_fit_time': array([0.13157922, 0.00412266, 0.11559632, 0.00938146, 0.05999268,
       0.02150903, 0.04666181, 0.03100776, 0.10115637, 0.02707972,
       0.07828293, 0.01642838, 0.132395  , 0.00991846, 0.08534771,
       0.02146351, 0.04786881, 0.01472882, 0.02962008, 0.00596926]), 'mean_score_time': array([0.04810281, 0.        , 0.03784504, 0.        , 0.04142275,
       0.        , 0.06174111, 0.        , 0.04984665, 0.        ,
       0.03693576, 0.        , 0.03759151, 0.        , 0.03965502,
       0.        , 0.03461962, 0.        , 0.03075895, 0.        ]), 'std_score_time': array([0.00709329, 0.        , 0.00792069, 0.        , 0.00910621,
       0.        , 0.011

## Evaluate BPE Ensemble Model

In [32]:
ensemble_davidson_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
davidson_ensemble_clfs_bpe = [(wulczyn_ensemble_model_bpe, wulczyn_ensemble_bpe_vect), 
                          (waseem_ensemble_model_bpe, waseem_ensemble_bpe_vect), 
                          (waseem_hovy_ensemble_model_bpe, waseem_hovy_ensemble_bpe_vect),
                          (oraby_sarcasm_ensemble_model_bpe, oraby_sarcasm_ensemble_bpe_vect), 
                          (oraby_factfeel_ensemble_model_bpe, oraby_factfeel_ensemble_bpe_vect),
                          (hoover_ensemble_model_bpe, hoover_ensemble_bpe_vect)]

evaluate_ensemble_model(davidson_ensemble_trained_model_bpe, davidson_ensemble_clfs_bpe, davidson_bpe.test, 
                        ensemble_davidson_bpe_eval_metrics)

0.40629303245450266


In [33]:
ensemble_waseem_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_ensemble_clfs_bpe = [(wulczyn_ensemble_model_bpe, wulczyn_ensemble_bpe_vect), 
                          (davidson_ensemble_model_bpe, davidson_ensemble_bpe_vect), 
                          (waseem_hovy_ensemble_model_bpe, waseem_hovy_ensemble_bpe_vect),
                          (oraby_sarcasm_ensemble_model_bpe, oraby_sarcasm_ensemble_bpe_vect), 
                          (oraby_factfeel_ensemble_model_bpe, oraby_factfeel_ensemble_bpe_vect),
                          (hoover_ensemble_model_bpe, hoover_ensemble_bpe_vect)]

evaluate_ensemble_model(waseem_ensemble_trained_model_bpe, waseem_ensemble_clfs_bpe, waseem_bpe.test, 
                        ensemble_waseem_bpe_eval_metrics)

0.29658263909597926


In [34]:
ensemble_wulczyn_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
wulczyn_ensemble_clfs_bpe = [(wulczyn_ensemble_model_bpe, wulczyn_ensemble_bpe_vect), 
                          (davidson_ensemble_model_bpe, davidson_ensemble_bpe_vect), 
                          (waseem_hovy_ensemble_model_bpe, waseem_hovy_ensemble_bpe_vect),
                          (oraby_sarcasm_ensemble_model_bpe, oraby_sarcasm_ensemble_bpe_vect), 
                          (oraby_factfeel_ensemble_model_bpe, oraby_factfeel_ensemble_bpe_vect),
                          (hoover_ensemble_model_bpe, hoover_ensemble_bpe_vect)]

evaluate_ensemble_model(wulczyn_ensemble_trained_model_bpe, wulczyn_ensemble_clfs_bpe, wulczyn_bpe.test, 
                        ensemble_wulczyn_bpe_eval_metrics)

0.598776911220475


## Ensemble LIWC model

In [35]:
davidson_ensemble_liwc_vect = select_vectorizer('dict')
davidson_ensemble_tr_liwc = vectorize(davidson_liwc.data, davidson_ensemble_liwc_vect, 'dict')
davidson_ensemble_de_liwc = vectorize(davidson_liwc.dev, davidson_ensemble_liwc_vect, 'dict')
davidson_ensemble_te_liwc = vectorize(davidson_liwc.test, davidson_ensemble_liwc_vect, 'dict')

wulczyn_ensemble_liwc_vect = select_vectorizer('dict')
wulczyn_ensemble_tr_liwc = vectorize(wulczyn_liwc.data, wulczyn_ensemble_liwc_vect, 'dict')
wulczyn_ensemble_de_liwc = vectorize(wulczyn_liwc.dev, wulczyn_ensemble_liwc_vect, 'dict')
wulczyn_ensemble_te_liwc = vectorize(wulczyn_liwc.test, wulczyn_ensemble_liwc_vect, 'dict')

waseem_ensemble_liwc_vect = select_vectorizer('dict')
waseem_ensemble_tr_liwc = vectorize(waseem_liwc.data, waseem_ensemble_liwc_vect, 'dict')
waseem_ensemble_de_liwc = vectorize(waseem_liwc.dev, waseem_ensemble_liwc_vect, 'dict')
waseem_ensemble_te_liwc = vectorize(waseem_liwc.test, waseem_ensemble_liwc_vect, 'dict')

waseem_hovy_ensemble_liwc_vect = select_vectorizer('dict')
waseem_hovy_ensemble_tr_liwc = vectorize(waseem_hovy_liwc.data, waseem_hovy_ensemble_liwc_vect, 'dict')
waseem_hovy_ensemble_de_liwc = vectorize(waseem_hovy_liwc.dev, waseem_hovy_ensemble_liwc_vect, 'dict')
waseem_hovy_ensemble_te_liwc = vectorize(waseem_hovy_liwc.test, waseem_hovy_ensemble_liwc_vect, 'dict')

oraby_sarcasm_ensemble_liwc_vect = select_vectorizer('dict')
oraby_sarcasm_ensemble_tr_liwc = vectorize(oraby_sarcasm_liwc.data, oraby_sarcasm_ensemble_liwc_vect, 'dict')
oraby_sarcasm_ensemble_de_liwc = vectorize(oraby_sarcasm_liwc.dev, oraby_sarcasm_ensemble_liwc_vect, 'dict')
oraby_sarcasm_ensemble_te_liwc = vectorize(oraby_sarcasm_liwc.test, oraby_sarcasm_ensemble_liwc_vect, 'dict')

oraby_factfeel_ensemble_liwc_vect = select_vectorizer('dict')
oraby_factfeel_ensemble_tr_liwc = vectorize(oraby_factfeel_liwc.data, oraby_factfeel_ensemble_liwc_vect, 'dict')
oraby_factfeel_ensemble_de_liwc = vectorize(oraby_factfeel_liwc.dev, oraby_factfeel_ensemble_liwc_vect, 'dict')
oraby_factfeel_ensemble_te_liwc = vectorize(oraby_factfeel_liwc.test, oraby_factfeel_ensemble_liwc_vect, 'dict')

hoover_ensemble_liwc_vect = select_vectorizer('dict')
hoover_ensemble_tr_liwc = vectorize(hoover_liwc.data, hoover_ensemble_liwc_vect, 'dict')
hoover_ensemble_de_liwc = vectorize(hoover_liwc.dev, hoover_ensemble_liwc_vect, 'dict')
hoover_ensemble_te_liwc = vectorize(hoover_liwc.test, hoover_ensemble_liwc_vect, 'dict')

In [36]:
grid_search = {'penalty': ['l2', 'l1'], 'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
davidson_ensemble_metrics_liwc = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
wulczyn_ensemble_metrics_liwc = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_ensemble_metrics_liwc = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_hovy_ensemble_metrics_liwc = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
oraby_sarcasm_ensemble_metrics_liwc = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
oraby_factfeel_ensemble_metrics_liwc = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
hoover_ensemble_metrics_liwc = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')

In [37]:
davidson_ensemble_model_liwc = train_model((davidson_ensemble_tr_liwc, [doc.label for doc in davidson_liwc.data]), 
                                      (davidson_ensemble_de_liwc, [doc.label for doc in davidson_liwc.dev]), 
                                      davidson_liwc, grid_search, davidson_ensemble_metrics_liwc)
setattr(davidson_ensemble_model_liwc, 'name', 'davidson')
wulczyn_ensemble_model_liwc = train_model((wulczyn_ensemble_tr_liwc, [doc.label for doc in wulczyn_liwc.data]), 
                                      (wulczyn_ensemble_de_liwc, [doc.label for doc in wulczyn_liwc.dev]), 
                                      wulczyn_liwc, grid_search, wulczyn_ensemble_metrics_liwc)
setattr(wulczyn_ensemble_model_liwc, 'name', 'wulczyn')
waseem_ensemble_model_liwc = train_model((waseem_ensemble_tr_liwc, [doc.label for doc in waseem_liwc.data]), 
                                    (waseem_ensemble_de_liwc, [doc.label for doc in waseem_liwc.dev]), 
                                    waseem_liwc, grid_search, waseem_ensemble_metrics_liwc)
setattr(waseem_ensemble_model_liwc, 'name', 'waseem')
waseem_hovy_ensemble_model_liwc = train_model((waseem_hovy_ensemble_tr_liwc, [doc.label for doc in waseem_hovy_liwc.data]), 
                                         (waseem_hovy_ensemble_de_liwc, [doc.label for doc in waseem_hovy_liwc.dev]), 
                                         waseem_hovy_liwc, grid_search, waseem_hovy_ensemble_metrics_liwc)
setattr(waseem_hovy_ensemble_model_liwc, 'name', 'waseem-hovy')
oraby_sarcasm_ensemble_model_liwc = train_model((oraby_sarcasm_ensemble_tr_liwc, [doc.label for doc in oraby_sarcasm_liwc.data]), 
                                           (oraby_sarcasm_ensemble_de_liwc, [doc.label for doc in oraby_sarcasm_liwc.dev]), 
                                           oraby_sarcasm_liwc, grid_search, oraby_sarcasm_ensemble_metrics_liwc)
setattr(oraby_sarcasm_ensemble_model_liwc, 'name', 'oraby-sarcasm')
oraby_factfeel_ensemble_model_liwc = train_model((oraby_factfeel_ensemble_tr_liwc, [doc.label for doc in oraby_factfeel_liwc.data]), 
                                            (oraby_factfeel_ensemble_de_liwc, [doc.label for doc in oraby_factfeel_liwc.dev]), 
                                            oraby_factfeel_liwc, grid_search, oraby_factfeel_ensemble_metrics_liwc)
setattr(oraby_factfeel_ensemble_model_liwc, 'name', 'oraby-factfeel')
hoover_ensemble_model_liwc = train_model((hoover_ensemble_tr_liwc, [doc.label for doc in hoover_liwc.data]), 
                                    (hoover_ensemble_de_liwc, [doc.label for doc in hoover_liwc.dev]), 
                                    hoover_liwc, grid_search, hoover_ensemble_metrics_liwc)
setattr(hoover_ensemble_model_liwc, 'name', 'hoover')



{'C': 0.3, 'penalty': 'l2'}
{'mean_fit_time': array([2.00897295e+01, 3.67469788e-02, 1.90670350e+01, 3.59298706e-02,
       1.92623750e+01, 5.51073074e-02, 2.30116646e+01, 3.69345188e-02,
       2.80258731e+01, 3.05057526e-02, 2.64864738e+01, 4.52500343e-02,
       3.38636648e+01, 3.68643761e-02, 3.07207834e+01, 8.49872112e-02,
       2.59679227e+01, 5.75111866e-02, 2.25238998e+01, 3.97895336e-02]), 'std_fit_time': array([0.8479245 , 0.00955964, 0.60964271, 0.00839907, 0.82004843,
       0.02710594, 2.10665941, 0.00908411, 2.20062562, 0.00361841,
       2.92370109, 0.01916699, 1.99806097, 0.00580098, 2.19198241,
       0.08048145, 1.15063901, 0.02879758, 2.10856497, 0.01079307]), 'mean_score_time': array([0.01555629, 0.        , 0.00929174, 0.        , 0.00961757,
       0.        , 0.01484947, 0.        , 0.01464653, 0.        ,
       0.01233764, 0.        , 0.01196117, 0.        , 0.01806006,
       0.        , 0.00748401, 0.        , 0.00977635, 0.        ]), 'std_score_time': arra



{'C': 1.0, 'penalty': 'l2'}
{'mean_fit_time': array([55.69628215,  0.25213537, 63.83984089,  0.1855238 , 76.62632122,
        0.60963187, 66.66269536,  0.2319716 , 74.40438781,  0.17893238,
       78.14436193,  0.23507853, 79.74303231,  0.24891424, 64.26930132,
        0.17939939, 58.90564299,  0.19378762, 48.84591475,  0.19888792]), 'std_fit_time': array([3.60094311e-01, 8.12271224e-02, 6.78500842e+00, 8.00881794e-03,
       3.72412371e+00, 9.90729372e-02, 1.44715462e+00, 7.96954517e-02,
       1.04831272e+00, 2.53863626e-02, 1.38385318e+00, 3.87474864e-02,
       1.52673991e+00, 5.87662993e-02, 7.73226544e+00, 6.52010909e-03,
       1.44151255e+00, 6.32561180e-03, 9.75767203e+00, 8.72757062e-03]), 'mean_score_time': array([0.05633082, 0.        , 0.05154085, 0.        , 0.04037571,
       0.        , 0.0466598 , 0.        , 0.0538826 , 0.        ,
       0.08990045, 0.        , 0.06643376, 0.        , 0.04421535,
       0.        , 0.05794539, 0.        , 0.03411369, 0.        ]), 's



{'C': 1.0, 'penalty': 'l2'}
{'mean_fit_time': array([2.24327564, 0.00959058, 1.90708361, 0.02920766, 1.76332822,
       0.00707002, 1.72876897, 0.01307216, 1.99371867, 0.00740643,
       1.76415563, 0.01031499, 1.95682993, 0.0083437 , 1.61717157,
       0.00824547, 1.94269567, 0.00879421, 1.65064902, 0.00705719]), 'std_fit_time': array([1.58063866e-01, 4.34642876e-03, 8.94567593e-02, 2.38243128e-02,
       4.06098251e-02, 8.08003602e-04, 2.59331844e-02, 5.47717356e-03,
       2.14102799e-01, 1.33197139e-03, 1.19650363e-01, 2.18250378e-03,
       1.64591770e-01, 1.29031737e-03, 1.74356782e-01, 1.41755048e-03,
       1.98131480e-02, 2.67305108e-03, 5.20964978e-01, 6.40758082e-05]), 'mean_score_time': array([0.00408907, 0.        , 0.00662804, 0.        , 0.00360541,
       0.        , 0.00662489, 0.        , 0.00485826, 0.        ,
       0.01092811, 0.        , 0.00480103, 0.        , 0.00349426,
       0.        , 0.00684981, 0.        , 0.00308299, 0.        ]), 'std_score_time': arra



{'C': 0.1, 'penalty': 'l2'}
{'mean_fit_time': array([12.97477813,  0.0381506 , 12.11027093,  0.02029305, 13.17171435,
        0.01959596, 12.92761273,  0.01848063, 15.04039054,  0.02043619,
       16.17210531,  0.03148255, 14.80920057,  0.02954097, 16.67775869,
        0.02005763, 14.50931048,  0.03337588, 11.07272887,  0.02152839]), 'std_fit_time': array([7.83911241e-01, 2.26141781e-02, 9.45556210e-01, 3.37605230e-03,
       2.84809301e-01, 1.70168293e-03, 2.05999042e-01, 1.61261343e-03,
       9.90879561e-01, 4.15567189e-03, 1.84463402e+00, 1.12988402e-02,
       3.61026279e-01, 5.65189140e-03, 6.86465136e-01, 5.91207895e-03,
       7.04708150e-01, 1.29484843e-02, 2.95533807e+00, 1.97838737e-03]), 'mean_score_time': array([0.00790663, 0.        , 0.01156502, 0.        , 0.00722289,
       0.        , 0.01064177, 0.        , 0.01203675, 0.        ,
       0.01151433, 0.        , 0.00800228, 0.        , 0.01036158,
       0.        , 0.00970321, 0.        , 0.00826025, 0.        ]), 's



{'C': 0.1, 'penalty': 'l2'}
{'mean_fit_time': array([4.58238087, 0.01713405, 4.1241251 , 0.01586914, 4.44014058,
       0.01344218, 4.01654706, 0.02592564, 4.64661512, 0.01388302,
       3.72182269, 0.03598547, 3.33433976, 0.01393118, 3.63953056,
       0.0145853 , 3.42683735, 0.01410494, 2.73689551, 0.01271825]), 'std_fit_time': array([0.44469162, 0.00480702, 0.64538645, 0.00134338, 0.38463339,
       0.00156036, 0.07961928, 0.01813588, 0.47561654, 0.00310987,
       0.17300679, 0.01926575, 0.03312228, 0.00233368, 0.2155407 ,
       0.00359683, 0.11027463, 0.00229637, 0.56844934, 0.0024293 ]), 'mean_score_time': array([0.00580153, 0.        , 0.00944586, 0.        , 0.00767999,
       0.        , 0.00779815, 0.        , 0.00642719, 0.        ,
       0.00517664, 0.        , 0.00557785, 0.        , 0.00406899,
       0.        , 0.00432558, 0.        , 0.00444021, 0.        ]), 'std_score_time': array([0.00176428, 0.        , 0.00549767, 0.        , 0.00274834,
       0.        , 0.005



{'C': 0.3, 'penalty': 'l2'}
{'mean_fit_time': array([3.53543921, 0.01672773, 3.68425088, 0.01531305, 3.88720894,
       0.01707225, 4.0938097 , 0.02293282, 3.68964963, 0.02548442,
       3.67218657, 0.01618829, 3.52622342, 0.01311178, 3.59285922,
       0.01498094, 3.63496761, 0.01583076, 2.88470087, 0.02061772]), 'std_fit_time': array([0.05924973, 0.00440816, 0.10835778, 0.00418002, 0.10944199,
       0.00500702, 0.18089942, 0.00728335, 0.13347695, 0.00834123,
       0.09958468, 0.00210687, 0.0835226 , 0.00134729, 0.05315364,
       0.00245   , 0.13521352, 0.00266028, 0.52479829, 0.00678749]), 'mean_score_time': array([0.006704  , 0.        , 0.00724583, 0.        , 0.00421119,
       0.        , 0.00822425, 0.        , 0.00916715, 0.        ,
       0.00677853, 0.        , 0.006635  , 0.        , 0.00571718,
       0.        , 0.00762825, 0.        , 0.00554271, 0.        ]), 'std_score_time': array([0.00187541, 0.        , 0.00195052, 0.        , 0.00086251,
       0.        , 0.003



In [38]:
grid_search = {'penalty': ['l2', 'l1'], 'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
davidson_ensemble_clfs_metrics_liwc = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
davidson_ensemble_clfs_liwc = [
        (wulczyn_ensemble_model_liwc, wulczyn_ensemble_liwc_vect), (waseem_ensemble_model_liwc, waseem_ensemble_liwc_vect), 
        (waseem_hovy_ensemble_model_liwc, waseem_hovy_ensemble_liwc_vect),
        (oraby_sarcasm_ensemble_model_liwc, oraby_sarcasm_ensemble_liwc_vect), 
        (oraby_factfeel_ensemble_model_liwc, oraby_factfeel_ensemble_liwc_vect),
        (hoover_ensemble_model_liwc, hoover_ensemble_liwc_vect)]
davidson_ensemble_trained_model_liwc = ensemble_train(davidson_ensemble_clfs_liwc, davidson_liwc.data, davidson_liwc.dev, 
                                                     davidson_liwc, grid_search, davidson_ensemble_clfs_metrics_liwc)



{'C': 0.6, 'penalty': 'l2'}
{'mean_fit_time': array([2.71521821, 0.00884299, 1.63878584, 0.01243358, 1.67077003,
       0.0097342 , 1.67319827, 0.01149225, 1.84931822, 0.01427402,
       1.58491364, 0.01116638, 1.83908672, 0.01416445, 1.58663344,
       0.00993214, 1.71305051, 0.01251993, 2.03678055, 0.01204429]), 'std_fit_time': array([5.56530568e-01, 1.83900664e-03, 1.16168335e-01, 2.72942904e-03,
       1.58340243e-01, 2.72504255e-04, 1.69122161e-01, 2.65277629e-03,
       1.67813565e-01, 4.22508466e-03, 7.11079582e-02, 6.88417344e-04,
       1.42102219e-01, 7.02232197e-03, 2.01738138e-01, 7.92779550e-04,
       1.58956328e-01, 9.27544966e-04, 2.66941399e-01, 2.29668733e-03]), 'mean_score_time': array([0.01005726, 0.        , 0.00752864, 0.        , 0.01057458,
       0.        , 0.0072238 , 0.        , 0.00856991, 0.        ,
       0.00829821, 0.        , 0.00775118, 0.        , 0.01647544,
       0.        , 0.00905032, 0.        , 0.00711765, 0.        ]), 'std_score_time': arra

In [39]:
waseem_ensemble_clfs_metrics_liwc = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_ensemble_clfs_liwc = [
        (wulczyn_ensemble_model_liwc, wulczyn_ensemble_liwc_vect), (davidson_ensemble_model_liwc, davidson_ensemble_liwc_vect), 
        (waseem_hovy_ensemble_model_liwc, waseem_hovy_ensemble_liwc_vect),
        (oraby_sarcasm_ensemble_model_liwc, oraby_sarcasm_ensemble_liwc_vect), (oraby_factfeel_ensemble_model_liwc, oraby_factfeel_ensemble_liwc_vect),
        (hoover_ensemble_model_liwc, hoover_ensemble_liwc_vect)]
waseem_ensemble_trained_model_liwc = ensemble_train(waseem_ensemble_clfs_liwc, waseem_liwc.data, waseem_liwc.dev, 
                                                   waseem_liwc, grid_search, waseem_ensemble_clfs_metrics_liwc)



{'C': 0.2, 'penalty': 'l2'}
{'mean_fit_time': array([1.28065548, 0.00341773, 1.47213044, 0.00445943, 1.31903071,
       0.00376592, 1.94360657, 0.00571036, 2.10430541, 0.00560899,
       1.81234384, 0.00396123, 1.87842498, 0.00823088, 1.93760147,
       0.00574808, 1.97921023, 0.00364795, 1.58411202, 0.00368776]), 'std_fit_time': array([1.66205880e-01, 5.96499023e-04, 2.25405212e-01, 6.34962533e-04,
       1.04349310e-01, 4.07858493e-04, 2.51809676e-01, 2.08628600e-03,
       3.02754728e-01, 2.74033927e-03, 1.88420599e-01, 7.84757766e-04,
       4.64068879e-01, 6.96980560e-03, 4.65556391e-01, 1.69444158e-03,
       2.21696353e-01, 6.35739028e-04, 1.51873515e-01, 5.10446875e-04]), 'mean_score_time': array([0.00371265, 0.        , 0.00320501, 0.        , 0.00434017,
       0.        , 0.00441012, 0.        , 0.00425396, 0.        ,
       0.00371695, 0.        , 0.00846291, 0.        , 0.00418224,
       0.        , 0.00356913, 0.        , 0.0062233 , 0.        ]), 'std_score_time': arra

In [40]:
wulczyn_ensemble_clfs_metrics_liwc = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
wulczyn_ensemble_clfs_liwc = [
        (davidson_ensemble_model_liwc, davidson_ensemble_liwc_vect), (waseem_ensemble_model_liwc, waseem_ensemble_liwc_vect), 
        (waseem_hovy_ensemble_model_liwc, waseem_hovy_ensemble_liwc_vect),
        (oraby_sarcasm_ensemble_model_liwc, oraby_sarcasm_ensemble_liwc_vect), 
        (oraby_factfeel_ensemble_model_liwc, oraby_factfeel_ensemble_liwc_vect),
        (hoover_ensemble_model_liwc, hoover_ensemble_liwc_vect)]
wulczyn_ensemble_trained_model_liwc = ensemble_train(wulczyn_ensemble_clfs_liwc, wulczyn_liwc.data, wulczyn_liwc.dev, 
                                                wulczyn_liwc, grid_search, wulczyn_ensemble_clfs_metrics_liwc)



{'C': 0.4, 'penalty': 'l2'}
{'mean_fit_time': array([1.47911997, 0.17133179, 1.42732096, 0.16123824, 1.50186219,
       0.15921659, 1.40253744, 0.17264566, 1.38612885, 0.14518733,
       1.42165852, 0.1483336 , 1.42553535, 0.15395641, 1.47433209,
       0.15860672, 1.54477315, 0.18659639, 1.37909284, 0.15695429]), 'std_fit_time': array([0.19638486, 0.02045109, 0.10278429, 0.00756306, 0.20925964,
       0.02307717, 0.14652152, 0.01025018, 0.0924558 , 0.01005985,
       0.10996787, 0.01457419, 0.10015044, 0.02282814, 0.14133729,
       0.02146161, 0.11250888, 0.05178391, 0.14886074, 0.01389665]), 'mean_score_time': array([0.06440887, 0.        , 0.07727137, 0.        , 0.08180122,
       0.        , 0.08762841, 0.        , 0.07859769, 0.        ,
       0.08615327, 0.        , 0.08983965, 0.        , 0.07900591,
       0.        , 0.08240609, 0.        , 0.07797794, 0.        ]), 'std_score_time': array([0.02446146, 0.        , 0.01472852, 0.        , 0.00970192,
       0.        , 0.011

## Evaluate LIWC Ensemble Model

In [41]:
ensemble_davidson_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
davidson_ensemble_clfs_liwc = [(wulczyn_ensemble_model_liwc, wulczyn_ensemble_liwc_vect), 
                          (waseem_ensemble_model_liwc, waseem_ensemble_liwc_vect), 
                          (waseem_hovy_ensemble_model_liwc, waseem_hovy_ensemble_liwc_vect),
                          (oraby_sarcasm_ensemble_model_liwc, oraby_sarcasm_ensemble_liwc_vect), 
                          (oraby_factfeel_ensemble_model_liwc, oraby_factfeel_ensemble_liwc_vect),
                          (hoover_ensemble_model_liwc, hoover_ensemble_liwc_vect)]

evaluate_ensemble_model(davidson_ensemble_trained_model_liwc, davidson_ensemble_clfs_liwc, davidson_liwc.test, 
                        ensemble_davidson_liwc_eval_metrics)

0.5348507886950079


In [42]:
ensemble_waseem_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_ensemble_clfs_liwc = [(wulczyn_ensemble_model_liwc, wulczyn_ensemble_liwc_vect), 
                          (davidson_ensemble_model_liwc, davidson_ensemble_liwc_vect), 
                          (waseem_hovy_ensemble_model_liwc, waseem_hovy_ensemble_liwc_vect),
                          (oraby_sarcasm_ensemble_model_liwc, oraby_sarcasm_ensemble_liwc_vect), 
                          (oraby_factfeel_ensemble_model_liwc, oraby_factfeel_ensemble_liwc_vect),
                          (hoover_ensemble_model_liwc, hoover_ensemble_liwc_vect)]

evaluate_ensemble_model(waseem_ensemble_trained_model_liwc, waseem_ensemble_clfs_liwc, waseem_liwc.test, 
                        ensemble_waseem_liwc_eval_metrics)

0.24890292657025953


In [43]:
ensemble_wulczyn_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
wulczyn_ensemble_clfs_liwc = [(wulczyn_ensemble_model_liwc, wulczyn_ensemble_liwc_vect), 
                          (davidson_ensemble_model_liwc, davidson_ensemble_liwc_vect), 
                          (waseem_hovy_ensemble_model_liwc, waseem_hovy_ensemble_liwc_vect),
                          (oraby_sarcasm_ensemble_model_liwc, oraby_sarcasm_ensemble_liwc_vect), 
                          (oraby_factfeel_ensemble_model_liwc, oraby_factfeel_ensemble_liwc_vect),
                          (hoover_ensemble_model_liwc, hoover_ensemble_liwc_vect)]

evaluate_ensemble_model(wulczyn_ensemble_trained_model_liwc, wulczyn_ensemble_clfs_liwc, wulczyn_liwc.test, 
                        ensemble_wulczyn_liwc_eval_metrics)

0.6275307481403284


# All Evaluations

### Evaluate Single task BPE models

In [44]:
davidson_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(davidson_bpe_model, davidson_bpe_vect, davidson_bpe.test, davidson_bpe_eval_metrics)

waseem_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(waseem_bpe_model, waseem_bpe_vect, waseem_bpe.test, waseem_bpe_eval_metrics)

wulczyn_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(wulczyn_bpe_model, wulczyn_bpe_vect, wulczyn_bpe.test, wulczyn_bpe_eval_metrics)

{'f1-score': [0.701282448896403], 'accuracy': [0.8922952803549818], 'precision': [0.7356355520089006], 'recall': [0.6839996439570558], 'loss': []}
{'f1-score': [0.5845064355933921], 'accuracy': [0.9132947976878613], 'precision': [0.7041701685318706], 'recall': [0.5378510378510378], 'loss': []}
{'f1-score': [0.8704438660314124], 'accuracy': [0.9582313437519613], 'precision': [0.9006247137343721], 'recall': [0.8454710064739721], 'loss': []}


### Evaluate single task liwc models

In [45]:
davidson_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(davidson_liwc_model, davidson_liwc_vect, davidson_liwc.test, davidson_liwc_eval_metrics)

waseem_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(waseem_liwc_model, waseem_liwc_vect, waseem_liwc.test, waseem_liwc_eval_metrics)

wulczyn_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
evaluate_single_task(wulczyn_liwc_model, wulczyn_liwc_vect, wulczyn_liwc.test, wulczyn_liwc_eval_metrics)

{'f1-score': [0.6142350240280037], 'accuracy': [0.8898749495764421], 'precision': [0.7830107163000565], 'recall': [0.6338177447552448], 'loss': []}
{'f1-score': [0.29093294982337775], 'accuracy': [0.846820809248555], 'precision': [0.3409606656580938], 'recall': [0.2848094738026479], 'loss': []}
{'f1-score': [0.8566454911660655], 'accuracy': [0.9555011611121571], 'precision': [0.9050873520472182], 'recall': [0.820637548546594], 'loss': []}


### Evaluate BPE Ensemble Model

In [46]:
ensemble_davidson_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
davidson_ensemble_clfs_bpe = [(wulczyn_ensemble_model_bpe, wulczyn_ensemble_bpe_vect), 
                          (waseem_ensemble_model_bpe, waseem_ensemble_bpe_vect), 
                          (waseem_hovy_ensemble_model_bpe, waseem_hovy_ensemble_bpe_vect),
                          (oraby_sarcasm_ensemble_model_bpe, oraby_sarcasm_ensemble_bpe_vect), 
                          (oraby_factfeel_ensemble_model_bpe, oraby_factfeel_ensemble_bpe_vect),
                          (hoover_ensemble_model_bpe, hoover_ensemble_bpe_vect)]

evaluate_ensemble_model(davidson_ensemble_trained_model_bpe, davidson_ensemble_clfs_bpe, davidson_bpe.test, 
                        ensemble_davidson_bpe_eval_metrics)

0.40629303245450266


In [47]:
ensemble_waseem_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_ensemble_clfs_bpe = [(wulczyn_ensemble_model_bpe, wulczyn_ensemble_bpe_vect), 
                          (davidson_ensemble_model_bpe, davidson_ensemble_bpe_vect), 
                          (waseem_hovy_ensemble_model_bpe, waseem_hovy_ensemble_bpe_vect),
                          (oraby_sarcasm_ensemble_model_bpe, oraby_sarcasm_ensemble_bpe_vect), 
                          (oraby_factfeel_ensemble_model_bpe, oraby_factfeel_ensemble_bpe_vect),
                          (hoover_ensemble_model_bpe, hoover_ensemble_bpe_vect)]

evaluate_ensemble_model(waseem_ensemble_trained_model_bpe, waseem_ensemble_clfs_bpe, waseem_bpe.test, 
                        ensemble_waseem_bpe_eval_metrics)

0.29658263909597926


In [48]:
ensemble_wulczyn_bpe_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
wulczyn_ensemble_clfs_bpe = [(wulczyn_ensemble_model_bpe, wulczyn_ensemble_bpe_vect), 
                          (davidson_ensemble_model_bpe, davidson_ensemble_bpe_vect), 
                          (waseem_hovy_ensemble_model_bpe, waseem_hovy_ensemble_bpe_vect),
                          (oraby_sarcasm_ensemble_model_bpe, oraby_sarcasm_ensemble_bpe_vect), 
                          (oraby_factfeel_ensemble_model_bpe, oraby_factfeel_ensemble_bpe_vect),
                          (hoover_ensemble_model_bpe, hoover_ensemble_bpe_vect)]

evaluate_ensemble_model(wulczyn_ensemble_trained_model_bpe, wulczyn_ensemble_clfs_bpe, wulczyn_bpe.test, 
                        ensemble_wulczyn_bpe_eval_metrics)

0.598776911220475


### Evaluate LIWC Ensemble Model

In [49]:
ensemble_davidson_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
davidson_ensemble_clfs_liwc = [(wulczyn_ensemble_model_liwc, wulczyn_ensemble_liwc_vect), 
                          (waseem_ensemble_model_liwc, waseem_ensemble_liwc_vect), 
                          (waseem_hovy_ensemble_model_liwc, waseem_hovy_ensemble_liwc_vect),
                          (oraby_sarcasm_ensemble_model_liwc, oraby_sarcasm_ensemble_liwc_vect), 
                          (oraby_factfeel_ensemble_model_liwc, oraby_factfeel_ensemble_liwc_vect),
                          (hoover_ensemble_model_liwc, hoover_ensemble_liwc_vect)]

evaluate_ensemble_model(davidson_ensemble_trained_model_liwc, davidson_ensemble_clfs_liwc, davidson_liwc.test, 
                        ensemble_davidson_liwc_eval_metrics)

0.5348507886950079


In [50]:
ensemble_waseem_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
waseem_ensemble_clfs_liwc = [(wulczyn_ensemble_model_liwc, wulczyn_ensemble_liwc_vect), 
                          (davidson_ensemble_model_liwc, davidson_ensemble_liwc_vect), 
                          (waseem_hovy_ensemble_model_liwc, waseem_hovy_ensemble_liwc_vect),
                          (oraby_sarcasm_ensemble_model_liwc, oraby_sarcasm_ensemble_liwc_vect), 
                          (oraby_factfeel_ensemble_model_liwc, oraby_factfeel_ensemble_liwc_vect),
                          (hoover_ensemble_model_liwc, hoover_ensemble_liwc_vect)]

evaluate_ensemble_model(waseem_ensemble_trained_model_liwc, waseem_ensemble_clfs_liwc, waseem_liwc.test, 
                        ensemble_waseem_liwc_eval_metrics)

0.24890292657025953


In [51]:
ensemble_wulczyn_liwc_eval_metrics = Metrics(['f1-score', 'accuracy', 'precision', 'recall'], 'f1-score')
wulczyn_ensemble_clfs_liwc = [(wulczyn_ensemble_model_liwc, wulczyn_ensemble_liwc_vect), 
                          (davidson_ensemble_model_liwc, davidson_ensemble_liwc_vect), 
                          (waseem_hovy_ensemble_model_liwc, waseem_hovy_ensemble_liwc_vect),
                          (oraby_sarcasm_ensemble_model_liwc, oraby_sarcasm_ensemble_liwc_vect), 
                          (oraby_factfeel_ensemble_model_liwc, oraby_factfeel_ensemble_liwc_vect),
                          (hoover_ensemble_model_liwc, hoover_ensemble_liwc_vect)]

evaluate_ensemble_model(wulczyn_ensemble_trained_model_liwc, wulczyn_ensemble_clfs_liwc, wulczyn_liwc.test, 
                        ensemble_wulczyn_liwc_eval_metrics)

0.6275307481403284
