## Data Loading

In [1]:
import gzip, json
def parse(path):
  g = gzip.open(path, 'r')
  for l in g:
    yield json.loads(l)

data = []

for review in parse("Software.json.gz"):
  data.append(review)

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split

indices = np.arange(len(data))
indices_train, indices_test = train_test_split(indices, test_size=0.1, random_state=42)
indices_train, indices_val = train_test_split(indices_train, test_size=0.1/0.9, random_state=42)

In [3]:
data_np = np.array(data)
data_train = data_np[indices_train]
data_val = data_np[indices_val]
data_test = data_np[indices_test]

In [4]:
y_train = [0 if ('vote' not in d) or (d['vote']==0)  else 1 for d in data_train]
y_val = [0 if ('vote' not in d) or (d['vote']==0)  else 1 for d in data_val]
y_test = [0 if ('vote' not in d) or (d['vote']==0)  else 1 for d in data_test]

## Naive Bayes

## Non-text (review) features

In [5]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

# fit the ID encoder on full data set
userID_str = np.array([[d['reviewerID']] for d in data])
userID_encoder = LabelEncoder()
userID_encoder.fit(userID_str.squeeze())
userID_one_hot_encoder = OneHotEncoder()
userID_one_hot_encoder.fit(userID_str)

itemID_str = np.array([[d['asin']] for d in data])
itemID_encoder = LabelEncoder()
itemID_encoder.fit(itemID_str.squeeze())
itemID_one_hot_encoder = OneHotEncoder()
itemID_one_hot_encoder.fit(itemID_str)

In [6]:
import textstat
import os
import pickle 

if os.path.isfile('readability_scores.pickle'):
    with open('readability_scores.pickle', 'rb') as handle:
        readability_scores = pickle.load(handle)
    
else:
    # Might take 2-3 mins, please be patient!
    readability_scores = {}
    for d in data:
        if 'reviewText' in d:
            readability_scores[d['reviewText']] = textstat.flesch_reading_ease(d['reviewText'])

In [7]:
def get_meta_features(d, length=False, rating=False, readability=False, verified=False, userID=False, itemID=False):
    feature_vec = []
    
    if length:
        if 'reviewText' not in d:
            feature_vec.append(0)
        else:
            feature_vec.append(len(d['reviewText']))
            
    if rating:
        feature_vec.append(d['overall'])
    
    if readability:
        if 'reviewText' not in d:
            feature_vec.append(100)
        else:
            feature_vec.append(readability_scores[d['reviewText']])
            
    if verified:
        if 'verified' not in d:
            feature_vec.append(0)
        elif d['verified']:
            feature_vec.append(1)
        else:
            feature_vec.append(0)
            
    if userID:
        feature_vec.extend(list(userID_one_hot_encoder.transform([[d["reviewerID"]]]).toarray()[0]))
                                          
    if itemID:
        feature_vec.extend(list(itemID_one_hot_encoder.transform([[d["asin"]]]).toarray()[0]))      
        
    return feature_vec
        

In [8]:
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
def test_metrics(clf, X_t, y_t):
    y_pred = clf.predict(X_t)
    precision, recall, _, _ = precision_recall_fscore_support(y_t, y_pred, average='binary')
    accuracy = accuracy_score(y_t, y_pred)
    return accuracy, precision, recall, 2*precision*recall/(precision+recall)

In [9]:
X_train = [get_meta_features(d, length=True, rating=True, readability=True, verified=True, userID=False, itemID=False) for d in data_train]
X_val = [get_meta_features(d, length=True, rating=True, readability=True, verified=True, userID=False, itemID=False) for d in data_val]
X_test = [get_meta_features(d, length=True, rating=True, readability=True, verified=True, userID=False, itemID=False) for d in data_test]
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler = scaler.fit(X_train)

In [10]:
class EarlyStopping():
    """
    Early stopping to stop the training when the loss does not improve after
    certain epochs.
    """
    def __init__(self, patience=5, min_delta=0):
        """
        :param patience: how many epochs to wait before stopping when loss is
               not improving
        :param min_delta: minimum difference between new loss and old loss for
               new loss to be considered as an improvement
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
    def __call__(self, val_loss):
        if self.best_loss == None:
            self.best_loss = val_loss
        elif self.best_loss - val_loss > self.min_delta:
            self.best_loss = val_loss
            # reset counter if validation loss improves
            self.counter = 0
        elif self.best_loss - val_loss < self.min_delta:
            self.counter += 1
            # print(f"INFO: Early stopping counter {self.counter} of {self.patience}")
            if self.counter >= self.patience:
                # print('INFO: Early stopping')
                self.early_stop = True

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.nn.functional import cross_entropy
from tqdm import tqdm
from torch.autograd import Variable
from math import ceil
from torch.optim.lr_scheduler import StepLR


class MLPClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_prob=0.5, l2_reg=0.01, tol=1e-3, lr=1e-1):
        super(MLPClassifier, self).__init__()

        # Check if hidden_size is a tuple
        if isinstance(hidden_size, tuple):
            # Create a list of linear layers for each hidden size
            layers = []
            prev_size = input_size
            for size in hidden_size:
                layers.append(nn.Linear(prev_size, size))
                layers.append(nn.ReLU())
                layers.append(nn.Dropout(p=dropout_prob))
                prev_size = size
            self.hidden_layers = nn.Sequential(*layers)
        else:
            # Single hidden layer
            self.hidden_layers = nn.Sequential(
                nn.Linear(input_size, hidden_size),
                nn.ReLU(),
                nn.Dropout(p=dropout_prob)
            )

        self.fc_out = nn.Linear(prev_size, output_size)
        self.softmax = nn.Softmax(dim=1)
        self.l2_reg = l2_reg
        self.tol = tol
        self.class_weights = torch.tensor([0, 0], requires_grad=False)
        self.lr = lr

    def forward(self, x):
        # Input layer

        # Hidden layers
        for layer in self.hidden_layers:
            x = layer(x)

        # Output layer
        x = self.fc_out(x)  # No activation function for binary classification

        return self.softmax(x)
    
    def calculate_loss(self,x,y):
        device = torch.device('cuda')
        l2_reg_term = torch.nn.utils.parameters_to_vector(self.parameters()).norm(p=2)
        one_hot_encoded = torch.zeros(len(y), 2).to(device)
        one_hot_encoded.scatter_(1, y.unsqueeze(1), 1)
        y_pred = self.forward(x.float())
        return cross_entropy(y_pred, one_hot_encoded, weight=self.class_weights.to('cuda')) + self.l2_reg * l2_reg_term


    def fit(self, X_train, y_train, x_val, y_val, epochs=100, use_gpu=True):
        # Check if GPU is available and decide whether to use it
        use_gpu = use_gpu and torch.cuda.is_available()
        device = torch.device('cuda' if use_gpu else 'cpu')
        self.to(device)

        optimizer = optim.Adam(self.my_trainable_params(), lr=self.lr)
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', patience=3, factor=0.5, min_lr=1e-6)
        es = EarlyStopping(patience=10)



        print(self.class_weights)

        # x_train_tensor = torch.tensor(X_train).to(device)
        # y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)

        x_train_tensor = torch.tensor(X_train) if not isinstance(X_train, torch.Tensor) else X_train
        x_train_tensor = x_train_tensor.to(device)
        
        y_train_tensor = torch.tensor(y_train, dtype=torch.long) if not isinstance(X_train, torch.Tensor) else y_train
        y_train_tensor = y_train_tensor.to(device)

        # Calculate class weights if not provided
        class_counts = torch.bincount(y_train_tensor)
        self.class_weights = 1.0 - (class_counts.float() / torch.sum(class_counts))
        self.class_weights.to(device)


        x_valid = torch.tensor(x_val).to(device)
        y_valid = torch.tensor(y_val).to(device)

        prev_loss = 1e12
        loss = 1e6

        epoch = 0
        while epoch < epochs:
            loss = self.calculate_loss(x_train_tensor, y_train_tensor)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            val_loss = self.calculate_loss(x_valid, y_valid)
            lr_scheduler.step(val_loss)
            es(val_loss)

            if es.early_stop:
                break

            epoch += 1

        print(f"Training stopped after {epoch} epochs.")

    def my_trainable_params(self):
        trainable_params = []
        for layer in self.hidden_layers:
            # print(type(layer))
            if isinstance(layer, nn.Linear):
                trainable_params.extend(layer.parameters())

        if isinstance(self.fc_out, nn.Linear):
            trainable_params.extend(self.fc_out.parameters())
        return trainable_params

    def predict(self, x, use_gpu=True):
        # Move inputs to GPU if available
        device = torch.device('cuda' if use_gpu else 'cpu')
        inputs = Variable(torch.FloatTensor(x)).to(device)
        prediction = self.forward(inputs)
        return torch.argmax(prediction.data.cpu(), 1).numpy() if use_gpu else torch.argmax(prediction.data, 1).numpy()

# Example usage:
# classifier = MLPClassifier(input_size=..., hidden_size=..., output_size=...)
# classifier.fit(X_train, y_train, epochs=..., batch_size=..., use_gpu=True)
# predictions = classifier.predict(X_test, use_gpu=True)


In [12]:
# Logistic regression
# from sklearn.linear_model import LogisticRegression

clf = MLPClassifier(input_size=len(X_train[0]),hidden_size=(100,10,),output_size=2)
clf.fit(scaler.transform(X_train), y_train, scaler.transform(X_val), y_val, epochs=100)
test_metrics(clf, scaler.transform(X_test), y_test)

tensor([0, 0])
Training stopped after 65 epochs.


(0.7287785129723141,
 0.5081853570095451,
 0.7463972431077694,
 0.6046762475809777)

In [13]:
np.unique(y_train, return_counts=True)

(array([0, 1]), array([265249, 102299]))

In [14]:
# Define the hyperparameter grid
param_grid = {
    'hidden_size': [(5,), (64,), (128,), (64, 32), (64, 32, 16)],
    'l2_reg': [0.001, 0.01, 0.1, 1, 10, 100],
    # 'tol': [1e-3, 1e-4, 1e-5]
}

In [15]:
from sklearn.model_selection import ParameterGrid



# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_notext = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(scaler.transform(X_train), y_train, scaler.transform(X_val), y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, scaler.transform(X_test), y_test)
    print(metrics)
    scores_notext.append({'params':params, 'scores':metrics})
    bar.set_postfix()


  0%|          | 0/30 [00:00<?, ?it/s]

Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
tensor([0, 0])


  3%|▎         | 1/30 [00:01<00:29,  1.03s/it]

Training stopped after 47 epochs.
(0.7353082012885251, 0.5183727828561051, 0.6706610275689223, 0.5847645713115034)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 18 epochs.


  7%|▋         | 2/30 [00:01<00:26,  1.07it/s]

(0.736048232631029, 0.5205937158645505, 0.6345551378446115, 0.5719529843634181)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
tensor([0, 0])


 10%|█         | 3/30 [00:03<00:29,  1.09s/it]

Training stopped after 100 epochs.
(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
tensor([0, 0])


 13%|█▎        | 4/30 [00:04<00:27,  1.05s/it]

Training stopped after 40 epochs.
(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 17%|█▋        | 5/30 [00:05<00:26,  1.06s/it]

Training stopped after 69 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
tensor([0, 0])


 20%|██        | 6/30 [00:06<00:26,  1.10s/it]

Training stopped after 95 epochs.
(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
tensor([0, 0])


 23%|██▎       | 7/30 [00:08<00:29,  1.26s/it]

Training stopped after 78 epochs.
(0.7349817168727146, 0.5153145695364238, 0.7800751879699248, 0.6206380857427717)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
tensor([0, 0])


 27%|██▋       | 8/30 [00:09<00:28,  1.32s/it]

Training stopped after 69 epochs.
(0.7444062336757792, 0.5289924760988856, 0.7323778195488722, 0.6142880604368532)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 30%|███       | 9/30 [00:11<00:30,  1.43s/it]

Training stopped after 100 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 33%|███▎      | 10/30 [00:12<00:29,  1.48s/it]

Training stopped after 100 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 37%|███▋      | 11/30 [00:14<00:28,  1.52s/it]

Training stopped after 100 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 40%|████      | 12/30 [00:15<00:27,  1.56s/it]

Training stopped after 100 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
tensor([0, 0])


 43%|████▎     | 13/30 [00:17<00:24,  1.46s/it]

Training stopped after 34 epochs.
(0.7327833884729236, 0.5126840609661586, 0.7771773182957393, 0.617812782118731)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
tensor([0, 0])


 47%|████▋     | 14/30 [00:19<00:25,  1.57s/it]

Training stopped after 91 epochs.
(0.7456904057113007, 0.5307744719509425, 0.7321428571428571, 0.6154048716260697)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 50%|█████     | 15/30 [00:21<00:26,  1.74s/it]

Training stopped after 100 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 53%|█████▎    | 16/30 [00:23<00:25,  1.80s/it]

Training stopped after 100 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 57%|█████▋    | 17/30 [00:25<00:23,  1.84s/it]

Training stopped after 100 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
tensor([0, 0])


 60%|██████    | 18/30 [00:26<00:22,  1.87s/it]

Training stopped after 100 epochs.
(0.27788176911022117, 0.27788781751300523, 0.999921679197995, 0.4349099827970908)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
tensor([0, 0])


 63%|██████▎   | 19/30 [00:28<00:20,  1.89s/it]

Training stopped after 99 epochs.
(0.7316298102037263, 0.5111825980392157, 0.7840695488721805, 0.618879821958457)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
tensor([0, 0])


 67%|██████▋   | 20/30 [00:30<00:19,  1.91s/it]

Training stopped after 98 epochs.
(0.7399660456207557, 0.5226133421473035, 0.7430294486215538, 0.6136282785162188)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
tensor([0, 0])


 70%|███████   | 21/30 [00:32<00:17,  1.92s/it]

Training stopped after 100 epochs.
(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 73%|███████▎  | 22/30 [00:34<00:15,  1.95s/it]

Training stopped after 100 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
tensor([0, 0])


 77%|███████▋  | 23/30 [00:36<00:13,  1.94s/it]

Training stopped after 100 epochs.
(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 80%|████████  | 24/30 [00:38<00:11,  1.95s/it]

Training stopped after 100 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
tensor([0, 0])


 83%|████████▎ | 25/30 [00:40<00:09,  1.91s/it]

Training stopped after 77 epochs.
(0.7272331534041442, 0.5059529815356675, 0.7854793233082706, 0.6154648665234735)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
tensor([0, 0])


 87%|████████▋ | 26/30 [00:42<00:07,  1.86s/it]

Training stopped after 73 epochs.
(0.7339804979975623, 0.5147679324894515, 0.7453007518796992, 0.6089460549049722)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
tensor([0, 0])


 90%|█████████ | 27/30 [00:44<00:05,  1.94s/it]

Training stopped after 100 epochs.
(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
tensor([0, 0])


 93%|█████████▎| 28/30 [00:46<00:03,  1.90s/it]

Training stopped after 75 epochs.
(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
tensor([0, 0])


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 97%|█████████▋| 29/30 [00:48<00:01,  1.97s/it]

Training stopped after 100 epochs.
(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
tensor([0, 0])


100%|██████████| 30/30 [00:50<00:00,  1.69s/it]

Training stopped after 100 epochs.
(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)





In [16]:
# with open('mlp_notext.json', 'w') as f:
#     json.dump(scores_notext, f)

## Text Features

In [17]:
train_documents = []
for d in data_train: 
    strs = []
    if 'summary' in d:
        strs.append(d['summary'])
    if 'reviewText' in d:
        strs.append(d['reviewText'])
    train_documents.append(" ".join(strs))

In [18]:
from sklearn.feature_extraction.text import CountVectorizer
bow_vectorizer = CountVectorizer(stop_words='english', max_features=300)
bow_vectorizer.fit(train_documents)

In [19]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=300)
tfidf_vectorizer.fit(train_documents)

In [20]:
w2v_features = np.load('text-feature_word2vec.npz')['word2vec']
w2v_dict = {}

count = 0 
for d in data:
    if 'reviewText' in d:
        w2v_dict[d['reviewText']] = w2v_features[count, :]
    else:
        w2v_dict[''] =  w2v_features[count, :]
    count+=1

In [21]:
all_docs = []
for d in data: 
    strs = []
    if 'summary' in d:
        strs.append(d['summary'])
    if 'reviewText' in d:
        strs.append(d['reviewText'])
    all_docs.append(" ".join(strs))
    

In [22]:
tfidf_vecs = tfidf_vectorizer.transform(all_docs).toarray()

tfidf_dict = {}
count = 0
for d in data:
    if 'reviewText' in d:
        tfidf_dict[d['reviewText']] = tfidf_vecs[count, :]
    else:
        tfidf_dict[''] =  tfidf_vecs[count, :]
    count+=1

In [23]:
bow_vecs = bow_vectorizer.transform(all_docs).toarray()

bow_dict = {}
count = 0
for d in data:
    if 'reviewText' in d:
        bow_dict[d['reviewText']] = bow_vecs[count, :]
    else:
        bow_dict[''] =  bow_vecs[count, :]
    count+=1

In [24]:
def get_text_features(d, BoW=False, tfidf=False, w2v=False):
    
    feature_vec = []
    
    if 'reviewText' in d:
        reviewText = d['reviewText']
    else:
        reviewText = ''
    
    if BoW:
        feature_vec.extend(bow_dict[reviewText].tolist())
    if tfidf:
        feature_vec.extend(tfidf_dict[reviewText].ravel())
    if w2v:
        feature_vec.extend(w2v_dict[reviewText].tolist())
        
    return feature_vec
       

In [25]:
X_train = [get_text_features(d, BoW=True, tfidf=False, w2v=False) for d in data_train]
X_val = [get_text_features(d, BoW=True, tfidf=False, w2v=False) for d in data_val]
X_test = [get_text_features(d, BoW=True, tfidf=False, w2v=False) for d in data_test]

In [26]:
from sklearn.model_selection import ParameterGrid

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_bow = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_bow.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_bow.json', 'w') as f:
    json.dump(scores_bow, f)

  0%|          | 0/30 [00:00<?, ?it/s]

Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 77 epochs.


  3%|▎         | 1/30 [00:13<06:26, 13.33s/it]

(0.7243383249172907, 0.5029818771350819, 0.6803728070175439, 0.5783814374646293)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 55 epochs.


  7%|▋         | 2/30 [00:16<03:18,  7.09s/it]

(0.722401184050148, 0.5004193123277825, 0.6542919799498746, 0.5671033874142964)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 10%|█         | 3/30 [00:19<02:25,  5.37s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 13%|█▎        | 4/30 [00:22<01:59,  4.60s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 17%|█▋        | 5/30 [00:26<01:43,  4.15s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 20%|██        | 6/30 [00:29<01:31,  3.83s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 23%|██▎       | 7/30 [00:33<01:27,  3.82s/it]

(0.7389865923733241, 0.5218173639226271, 0.7268170426065163, 0.6074888714323122)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 27%|██▋       | 8/30 [00:36<01:23,  3.81s/it]

(0.7305197631899704, 0.5109637939826619, 0.706296992481203, 0.5929578853930368)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 30%|███       | 9/30 [00:40<01:19,  3.80s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 33%|███▎      | 10/30 [00:44<01:14,  3.74s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 42 epochs.


 37%|███▋      | 11/30 [00:47<01:05,  3.43s/it]

(0.27331098728887343, 0.27284345047923325, 0.9698464912280702, 0.42587656698708587)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 40%|████      | 12/30 [00:50<01:03,  3.54s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 43%|████▎     | 13/30 [00:55<01:04,  3.81s/it]

(0.728713216089152, 0.5080431791723992, 0.7519580200501254, 0.606391713509758)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 47%|████▋     | 14/30 [00:59<01:03,  3.98s/it]

(0.7329575134946892, 0.514277539341917, 0.7038690476190477, 0.5943193466256654)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 50%|█████     | 15/30 [01:04<01:01,  4.11s/it]

(0.47566602820825354, 0.3056909452872932, 0.6975250626566416, 0.4250871080139373)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 53%|█████▎    | 16/30 [01:08<00:58,  4.19s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 57%|█████▋    | 17/30 [01:12<00:55,  4.26s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 60%|██████    | 18/30 [01:17<00:51,  4.29s/it]

(0.2772940971617621, 0.2772691603452786, 0.9962406015037594, 0.433803969715572)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 97 epochs.


 63%|██████▎   | 19/30 [01:21<00:46,  4.24s/it]

(0.7286261535782692, 0.5080446208302049, 0.7419329573934837, 0.6031068950149615)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 67%|██████▋   | 20/30 [01:25<00:42,  4.21s/it]

(0.7286479192059899, 0.5085390070921986, 0.7019893483709273, 0.5898068634224987)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 70%|███████   | 21/30 [01:29<00:37,  4.19s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 73%|███████▎  | 22/30 [01:33<00:33,  4.17s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 77%|███████▋  | 23/30 [01:38<00:29,  4.21s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 80%|████████  | 24/30 [01:42<00:25,  4.20s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 83%|████████▎ | 25/30 [01:46<00:20,  4.18s/it]

(0.726514887689361, 0.5053866157193653, 0.7458489974937343, 0.6025117838727024)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 75 epochs.


 87%|████████▋ | 26/30 [01:49<00:16,  4.00s/it]

(0.724534215566777, 0.5031659882406151, 0.6970551378446115, 0.5844496979248751)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 90%|█████████ | 27/30 [01:54<00:12,  4.03s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 93%|█████████▎| 28/30 [01:58<00:08,  4.10s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 97%|█████████▋| 29/30 [02:02<00:04,  4.17s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


100%|██████████| 30/30 [02:06<00:00,  4.23s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)





In [27]:
X_train = [get_text_features(d, BoW=False, tfidf=True, w2v=False) for d in data_train]
X_val = [get_text_features(d, BoW=False, tfidf=True, w2v=False) for d in data_val]
X_test = [get_text_features(d, BoW=False, tfidf=True, w2v=False) for d in data_test]

In [28]:
from sklearn.model_selection import ParameterGrid

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_tfidf = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_tfidf.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_tfidf.json', 'w') as f:
    json.dump(scores_tfidf, f)

  y_train = torch.tensor(y_train).to('cuda')


Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 77 epochs.


  3%|▎         | 1/30 [00:28<13:45, 28.46s/it]

(0.7108654013581752, 0.4834869431643625, 0.5916353383458647, 0.5321217244294167)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 60 epochs.


  7%|▋         | 2/30 [00:33<06:43, 14.40s/it]

(0.6274812815601601, 0.40363555752604746, 0.7130325814536341, 0.5154715058177393)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 10%|█         | 3/30 [00:38<04:35, 10.19s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 13%|█▎        | 4/30 [00:43<03:31,  8.14s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 17%|█▋        | 5/30 [00:48<02:53,  6.95s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 20%|██        | 6/30 [00:52<02:29,  6.22s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 79 epochs.


 23%|██▎       | 7/30 [00:57<02:13,  5.80s/it]

(0.7139996517499565, 0.49064574532287264, 0.7640977443609023, 0.597574421168688)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 79 epochs.


 27%|██▋       | 8/30 [01:02<02:01,  5.53s/it]

(0.6777381159672645, 0.4542187078803127, 0.7918233082706767, 0.5772854450979272)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 30%|███       | 9/30 [01:07<01:53,  5.42s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 33%|███▎      | 10/30 [01:13<01:47,  5.38s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 42 epochs.


 37%|███▋      | 11/30 [01:17<01:35,  5.01s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 42 epochs.


 40%|████      | 12/30 [01:21<01:25,  4.77s/it]

(0.3731716872714609, 0.2826187183033656, 0.8161810776942355, 0.41985455570999775)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 13 epochs.


 43%|████▎     | 13/30 [01:25<01:16,  4.47s/it]

(0.7074699634337455, 0.48308668076109934, 0.7516447368421053, 0.5881595881595881)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 94 epochs.


 47%|████▋     | 14/30 [01:30<01:17,  4.82s/it]

(0.6791528817691103, 0.4556569425091023, 0.793937969924812, 0.5790089961445095)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 50%|█████     | 15/30 [01:36<01:16,  5.09s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 53%|█████▎    | 16/30 [01:42<01:15,  5.36s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 57%|█████▋    | 17/30 [01:48<01:11,  5.52s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 60%|██████    | 18/30 [01:54<01:07,  5.59s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 63%|██████▎   | 19/30 [01:59<01:00,  5.52s/it]

(0.7166550583318823, 0.4937103753647982, 0.768483709273183, 0.6011886526560873)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 93 epochs.


 67%|██████▋   | 20/30 [02:05<00:54,  5.46s/it]

(0.6950417900052237, 0.4701646584417455, 0.7670739348370927, 0.5829935414744487)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 70%|███████   | 21/30 [02:10<00:50,  5.56s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 73%|███████▎  | 22/30 [02:16<00:45,  5.72s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 77%|███████▋  | 23/30 [02:22<00:40,  5.80s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 80%|████████  | 24/30 [02:28<00:35,  5.84s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 83%|████████▎ | 25/30 [02:34<00:29,  5.86s/it]

(0.7255354344419293, 0.5041640312038794, 0.7491384711779449, 0.6027095148078134)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 87%|████████▋ | 26/30 [02:40<00:23,  5.92s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 90%|█████████ | 27/30 [02:46<00:17,  5.95s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 93%|█████████▎| 28/30 [02:52<00:11,  5.96s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 97%|█████████▋| 29/30 [02:58<00:05,  5.91s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


100%|██████████| 30/30 [03:04<00:00,  6.15s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)





In [29]:
X_train = [get_text_features(d, BoW=False, tfidf=False, w2v=True) for d in data_train]
X_val = [get_text_features(d, BoW=False, tfidf=False, w2v=True) for d in data_val]
X_test = [get_text_features(d, BoW=False, tfidf=False, w2v=True) for d in data_test]

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_w2v = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_w2v.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_w2v.json', 'w') as f:
    json.dump(scores_w2v, f)

  y_train = torch.tensor(y_train).to('cuda')


Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 78 epochs.


  3%|▎         | 1/30 [00:13<06:17, 13.00s/it]

(0.5902620581577572, 0.390028686589927, 0.8412437343358395, 0.5329595355645421)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
  7%|▋         | 2/30 [00:15<03:11,  6.83s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 10%|█         | 3/30 [00:17<02:10,  4.84s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
tensor([0, 0])


 13%|█▎        | 4/30 [00:20<01:44,  4.02s/it]

Training stopped after 100 epochs.
(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 17%|█▋        | 5/30 [00:23<01:28,  3.53s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 20%|██        | 6/30 [00:26<01:18,  3.28s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 23%|██▎       | 7/30 [00:29<01:14,  3.25s/it]

(0.6547971443496431, 0.43556722513961826, 0.8185307017543859, 0.5685762472117948)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 27%|██▋       | 8/30 [00:32<01:10,  3.21s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 30%|███       | 9/30 [00:35<01:06,  3.16s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 33%|███▎      | 10/30 [00:38<01:02,  3.13s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 42 epochs.


 37%|███▋      | 11/30 [00:40<00:54,  2.89s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 42 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 40%|████      | 12/30 [00:43<00:48,  2.71s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 43%|████▎     | 13/30 [00:47<00:51,  3.03s/it]

(0.6503787219223403, 0.4326423811276013, 0.8287907268170426, 0.5685121014317565)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 47%|████▋     | 14/30 [00:50<00:51,  3.22s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 50%|█████     | 15/30 [00:54<00:50,  3.36s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 53%|█████▎    | 16/30 [00:58<00:48,  3.46s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 57%|█████▋    | 17/30 [01:01<00:45,  3.53s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 60%|██████    | 18/30 [01:05<00:42,  3.58s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 39 epochs.


 63%|██████▎   | 19/30 [01:07<00:35,  3.25s/it]

(0.588520807940101, 0.3930724465972053, 0.8834586466165414, 0.5440733148438442)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 67%|██████▋   | 20/30 [01:11<00:32,  3.29s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 70%|███████   | 21/30 [01:14<00:29,  3.32s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 73%|███████▎  | 22/30 [01:18<00:26,  3.35s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 77%|███████▋  | 23/30 [01:21<00:23,  3.37s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 80%|████████  | 24/30 [01:25<00:20,  3.39s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 41 epochs.


 83%|████████▎ | 25/30 [01:27<00:15,  3.13s/it]

(0.5332143479017935, 0.36267249018863146, 0.8974780701754386, 0.5165900279505905)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 87%|████████▋ | 26/30 [01:31<00:13,  3.28s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 90%|█████████ | 27/30 [01:34<00:10,  3.37s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 93%|█████████▎| 28/30 [01:38<00:06,  3.43s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 97%|█████████▋| 29/30 [01:41<00:03,  3.49s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


100%|██████████| 30/30 [01:45<00:00,  3.52s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)





## Text + HandCrafted Features

In [30]:
def get_features(d, BoW=False, tfidf=False, w2v=False):
    a = get_text_features(d, BoW, tfidf, w2v)
    a.extend(get_meta_features(d, True, True, True, True))
    return a

In [31]:
X_train = [get_features(d, BoW=True, tfidf=False, w2v=False) for d in data_train]
X_val = [get_features(d, BoW=True, tfidf=False, w2v=False) for d in data_val]
X_test = [get_features(d, BoW=True, tfidf=False, w2v=False) for d in data_test]

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_bow_hand = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_bow_hand.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_bow_hand.json', 'w') as f:
    json.dump(scores_bow_hand, f)

  y_train = torch.tensor(y_train).to('cuda')


Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 55 epochs.


  3%|▎         | 1/30 [00:14<07:13, 14.94s/it]

(0.6376675953334494, 0.401663033007149, 0.6204573934837093, 0.4876427318334308)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


  7%|▋         | 2/30 [00:17<03:41,  7.90s/it]

(0.6009925126240641, 0.33948765289637667, 0.4608395989974937, 0.39096345514950165)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 10%|█         | 3/30 [00:20<02:32,  5.65s/it]

(0.6541877067734634, 0.4269526128488481, 0.7141290726817042, 0.5344039385769546)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 13%|█▎        | 4/30 [00:24<02:01,  4.66s/it]

(0.46497910499738815, 0.32265425611770004, 0.8416353383458647, 0.466477112408569)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 17%|█▋        | 5/30 [00:26<01:41,  4.04s/it]

(0.7256007313250914, 0.7112860892388452, 0.021224937343358397, 0.04121986462848886)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 20%|██        | 6/30 [00:30<01:29,  3.73s/it]

(0.27786000348250045, 0.27787209960384834, 0.9998433583959899, 0.4348833248168966)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 29 epochs.


 23%|██▎       | 7/30 [00:32<01:17,  3.36s/it]

(0.7053586975448372, 0.4806267949816093, 0.7471021303258145, 0.5849455771884101)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 27%|██▋       | 8/30 [00:36<01:15,  3.41s/it]

(0.682395960299495, 0.4583903640843143, 0.7868890977443609, 0.5793115377962291)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 30%|███       | 9/30 [00:39<01:12,  3.44s/it]

(0.6954118056764758, 0.47064738555832214, 0.7698151629072681, 0.5841554736716986)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 33%|███▎      | 10/30 [00:43<01:09,  3.50s/it]

(0.2781647222705903, 0.2779761386397283, 1.0, 0.43502555366269163)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 37%|███▋      | 11/30 [00:46<01:07,  3.53s/it]

(0.5509968657496083, 0.3723075923459277, 0.8975563909774437, 0.5263037038737973)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 40%|████      | 12/30 [00:50<01:03,  3.54s/it]

(0.37227929653491204, 0.28753106657500926, 0.8517387218045113, 0.4299268630164065)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 30 epochs.


 43%|████▎     | 13/30 [00:53<00:56,  3.33s/it]

(0.7133249172906146, 0.4893515827300111, 0.725250626566416, 0.5843930453441041)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 83 epochs.


 47%|████▋     | 14/30 [00:57<00:55,  3.49s/it]

(0.6880985547623193, 0.4634431754353117, 0.775454260651629, 0.5801593812258291)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 56 epochs.


 50%|█████     | 15/30 [01:00<00:51,  3.46s/it]

(0.6171861396482674, 0.4106149395445442, 0.8670895989974937, 0.5573118550213944)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 53%|█████▎    | 16/30 [01:04<00:51,  3.67s/it]

(0.2787306285913286, 0.27813357731015553, 1.0, 0.43521832498210444)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 57%|█████▋    | 17/30 [01:08<00:49,  3.81s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 60%|██████    | 18/30 [01:13<00:46,  3.91s/it]

(0.2972096465262058, 0.25610335090831854, 0.8027098997493735, 0.3883153049046167)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 17 epochs.


 63%|██████▎   | 19/30 [01:15<00:38,  3.50s/it]

(0.6725578965697371, 0.4506333506854069, 0.8135964912280702, 0.5800111669458403)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 52 epochs.


 67%|██████▋   | 20/30 [01:18<00:34,  3.43s/it]

(0.682765975970747, 0.45775866099396884, 0.7668389724310777, 0.5732939075445735)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 70%|███████   | 21/30 [01:22<00:32,  3.60s/it]

(0.684899007487376, 0.4611219800718868, 0.7937813283208021, 0.5833597145077273)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 73%|███████▎  | 22/30 [01:26<00:29,  3.70s/it]

(0.27788176911022117, 0.27788781751300523, 0.999921679197995, 0.4349099827970908)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 77%|███████▋  | 23/30 [01:30<00:26,  3.78s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 80%|████████  | 24/30 [01:34<00:22,  3.80s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 70 epochs.


 83%|████████▎ | 25/30 [01:38<00:18,  3.70s/it]

(0.7027903534737941, 0.4780325920055476, 0.755874060150376, 0.5856722395849137)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 87%|████████▋ | 26/30 [01:42<00:15,  3.81s/it]

(0.6977189622148703, 0.4732875405456974, 0.7770989974937343, 0.5882841219020515)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 90%|█████████ | 27/30 [01:46<00:11,  3.90s/it]

(0.5288176911022114, 0.3656499636891794, 0.9464285714285714, 0.5275013095861708)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 93%|█████████▎| 28/30 [01:50<00:07,  3.96s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 97%|█████████▋| 29/30 [01:54<00:04,  4.02s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


100%|██████████| 30/30 [01:58<00:00,  3.95s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)





In [32]:
X_train = [get_features(d, BoW=False, tfidf=True, w2v=False) for d in data_train]
X_val = [get_features(d, BoW=False, tfidf=True, w2v=False) for d in data_val]
X_test = [get_features(d, BoW=False, tfidf=True, w2v=False) for d in data_test]

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_tfidf_hand = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_tfidf_hand.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_tfidf_hand.json', 'w') as f:
    json.dump(scores_tfidf_hand, f)

  y_train = torch.tensor(y_train).to('cuda')


Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 16 epochs.


  3%|▎         | 1/30 [00:27<13:30, 27.96s/it]

(0.49259968657496084, 0.3149136357253195, 0.7025375939849624, 0.4348880054300398)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 32 epochs.


  7%|▋         | 2/30 [00:32<06:30, 13.94s/it]

(0.6307678913459864, 0.3672991777356104, 0.45480889724310775, 0.4063965287983763)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 88 epochs.


 10%|█         | 3/30 [00:37<04:25,  9.83s/it]

(0.6325962040745255, 0.36642411642411643, 0.4417293233082707, 0.4005681818181819)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 13%|█▎        | 4/30 [00:41<03:24,  7.88s/it]

(0.585952463869058, 0.3325839087843263, 0.48660714285714285, 0.3951159019364686)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 17%|█▋        | 5/30 [00:46<02:49,  6.78s/it]

(0.7358088107261014, 0.5439821278972354, 0.3051378446115288, 0.39096838936276973)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 20%|██        | 6/30 [00:51<02:28,  6.19s/it]

(0.7221399965174996, 1.0, 0.00015664160401002505, 0.00031323414252153485)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 41 epochs.


 23%|██▎       | 7/30 [00:56<02:09,  5.63s/it]

(0.6966742120842765, 0.47218517812916744, 0.7764724310776943, 0.5872526951782964)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 27%|██▋       | 8/30 [01:01<02:02,  5.56s/it]

(0.7109742294967787, 0.4865717138802754, 0.7250939849624061, 0.5823557163075955)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 43 epochs.


 30%|███       | 9/30 [01:05<01:48,  5.16s/it]

(0.6907539613442452, 0.46232339089481944, 0.6919642857142857, 0.554300771692076)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 33%|███▎      | 10/30 [01:11<01:43,  5.16s/it]

(0.34313512101689014, 0.2955447521078466, 0.9855889724310777, 0.4547310604007444)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 37%|███▋      | 11/30 [01:16<01:38,  5.17s/it]

(0.30487114748389343, 0.2124276155900267, 0.5545112781954887, 0.3071783413237304)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 40%|████      | 12/30 [01:21<01:33,  5.18s/it]

(0.4207078182134773, 0.2629668937656202, 0.6015820802005013, 0.3659623126950473)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 20 epochs.


 43%|████▎     | 13/30 [01:25<01:22,  4.84s/it]

(0.6800888037611005, 0.45695807314897413, 0.8023966165413534, 0.5823007843583039)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 47%|████▋     | 14/30 [01:31<01:22,  5.18s/it]

(0.687314992164374, 0.4635126495570372, 0.7949561403508771, 0.5855881843881613)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 45 epochs.


 50%|█████     | 15/30 [01:36<01:17,  5.17s/it]

(0.6218875152359394, 0.4136728418210455, 0.8639567669172933, 0.5594664502713395)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 53%|█████▎    | 16/30 [01:42<01:16,  5.45s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 57%|█████▋    | 17/30 [01:48<01:13,  5.63s/it]

(0.27690231586278946, 0.2766933053845146, 0.9924812030075187, 0.4327425468701977)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 60%|██████    | 18/30 [01:54<01:08,  5.74s/it]

(0.7373106390388299, 0.5637657361795293, 0.24201127819548873, 0.33864869307907286)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 15 epochs.


 63%|██████▎   | 19/30 [01:58<00:57,  5.21s/it]

(0.6988725404840676, 0.4739768791766255, 0.7610432330827067, 0.5841474044906669)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 12 epochs.


 67%|██████▋   | 20/30 [02:02<00:48,  4.82s/it]

(0.6800888037611005, 0.45656674768205957, 0.7944862155388471, 0.5798890984965415)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 70%|███████   | 21/30 [02:08<00:45,  5.10s/it]

(0.6724708340588542, 0.4507005708354956, 0.8162593984962406, 0.5807422266800402)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 73%|███████▎  | 22/30 [02:14<00:42,  5.30s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 77%|███████▋  | 23/30 [02:19<00:38,  5.43s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 80%|████████  | 24/30 [02:25<00:32,  5.50s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 76 epochs.


 83%|████████▎ | 25/30 [02:31<00:28,  5.65s/it]

(0.7018109002263625, 0.4769169803843868, 0.7540726817042607, 0.5842942104624347)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 87%|████████▋ | 26/30 [02:38<00:23,  5.97s/it]

(0.7013973532996691, 0.47686018784369066, 0.7674655388471178, 0.5882282318335985)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 90%|█████████ | 27/30 [02:44<00:18,  6.05s/it]

(0.5199155493644436, 0.3618325152461699, 0.9526159147869674, 0.5244593924498201)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 93%|█████████▎| 28/30 [02:50<00:12,  6.06s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 97%|█████████▋| 29/30 [02:56<00:05,  6.00s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


100%|██████████| 30/30 [03:02<00:00,  6.08s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)





In [33]:
X_train = [get_features(d, BoW=False, tfidf=False, w2v=True) for d in data_train]
X_val = [get_features(d, BoW=False, tfidf=False, w2v=True) for d in data_val]
X_test = [get_features(d, BoW=False, tfidf=False, w2v=True) for d in data_test]

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_w2v_hand = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_w2v_hand.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_w2v_hand.json', 'w') as f:
    json.dump(scores_w2v_hand, f)

  y_train = torch.tensor(y_train).to('cuda')


Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 32 epochs.


  3%|▎         | 1/30 [00:14<06:53, 14.24s/it]

(0.6775639909454989, 0.29782608695652174, 0.11802944862155389, 0.16905990576621047)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 25 epochs.


  7%|▋         | 2/30 [00:16<03:20,  7.15s/it]

(0.7244036218004527, 0.50416732190596, 0.5021929824561403, 0.5031782154908577)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 10%|█         | 3/30 [00:19<02:19,  5.17s/it]

(0.6720355215044402, 0.4374592125299108, 0.6300125313283208, 0.5163692386699191)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 13%|█▎        | 4/30 [00:22<01:51,  4.29s/it]

(0.5303412850426606, 0.3404679125018108, 0.7362938596491229, 0.4656265477959386)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 17%|█▋        | 5/30 [00:25<01:34,  3.78s/it]

(0.2769240814905102, 0.22360063785507717, 0.6479479949874687, 0.33246930697048244)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 20%|██        | 6/30 [00:28<01:26,  3.60s/it]

(0.7272113877764235, 0.6981450252951096, 0.03242481203007519, 0.06197140932564928)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 52 epochs.


 23%|██▎       | 7/30 [00:30<01:15,  3.28s/it]

(0.6907321957165244, 0.46645873097155627, 0.7847744360902256, 0.5851265730386289)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 27%|██▋       | 8/30 [00:34<01:11,  3.25s/it]

(0.692234024029253, 0.467626238791883, 0.7760808270676691, 0.5836032746333706)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 55 epochs.


 30%|███       | 9/30 [00:36<01:05,  3.13s/it]

(0.6308984851123106, 0.41901816776188633, 0.8489974937343359, 0.5611056472902325)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 33%|███▎      | 10/30 [00:40<01:03,  3.17s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
tensor([0, 0])


 37%|███▋      | 11/30 [00:43<01:01,  3.22s/it]

Training stopped after 100 epochs.
(0.6859437576179697, 0.42643281070068206, 0.37703634085213034, 0.40021615330257304)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 40%|████      | 12/30 [00:46<00:57,  3.22s/it]

(0.7211170120146265, 0.46353322528363045, 0.022399749373433583, 0.04273440418378782)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 57 epochs.


 43%|████▎     | 13/30 [00:49<00:53,  3.17s/it]

(0.688707992338499, 0.4641956866772477, 0.7788220551378446, 0.5816905527932144)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 47%|████▋     | 14/30 [00:53<00:53,  3.37s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 47 epochs.


 50%|█████     | 15/30 [00:56<00:48,  3.22s/it]

(0.5437924429740554, 0.3722554890219561, 0.9348370927318296, 0.5324768022840828)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 53%|█████▎    | 16/30 [01:00<00:47,  3.40s/it]

(0.3081795228974404, 0.2863018316664794, 0.9977286967418546, 0.4449295356512932)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 57%|█████▋    | 17/30 [01:04<00:45,  3.52s/it]

(0.27751175343896917, 0.27737814979509984, 0.9966322055137845, 0.43397449014391926)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 60%|██████    | 18/30 [01:07<00:43,  3.60s/it]

(0.28547797318474666, 0.24469276587079367, 0.7528978696741855, 0.3693472163522496)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 55 epochs.


 63%|██████▎   | 19/30 [01:10<00:37,  3.39s/it]

(0.6929087584885948, 0.46801507417831417, 0.7684053884711779, 0.581720079452136)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 12 epochs.


 67%|██████▋   | 20/30 [01:12<00:30,  3.02s/it]

(0.6722531777816472, 0.44995191887402747, 0.806234335839599, 0.5775683106098861)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 50 epochs.


 70%|███████   | 21/30 [01:15<00:26,  2.95s/it]

(0.6783910847988855, 0.4522541373406886, 0.7448308270676691, 0.5627884956799621)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 73%|███████▎  | 22/30 [01:19<00:25,  3.14s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 77%|███████▋  | 23/30 [01:22<00:22,  3.27s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 80%|████████  | 24/30 [01:26<00:20,  3.39s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 17 epochs.


 83%|████████▎ | 25/30 [01:28<00:15,  3.06s/it]

(0.7056851819606478, 0.48107999598514506, 0.7507832080200502, 0.5864072918578332)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 87%|████████▋ | 26/30 [01:32<00:13,  3.28s/it]

(0.6994819780602473, 0.4748292068414167, 0.7675438596491229, 0.5867033855180053)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 90%|█████████ | 27/30 [01:36<00:10,  3.42s/it]

(0.5219615183701898, 0.36267510977030376, 0.9509711779448622, 0.5250935195796484)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 93%|█████████▎| 28/30 [01:40<00:07,  3.53s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 97%|█████████▋| 29/30 [01:43<00:03,  3.60s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


100%|██████████| 30/30 [01:47<00:00,  3.59s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)



