## Data Loading

In [1]:
import gzip, json
def parse(path):
  g = gzip.open(path, 'r')
  for l in g:
    yield json.loads(l)

data = []

for review in parse("Software.json.gz"):
  data.append(review)

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

indices = np.arange(len(data))
indices_train, indices_test = train_test_split(indices, test_size=0.1, random_state=42)
indices_train, indices_val = train_test_split(indices_train, test_size=0.1/0.9, random_state=42)

In [None]:
data_np = np.array(data)
data_train = data_np[indices_train]
data_val = data_np[indices_val]
data_test = data_np[indices_test]

In [None]:
y_train = [0 if ('vote' not in d) or (d['vote']==0)  else 1 for d in data_train]
y_val = [0 if ('vote' not in d) or (d['vote']==0)  else 1 for d in data_val]
y_test = [0 if ('vote' not in d) or (d['vote']==0)  else 1 for d in data_test]

## Naive Bayes

## Non-text (review) features

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

# fit the ID encoder on full data set
userID_str = np.array([[d['reviewerID']] for d in data])
userID_encoder = LabelEncoder()
userID_encoder.fit(userID_str.squeeze())
userID_one_hot_encoder = OneHotEncoder()
userID_one_hot_encoder.fit(userID_str)

itemID_str = np.array([[d['asin']] for d in data])
itemID_encoder = LabelEncoder()
itemID_encoder.fit(itemID_str.squeeze())
itemID_one_hot_encoder = OneHotEncoder()
itemID_one_hot_encoder.fit(itemID_str)

In [None]:
import textstat
import os
import pickle 

if os.path.isfile('readability_scores.pickle'):
    with open('readability_scores.pickle', 'rb') as handle:
        readability_scores = pickle.load(handle)
    
else:
    # Might take 2-3 mins, please be patient!
    readability_scores = {}
    for d in data:
        if 'reviewText' in d:
            readability_scores[d['reviewText']] = textstat.flesch_reading_ease(d['reviewText'])

In [None]:
def get_meta_features(d, length=False, rating=False, readability=False, verified=False, userID=False, itemID=False):
    feature_vec = []
    
    if length:
        if 'reviewText' not in d:
            feature_vec.append(0)
        else:
            feature_vec.append(len(d['reviewText']))
            
    if rating:
        feature_vec.append(d['overall'])
    
    if readability:
        if 'reviewText' not in d:
            feature_vec.append(100)
        else:
            feature_vec.append(readability_scores[d['reviewText']])
            
    if verified:
        if 'verified' not in d:
            feature_vec.append(0)
        elif d['verified']:
            feature_vec.append(1)
        else:
            feature_vec.append(0)
            
    if userID:
        feature_vec.extend(list(userID_one_hot_encoder.transform([[d["reviewerID"]]]).toarray()[0]))
                                          
    if itemID:
        feature_vec.extend(list(itemID_one_hot_encoder.transform([[d["asin"]]]).toarray()[0]))      
        
    return feature_vec
        

In [None]:
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
def test_metrics(clf, X_t, y_t):
    y_pred = clf.predict(X_t)
    precision, recall, _, _ = precision_recall_fscore_support(y_t, y_pred, average='binary')
    accuracy = accuracy_score(y_t, y_pred)
    return accuracy, precision, recall, 2*precision*recall/(precision+recall)

In [None]:
X_train = [get_meta_features(d, length=True, rating=True, readability=True, verified=True, userID=False, itemID=False) for d in data_train]
X_val = [get_meta_features(d, length=True, rating=True, readability=True, verified=True, userID=False, itemID=False) for d in data_val]
X_test = [get_meta_features(d, length=True, rating=True, readability=True, verified=True, userID=False, itemID=False) for d in data_test]
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler = scaler.fit(X_train)

In [None]:
class EarlyStopping():
    """
    Early stopping to stop the training when the loss does not improve after
    certain epochs.
    """
    def __init__(self, patience=5, min_delta=0):
        """
        :param patience: how many epochs to wait before stopping when loss is
               not improving
        :param min_delta: minimum difference between new loss and old loss for
               new loss to be considered as an improvement
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
    def __call__(self, val_loss):
        if self.best_loss == None:
            self.best_loss = val_loss
        elif self.best_loss - val_loss > self.min_delta:
            self.best_loss = val_loss
            # reset counter if validation loss improves
            self.counter = 0
        elif self.best_loss - val_loss < self.min_delta:
            self.counter += 1
            # print(f"INFO: Early stopping counter {self.counter} of {self.patience}")
            if self.counter >= self.patience:
                # print('INFO: Early stopping')
                self.early_stop = True

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.nn.functional import cross_entropy
from tqdm import tqdm
from torch.autograd import Variable
from math import ceil
from torch.optim.lr_scheduler import StepLR


class MLPClassifier(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout_prob=0.5, l2_reg=0.01, tol=1e-3, lr=1e-1):
        super(MLPClassifier, self).__init__()

        # Check if hidden_size is a tuple
        if isinstance(hidden_size, tuple):
            # Create a list of linear layers for each hidden size
            layers = []
            prev_size = input_size
            for size in hidden_size:
                layers.append(nn.Linear(prev_size, size))
                layers.append(nn.ReLU())
                layers.append(nn.Dropout(p=dropout_prob))
                prev_size = size
            self.hidden_layers = nn.Sequential(*layers)
        else:
            # Single hidden layer
            self.hidden_layers = nn.Sequential(
                nn.Linear(input_size, hidden_size),
                nn.ReLU(),
                nn.Dropout(p=dropout_prob)
            )

        self.fc_out = nn.Linear(prev_size, output_size)
        self.softmax = nn.Softmax(dim=1)
        self.l2_reg = l2_reg
        self.tol = tol
        self.class_weights = torch.tensor([0, 0], requires_grad=False)
        self.lr = lr

    def forward(self, x):
        # Input layer

        # Hidden layers
        for layer in self.hidden_layers:
            x = layer(x)

        # Output layer
        x = self.fc_out(x)  # No activation function for binary classification

        return self.softmax(x)
    
    def calculate_loss(self,x,y):
        device = torch.device('cuda')
        l2_reg_term = torch.nn.utils.parameters_to_vector(self.parameters()).norm(p=2)
        one_hot_encoded = torch.zeros(len(y), 2).to(device)
        one_hot_encoded.scatter_(1, y.unsqueeze(1), 1)
        y_pred = self.forward(x.float())
        return cross_entropy(y_pred, one_hot_encoded, weight=self.class_weights.to('cuda')) + self.l2_reg * l2_reg_term


    def fit(self, X_train, y_train, x_val, y_val, epochs=100, use_gpu=True):
        # Check if GPU is available and decide whether to use it
        use_gpu = use_gpu and torch.cuda.is_available()
        device = torch.device('cuda' if use_gpu else 'cpu')
        self.to(device)

        optimizer = optim.Adam(self.my_trainable_params(), lr=self.lr)
        lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, mode='min', patience=3, factor=0.5, min_lr=1e-6)
        es = EarlyStopping(patience=10)



        print(self.class_weights)

        # x_train_tensor = torch.tensor(X_train).to(device)
        # y_train_tensor = torch.tensor(y_train, dtype=torch.long).to(device)

        x_train_tensor = torch.tensor(X_train) if not isinstance(X_train, torch.Tensor) else X_train
        x_train_tensor = x_train_tensor.to(device)
        
        y_train_tensor = torch.tensor(y_train, dtype=torch.long) if not isinstance(X_train, torch.Tensor) else y_train
        y_train_tensor = y_train_tensor.to(device)

        # Calculate class weights if not provided
        class_counts = torch.bincount(y_train_tensor)
        self.class_weights = 1.0 - (class_counts.float() / torch.sum(class_counts))
        self.class_weights.to(device)


        x_valid = torch.tensor(x_val).to(device)
        y_valid = torch.tensor(y_val).to(device)

        prev_loss = 1e12
        loss = 1e6

        epoch = 0
        while epoch < epochs:
            loss = self.calculate_loss(x_train_tensor, y_train_tensor)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            val_loss = self.calculate_loss(x_valid, y_valid)
            lr_scheduler.step(val_loss)
            es(val_loss)

            if es.early_stop:
                break

            epoch += 1

        print(f"Training stopped after {epoch} epochs.")

    def my_trainable_params(self):
        trainable_params = []
        for layer in self.hidden_layers:
            # print(type(layer))
            if isinstance(layer, nn.Linear):
                trainable_params.extend(layer.parameters())

        if isinstance(self.fc_out, nn.Linear):
            trainable_params.extend(self.fc_out.parameters())
        return trainable_params

    def predict(self, x, use_gpu=True):
        # Move inputs to GPU if available
        device = torch.device('cuda' if use_gpu else 'cpu')
        inputs = Variable(torch.FloatTensor(x)).to(device)
        prediction = self.forward(inputs)
        return torch.argmax(prediction.data.cpu(), 1).numpy() if use_gpu else torch.argmax(prediction.data, 1).numpy()

# Example usage:
# classifier = MLPClassifier(input_size=..., hidden_size=..., output_size=...)
# classifier.fit(X_train, y_train, epochs=..., batch_size=..., use_gpu=True)
# predictions = classifier.predict(X_test, use_gpu=True)


In [None]:
# Logistic regression
# from sklearn.linear_model import LogisticRegression

clf = MLPClassifier(input_size=len(X_train[0]),hidden_size=(100,10,),output_size=2)
clf.fit(scaler.transform(X_train), y_train, scaler.transform(X_val), y_val, epochs=100)
test_metrics(clf, scaler.transform(X_test), y_test)

tensor([0.2783, 0.7217])
Training stopped after 70 epochs.


(0.728125544140693, 0.5074699315031552, 0.736920426065163, 0.601041234149925)

In [None]:
np.unique(y_train, return_counts=True)

(array([0, 1]), array([265249, 102299]))

In [None]:
# Define the hyperparameter grid
param_grid = {
    'hidden_size': [(5,), (64,), (128,), (64, 32), (64, 32, 16)],
    'l2_reg': [0.001, 0.01, 0.1, 1, 10, 100],
    # 'tol': [1e-3, 1e-4, 1e-5]
}

In [None]:
from sklearn.model_selection import ParameterGrid



# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_notext = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(scaler.transform(X_train), y_train, scaler.transform(X_val), y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, scaler.transform(X_test), y_test)
    print(metrics)
    scores_notext.append({'params':params, 'scores':metrics})
    bar.set_postfix()


In [None]:
# with open('mlp_notext.json', 'w') as f:
#     json.dump(scores_notext, f)

## Text Features

In [None]:
train_documents = []
for d in data_train: 
    strs = []
    if 'summary' in d:
        strs.append(d['summary'])
    if 'reviewText' in d:
        strs.append(d['reviewText'])
    train_documents.append(" ".join(strs))

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
bow_vectorizer = CountVectorizer(stop_words='english', max_features=300)
bow_vectorizer.fit(train_documents)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=300)
tfidf_vectorizer.fit(train_documents)

In [None]:
w2v_features = np.load('text-feature_word2vec.npz')['word2vec']
w2v_dict = {}

count = 0 
for d in data:
    if 'reviewText' in d:
        w2v_dict[d['reviewText']] = w2v_features[count, :]
    else:
        w2v_dict[''] =  w2v_features[count, :]
    count+=1

In [None]:
all_docs = []
for d in data: 
    strs = []
    if 'summary' in d:
        strs.append(d['summary'])
    if 'reviewText' in d:
        strs.append(d['reviewText'])
    all_docs.append(" ".join(strs))
    

In [None]:
tfidf_vecs = tfidf_vectorizer.transform(all_docs).toarray()

tfidf_dict = {}
count = 0
for d in data:
    if 'reviewText' in d:
        tfidf_dict[d['reviewText']] = tfidf_vecs[count, :]
    else:
        tfidf_dict[''] =  tfidf_vecs[count, :]
    count+=1

In [None]:
bow_vecs = bow_vectorizer.transform(all_docs).toarray()

bow_dict = {}
count = 0
for d in data:
    if 'reviewText' in d:
        bow_dict[d['reviewText']] = bow_vecs[count, :]
    else:
        bow_dict[''] =  bow_vecs[count, :]
    count+=1

In [None]:
def get_text_features(d, BoW=False, tfidf=False, w2v=False):
    
    feature_vec = []
    
    if 'reviewText' in d:
        reviewText = d['reviewText']
    else:
        reviewText = ''
    
    if BoW:
        feature_vec.extend(bow_dict[reviewText].tolist())
    if tfidf:
        feature_vec.extend(tfidf_dict[reviewText].ravel())
    if w2v:
        feature_vec.extend(w2v_dict[reviewText].tolist())
        
    return feature_vec
       

In [None]:
X_train = [get_text_features(d, BoW=True, tfidf=False, w2v=False) for d in data_train]
X_val = [get_text_features(d, BoW=True, tfidf=False, w2v=False) for d in data_val]
X_test = [get_text_features(d, BoW=True, tfidf=False, w2v=False) for d in data_test]

In [None]:
from sklearn.model_selection import ParameterGrid

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_bow = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_bow.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_bow.json', 'w') as f:
    json.dump(scores_bow, f)

  X_train = torch.tensor(X_train).to('cuda')
  y_train = torch.tensor(y_train).to('cuda')


Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 73 epochs.


  3%|▎         | 1/30 [00:02<01:25,  2.95s/it]

(0.72122584015323, 0.49882117175527524, 0.662828947368421, 0.5692473262931323)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 61 epochs.


  7%|▋         | 2/30 [00:05<01:18,  2.81s/it]

(0.7252742469092809, 0.5044593769089798, 0.6467731829573935, 0.5668199601894434)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 63 epochs.


  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 10%|█         | 3/30 [00:08<01:14,  2.77s/it]

(0.7220746996343375, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 13%|█▎        | 4/30 [00:11<01:15,  2.90s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 17%|█▋        | 5/30 [00:14<01:15,  3.04s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 20%|██        | 6/30 [00:17<01:13,  3.05s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 23%|██▎       | 7/30 [00:21<01:15,  3.26s/it]

(0.7181133553891694, 0.4954077791718946, 0.7731046365914787, 0.6038601535496895)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 27%|██▋       | 8/30 [00:25<01:15,  3.43s/it]

(0.7325439665679958, 0.5137614678899083, 0.7017543859649122, 0.5932203389830508)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 30%|███       | 9/30 [00:29<01:13,  3.51s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 33%|███▎      | 10/30 [00:32<01:12,  3.63s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 37%|███▋      | 11/30 [00:36<01:11,  3.75s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 40%|████      | 12/30 [00:40<01:07,  3.77s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 43%|████▎     | 13/30 [00:45<01:06,  3.93s/it]

(0.7356999825874978, 0.5172147854349144, 0.7353540100250626, 0.6072895443226286)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 47%|████▋     | 14/30 [00:49<01:07,  4.21s/it]

(0.7323916071739509, 0.5135320707215197, 0.7029291979949874, 0.5934865266986279)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
tensor([0, 0])


 50%|█████     | 15/30 [00:54<01:04,  4.30s/it]

Training stopped after 100 epochs.
(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 53%|█████▎    | 16/30 [00:58<01:01,  4.37s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 57%|█████▋    | 17/30 [01:03<00:57,  4.40s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 60%|██████    | 18/30 [01:07<00:53,  4.45s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 63%|██████▎   | 19/30 [01:12<00:47,  4.35s/it]

(0.7170686052585756, 0.49419393756597796, 0.7699718045112782, 0.6020023881693762)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 67%|██████▋   | 20/30 [01:16<00:43,  4.31s/it]

(0.727167856520982, 0.5064956788402565, 0.7114661654135338, 0.5917337068038954)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 70%|███████   | 21/30 [01:20<00:38,  4.30s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 73%|███████▎  | 22/30 [01:24<00:34,  4.26s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 77%|███████▋  | 23/30 [01:28<00:29,  4.22s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 80%|████████  | 24/30 [01:33<00:25,  4.20s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 83%|████████▎ | 25/30 [01:37<00:21,  4.23s/it]

(0.7130637297579662, 0.4897495430519192, 0.7764724310776943, 0.6006482687588985)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 87%|████████▋ | 26/30 [01:41<00:17,  4.25s/it]

(0.7248171687271461, 0.5034528479089553, 0.7137374686716792, 0.5904305290096861)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 90%|█████████ | 27/30 [01:46<00:12,  4.30s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 93%|█████████▎| 28/30 [01:50<00:08,  4.25s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 97%|█████████▋| 29/30 [01:54<00:04,  4.28s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


100%|██████████| 30/30 [01:58<00:00,  3.96s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)





In [None]:
X_train = [get_text_features(d, BoW=False, tfidf=True, w2v=False) for d in data_train]
X_val = [get_text_features(d, BoW=False, tfidf=True, w2v=False) for d in data_val]
X_test = [get_text_features(d, BoW=False, tfidf=True, w2v=False) for d in data_test]

In [None]:
from sklearn.model_selection import ParameterGrid

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_tfidf = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_tfidf.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_tfidf.json', 'w') as f:
    json.dump(scores_tfidf, f)

  y_train = torch.tensor(y_train).to('cuda')


Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 63 epochs.


  3%|▎         | 1/30 [00:28<13:43, 28.39s/it]

(0.7146743862092982, 0.49030203060121724, 0.675125313283208, 0.5680582556262151)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 53 epochs.


  7%|▋         | 2/30 [00:32<06:40, 14.32s/it]

(0.6413459864182482, 0.4155205392112214, 0.7145989974937343, 0.5254852272072799)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 10%|█         | 3/30 [00:38<04:34, 10.15s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 13%|█▎        | 4/30 [00:42<03:29,  8.06s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 17%|█▋        | 5/30 [00:47<02:52,  6.89s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 20%|██        | 6/30 [00:52<02:29,  6.21s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 95 epochs.


 23%|██▎       | 7/30 [00:57<02:15,  5.89s/it]

(0.7156103082012886, 0.49246180309622584, 0.762374686716792, 0.5983893772668594)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 74 epochs.


 27%|██▋       | 8/30 [01:02<02:03,  5.61s/it]

(0.6779993034999129, 0.45434469082386875, 0.7895520050125313, 0.5767822405309532)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 30%|███       | 9/30 [01:08<01:55,  5.51s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 33%|███▎      | 10/30 [01:13<01:49,  5.47s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 37%|███▋      | 11/30 [01:18<01:43,  5.47s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 42 epochs.


 40%|████      | 12/30 [01:23<01:31,  5.08s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 43%|████▎     | 13/30 [01:29<01:30,  5.34s/it]

(0.7170250740031342, 0.4940889948754376, 0.762687969924812, 0.5996859315823506)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 95 epochs.


 47%|████▋     | 14/30 [01:34<01:27,  5.49s/it]

(0.6796534912066864, 0.4558863451271378, 0.7891604010025063, 0.5779179810725552)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 50%|█████     | 15/30 [01:41<01:24,  5.66s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 53%|█████▎    | 16/30 [01:46<01:19,  5.68s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 57%|█████▋    | 17/30 [01:52<01:14,  5.76s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


 60%|██████    | 18/30 [01:58<01:10,  5.85s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 13 epochs.


 63%|██████▎   | 19/30 [02:02<00:57,  5.22s/it]

(0.6861614138951767, 0.4627868187350674, 0.8040413533834586, 0.587451002832537)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 77 epochs.


 67%|██████▋   | 20/30 [02:07<00:51,  5.16s/it]

(0.691864008358001, 0.46691754394321916, 0.7677005012531328, 0.5806700038505969)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 70%|███████   | 21/30 [02:13<00:47,  5.26s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 73%|███████▎  | 22/30 [02:18<00:42,  5.33s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 77%|███████▋  | 23/30 [02:24<00:37,  5.40s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 80%|████████  | 24/30 [02:29<00:32,  5.41s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 83%|████████▎ | 25/30 [02:35<00:27,  5.49s/it]

(0.710582448197806, 0.486902698687794, 0.7701284461152882, 0.5966083184176197)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 87%|████████▋ | 26/30 [02:40<00:22,  5.56s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 90%|█████████ | 27/30 [02:46<00:16,  5.59s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 93%|█████████▎| 28/30 [02:52<00:11,  5.60s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 97%|█████████▋| 29/30 [02:57<00:05,  5.63s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


100%|██████████| 30/30 [03:03<00:00,  6.12s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)





In [None]:
X_train = [get_text_features(d, BoW=False, tfidf=False, w2v=True) for d in data_train]
X_val = [get_text_features(d, BoW=False, tfidf=False, w2v=True) for d in data_val]
X_test = [get_text_features(d, BoW=False, tfidf=False, w2v=True) for d in data_test]

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_w2v = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_w2v.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_w2v.json', 'w') as f:
    json.dump(scores_w2v, f)

  y_train = torch.tensor(y_train).to('cuda')


Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 97 epochs.


  3%|▎         | 1/30 [00:13<06:19, 13.09s/it]

(0.6301149225143653, 0.4169679346117573, 0.831062030075188, 0.5553171446514549)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
  7%|▋         | 2/30 [00:15<03:15,  6.96s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 10%|█         | 3/30 [00:18<02:15,  5.02s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 13%|█▎        | 4/30 [00:21<01:45,  4.04s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 17%|█▋        | 5/30 [00:23<01:26,  3.47s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 20%|██        | 6/30 [00:25<01:14,  3.12s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 87 epochs.


 23%|██▎       | 7/30 [00:28<01:09,  3.04s/it]

(0.6527947065993384, 0.4337935623388505, 0.8169642857142857, 0.5666865866246537)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 27%|██▋       | 8/30 [00:31<01:06,  3.04s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 30%|███       | 9/30 [00:34<01:03,  3.04s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 33%|███▎      | 10/30 [00:37<01:00,  3.03s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 42 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 37%|███▋      | 11/30 [00:40<00:53,  2.81s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 40%|████      | 12/30 [00:43<00:51,  2.88s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 43%|████▎     | 13/30 [00:46<00:53,  3.12s/it]

(0.6580402228800278, 0.4383871372943098, 0.8200187969924813, 0.5713350249652125)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 47%|████▋     | 14/30 [00:50<00:52,  3.29s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


 50%|█████     | 15/30 [00:54<00:51,  3.40s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 53%|█████▎    | 16/30 [00:57<00:48,  3.49s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 57%|█████▋    | 17/30 [01:01<00:46,  3.58s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 60%|██████    | 18/30 [01:05<00:43,  3.64s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 100 epochs.


 63%|██████▎   | 19/30 [01:08<00:39,  3.57s/it]

(0.6599338324917291, 0.4398432892408796, 0.8177474937343359, 0.5720155590861776)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


 67%|██████▋   | 20/30 [01:12<00:35,  3.54s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 70%|███████   | 21/30 [01:15<00:31,  3.49s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


 73%|███████▎  | 22/30 [01:19<00:27,  3.46s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 77%|███████▋  | 23/30 [01:22<00:24,  3.45s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 80%|████████  | 24/30 [01:26<00:20,  3.47s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
tensor([0, 0])
Training stopped after 95 epochs.


 83%|████████▎ | 25/30 [01:29<00:17,  3.48s/it]

(0.6852907887863486, 0.4567054124635158, 0.6985432330827067, 0.5523113601882528)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 87%|████████▋ | 26/30 [01:33<00:13,  3.45s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 90%|█████████ | 27/30 [01:36<00:10,  3.42s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 93%|█████████▎| 28/30 [01:39<00:06,  3.39s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
tensor([0, 0])
Training stopped after 100 epochs.


 97%|█████████▋| 29/30 [01:43<00:03,  3.37s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
tensor([0, 0])
Training stopped after 100 epochs.


100%|██████████| 30/30 [01:46<00:00,  3.55s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)





## Text + HandCrafted Features

In [None]:
def get_features(d, BoW=False, tfidf=False, w2v=False):
    a = get_text_features(d, BoW, tfidf, w2v)
    a.extend(get_meta_features(d, True, True, True, True))
    return a

In [None]:
X_train = [get_features(d, BoW=True, tfidf=False, w2v=False) for d in data_train]
X_val = [get_features(d, BoW=True, tfidf=False, w2v=False) for d in data_val]
X_test = [get_features(d, BoW=True, tfidf=False, w2v=False) for d in data_test]

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_bow_hand = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_bow_hand.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_bow_hand.json', 'w') as f:
    json.dump(scores_bow_hand, f)

  y_train = torch.tensor(y_train).to('cuda')


Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 55 epochs.


  3%|▎         | 1/30 [00:13<06:43, 13.90s/it]

(0.5060508445063555, 0.31536458333333334, 0.6639254385964912, 0.4276129943502825)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


  7%|▋         | 2/30 [00:16<03:28,  7.46s/it]

(0.2794924255615532, 0.2783264656506857, 0.9998433583959899, 0.43543958386629145)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 26 epochs.


 10%|█         | 3/30 [00:19<02:18,  5.12s/it]

(0.5761361657670208, 0.3197893152746426, 0.46600877192982454, 0.3792949576082106)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 13%|█▎        | 4/30 [00:22<01:50,  4.25s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 17%|█▋        | 5/30 [00:25<01:34,  3.76s/it]

(0.26236287654535956, 0.25851740064938034, 0.8854949874686717, 0.40019822307174974)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 20%|██        | 6/30 [00:28<01:24,  3.51s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 60 epochs.


 23%|██▎       | 7/30 [00:31<01:17,  3.37s/it]

(0.6983501654187707, 0.4737424789410349, 0.7708333333333334, 0.586828846555168)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 27%|██▋       | 8/30 [00:34<01:15,  3.45s/it]

(0.6907104300888037, 0.4660033949453037, 0.7740444862155389, 0.5817635978337651)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 30%|███       | 9/30 [00:38<01:14,  3.53s/it]

(0.6932352429044054, 0.46822886716503737, 0.7652725563909775, 0.580984659293614)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 33%|███▎      | 10/30 [00:41<01:09,  3.48s/it]

(0.2786653317081665, 0.2781154022087172, 1.0, 0.43519607341888644)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 37%|███▋      | 11/30 [00:45<01:05,  3.47s/it]

(0.7255789656973707, 0.5048467224039743, 0.6526472431077694, 0.5693106510897041)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 40%|████      | 12/30 [00:48<01:01,  3.43s/it]

(0.3649660456207557, 0.22129365402908005, 0.5101817042606517, 0.30869111932518245)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 31 epochs.


 43%|████▎     | 13/30 [00:51<00:54,  3.22s/it]

(0.7085582448197806, 0.48422760929100317, 0.7478070175438597, 0.5878224465923784)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 47%|████▋     | 14/30 [00:55<00:56,  3.54s/it]

(0.7128896047362006, 0.48854713813830075, 0.7066102756892231, 0.5776852889386905)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 46 epochs.


 50%|█████     | 15/30 [00:58<00:51,  3.44s/it]

(0.6670511927563991, 0.44279057141564493, 0.7665256892230576, 0.5613260302256889)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 53%|█████▎    | 16/30 [01:02<00:51,  3.66s/it]

(0.27792530036566254, 0.2779095836144788, 1.0, 0.4349440479637547)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 57%|█████▋    | 17/30 [01:07<00:49,  3.83s/it]

(0.7364835451854431, 0.5262739486445663, 0.518483709273183, 0.5223497849844163)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 60%|██████    | 18/30 [01:11<00:47,  3.94s/it]

(0.2599904231238029, 0.20700505092324253, 0.5874060150375939, 0.3061286965040103)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 18 epochs.


 63%|██████▎   | 19/30 [01:14<00:39,  3.55s/it]

(0.7036392129549016, 0.47890757138593176, 0.7539943609022557, 0.5857620930940067)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 69 epochs.


 67%|██████▋   | 20/30 [01:17<00:35,  3.58s/it]

(0.6862267107783389, 0.4621844882973841, 0.7887687969924813, 0.5828462295271717)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 70%|███████   | 21/30 [01:21<00:33,  3.70s/it]

(0.656712519589065, 0.4386036625245258, 0.8403822055137845, 0.576385904598195)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 73%|███████▎  | 22/30 [01:25<00:30,  3.79s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 77%|███████▋  | 23/30 [01:29<00:27,  3.87s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 80%|████████  | 24/30 [01:33<00:23,  3.90s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 14 epochs.


 83%|████████▎ | 25/30 [01:36<00:17,  3.50s/it]

(0.6999172906146613, 0.47486805110245156, 0.7539943609022557, 0.5827305468962805)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 94 epochs.


 87%|████████▋ | 26/30 [01:40<00:14,  3.66s/it]

(0.7069258227407278, 0.48233385715009885, 0.7452224310776943, 0.5856285582397293)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 90%|█████████ | 27/30 [01:44<00:11,  3.80s/it]

(0.5298189099773637, 0.3661028252697951, 0.9458803258145363, 0.527887053064079)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 93%|█████████▎| 28/30 [01:48<00:07,  3.94s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 97%|█████████▋| 29/30 [01:52<00:04,  4.02s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


100%|██████████| 30/30 [01:57<00:00,  3.90s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)





In [None]:
X_train = [get_features(d, BoW=False, tfidf=True, w2v=False) for d in data_train]
X_val = [get_features(d, BoW=False, tfidf=True, w2v=False) for d in data_val]
X_test = [get_features(d, BoW=False, tfidf=True, w2v=False) for d in data_test]

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_tfidf_hand = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_tfidf_hand.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_tfidf_hand.json', 'w') as f:
    json.dump(scores_tfidf_hand, f)

  y_train = torch.tensor(y_train).to('cuda')


Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.001}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 17 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
  3%|▎         | 1/30 [00:29<14:06, 29.19s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.01}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 61 epochs.


  7%|▋         | 2/30 [00:33<06:55, 14.84s/it]

(0.5630332578791573, 0.31461187214611874, 0.4856672932330827, 0.38185848882320345)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 0.1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 29 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 10%|█         | 3/30 [00:38<04:28,  9.95s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 13%|█▎        | 4/30 [00:43<03:27,  7.96s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 10}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 17%|█▋        | 5/30 [00:48<02:54,  6.96s/it]

(0.72401184050148, 0.782051282051282, 0.009555137844611528, 0.01887960383782111)
Testing hyperparameters: {'hidden_size': (5,), 'l2_reg': 100}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 20%|██        | 6/30 [00:53<02:31,  6.30s/it]

(0.7221182308897789, 1.0, 7.832080200501253e-05, 0.00015662933667475918)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.001}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 31 epochs.


 23%|██▎       | 7/30 [00:57<02:10,  5.69s/it]

(0.687097335887167, 0.46312603192074847, 0.7908834586466166, 0.5841721624435959)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.01}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 27%|██▋       | 8/30 [01:03<02:04,  5.64s/it]

(0.6870102733762842, 0.46252557188023063, 0.7791353383458647, 0.5804644649317307)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 0.1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 30%|███       | 9/30 [01:08<01:57,  5.61s/it]

(0.6993296186662024, 0.4743426216640502, 0.7572838345864662, 0.5833132239382239)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 33%|███▎      | 10/30 [01:14<01:51,  5.58s/it]

(0.277968831621104, 0.2779216821575499, 1.0, 0.43495886490997976)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 10}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 37%|███▋      | 11/30 [01:19<01:45,  5.55s/it]

(0.5527381159672645, 0.36910809810584394, 0.8592575187969925, 0.516391706479019)
Testing hyperparameters: {'hidden_size': (64,), 'l2_reg': 100}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 45 epochs.


 40%|████      | 12/30 [01:24<01:34,  5.27s/it]

(0.38925648615706077, 0.28982957669048925, 0.8258145363408521, 0.4290713762513226)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.001}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 40 epochs.


 43%|████▎     | 13/30 [01:29<01:27,  5.14s/it]

(0.6916463520807941, 0.46713648108996947, 0.7787437343358395, 0.5839720435791266)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.01}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 71 epochs.


 47%|████▋     | 14/30 [01:34<01:24,  5.29s/it]

(0.6808941319867665, 0.45749315130013024, 0.7978540100250626, 0.5815327529613245)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 0.1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 82 epochs.


 50%|█████     | 15/30 [01:40<01:21,  5.45s/it]

(0.6376675953334494, 0.4244537062283333, 0.8534617794486216, 0.5669467495642674)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 53%|█████▎    | 16/30 [01:46<01:19,  5.71s/it]

(0.2781429566428696, 0.27797008686565217, 1.0, 0.4350181427914345)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 10}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 57%|█████▋    | 17/30 [01:53<01:15,  5.82s/it]

(0.6197544837193104, 0.24589277993947253, 0.17817982456140352, 0.206630336058129)
Testing hyperparameters: {'hidden_size': (128,), 'l2_reg': 100}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 60%|██████    | 18/30 [01:59<01:11,  5.93s/it]

(0.39173776771722096, 0.30803865027571203, 0.9537907268170426, 0.4656800887155366)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.001}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 16 epochs.


 63%|██████▎   | 19/30 [02:03<00:58,  5.36s/it]

(0.6914722270590283, 0.46683950035352345, 0.7756892230576441, 0.5828796751316835)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.01}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 67%|██████▋   | 20/30 [02:08<00:54,  5.45s/it]

(0.7122584015323002, 0.4883457095709571, 0.7416979949874687, 0.5889303482587065)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 0.1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 56 epochs.


 70%|███████   | 21/30 [02:13<00:47,  5.27s/it]

(0.6826353822044228, 0.4581428637392067, 0.7770989974937343, 0.5764415395787945)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 73%|███████▎  | 22/30 [02:19<00:43,  5.40s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 10}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
 77%|███████▋  | 23/30 [02:25<00:38,  5.48s/it]

(0.7220964652620582, 0.0, 0.0, nan)
Testing hyperparameters: {'hidden_size': (64, 32), 'l2_reg': 100}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 80%|████████  | 24/30 [02:30<00:33,  5.55s/it]

(0.2721791746473968, 0.2471313581092653, 0.7911184210526315, 0.37661490277959025)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.001}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 83%|████████▎ | 25/30 [02:36<00:28,  5.67s/it]

(0.6612180045272505, 0.44265387296510433, 0.8454730576441103, 0.5810792625487821)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.01}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 87%|████████▋ | 26/30 [02:43<00:23,  5.84s/it]

(0.6902968831621104, 0.46619151200999676, 0.7889254385964912, 0.5860654545454547)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 0.1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 90%|█████████ | 27/30 [02:49<00:17,  5.96s/it]

(0.533976144872018, 0.36821078954591197, 0.9456453634085213, 0.5300379727386466)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 1}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 93%|█████████▎| 28/30 [02:55<00:12,  6.07s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 10}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


 97%|█████████▋| 29/30 [03:01<00:06,  6.11s/it]

(0.2779035347379418, 0.2779035347379418, 1.0, 0.434936639869192)
Testing hyperparameters: {'hidden_size': (64, 32, 16), 'l2_reg': 100}
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
<class 'torch.nn.modules.linear.Linear'>
<class 'torch.nn.modules.activation.ReLU'>
<class 'torch.nn.modules.dropout.Dropout'>
tensor([0, 0])
Training stopped after 100 epochs.


  _warn_prf(average, modifier, msg_start, len(result))
  return accuracy, precision, recall, 2*precision*recall/(precision+recall)
100%|██████████| 30/30 [03:07<00:00,  6.27s/it]

(0.7220964652620582, 0.0, 0.0, nan)





In [None]:
X_train = [get_features(d, BoW=False, tfidf=False, w2v=True) for d in data_train]
X_val = [get_features(d, BoW=False, tfidf=False, w2v=True) for d in data_val]
X_test = [get_features(d, BoW=False, tfidf=False, w2v=True) for d in data_test]

# Generate all possible combinations of hyperparameters
grid = ParameterGrid(param_grid)

scores_w2v_hand = []

# Iterate over the hyperparameter combinations
bar = tqdm(grid)
X_train = torch.tensor(X_train).to('cuda')
y_train = torch.tensor(y_train).to('cuda')
for params in bar:
    print("Testing hyperparameters:", params)

    # Create the model with the current hyperparameters
    model = MLPClassifier(input_size=len(X_train[0]), output_size=2, **params)

    # Train the model
    model.fit(X_train, y_train, X_val, y_val, epochs=100, use_gpu=True)
    metrics = test_metrics(model, X_test, y_test)
    print(metrics)
    scores_w2v_hand.append({'params':params, 'scores':metrics})
    bar.set_postfix()

with open('mlp_w2v_hand.json', 'w') as f:
    json.dump(scores_w2v_hand, f)

: 