# Preparation

## Library

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split

from torch.utils.data import TensorDataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.sampler import WeightedRandomSampler
from torch.optim import lr_scheduler

from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score, precision_score, recall_score
from transformers import AdamW, get_linear_schedule_with_warmup
from sklearn.inspection import permutation_importance

from fetchData import fetchdata, cv_events
import __MLP
# from __MLP import getSamplers, convert_df_to_unsqueezed_tensor, train_sequential, clf_report
import random

import __Preprocessing
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier

pd.set_option('display.max_columns', None)

In [2]:
if torch.cuda.is_available():       
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

No GPU available, using the CPU instead.


In [3]:
seed = 42
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed_value)


torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
random.seed(42)
torch.backends.cudnn.deterministic=True
torch.backends.cudnn.benchmark = False

def _init_fn(worker_id):
    np.random.seed(int(seed))

def test_data_process(X_test, y_test):
    tensor_x1 = torch.Tensor(X_test.values).unsqueeze(1)
    tensor_y1 = torch.Tensor(y_test.values).unsqueeze(1)
    test_dataset = TensorDataset(tensor_x1,tensor_y1)

    batch_size = 8

    # train_sampler, test_sampler = __MLP.getSamplers(pheme_y, tensor_x2)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    data = next(iter(test_dataloader))
    print("mean: %s, std: %s" %(data[0].mean(), data[0].std()))

    test_size = int(tensor_y1.size(0))

    print("Test Size",test_size)

    # predict_batch
    return test_dataloader, test_size


def test_data_process(X_test, y_test):

    tensor_x1 = torch.Tensor(X_test.values).unsqueeze(1)
    tensor_y1 = torch.Tensor(y_test.values).unsqueeze(1)
    test_dataset = TensorDataset(tensor_x1,tensor_y1)

    batch_size = 8

    # train_sampler, test_sampler = __MLP.getSamplers(pheme_y, tensor_x2)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    data = next(iter(test_dataloader))
    print("mean: %s, std: %s" %(data[0].mean(), data[0].std()))

    test_size = int(tensor_y1.size(0))

    print("Test Size",test_size)

    # predict_batch
    return test_dataloader, test_size

def predict(model, criterion, val_dataloader, val_size):
    model.eval()
    val_label_list = []
    # val_preds_list = []
    running_val_preds = []

    with torch.no_grad():
        val_loss = 0.0
        val_corrects = 0
        f1_running = 0
        for j, val in enumerate(val_dataloader, 0):
            val_x, val_label = val
            val_x, val_label = val_x.float(), val_label.float()
            val_outputs = model(val_x)
            val_preds = val_outputs.squeeze(1) > 0.0
            f1_running += (f1_score(val_label, val_preds,zero_division=True) * val_x.size(0))
            v_loss = criterion(val_outputs, val_label.unsqueeze(1))
            val_loss += (v_loss.item() * val_x.size(0))
            val_corrects += torch.sum(val_preds == val_label)
            val_label_list.append(val_label)
            running_val_preds.append(val_preds)

    running_val_preds = torch.cat(running_val_preds, 0)
    val_label_list = torch.cat(val_label_list, 0)
    val_corrects = val_corrects
    val_loss = val_loss/val_size
    val_acc = val_corrects.double().numpy() / val_size
    f1_running /= val_size
    print("accuracy_score:\t\t%.4f" % val_acc)
    print('Precision Score:\t%.4f' % precision_score(val_label_list,running_val_preds))
    print('Recall Score:\t\t%.4f' % recall_score(val_label_list,running_val_preds))
    print("f1_score:\t\t%.4f" % f1_running)
    print("Test_loss:\t\t%.4f" % val_loss)


## Data

In [4]:
# Final
pheme_sparse_final = pd.read_csv('./data/_PHEME_sparse.csv')
pheme_y = pd.read_csv('./data/_PHEME_target.csv').target
pheme_pos_final = pd.read_csv('./data/_PHEME_postags.csv')
pheme_thread_final_avg = pd.read_csv('./data/_PHEME_thread_avg.csv')
pheme_thread_final_std = pd.read_csv('./data/_PHEME_thread_std.csv')

ext_pos_final = pd.read_csv('./data/_PHEMEext_postags.csv')
ext_sparse_final = pd.read_csv('./data/_PHEMEext_sparse.csv')
ext_y = pd.read_csv('./data/_PHEMEext_text.csv').target
ext_thread_final_avg = pd.read_csv('./data/_PHEMEext_thread_avg.csv')
ext_thread_final_std = pd.read_csv('./data/_PHEMEext_thread_std.csv')

pheme_bert_simple_normal = pd.read_csv('./data/_PHEME_Bert_final_simple_nrmzd.csv')
ext_bert_simple_normal = pd.read_csv('./data/_PHEMEext_Bert_final_simple_nrmzd.csv')

pheme_bert_brackets_normal = pd.read_csv('./data/_PHEME_Bert_final_brackets_nrmzd.csv')
ext_bert_brackets_normal = pd.read_csv('./data/_PHEMEext_Bert_final_brackets_nrmzd.csv')

pheme_event = pd.read_csv('./data/_PHEME_text.csv')['Event']
ext_event = pd.read_csv('./data/_PHEMEext_text.csv').Event
pheme_AVGw2v = pd.read_csv('./data/_PHEME_text_AVGw2v.csv').drop(['token'],axis=1)
ext_AVGw2v = pd.read_csv('./data/_PHEMEext_text_AVGw2v.csv').drop(['token'],axis=1)

pheme_root = pd.concat([pheme_sparse_final, pheme_pos_final],axis=1)
ext_root = pd.concat([ext_sparse_final, ext_pos_final],axis=1)

pheme_root_thread = pd.concat([pheme_root, pheme_thread_final_avg],axis=1)
ext_root_thread = pd.concat([ext_root, ext_thread_final_avg],axis=1)

pheme_total= pd.concat([pheme_root_thread, pheme_bert_simple_normal],axis=1)
ext_total = pd.concat([ext_root_thread, ext_bert_simple_normal],axis=1)

In [5]:
all_event = pd.concat([pheme_event,ext_event],axis=0, ignore_index=True)
all_y = pd.concat([pheme_y,ext_y],axis=0, ignore_index=True)

all_root = pd.concat([pheme_root, ext_root],axis=0, ignore_index=True)
all_thread = pd.concat([pheme_thread_final_avg, ext_thread_final_avg],axis=0, ignore_index=True)
all_bert_simple = pd.concat([pheme_bert_simple_normal,ext_bert_simple_normal],axis=0,ignore_index=True)
all_root_thread = pd.concat([pheme_root_thread,ext_root_thread],axis=0,ignore_index=True)
all_total = pd.concat([pheme_total,ext_total],axis=0,ignore_index=True)

In [6]:
print("pheme_sparse_final", pheme_sparse_final.shape)
print(pheme_sparse_final.columns, "\n")
print("pheme_pos_final", pheme_pos_final.shape)
x = zip(pheme_pos_final.columns.values, ['Interjection', 'Hashtag', 'coordinating conjunction', 'punctuation', 'at-mention', 'proper noun', 'adjective' 'determiner' 'other', 'nominal + verbal',
    'common noun', 'pronoun', 'pre/postposition/subordinating conjunction', 'adverb', 'nominal + possessive', 'verb particle', 'URL or email', 'verb incl. copula, auxiliaries',
 'existential there, predeterminers', 'proper noun + possessive', 'discourse marker'])
print(set(x), "\n")
print("pheme_thread_final", pheme_thread_final_avg.shape)
print(pheme_thread_final_avg.columns, "\n")
print("pheme_bert_simple_normal", pheme_bert_simple_normal.shape)
print(pheme_bert_simple_normal.columns, "\n")


pheme_sparse_final (5802, 28)
Index(['emoji_count', 'URLcount', 'has_media', 'Skepticism', 'MentionCount',
       'FirstPersonPronoun', 'SecondPersonPronoun', 'ThirdPersonPronoun',
       'test_auxiliary', 'test_tentat', 'test_certain', 'Numeral',
       'char_count', 'word_count', 'HashTag', 'has_question', 'has_exclaim',
       'has_period', 'capital_ratio', 'retweet_count', 'tweet_count',
       'listed_count', 'friends_count', 'follower_count', 'followers/friend',
       'favourites_count', 'account_age_days', 'verified'],
      dtype='object') 

pheme_pos_final (5802, 21)
{('n', 'pre/postposition/subordinating conjunction'), ('o', 'adverb'), ('#', 'Hashtag'), ('&', 'coordinating conjunction'), ('p', 'nominal + possessive'), ('d', 'nominal + verbal'), ('v', 'proper noun + possessive'), ('s', 'URL or email'), ('x', 'discourse marker'), (',', 'punctuation'), ('l', 'pronoun'), ('a', 'adjectivedeterminerother'), ('g', 'common noun'), ('t', 'verb incl. copula, auxiliaries'), ('^', 'prop

In [7]:
print("pheme_sparse_final", ext_sparse_final.shape)
print(ext_sparse_final.columns, "\n")
print("pheme_pos_final", ext_pos_final.shape)
x = zip(pheme_pos_final.columns.values, ['Interjection', 'Hashtag', 'coordinating conjunction', 'punctuation', 'at-mention', 'proper noun', 'adjective' 'determiner' 'other', 'nominal + verbal',
    'common noun', 'pronoun', 'pre/postposition/subordinating conjunction', 'adverb', 'nominal + possessive', 'verb particle', 'URL or email', 'verb incl. copula, auxiliaries',
 'existential there, predeterminers', 'proper noun + possessive', 'discourse marker'])
print(set(x), "\n")
print("pheme_thread_final", ext_thread_final_avg.shape)
print(ext_thread_final_avg.columns, "\n")
print("pheme_bert_simple_normal", ext_bert_simple_normal.shape)
print(ext_bert_simple_normal.columns, "\n")


pheme_sparse_final (623, 28)
Index(['emoji_count', 'URLcount', 'has_media', 'Skepticism', 'MentionCount',
       'FirstPersonPronoun', 'SecondPersonPronoun', 'ThirdPersonPronoun',
       'test_auxiliary', 'test_tentat', 'test_certain', 'Numeral',
       'char_count', 'word_count', 'HashTag', 'has_question', 'has_exclaim',
       'has_period', 'capital_ratio', 'retweet_count', 'tweet_count',
       'listed_count', 'friends_count', 'follower_count', 'followers/friend',
       'favourites_count', 'account_age_days', 'verified'],
      dtype='object') 

pheme_pos_final (623, 21)
{('n', 'pre/postposition/subordinating conjunction'), ('o', 'adverb'), ('#', 'Hashtag'), ('&', 'coordinating conjunction'), ('p', 'nominal + possessive'), ('d', 'nominal + verbal'), ('v', 'proper noun + possessive'), ('s', 'URL or email'), ('x', 'discourse marker'), (',', 'punctuation'), ('l', 'pronoun'), ('a', 'adjectivedeterminerother'), ('g', 'common noun'), ('t', 'verb incl. copula, auxiliaries'), ('^', 'proper

## Functions

In [8]:
def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
random.seed(42)
torch.backends.cudnn.deterministic=True
torch.backends.cudnn.benchmark = False

seed = 42

def _init_fn(worker_id):
    np.random.seed(int(seed))

In [9]:
def getDataSize(tensor_x1, tensor_y1, tensor_x2, tensor_y2):
    train_size = int(tensor_y1.size(0))
    test_size = int(tensor_y2.size(0))

    # print("Variables)\n\tTrain:%s\n\tTest: %s"%(tensor_x1.size(),tensor_x2.size()))
    # print("\tTargets:%s \ %s"%(tensor_y1.size()[0],tensor_y2.size()[0]))
    # print("Train Size",train_size,"Test Size",test_size)
    # print()
    return train_size, test_size

def reset_weights(m):
  '''
    Try resetting model weights to avoid
    weight leakage.
  '''
  for layer in m.children():
   if hasattr(layer, 'reset_parameters'):
    # print(f'Reset trainable parameters of layer = {layer}')
    print("Parameter Resetted")
    layer.reset_parameters()

class writeLog():
  def write(self, fileName, text):
    print(text)
    f=open(fileName,'a')
    f.write(text)
    f.write("\n")
    f.close()
  def writeWithoutCR(self, fileName, text):
    f=open(fileName,'a')
    f.write(text)
    f.close()


In [118]:
def cv_process(dataset, criterion, modelClass, target, epochs, events, verbose=True, scaling=False):

    # cv_pd_list[?][0]은 Training cv_pd_list[?][1] Testing
    cv_pd_list = []
    data = pd.concat([dataset, events, target], axis=1)
    NUM_EVENT = data.Event.unique().shape[0]
    EVENTS = data.Event.unique()
    results = {}
    # modelClass.__class__

    for i, d in enumerate(EVENTS):
        df1, df2 = [x for _, x in data.groupby(data['Event'] != d)]
        df1.reset_index(inplace=True, drop=True)
        df2.reset_index(inplace=True, drop=True)
        cv_pd_list.append([df2, df1])
    
    # for train, test in cv_pd_list:
    #     print("Train: %s \ Test: %s" % (train.shape, test.shape))

    log = writeLog()
    modelname = modelClass.__name__
    PREFIX = "./Model/"+modelname+"_"
    log.write(PREFIX+"log.txt",f"\nSTARTING TEST of {epochs} EPOCH\n")

    for index, fold in enumerate(cv_pd_list):

        # DATA PREPARATION
        train, test = fold
        log.writeWithoutCR(PREFIX+"log.txt",f"\n----------------------------------------------------------------------------\n> FOLD {int(index)+1}\n----------------------------------------------------------------------------")
        print(f'> FOLD {int(index)+1}')
        train_target = train.pop('target')
        train.pop('Event').values
        test_target = test.pop('target')
        test.pop('Event')

        if scaling==True:
            scaler = StandardScaler()
            train = pd.DataFrame(scaler.fit_transform(train))
            test = pd.DataFrame(scaler.transform(test))

        tensor_x1, tensor_y1, tensor_x2, tensor_y2 = __MLP.convert_df_to_unsqueezed_tensor(
            train.values, train_target, test.values, test_target.values)
        train_dataset = TensorDataset(tensor_x1, tensor_y1)
        test_dataset = TensorDataset(tensor_x2, tensor_y2)

        batch_size = 16

        # train_sampler, test_sampler = __MLP.getSamplers(train_target, tensor_x2)
        counts = np.bincount(train_target.values)
        labels_weights = 1. / counts
        weights = labels_weights[train_target.values]
        train_sampler = WeightedRandomSampler(weights, len(weights))
        test_sampler = SequentialSampler(tensor_x2)


        train_dataloader = DataLoader(train_dataset, batch_size=batch_size,
                                    sampler=train_sampler, pin_memory=True, num_workers=0, worker_init_fn=_init_fn)
        test_dataloader = DataLoader(test_dataset, batch_size=batch_size,
                                    shuffle=False, pin_memory=True, num_workers=0, worker_init_fn=_init_fn)

        data = next(iter(train_dataloader))

        train_size, test_size = getDataSize(tensor_x1, tensor_y1, tensor_x2, tensor_y2)
        results

        if verbose==True:
            print("mean: %s, std: %s" % (data[0].mean(), data[0].std()))
            print("Train Size",train_size,"Test Size",test_size)
        log.writeWithoutCR(PREFIX+"log.txt", f"\nmean: {data[0].mean()}, std: {data[0].std()}")
        log.writeWithoutCR(PREFIX+"log.txt", f"\nTrain Size: {train_size}, Test Size: {test_size}\n")
        

        model = modelClass()
        model.apply(reset_weights)
        # for layer in model.children():
        #     if hasattr(layer, 'reset_parameters'):
        #         print(f'Reset trainable parameters of layer = {layer}')
        #         layer.reset_parameters()


        # model_sparse = sparse_model()
        # criterion = nn.BCEWithLogitsLoss()
        # optimizer = optim.SGD(model_sparse.parameters(), lr=0.01, momentum=0.9)
        # optimizer = optim.Adam(model_sparse.parameters(), lr=5e-5, eps=1e-8, weight_decay=1e-7)
        optimizer = AdamW(model.parameters(),
                        # lr=5e-5,    # Default learning rate
                        lr=8e-5,    # Default learning rate
                        eps=1e-8,    # Default epsilon value
                        weight_decay=1e-6
                        )


        total_steps = len(train_dataloader) * epochs
        # print(f'length of tloader: {len(train_dataloader)}')
        # print(f'total step: {total_steps}')
        log.writeWithoutCR(PREFIX+"log.txt", f'total step: {total_steps}\n')
        
        scheduler = get_linear_schedule_with_warmup(optimizer,
                                                    num_warmup_steps=0,  # Default value
                                                    num_training_steps=total_steps)

        # print(f'Model: {modelname}')
        PATH = "./Model/"+modelname+"_"+str(index+1)+".pt"
        # print(f'PATH: {PATH}')
        log.writeWithoutCR(PREFIX+"log.txt", f'PATH: {PATH}')

        training_acc = []
        training_loss = []
        # train_acc, train_loss, val_acc, val_loss_list = __MLP.train_sequential(model=model, num_epochs=epochs, patience=patience, criterion=criterion, optimizer=optimizer, scheduler=scheduler, train_loader=train_dataloader, train_size=train_size, test_loader=test_dataloader, test_size=test_size, PATH=PATH)

        # Run the training loop for defined number of epochs
        for epoch in range(0, epochs):

            # Print epoch
            if (verbose!=False):
                # pass
                print(f'Starting epoch {epoch+1}')
            elif (verbose!=True):
                if epoch%50 == 49:
                    print(f'Starting epoch {epoch+1}')
            # Set current loss value
            current_loss = 0.0
            running_corrects = 0.0
            running_loss = 0.0

            # Iterate over the DataLoader for training data
            for i, data in enumerate(train_dataloader, 0):

                # Get inputs
                inputs, targets = data

                # Zero the gradients
                optimizer.zero_grad()

                # Perform forward pass
                outputs = model(inputs)

                outputs = outputs.view(outputs.size(0), -1)

                # Compute Prediction Outputs
                # preds = outputs.squeeze(1) > 0.0
                preds = outputs > 0.0

                # Compute loss
                loss = criterion(outputs, targets)

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == targets.data).data

                # Perform backward pass
                loss.backward()

                # Perform optimization and Scheduler
                optimizer.step()
                scheduler.step()

                # Print statistics
                # current_loss += loss.item() # 원본
                # if i % len(train_dataloader) == len(train_dataloader)-1:
                #     print('Loss after mini-batch %5d: %.3f' %
                #           (i + 1, current_loss / i+1))

                current_loss += loss.item() * inputs.size(0)
                if verbose == True:
                    if i % len(train_dataloader) == len(train_dataloader)-1:
                        print("Loss/ACC after mini-batch %5d: %.3f / %.4f" %
                            (i + 1, current_loss / train_size, running_corrects/train_size))

            # epoch_acc = running_corrects.double() / train_size
            epoch_acc = running_corrects / train_size
            epoch_loss = running_loss / train_size
            training_acc.append(epoch_acc)
            training_loss.append(epoch_loss)
            # print('Epoch {}/{}\tTrain) Acc: {:.4f}, Loss: {:.4f}'.format(epoch+1,
                                                                        #  epochs, epoch_acc, epoch_loss))
            
        # Process is complete.
        if verbose==True:
            print('>> Training process has finished. Saving trained model and starting Testing')

        # Print about testing

        # Saving the model
        torch.save(model.state_dict(), PATH)

        # Evaluation for this fold
        correct, total = 0, 0
        val_corrects=0
        f1_batch_epoch = 0
        val_label_list = []
        val_loss = 0
        with torch.no_grad():

            # Iterate over the test data and generate predictions
            for i, data in enumerate(test_dataloader, 0):

                # Get inputs
                inputs, targets = data

                # Generate outputs
                outputs = model(inputs)

                # Set total and correct
                outputs = outputs.view(outputs.size(0), -1).float()
                predicted = (outputs > 0.0).float()
                correct += (predicted == targets).float().sum().item()

                loss = criterion(outputs, targets)
                val_loss += loss.item() * inputs.size(0)
                #!
                preds = (outputs > 0.0).float()
                # running_loss += loss.item() * inputs.size(0)
                val_corrects += torch.sum(preds == targets.data).data
                # f1_batch = f1_score(targets.cpu(), outputs.sigmoid().cpu() > 0.5, average='macro')
                f1_batch = f1_score(targets.cpu(), preds, average='macro')
                f1_batch_epoch += f1_batch * inputs.size(0)
                # f1_running += (f1_score(targets, preds, average='macro') * inputs.size(0))
                total += targets.size(0)
            
            if verbose==True:
                # Print accuracy
                print('Accuracy for fold %d: %f %%' % (index, 100.0 * correct / total))
                # print('Accuracy-2 for fold %d: %f %%' % (index, 100.0 * val_corrects / total))
                # print('F1 Score-2 for fold %d: %f ->  %%' %(index, f1_score(targets, preds, zero_division=False)))
                # print('F1 Score-3 for fold %d: %f %%' %(index, f1_score(targets, predicted, zero_division=False)))
                print('F1 Score for fold %d: %f %%' %(index, f1_batch_epoch / total))
                print('Loss for fold %d: %f %%' %(index, val_loss / total))
                # print('F1 Score-5 for fold %d: %f %%' %(index, f1_batch_epoch / test_size))
                # print('F1 Score-6 for fold %d: %f %%' %(index, f1_running / test_size))
            
            results[index] = [100.0 * (correct / total), 100.0 * f1_batch_epoch / total, val_loss / total]
            log.writeWithoutCR(PREFIX+"log.txt", f'\nAccuracy for fold {index+1}: {100.0 * correct / total:.3f}')
            log.writeWithoutCR(PREFIX+"log.txt", f'\nF1 Score for fold {index+1}: {f1_batch_epoch / total:.3f}')
            log.writeWithoutCR(PREFIX+"log.txt", f'\nLoss for fold {index+1}: {val_loss / total:.3f}')

    # ---------------------------- Print fold results ---------------------------- #
    # log.write(PREFIX+"log.txt",f"FOLD {int(index)+1}\n----------------------------------------------------------------------------")

    # print(f'K-FOLD CROSS VALIDATION RESULTS FOR {NUM_EVENT} FOLDS')
    log.write(PREFIX+"log.txt", f'\n\n----------------------------------------------------------------------------\n>>>K-FOLD CROSS VALIDATION RESULTS FOR {NUM_EVENT} FOLDS\n----------------------------------------------------------------------------')

    # print('----------------------------------------------------------------------------')
    acc_sum = 0.0
    f1_sum = 0.0
    loss_sum = 0.0
    for key, value in results.items():
        # print(f'Fold {key}: Acc {value[0]}, F1 {value[1]} %')
        log.write(PREFIX+"log.txt", f'Fold {key}: Acc {value[0]}, F1 {value[1]}')
        acc_sum += value[0]
        f1_sum += value[1]
        loss_sum += value[2]
    # print(f'Average: {acc_sum/len(results.items())} %')
    # print(f'F1: {f1_sum/len(results.items())} %')
    log.write(PREFIX+"log.txt", f'Average: {acc_sum/len(results.items()):.3f}%')
    log.write(PREFIX+"log.txt", f'F1: {f1_sum/len(results.items()):.3f}%')
    log.write(PREFIX+"log.txt", f'Loss: {loss_sum/len(results.items()):.3f}\n')

    return results

In [81]:
def epochs_diff(testing_results):
    string = ""
    print('----------------------------------------------------------------------------')
    for loop in testing_results:
        acc_sum = 0.0
        f1_sum = 0.0
        loss=0.0
        for key, value in loop.items():
            print(f'Fold {key}: Acc {value[0]:.2f}%, F1 {value[1]:.2f}%, Loss {value[2]:.2f}')
            acc_sum += value[0]
            f1_sum += value[1]
            loss += value[2]
        string+=f'Average: {acc_sum/len(loop.items()):.2f}%\n'
        string+=f'F1: {f1_sum/len(loop.items()):.2f}%\n'
        string+=f'Loss: {loss/len(loop.items()):.2f}\n'
        # print(f'Average: {acc_sum/len(loop.items()):.2f} %')
        # print(f'F1: {f1_sum/len(loop.items()):.2f} %')
        print("-----------------------------")
        return string

In [None]:
class root_model(nn.Module):
    def __init__(self):
        super(root_model, self).__init__()  # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(49, 16),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(16, 5),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(5, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

# model = sparse_model()
dataset = all_root
criterion = nn.BCEWithLogitsLoss()

# testing_epochs = [3]
testing_results = []

writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in root_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=root_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

# writeLog().writeWithoutCR("./Model/"+root_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")

# 5 FOLD

## Sparse

In [19]:
class sparse_model(nn.Module):
    def __init__(self):
        super(sparse_model, self).__init__()  # 1*20
        self.layers = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(28, 10),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(10, 4),
            nn.ELU(),
            # nn.Linear(12, 8),
            # nn.ELU(),
            nn.Linear(4, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

model = sparse_model()
dataset = pheme_sparse_final
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [5, 10, 25]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=sparse_model, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False, scaling=True)
    testing_results.append(result)

Num event: 5
Events: ['charliehebdo' 'ferguson' 'germanwings-crash' 'ottawashooting'
 'sydneysiege']
cv_pd_list: 5
cv_pd_list[0]: 2

Epochs: 5
FOLD 1
----------------------------------------------------------------------------
length of tloader: 233
total step: 1165
PATH: ./Model/sparse_model_1.pt
Num event: 5
Events: ['charliehebdo' 'ferguson' 'germanwings-crash' 'ottawashooting'
 'sydneysiege']
cv_pd_list: 5
cv_pd_list[0]: 2

Epochs: 10
FOLD 1
----------------------------------------------------------------------------
length of tloader: 233
total step: 2330
PATH: ./Model/sparse_model_1.pt
Num event: 5
Events: ['charliehebdo' 'ferguson' 'germanwings-crash' 'ottawashooting'
 'sydneysiege']
cv_pd_list: 5
cv_pd_list[0]: 2

Epochs: 25
FOLD 1
----------------------------------------------------------------------------
length of tloader: 233
total step: 5825
PATH: ./Model/sparse_model_1.pt


In [None]:
model = sparse_model()
dataset = pheme_sparse_final
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [5, 10, 25]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=sparse_model, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False, scaling=True)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

## POS


In [51]:
# 
class pos_model(nn.Module):
    def __init__(self):
        super(pos_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(21, 4, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(4, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x
        
model = pos_model()
dataset = pheme_pos_final
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [5,10,50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion,scaling=True, modelClass=pos_model, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)


Epochs: 5
FOLD 1
----------------------------------------------------------------------------
mean: tensor(0.0065), std: tensor(1.1613)
Train Size 3723 Test Size 2079
PATH: ./Model/pos_model_1.pt

Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------------
FOLD 2
----------------------------------------------------------------------------
mean: tensor(0.0023), std: tensor(0.9391)
Train Size 4659 Test Size 1143
PATH: ./Model/pos_model_2.pt

Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------------
FOLD 3
----------------------------------------------------------------------------
mean: tensor(-0.1089), std: tensor(0.8013)
Train Size 5333 Test Size 469
PATH: ./Model/pos_model_3.pt

Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------

In [52]:
class pos_model(nn.Module):
    def __init__(self):
        super(pos_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Linear(21, 8, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 3, bias=True),
            nn.ELU(),
            nn.Linear(3, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x


model = pos_model()
dataset = pheme_pos_final
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [5,10,50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion,scaling=True, modelClass=pos_model, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)


Epochs: 5
FOLD 1
----------------------------------------------------------------------------
mean: tensor(-0.0044), std: tensor(1.0529)
Train Size 3723 Test Size 2079
PATH: ./Model/pos_model_1.pt

Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------------
FOLD 2
----------------------------------------------------------------------------
mean: tensor(-0.0009), std: tensor(1.1191)
Train Size 4659 Test Size 1143
PATH: ./Model/pos_model_2.pt

Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------------
FOLD 3
----------------------------------------------------------------------------
mean: tensor(-0.0236), std: tensor(0.9941)
Train Size 5333 Test Size 469
PATH: ./Model/pos_model_3.pt

Training process has finished. Saving trained model.
<Starting TESTING>
--------------------------------------------------------

## Thread

In [56]:
class thread_model(nn.Module):
    def __init__(self):
        super(thread_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(52, 12, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(12, 8, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x


model = thread_model()
dataset = pheme_thread_final_avg
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [5,10,25,50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=thread_model, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False, scaling=True)
    testing_results.append(result)

epochs_diff(testing_results)



Epochs: 5
FOLD 1
----------------------------------------------------------------------------
mean: tensor(-0.0002), std: tensor(0.7655)
Train Size 3723 Test Size 2079
PATH: ./Model/thread_model_1.pt

Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------------
FOLD 2
----------------------------------------------------------------------------
mean: tensor(-0.0363), std: tensor(0.9062)
Train Size 4659 Test Size 1143
PATH: ./Model/thread_model_2.pt

Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------------
FOLD 3
----------------------------------------------------------------------------
mean: tensor(-0.0468), std: tensor(0.7949)
Train Size 5333 Test Size 469
PATH: ./Model/thread_model_3.pt

Training process has finished. Saving trained model.
<Starting TESTING>
-----------------------------------------------

In [None]:

class thread_model2(nn.Module):
    def __init__(self):
        super(thread_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(52, 8, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x
    
model = thread_model()
dataset = pheme_thread_final_avg
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [5,10,25,50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=thread_model, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False, scaling=True)
    testing_results.append(result)

epochs_diff(testing_results)

## SPARSE + POS + Thread

In [122]:
# class all_sparse_model(nn.Module):
#     def __init__(self):
#         super(all_sparse_model, self).__init__() # 1*20
#         self.layers = nn.Sequential(
#             nn.Dropout(0.3),
#             nn.Linear(52, 8),
#             nn.ELU(),
#             nn.Dropout(0.3),
#             nn.Linear(8, 1)
#         )

#     def forward(self, x):
#         x = self.layers(x)
#         return x

class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 32),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(32, 10),
            nn.ELU(),
            nn.Linear(10, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [123]:
model = all_sparse_model()
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg],axis=1)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)


FOLD 1
----------------------------------------------------------------------------
mean: tensor(17072.7910), std: tensor(317982.8125)
Variables)
	Train:torch.Size([3723, 1, 101])
	Test: torch.Size([2079, 1, 101])
Train Size 3723 Test Size 2079

Model: all_sparse_model
Epochs: 50
PATH: ./Model/all_sparse_model_1.pt

Starting epoch 25
Starting epoch 50
Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------------
FOLD 2
----------------------------------------------------------------------------
mean: tensor(24228.5430), std: tensor(385957.3750)
Variables)
	Train:torch.Size([4659, 1, 101])
	Test: torch.Size([1143, 1, 101])
Train Size 4659 Test Size 1143

Model: all_sparse_model
Epochs: 50
PATH: ./Model/all_sparse_model_2.pt

Starting epoch 25
Starting epoch 50
Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------

In [124]:
epochs_diff(testing_results)

Fold 0: Acc 39.06, F1 26.41 %
Fold 1: Acc 68.77, F1 41.46 %
Fold 2: Acc 46.91, F1 28.64 %
Fold 3: Acc 49.33, F1 35.07 %
Fold 4: Acc 56.76, F1 43.71 %
Average: 52.16 %
F1: 35.06 %
-----------------------------
Fold 0: Acc 76.77, F1 61.71 %
Fold 1: Acc 35.43, F1 24.33 %
Fold 2: Acc 45.63, F1 29.11 %
Fold 3: Acc 58.43, F1 34.96 %
Fold 4: Acc 56.02, F1 34.57 %
Average: 54.46 %
F1: 36.94 %
-----------------------------


In [125]:
# class all_sparse_model(nn.Module):
#     def __init__(self):
#         super(all_sparse_model, self).__init__() # 1*20
#         self.layers = nn.Sequential(
#             nn.Dropout(0.3),
#             nn.Linear(52, 8),
#             nn.ELU(),
#             nn.Dropout(0.3),
#             nn.Linear(8, 1)
#         )

#     def forward(self, x):
#         x = self.layers(x)
#         return x

class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 50),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(50, 25),
            nn.ELU(),
            nn.Linear(25, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [126]:
model = all_sparse_model()
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg],axis=1)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100,200]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)


FOLD 1
----------------------------------------------------------------------------
mean: tensor(2469.1448), std: tensor(24742.1504)
Variables)
	Train:torch.Size([3723, 1, 101])
	Test: torch.Size([2079, 1, 101])
Train Size 3723 Test Size 2079

Model: all_sparse_model
Epochs: 100
PATH: ./Model/all_sparse_model_1.pt

Starting epoch 25
Starting epoch 50
Starting epoch 75
Starting epoch 100
Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------------
FOLD 2
----------------------------------------------------------------------------
mean: tensor(16121.2100), std: tensor(338595.2188)
Variables)
	Train:torch.Size([4659, 1, 101])
	Test: torch.Size([1143, 1, 101])
Train Size 4659 Test Size 1143

Model: all_sparse_model
Epochs: 100
PATH: ./Model/all_sparse_model_2.pt

Starting epoch 25
Starting epoch 50
Starting epoch 75
Starting epoch 100
Training process has finished. Saving trained model.
<Starting TESTI

In [127]:
epochs_diff(testing_results)

Fold 0: Acc 49.21, F1 32.42 %
Fold 1: Acc 66.49, F1 39.12 %
Fold 2: Acc 46.06, F1 31.36 %
Fold 3: Acc 56.07, F1 34.48 %
Fold 4: Acc 57.99, F1 36.40 %
Average: 55.16 %
F1: 34.76 %
-----------------------------
Fold 0: Acc 57.53, F1 36.29 %
Fold 1: Acc 58.71, F1 36.36 %
Fold 2: Acc 50.32, F1 32.48 %
Fold 3: Acc 59.66, F1 36.24 %
Fold 4: Acc 60.85, F1 37.88 %
Average: 57.41 %
F1: 35.85 %
-----------------------------


## SPARSE + POS + Thread Standardization

In [150]:
class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 20),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(20, 6),
            nn.ELU(),
            nn.Linear(6, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [151]:
model = all_sparse_model()
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg],axis=1)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [5,10,25,50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False, scaling=True)
    testing_results.append(result)


FOLD 1
----------------------------------------------------------------------------
mean: tensor(-0.0077), std: tensor(0.8630)
Variables)
	Train:torch.Size([3723, 1, 101])
	Test: torch.Size([2079, 1, 101])
Train Size 3723 Test Size 2079

Model: all_sparse_model
Epochs: 5
PATH: ./Model/all_sparse_model_1.pt

Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------------
FOLD 2
----------------------------------------------------------------------------
mean: tensor(-0.0158), std: tensor(0.8134)
Variables)
	Train:torch.Size([4659, 1, 101])
	Test: torch.Size([1143, 1, 101])
Train Size 4659 Test Size 1143

Model: all_sparse_model
Epochs: 5
PATH: ./Model/all_sparse_model_2.pt

Training process has finished. Saving trained model.
<Starting TESTING>
----------------------------------------------------------------------------
FOLD 3
----------------------------------------------------------------------------

In [None]:
epochs_diff(testing_results)

In [None]:
# class all_sparse_model(nn.Module):
#     def __init__(self):
#         super(all_sparse_model, self).__init__() # 1*20
#         self.layers = nn.Sequential(
#             nn.Dropout(0.3),
#             nn.Linear(52, 8),
#             nn.ELU(),
#             nn.Dropout(0.3),
#             nn.Linear(8, 1)
#         )

#     def forward(self, x):
#         x = self.layers(x)
#         return x

class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 50),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(50, 25),
            nn.ELU(),
            nn.Linear(25, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
model = all_sparse_model()
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg],axis=1)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100,200]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

## BERT

In [27]:
class bert_model(nn.Module):
    def __init__(self):
        super(bert_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(768, 50, bias=True),
            nn.ELU(),
            # nn.Dropout(0.5),
            nn.Linear(50, 8, bias=True),
            nn.ELU(),
            nn.Linear(8, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x


model = bert_model()
dataset = pheme_bert_brackets_normal
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [2,3,4,5,10,25,50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_model, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)



Epochs: 2
FOLD 1
----------------------------------------------------------------------------
PATH: ./Model/bert_model_1.pt
----------------------------------------------------------------------------
FOLD 2
----------------------------------------------------------------------------
PATH: ./Model/bert_model_2.pt
----------------------------------------------------------------------------
FOLD 3
----------------------------------------------------------------------------
PATH: ./Model/bert_model_3.pt
----------------------------------------------------------------------------
FOLD 4
----------------------------------------------------------------------------
PATH: ./Model/bert_model_4.pt
----------------------------------------------------------------------------
FOLD 5
----------------------------------------------------------------------------
PATH: ./Model/bert_model_5.pt
----------------------------------------------------------------------------
K-FOLD CROSS VALIDATION RESULTS FO

In [None]:
class bert_model(nn.Module):
    def __init__(self):
        super(bert_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(768, 50, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(50, 8, bias=True),
            nn.ELU(),
            nn.Linear(8, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x


dataset = pheme_bert_brackets_normal
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [2,3,4,5,10,25,50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_model, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False, )
    testing_results.append(result)

epochs_diff(testing_results)


## BERT + ALL SPARSE

In [43]:
class bert_sparse_model(nn.Module):
    def __init__(self):
        super(bert_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Linear(869, 50, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(50, 8, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 3, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(3, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x


# dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg, pheme_bert_brackets_normal],axis=1)
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg, pheme_bert_simple_normal],axis=1)

criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [25]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_sparse_model, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False, scaling=True)
    testing_results.append(result)

epochs_diff(testing_results)



Epochs: 10
FOLD 1
----------------------------------------------------------------------------
PATH: ./Model/bert_sparse_model_1.pt
----------------------------------------------------------------------------
FOLD 2
----------------------------------------------------------------------------
PATH: ./Model/bert_sparse_model_2.pt
----------------------------------------------------------------------------
FOLD 3
----------------------------------------------------------------------------
PATH: ./Model/bert_sparse_model_3.pt
----------------------------------------------------------------------------
FOLD 4
----------------------------------------------------------------------------
PATH: ./Model/bert_sparse_model_4.pt
----------------------------------------------------------------------------
FOLD 5
----------------------------------------------------------------------------
PATH: ./Model/bert_sparse_model_5.pt
---------------------------------------------------------------------------

## BERT + ALL SPARSE (STD)

In [45]:
class bert_sparse_model(nn.Module):
    def __init__(self):
        super(bert_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Linear(869, 50, bias=True),
            nn.ELU(),
            nn.Dropout(0.4),
            nn.Linear(50, 8, bias=True),
            nn.ELU(),
            nn.Dropout(0.4),
            nn.Linear(8, 3, bias=True),
            nn.ELU(),
            nn.Linear(3, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x


# dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg, pheme_bert_simple_normal],axis=1)
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg, pheme_bert_brackets_normal],axis=1)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [10]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_sparse_model, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

epochs_diff(testing_results)



Epochs: 10
FOLD 1
----------------------------------------------------------------------------
PATH: ./Model/bert_sparse_model_1.pt
----------------------------------------------------------------------------
FOLD 2
----------------------------------------------------------------------------
PATH: ./Model/bert_sparse_model_2.pt
----------------------------------------------------------------------------
FOLD 3
----------------------------------------------------------------------------
PATH: ./Model/bert_sparse_model_3.pt
----------------------------------------------------------------------------
FOLD 4
----------------------------------------------------------------------------
PATH: ./Model/bert_sparse_model_4.pt
----------------------------------------------------------------------------
FOLD 5
----------------------------------------------------------------------------
PATH: ./Model/bert_sparse_model_5.pt
---------------------------------------------------------------------------

# D

In [190]:
print(pheme_event.unique())
print(ext_event.unique())

['charliehebdo' 'ferguson' 'germanwings-crash' 'ottawashooting'
 'sydneysiege']
['ebola-essien' 'prince-toronto' 'putinmissing']


In [191]:
ext_pos_final

Unnamed: 0,!,#,&,",",@,^,a,d,g,l,n,o,p,r,s,t,u,v,x,z,~
0,0,0,0,0,0,3,0,1,0,0,1,0,1,0,0,0,1,1,0,0,0
1,0,1,2,2,0,1,2,1,0,1,5,2,3,3,0,0,0,6,0,0,0
2,0,0,1,1,0,3,1,4,0,0,4,0,3,0,0,0,0,4,0,0,0
3,0,0,0,2,0,3,2,2,0,0,3,0,1,0,0,0,2,3,0,0,0
4,0,0,0,1,1,3,1,1,0,0,1,0,2,1,0,0,1,4,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,0,3,0,0,0,4,0,0,0,0,3,0,3,0,0,0,2,1,0,0,0
481,0,1,0,1,0,2,0,0,0,0,2,0,2,0,0,1,0,2,0,0,0
482,0,1,0,1,0,0,0,1,0,0,2,0,0,0,0,0,1,0,0,0,0
483,0,0,0,2,0,1,2,1,0,0,5,1,2,1,0,0,0,3,0,0,0


In [205]:
print(pheme_sparse_final.shape)
print(pheme_pos_final.shape)

(5802, 28)
(5802, 21)


In [209]:
print(ext_sparse_final.shape)
print(ext_pos_final.shape)
print(ext_bert_simple_normal.shape)
print(ext_bert_brackets_normal.shape)

(623, 28)
(623, 21)
(623, 768)
(623, 768)


In [208]:

# Final
pheme_sparse_final = pd.read_csv('./data/_PHEME_sparse.csv')
pheme_y = pd.read_csv('./data/_PHEME_target.csv').target
pheme_pos_final = pd.read_csv('./data/_PHEME_postags.csv')
pheme_thread_final_avg = pd.read_csv('./data/_PHEME_thread_avg.csv')
pheme_thread_final_std = pd.read_csv('./data/_PHEME_thread_std.csv')

ext_pos_final = pd.read_csv('./data/_PHEMEext_postags.csv')
ext_sparse_final = pd.read_csv('./data/_PHEMEext_sparse.csv')
ext_y = pd.read_csv('./data/_PHEMEext_text.csv').target
ext_thread_final_avg = pd.read_csv('./data/_PHEMEext_thread_avg.csv')
ext_thread_final_std = pd.read_csv('./data/_PHEMEext_thread_std.csv')

pheme_bert_simple_normal = pd.read_csv('./data/_PHEME_Bert_final_simple_nrmzd.csv')
ext_bert_simple_normal = pd.read_csv('./data/_PHEMEext_Bert_final_simple_nrmzd.csv')

pheme_bert_brackets_normal = pd.read_csv('./data/_PHEME_Bert_final_brackets_nrmzd.csv')
ext_bert_brackets_normal = pd.read_csv('./data/_PHEMEext_Bert_final_brackets_nrmzd.csv')

pheme_event = pd.read_csv('./data/_PHEME_text.csv')['Event']
ext_event = pd.read_csv('./data/_PHEMEext_text.csv').Event
pheme_AVGw2v = pd.read_csv('./data/_PHEME_text_AVGw2v.csv').drop(['token'],axis=1)
ext_AVGw2v = pd.read_csv('./data/_PHEMEext_text_AVGw2v.csv').drop(['token'],axis=1)

# 8 Folds

In [12]:
all_event = pd.concat([pheme_event,ext_event],axis=0, ignore_index=True)
all_y = pd.concat([pheme_y,ext_y],axis=0, ignore_index=True)
all_sparse = pd.concat([pheme_sparse_final, ext_sparse_final],axis=0, ignore_index=True)

## Sparse

In [30]:
class sparse_model(nn.Module):
    def __init__(self):
        super(sparse_model, self).__init__()  # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(28, 10),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(10, 4),
            nn.ELU(),
            nn.Dropout(0.5),
            # nn.Linear(12, 8),
            # nn.ELU(),
            nn.Linear(4, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

# model = sparse_model()
dataset = all_sparse
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [10, 100]
testing_results = []

writeLog().write("./Model/"+sparse_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=sparse_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=True)
    testing_results.append(result)




************************************************************
STARTING A CROSS-VALIDATION ... [10, 100] epochs
************************************************************

STARTING TEST of 10 EPOCH

> FOLD 1
> FOLD 2
> FOLD 3
> FOLD 4
> FOLD 5
> FOLD 6
> FOLD 7
> FOLD 8
> FOLD 9


----------------------------------------------------------------------------
>>>K-FOLD CROSS VALIDATION RESULTS FOR 9 FOLDS
----------------------------------------------------------------------------
Fold 0: Acc 63.44396344396345, F1 38.68452196113811
Fold 1: Acc 61.76727909011374, F1 37.803648325060465
Fold 2: Acc 65.88486140724946, F1 39.87612714301961
Fold 3: Acc 59.88764044943821, F1 37.247254896172656
Fold 4: Acc 61.83456183456183, F1 38.40176099773573
Fold 5: Acc 42.857142857142854, F1 30.0
Fold 6: Acc 66.09442060085837, F1 40.40443858006322
Fold 7: Acc 60.924369747899156, F1 39.31815520708469
Fold 8: Acc 49.275362318840585, F1 32.28237248527103
Average: 59.108%
F1: 37.113%
Loss: 0.678%


STARTING TE

## POS


In [None]:
class pos_model(nn.Module):
    def __init__(self):
        super(pos_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(21, 4),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(4, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x


In [None]:
model = pos_model()
dataset = pheme_pos_final
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [200]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

In [None]:
class pos_model(nn.Module):
    def __init__(self):
        super(pos_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(21, 8),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 3),
            nn.ELU(),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x


In [None]:
model = pos_model()
dataset = pheme_pos_final
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100,200]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

## Thread

In [None]:
class thread_model(nn.Module):
    def __init__(self):
        super(thread_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(52, 8),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x


In [None]:
model = thread_model()
dataset = pheme_thread_final_avg
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [200]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

In [None]:
class thread_model(nn.Module):
    def __init__(self):
        super(thread_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(52, 12),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(12, 8),
            nn.ELU(),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
model = thread_model()
dataset = pheme_thread_final_avg
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [200]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

## SPARSE + POS + Thread

In [None]:
# class all_sparse_model(nn.Module):
#     def __init__(self):
#         super(all_sparse_model, self).__init__() # 1*20
#         self.layers = nn.Sequential(
#             nn.Dropout(0.3),
#             nn.Linear(52, 8),
#             nn.ELU(),
#             nn.Dropout(0.3),
#             nn.Linear(8, 1)
#         )

#     def forward(self, x):
#         x = self.layers(x)
#         return x

class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 32),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(32, 10),
            nn.ELU(),
            nn.Linear(10, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
model = all_sparse_model()
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg],axis=1)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

In [None]:
# class all_sparse_model(nn.Module):
#     def __init__(self):
#         super(all_sparse_model, self).__init__() # 1*20
#         self.layers = nn.Sequential(
#             nn.Dropout(0.3),
#             nn.Linear(52, 8),
#             nn.ELU(),
#             nn.Dropout(0.3),
#             nn.Linear(8, 1)
#         )

#     def forward(self, x):
#         x = self.layers(x)
#         return x

class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 50),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(50, 25),
            nn.ELU(),
            nn.Linear(25, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
model = all_sparse_model()
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg],axis=1)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100,200]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

## SPARSE + POS + Thread Standardization

In [None]:
class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 20),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(20, 6),
            nn.ELU(),
            nn.Linear(6, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
model = all_sparse_model()
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg],axis=1)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [5,10,25,50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False, scaling=True)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

In [None]:
# class all_sparse_model(nn.Module):
#     def __init__(self):
#         super(all_sparse_model, self).__init__() # 1*20
#         self.layers = nn.Sequential(
#             nn.Dropout(0.3),
#             nn.Linear(52, 8),
#             nn.ELU(),
#             nn.Dropout(0.3),
#             nn.Linear(8, 1)
#         )

#     def forward(self, x):
#         x = self.layers(x)
#         return x

class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 50),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(50, 25),
            nn.ELU(),
            nn.Linear(25, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
model = all_sparse_model()
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg],axis=1)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100,200]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

## BERT

In [34]:
class bert_model(nn.Module):
    def __init__(self):
        super(bert_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(768, 50, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(50, 8, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x


model = bert_model()
dataset = pd.concat([pheme_bert_simple_normal,ext_bert_simple_normal],axis=0,ignore_index=True)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [2,3,4,5,10,25,50,100]
testing_results = []

writeLog().write("./Model/"+bert_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")


for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_model, events=all_event, target=all_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)




************************************************************
STARTING A CROSS-VALIDATION ... [2, 3, 4, 5, 10, 25, 50, 100] epochs
************************************************************

STARTING TEST of 2 EPOCH

> FOLD 1
> FOLD 2
> FOLD 3
> FOLD 4
> FOLD 5
> FOLD 6
> FOLD 7
> FOLD 8
> FOLD 9


----------------------------------------------------------------------------
>>>K-FOLD CROSS VALIDATION RESULTS FOR 9 FOLDS
----------------------------------------------------------------------------
Fold 0: Acc 65.31986531986533, F1 38.917415269719854
Fold 1: Acc 75.67804024496938, F1 52.860823533214806
Fold 2: Acc 66.52452025586354, F1 40.75424995038963
Fold 3: Acc 77.30337078651685, F1 43.649535368374
Fold 4: Acc 73.54627354627354, F1 43.34452112732069
Fold 5: Acc 50.0, F1 33.33333333333333
Fold 6: Acc 60.94420600858369, F1 38.906737067044254
Fold 7: Acc 62.18487394957983, F1 38.41633103862764
Fold 8: Acc 56.52173913043478, F1 36.61520676630513
Average: 65.336%
F1: 40.755%
Loss: 0.657

In [35]:
class bert_model(nn.Module):
    def __init__(self):
        super(bert_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(768, 50, bias=True),
            nn.ELU(),
            # nn.Dropout(0.5),
            nn.Linear(50, 8, bias=True),
            nn.ELU(),
            # nn.Dropout(0.5),
            nn.Linear(8, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x


model = bert_model()
dataset = pd.concat([pheme_bert_simple_normal,ext_bert_simple_normal],axis=0,ignore_index=True)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [2,3,4,5,10,25,50,100]
testing_results = []

writeLog().write("./Model/"+bert_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")


for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_model, events=all_event, target=all_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)




************************************************************
STARTING A CROSS-VALIDATION ... [2, 3, 4, 5, 10, 25, 50, 100] epochs
************************************************************

STARTING TEST of 2 EPOCH

> FOLD 1
> FOLD 2
> FOLD 3
> FOLD 4
> FOLD 5
> FOLD 6
> FOLD 7
> FOLD 8
> FOLD 9


----------------------------------------------------------------------------
>>>K-FOLD CROSS VALIDATION RESULTS FOR 9 FOLDS
----------------------------------------------------------------------------
Fold 0: Acc 80.13468013468014, F1 45.372089880119
Fold 1: Acc 72.17847769028872, F1 42.34933598612385
Fold 2: Acc 75.47974413646055, F1 45.66179898243038
Fold 3: Acc 77.64044943820225, F1 48.89709070482427
Fold 4: Acc 74.36527436527437, F1 44.78585585646129
Fold 5: Acc 85.71428571428571, F1 46.153846153846146
Fold 6: Acc 79.39914163090128, F1 49.4352435911727
Fold 7: Acc 65.54621848739495, F1 39.36211791191444
Fold 8: Acc 58.69565217391305, F1 36.56052336946673
Average: 74.350%
F1: 44.286%
L

## BERT + ALL SPARSE

In [36]:
bert_simple_all = pd.concat([pheme_bert_simple_normal,ext_bert_simple_normal],axis=0,ignore_index=True)
sparse_all = pd.concat([pheme_sparse_final, ext_sparse_final],axis=0,ignore_index=True)
thread_avg_all = pd.concat([pheme_thread_final_avg, ext_thread_final_avg],axis=0,ignore_index=True)
pos_all = pd.concat([pheme_pos_final, ext_pos_final],axis=0,ignore_index=True)


In [40]:
bert_simple_all.shape, sparse_all.shape, thread_avg_all.shape, pos_all.shape, dataset.shape

((6425, 768), (6425, 28), (6425, 52), (6425, 21), (6425, 869))

In [42]:
class bert_sparse_model(nn.Module):
    def __init__(self):
        super(bert_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Linear(869, 50, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(50, 8, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 3, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(3, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x


# model = bert_sparse_model()
# dataset = pd.concat([pheme_bert_simple_normal,ext_bert_simple_normal],axis=0,ignore_index=True)
dataset = pd.concat([bert_simple_all, sparse_all, thread_avg_all, pos_all],axis=1)

criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [2,3,4,5,10,25,50,100]
testing_results = []

writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")


for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_sparse_model, events=all_event, target=all_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)




************************************************************
STARTING A CROSS-VALIDATION ... [2, 3, 4, 5, 10, 25, 50, 100] epochs
************************************************************

STARTING TEST of 2 EPOCH

> FOLD 1
Reset trainable parameters of layer = Linear(in_features=869, out_features=50, bias=True)
Reset trainable parameters of layer = Linear(in_features=50, out_features=8, bias=True)
Reset trainable parameters of layer = Linear(in_features=8, out_features=3, bias=True)
Reset trainable parameters of layer = Linear(in_features=3, out_features=1, bias=True)
> FOLD 2
Reset trainable parameters of layer = Linear(in_features=869, out_features=50, bias=True)
Reset trainable parameters of layer = Linear(in_features=50, out_features=8, bias=True)
Reset trainable parameters of layer = Linear(in_features=8, out_features=3, bias=True)
Reset trainable parameters of layer = Linear(in_features=3, out_features=1, bias=True)
> FOLD 3
Reset trainable parameters of layer = Linear(in_f

In [None]:
class bert_sparse_model(nn.Module):
    def __init__(self):
        super(bert_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(869, 72, bias=True),
            nn.ELU(),
            # nn.Dropout(0.5),
            nn.Linear(72, 10, bias = True),
            nn.ELU(),
            nn.Linear(10, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x

In [None]:
model = bert_sparse_model()
dataset = pd.concat([pheme_sparse_final, pheme_pos_final, pheme_thread_final_avg, pheme_bert_brackets_normal],axis=1)
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [5,10,50,100]
testing_results = []

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, events=pheme_event, target=pheme_y, epochs=epoch, verbose=False,scaling=True)
    testing_results.append(result)

In [None]:
epochs_diff(testing_results)

## BERT + ALL SPARSE (STD)

# 9 Folds Batch


In [66]:
from datetime import datetime

In [112]:
class root_model(nn.Module):
    def __init__(self):
        super(root_model, self).__init__()  # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(49, 16),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(16, 5),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(5, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

# model = sparse_model()
dataset = all_root
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in root_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=root_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

writeLog().writeWithoutCR("./Model/"+root_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")



************************************************************
STARTING A CROSS-VALIDATION ... [100] epochs
************************************************************
TIME: 31/03/2021 21:34:10
LAYER: [Sequential(
  (0): Linear(in_features=49, out_features=16, bias=True)
  (1): ELU(alpha=1.0)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=16, out_features=5, bias=True)
  (4): ELU(alpha=1.0)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=5, out_features=1, bias=True)
)]

STARTING TEST of 100 EPOCH

> FOLD 1
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 2
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50


KeyboardInterrupt: 

In [86]:
class root_model(nn.Module):
    def __init__(self):
        super(root_model, self).__init__()  # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(49, 12),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(12, 4),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(4, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

# model = sparse_model()
dataset = all_root
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [50, 100]
testing_results = []

writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in root_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=root_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

writeLog().writeWithoutCR("./Model/"+root_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")





************************************************************
STARTING A CROSS-VALIDATION ... [50, 100] epochs
************************************************************
TIME: 31/03/2021 17:25:36
LAYER: [Sequential(
  (0): Linear(in_features=49, out_features=12, bias=True)
  (1): ELU(alpha=1.0)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=12, out_features=4, bias=True)
  (4): ELU(alpha=1.0)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4, out_features=1, bias=True)
)]

STARTING TEST of 50 EPOCH

> FOLD 1
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
> FOLD 2
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
> FOLD 3
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
> FOLD 4
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
> FOLD 5
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
> FOLD 6
Parameter Resetted
Parameter Res

In [87]:
class root_model(nn.Module):
    def __init__(self):
        super(root_model, self).__init__()  # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(49, 8),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 3),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(3, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

# model = sparse_model()
dataset = all_root
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [50, 100]
testing_results = []

writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in root_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=root_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

writeLog().writeWithoutCR("./Model/"+root_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")



************************************************************
STARTING A CROSS-VALIDATION ... [50, 100] epochs
************************************************************
TIME: 31/03/2021 17:30:37
LAYER: [Sequential(
  (0): Linear(in_features=49, out_features=8, bias=True)
  (1): ELU(alpha=1.0)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=8, out_features=3, bias=True)
  (4): ELU(alpha=1.0)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=3, out_features=1, bias=True)
)]

STARTING TEST of 50 EPOCH

> FOLD 1
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
> FOLD 2
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
> FOLD 3
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
> FOLD 4
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
> FOLD 5
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
> FOLD 6
Parameter Resetted
Parameter Reset

In [120]:
class thread_model(nn.Module):
    def __init__(self):
        super(thread_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(52, 12),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(12, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

# model = thread_model()
dataset = all_thread
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+thread_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+thread_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+thread_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in thread_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=thread_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

writeLog().writeWithoutCR("./Model/"+thread_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")



************************************************************
STARTING A CROSS-VALIDATION ... [100] epochs
************************************************************
TIME: 31/03/2021 22:21:44
LAYER: [Sequential(
  (0): Linear(in_features=52, out_features=12, bias=True)
  (1): ELU(alpha=1.0)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=12, out_features=1, bias=True)
)]

STARTING TEST of 100 EPOCH

> FOLD 1
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 2
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 3
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 4
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 5
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 6
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 7
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting e

In [116]:
s

NameError: name 's' is not defined

In [119]:
class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 24),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(24, 1),
            # nn.ELU(),
            # nn.Dropout(0.5),
            # nn.Linear(6, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

# model = thread_model()
dataset = all_root_thread
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in all_sparse_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=all_sparse_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

# writeLog().writeWithoutCR("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")2



************************************************************
STARTING A CROSS-VALIDATION ... [100] epochs
************************************************************
TIME: 31/03/2021 22:21:29
LAYER: [Sequential(
  (0): Linear(in_features=101, out_features=24, bias=True)
  (1): ELU(alpha=1.0)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=24, out_features=1, bias=True)
)]

STARTING TEST of 100 EPOCH

> FOLD 1
Parameter Resetted
Parameter Resetted


KeyboardInterrupt: 

In [91]:
class bert_model(nn.Module):
    def __init__(self):
        super(bert_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(768, 50, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(50, 8, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x


# model = bert_model()
dataset = all_bert_simple
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+bert_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")


for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_model, events=all_event, target=all_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)




************************************************************
STARTING A CROSS-VALIDATION ... [100] epochs
************************************************************

STARTING TEST of 100 EPOCH

> FOLD 1
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 2
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 3
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 4
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 5
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 6
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 7
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 8
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 

'Average: 69.51%\nF1: 41.44%\nLoss: 0.63\n'

In [113]:
class bert_sparse_model(nn.Module):
    def __init__(self):
        super(bert_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Linear(869, 72, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(72, 10, bias = True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(10, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x

# model = bert_model()
dataset = all_total
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100,200]
testing_results = []

writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in bert_sparse_model().children()]}")


for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_sparse_model, events=all_event, target=all_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)




************************************************************
STARTING A CROSS-VALIDATION ... [100, 200] epochs
************************************************************
TIME: 31/03/2021 21:43:35
LAYER: [Sequential(
  (0): Linear(in_features=869, out_features=72, bias=True)
  (1): ELU(alpha=1.0)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=72, out_features=10, bias=True)
  (4): ELU(alpha=1.0)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=10, out_features=1, bias=True)
)]

STARTING TEST of 100 EPOCH

> FOLD 1
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 2
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 3
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 4
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 5
Parameter Resetted
Parameter Resett

In [98]:
testing_results

[{0: [30.014430014430015, 20.329881502230815, 0.9727699329441358],
  1: [52.31846019247593, 34.094144605369294, 0.7706452375530467],
  2: [52.23880597014925, 33.81135426518758, 1.5158748644501416],
  3: [51.348314606741575, 33.655633914142015, 0.7604606889606862],
  4: [47.338247338247335, 31.74001647471132, 5.580640545299461],
  5: [50.0, 33.33333333333333, 0.6937494874000549],
  6: [42.48927038626609, 29.919186399219985, 0.737345610821196],
  7: [50.84033613445378, 33.669406786111594, 0.7304117864921313],
  8: [52.17391304347826, 35.17515949242477, 0.7004137376080388]}]

In [105]:
pd.DataFrame(testing_results[0])


Unnamed: 0,0,1,2,3,4,5,6,7,8
0,30.01443,52.31846,52.238806,51.348315,47.338247,50.0,42.48927,50.840336,52.173913
1,20.329882,34.094145,33.811354,33.655634,31.740016,33.333333,29.919186,33.669407,35.175159
2,0.97277,0.770645,1.515875,0.760461,5.580641,0.693749,0.737346,0.730412,0.700414


In [107]:
class bert_sparse_model(nn.Module):
    def __init__(self):
        super(bert_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Linear(869, 80, bias = True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(80, 16, bias = True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(16, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x

# model = bert_model()
dataset = all_total
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in bert_sparse_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_sparse_model, events=all_event, target=all_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)

df = pd.DataFrame(testing_results[0])



************************************************************
STARTING A CROSS-VALIDATION ... [100] epochs
************************************************************
TIME: 31/03/2021 18:58:07
LAYER: [Sequential(
  (0): Linear(in_features=869, out_features=80, bias=True)
  (1): ELU(alpha=1.0)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=80, out_features=16, bias=True)
  (4): ELU(alpha=1.0)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=16, out_features=1, bias=True)
)]

STARTING TEST of 100 EPOCH

> FOLD 1
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 2
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 3
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 4
Parameter Resetted
Parameter Resetted
Parameter Resetted
Starting epoch 50
Starting epoch 100
> FOLD 5
Parameter Resetted
Parameter Resetted
Pa

# Batch 5

## Class

In [None]:
from datetime import datetime

In [121]:
class root_model(nn.Module):
    def __init__(self):
        super(root_model, self).__init__()  # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(49, 16),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(16, 5),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(5, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

class root_model(nn.Module):
    def __init__(self):
        super(root_model, self).__init__()  # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(49, 12),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(12, 4),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(4, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

class root_model(nn.Module):
    def __init__(self):
        super(root_model, self).__init__()  # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(49, 8),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 3),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(3, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

class thread_model(nn.Module):
    def __init__(self):
        super(thread_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(52, 12),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(12, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 24),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(24, 6),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(6, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        return x

class all_sparse_model(nn.Module):
    def __init__(self):
        super(all_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(101, 24),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(24, 1),
        )

    def forward(self, x):
        x = self.layers(x)
        return x

class bert_model(nn.Module):
    def __init__(self):
        super(bert_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            # nn.Dropout(0.5),
            nn.Linear(768, 50, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(50, 8, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(8, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x



## Code


In [None]:
# model = sparse_model()
dataset = all_root
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+root_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in root_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=root_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

writeLog().writeWithoutCR("./Model/"+root_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")

In [None]:
dataset = all_thread
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+thread_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+thread_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+thread_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in thread_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=thread_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

writeLog().writeWithoutCR("./Model/"+thread_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")

In [None]:
dataset = all_root_thread
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in all_sparse_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=all_sparse_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

writeLog().writeWithoutCR("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")

In [None]:
dataset = all_root_thread
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in all_sparse_model().children()]}")


for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=all_sparse_model, events=all_event, target=all_y, epochs=epoch, verbose=False, scaling=False)
    testing_results.append(result)

writeLog().writeWithoutCR("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")

In [None]:
dataset = all_bert_simple
criterion = nn.BCEWithLogitsLoss()

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+bert_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+bert_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+bert_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in bert_model().children()]}")


for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_model, events=all_event, target=all_y, epochs=epoch, verbose=False)
    testing_results.append(result)

writeLog().writeWithoutCR("./Model/"+all_sparse_model.__name__+"_"+"log.txt",f"RESULT:\n{epochs_diff(testing_results)}")2

'Average: 69.51%\nF1: 41.44%\nLoss: 0.63\n'

In [None]:
class bert_sparse_model(nn.Module):
    def __init__(self):
        super(bert_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Linear(869, 72, bias=True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(72, 10, bias = True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(10, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x

# model = bert_model()
dataset = all_total
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100,200]
testing_results = []

writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in bert_sparse_model().children()]}")


for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_sparse_model, events=all_event, target=all_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)


In [None]:
testing_results

[{0: [30.014430014430015, 20.329881502230815, 0.9727699329441358],
  1: [52.31846019247593, 34.094144605369294, 0.7706452375530467],
  2: [52.23880597014925, 33.81135426518758, 1.5158748644501416],
  3: [51.348314606741575, 33.655633914142015, 0.7604606889606862],
  4: [47.338247338247335, 31.74001647471132, 5.580640545299461],
  5: [50.0, 33.33333333333333, 0.6937494874000549],
  6: [42.48927038626609, 29.919186399219985, 0.737345610821196],
  7: [50.84033613445378, 33.669406786111594, 0.7304117864921313],
  8: [52.17391304347826, 35.17515949242477, 0.7004137376080388]}]

In [None]:
pd.DataFrame(testing_results[0])


Unnamed: 0,0,1,2,3,4,5,6,7,8
0,30.01443,52.31846,52.238806,51.348315,47.338247,50.0,42.48927,50.840336,52.173913
1,20.329882,34.094145,33.811354,33.655634,31.740016,33.333333,29.919186,33.669407,35.175159
2,0.97277,0.770645,1.515875,0.760461,5.580641,0.693749,0.737346,0.730412,0.700414


In [None]:
class bert_sparse_model(nn.Module):
    def __init__(self):
        super(bert_sparse_model, self).__init__() # 1*20
        self.layers = nn.Sequential(
            nn.Linear(869, 80, bias = True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(80, 16, bias = True),
            nn.ELU(),
            nn.Dropout(0.5),
            nn.Linear(16, 1)
        )
    def forward(self, x):
        x = self.layers(x)
        return x

# model = bert_model()
dataset = all_total
criterion = nn.BCEWithLogitsLoss()
# epochs = 10

testing_epochs = [100]
testing_results = []

writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"\n\n************************************************************\nSTARTING A CROSS-VALIDATION ... {testing_epochs} epochs\n************************************************************")
writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"TIME: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
writeLog().write("./Model/"+bert_sparse_model.__name__+"_"+"log.txt",f"LAYER: {[layer for layer in bert_sparse_model().children()]}")

for epoch in testing_epochs:
    result = cv_process(dataset, criterion, modelClass=bert_sparse_model, events=all_event, target=all_y, epochs=epoch, verbose=False)
    testing_results.append(result)

epochs_diff(testing_results)

df = pd.DataFrame(testing_results[0])