#README

**Author:** \
Yiming Zhang\
Ziyang Yang\
Lichen Xue\
\
**Description:**\
Humour, as a representative of human subjective judgment, is usually difficult to successfully predict its level through natural language processing methods. This notebook tries to predict humour levels of a sentence in 2 main approach, pre-trained and non pre-trained.\
**File structure:**


*   import and download file
*   Approach 1 base line
  *   prepare before training
  *   helper functions
*   Approach 1
 *  data preprocessing & get all possible inputs
 *  load Tokenizer : Bert, XLNet, Roberta
 *  define three class for three pretrained dataset
 *  Train function
 *  Load the three pretrained model: Bert, XLNet, Roberta
 *  hyperparameters tuning (manually grid search :)
 *  Begin the train and save the model
 *  get the test RMSE and output predicted file
*   Approach 2 base line
*   Approach 2
 *  data preprocessing
 *  mini batch generating
 *  Training models: CNN, BiLSTM, GRU
 *  Training
 *  Testing
 *  Output predicted file



**How to run the file:**\
run each cell from top to bottom. There should be no other extra file to add on. \

**How to save prediction file after run the model:**\
Save prediction file code has been comment out. If there is any need on get predict test score file, please remove the comments in "output predictied file" section in Approach 1 or 2 base need.\
To successfully save predicted file, you need to give the file access to your google drive, the access link will be shown as an ouput at the beginning of output file code.


# import and download what we need

In [None]:
# load transformers
!pip install transformers
!pip install SentencePiece #XLNET need SentencePiece



In [None]:
import random

import pandas as pd
import numpy as np

import os
import re
import time
import math

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchtext import data
from torchtext import datasets
import torch.utils.data as tud

from transformers import AdamW
from transformers import get_linear_schedule_with_warmup

from transformers import BertTokenizer
from transformers import XLNetTokenizer
from transformers import RobertaTokenizer

from transformers import BertForSequenceClassification
from transformers import XLNetForSequenceClassification
from transformers import RobertaForSequenceClassification

import nltk
nltk.download('punkt')
from nltk import word_tokenize


from sklearn.feature_extraction.text import CountVectorizer
from torch.utils.data import Dataset, random_split
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import codecs

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import torch.utils.data as tud



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
# # You will need to download any word embeddings required for your code, e.g.:

# !wget http://nlp.stanford.edu/data/glove.6B.zip
# !unzip glove.6B.zip

# # For any packages that Colab does not provide auotmatically you will also need to install these below, e.g.:

# ! pip install torch

In [None]:
# Load data
!wget -O train.csv https://drive.google.com/u/0/uc?id=1_jnO8KxostH_K98RrGg_7arJ7NA5RqtW&export=download
!wget -O dev.csv https://drive.google.com/u/0/uc?id=13kfmFS9XXbrvWTlrN1C2W7hRMjhDQa5C&export=download
!wget -O test.csv https://drive.google.com/u/0/uc?id=1v0XdNKw7gbrDUP3Xw9KaLPSGyX8w0fhk&export=download
!wget -O test_trueLabel.csv https://drive.google.com/u/0/uc?id=16J7AfRi8awIXUHdpTHkgBga_Y4FVUKV5&export=download


train_df = pd.read_csv('./train.csv')
dev_df = pd.read_csv('./dev.csv')
test_df = pd.read_csv('./test.csv')
test_true = pd.read_csv('./test_trueLabel.csv')

--2021-03-02 10:12:01--  https://drive.google.com/u/0/uc?id=1_jnO8KxostH_K98RrGg_7arJ7NA5RqtW
Resolving drive.google.com (drive.google.com)... 108.177.126.100, 108.177.126.101, 108.177.126.113, ...
Connecting to drive.google.com (drive.google.com)|108.177.126.100|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-0c-5g-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/lm98c91rmjb4bj3f5h06b5dkrtka780m/1614679875000/02947314171395917638/*/1_jnO8KxostH_K98RrGg_7arJ7NA5RqtW [following]
--2021-03-02 10:12:02--  https://doc-0c-5g-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/lm98c91rmjb4bj3f5h06b5dkrtka780m/1614679875000/02947314171395917638/*/1_jnO8KxostH_K98RrGg_7arJ7NA5RqtW
Resolving doc-0c-5g-docs.googleusercontent.com (doc-0c-5g-docs.googleusercontent.com)... 172.217.218.132, 2a00:1450:4013:c08::84
Connecting to doc-0c-5g-docs.googleusercontent.com (doc-0c-5g-docs.googleusercontent.co

In [None]:
#train_df.head()
print(train_df.original)

0       France is ‘ hunting down its citizens who join...
1       Pentagon claims 2,000 % increase in Russian tr...
2       Iceland PM Calls Snap Vote as Pedophile Furor ...
3       In an apparent first , Iran and Israel <engage...
4       Trump was told weeks ago that Flynn misled <Vi...
                              ...                        
9647    State officials blast ' unprecedented ' DHS <m...
9648    Protesters Rally for <Refugees/> Detained at J...
9649    Cruise line Carnival Corp. joins the fight aga...
9650    Columbia police hunt woman seen with <gun/> ne...
9651    Here 's What 's In The House-Approved Health <...
Name: original, Length: 9652, dtype: object


# Baseline 1: prepare before training

In [None]:
# Setting random seed and device
SEED = 1

torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# change it to a function
def fix_seed(seed=2021):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
fix_seed()

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
DEVICE = device

print('Device is', DEVICE)

Device is cuda:0


# helper function

## train and eval function for 1 input models

In [None]:
def train1in(train_iter, dev_iter, model, number_epoch):
    """
    Training loop for the model, which calls on eval to evaluate after each epoch
    """
    
    print("Training model.")

    for epoch in range(1, number_epoch+1):

        model.train()
        epoch_loss = 0
        epoch_sse = 0
        no_observations = 0  # Observations used for training so far

        for batch in train_iter:

            feature1, target = batch

            feature1, target = feature1.to(device),  target.to(device)

            # for RNN:
            model.batch_size = target.shape[0]
            no_observations = no_observations + target.shape[0]
            model.hidden = model.init_hidden()

            predictions = model(feature1).squeeze(1)

            optimizer.zero_grad()

            loss = loss_fn(predictions, target)

            sse, __ = model_performance(predictions.detach().cpu().numpy(), target.detach().cpu().numpy())

            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()*target.shape[0]
            epoch_sse += sse

        valid_loss, valid_mse, __, __ = eval1in(dev_iter, model)

        epoch_loss, epoch_mse = epoch_loss / no_observations, epoch_sse / no_observations
        print(f'| Epoch: {epoch:02} | Train Loss: {epoch_loss:.2f} | Train MSE: {epoch_mse:.2f} | Train RMSE: {epoch_mse**0.5:.5f} | \
        Val. Loss: {valid_loss:.2f} | Val. MSE: {valid_mse:.2f} |  Val. RMSE: {valid_mse**0.5:.5f} |')

In [None]:
# We evaluate performance on our dev set
def eval1in(data_iter, model):
    """
    Evaluating model performance on the dev set
    """
    model.eval()
    epoch_loss = 0
    epoch_sse = 0
    pred_all = []
    trg_all = []
    no_observations = 0

    with torch.no_grad():
        for batch in data_iter:
            feature1, target = batch

            feature1, target = feature1.to(device), target.to(device)

            # for RNN:
            model.batch_size = target.shape[0]
            no_observations = no_observations + target.shape[0]
            model.hidden = model.init_hidden()

            predictions = model(feature1).squeeze(1)
            loss = loss_fn(predictions, target)

            # We get the mse
            pred, trg = predictions.detach().cpu().numpy(), target.detach().cpu().numpy()
            sse, __ = model_performance(pred, trg)

            epoch_loss += loss.item()*target.shape[0]
            epoch_sse += sse
            pred_all.extend(pred)
            trg_all.extend(trg)

    return epoch_loss/no_observations, epoch_sse/no_observations, np.array(pred_all), np.array(trg_all)

## train & eval function for 2 inputs models

In [None]:
def train2in(train_iter, dev_iter, model, number_epoch):
    """
    Training loop for the model, which calls on eval to evaluate after each epoch
    """
    optimizer = torch.optim.Adam(modelgru.parameters(),lr=0.09)

    steps = 8
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, steps)
    
    print("Training model.")

    for epoch in range(1, number_epoch+1):

        model.train()
        epoch_loss = 0
        epoch_sse = 0
        no_observations = 0  # Observations used for training so far

        for batch in train_iter:

            feature1, feature2, target = batch

            feature1, feature2, target = feature1.to(device), feature2.to(device), target.to(device)

            # for RNN:
            model.batch_size = target.shape[0]
            no_observations = no_observations + target.shape[0]
            model.hidden = model.init_hidden()

            predictions = model(feature1,feature2).squeeze(1)

            optimizer.zero_grad()

            loss = loss_fn(predictions, target)

            sse, __ = model_performance(predictions.detach().cpu().numpy(), target.detach().cpu().numpy())

            loss.backward()
            optimizer.step()
            scheduler.step()

            epoch_loss += loss.item()*target.shape[0]
            epoch_sse += sse

        valid_loss, valid_mse, __, __ = eval2in(dev_iter, model)

        epoch_loss, epoch_mse = epoch_loss / no_observations, epoch_sse / no_observations
        print(f'| Epoch: {epoch:02} | Train Loss: {epoch_loss:.2f} | Train MSE: {epoch_mse:.2f} | Train RMSE: {epoch_mse**0.5:.5f} | \
        Val. Loss: {valid_loss:.2f} | Val. MSE: {valid_mse:.2f} |  Val. RMSE: {valid_mse**0.5:.5f} |')

In [None]:
# We evaluate performance on our dev set
def eval2in(data_iter, model):
    """
    Evaluating model performance on the dev set
    """
    model.eval()
    epoch_loss = 0
    epoch_sse = 0
    pred_all = []
    trg_all = []
    no_observations = 0

    with torch.no_grad():
        for batch in data_iter:
            feature1,feature2, target = batch

            feature1,feature2, target = feature1.to(device), feature2.to(device), target.to(device)

            # for RNN:
            model.batch_size = target.shape[0]
            no_observations = no_observations + target.shape[0]
            model.hidden = model.init_hidden()

            predictions = model(feature1, feature2).squeeze(1)
            loss = loss_fn(predictions, target)

            # We get the mse
            pred, trg = predictions.detach().cpu().numpy(), target.detach().cpu().numpy()
            sse, __ = model_performance(pred, trg)

            epoch_loss += loss.item()*target.shape[0]
            epoch_sse += sse
            pred_all.extend(pred)
            trg_all.extend(trg)

    return epoch_loss/no_observations, epoch_sse/no_observations, np.array(pred_all), np.array(trg_all)

## other help functions


*   model_performance
*   create vocab
*   collate_fn_padd




In [None]:
# How we print the model performance
def model_performance(output, target, print_output=False):
    """
    Returns SSE and MSE per batch (printing the MSE and the RMSE)
    """

    sq_error = (output - target)**2

    sse = np.sum(sq_error)
    mse = np.mean(sq_error)
    rmse = np.sqrt(mse)

    if print_output:
        print(f'| MSE: {mse:.2f} | RMSE: {rmse:.5f} |')

    return sse, mse

In [None]:
def create_vocab(data):
    """
    Creating a corpus of all the tokens used
    """ 
    tokenized_corpus = [] # Let us put the tokenized corpus in a list

    for sentence in data:

        tokenized_sentence = []

        for token in sentence.split(' '): # simplest split is

            tokenized_sentence.append(token)

        tokenized_corpus.append(tokenized_sentence)

    # Create single list of all vocabulary
    vocabulary = []  # Let us put all the tokens (mostly words) appearing in the vocabulary in a list

    for sentence in tokenized_corpus:

        for token in sentence:

            if token not in vocabulary:

                if True:
                    vocabulary.append(token)

    return vocabulary, tokenized_corpus

In [None]:
def collate_fn_padd(batch):
    '''
    We add padding to our minibatches and create tensors for our model
    '''

    batch_labels = [l for f, l in batch]
    batch_features = [f for f, l in batch]

    batch_features_len = [len(f) for f, l in batch]

    seq_tensor = torch.zeros((len(batch), max(batch_features_len))).long()

    for idx, (seq, seqlen) in enumerate(zip(batch_features, batch_features_len)):
        seq_tensor[idx, :seqlen] = torch.LongTensor(seq)

    batch_labels = torch.FloatTensor(batch_labels)

    return seq_tensor, batch_labels

class Task1Dataset(Dataset):

    def __init__(self, train_data, labels):
        self.x_train = train_data
        self.y_train = labels

    def __len__(self):
        return len(self.y_train)

    def __getitem__(self, item):
        return self.x_train[item], self.y_train[item]

# Approach 1 Baseline

> valid loss value : 0.58



# Approach 1

## data preprocessing & get all possible inputs

In [None]:
#################################
# Function : remove the punctuation in one sentence.
# We do not use this function in approach 1.
#################################
import string
def removePunctuation(text):
    temp = []
    for c in text:
        if c not in string.punctuation:
            temp.append(c)
    newText = ''.join(temp)
    return newText


#################################
# Function : remove the sentences without grade.
# We do not use this function in approach 1.
#################################
def removeNonGradedRow (train_df):
  train_df=train_df[~train_df['grades'].isin([0])]
  return train_df



#################################
#Input: The result from pd.read_csv(train_file / dev_file / test_file)

#Function: Extract and split five data from CSV: original sentences, new sentences, original words, new words, grade.
# We do not remove the sentences which don't have grade, because the result with removing the no grading sentences is not better than not removing.

#Output: Five list: original sentences, new sentences, original words, new words, grade.
#################################

def extract_data_in_csv_to_list(csv):
    csv_data = [(origin_sentence, edit_word) for (origin_sentence, edit_word) in zip(csv.original.to_list(), csv.edit.to_list())]

    #Get the grade from csv to list
    grade = csv.meanGrade.to_list()

    # list of tuple for original headlines and new edited headlines
    original_sentences = []
    new_sentences = []
    original_words = []
    edit_words = []

    for original, edit in csv_data:

      p = re.compile(r'\<(.*?)\/\>')
      #################################
      # Firstly, we get the original words list and the edited words list
      #get the original words and append to the list
      original_word = ''.join(re.findall(p, original))
      original_words.append(original_word)

      #append the edited words to the list
      edit_words.append(edit)
      #################################

      #################################
      # Secondly, we get the original sentences list and the new sentences list
      #get the original sentences and append to the list
      original_sentence = p.sub(original_word, original)
      original_sentences.append(original_sentence)

      #get the new sentences and append to the list
      new_sentence = p.sub(edit, original)
      new_sentences.append(new_sentence)
      #################################

    return original_sentences, new_sentences, original_words, edit_words, grade

In [None]:
#################################
## extract the data from csv to list
## train_df -> 4 list
## dev_df -> 4 list
## test_df -> 4 list
#################################

train_original_sentences, train_new_sentences, train_original_words, train_edit_words, train_grade_list = extract_data_in_csv_to_list(train_df)
valid_original_sentences, valid_new_sentences, valid_original_words, valid_edit_words, valid_grade_list = extract_data_in_csv_to_list(dev_df)
test_original_sentences, test_new_sentences, test_original_words, test_edit_words, test_grade_list = extract_data_in_csv_to_list(test_true)

len(train_original_sentences)

9652

In [None]:
##check the max length

max_length = 0
for sent in train_original_sentences:
    sent = sent.split()
    max_length = max(len(sent), max_length)
print('the max is', max_length)

the max is 26


## load Tokenizer
BERT

In [None]:
########################################################################################################################################

## There are some Tokenizer choices, we use Bert as out first choice. ##

########################################################################################################################################

from transformers import BertTokenizer
# Load the BERT tokenizer.
tokenizer_bert = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

train_inputs_two_sentences_bert = tokenizer_bert(train_original_sentences, train_new_sentences, padding='max_length', max_length=60, truncation=True, return_tensors="pt")
valid_inputs_two_sentences_bert = tokenizer_bert(valid_original_sentences, valid_new_sentences, padding='max_length', max_length=60, truncation=True, return_tensors="pt")
test_inputs_two_sentences_bert = tokenizer_bert(test_original_sentences, test_new_sentences, padding='max_length', max_length=60, truncation=True, return_tensors="pt")

train_inputs_new_sen_new_word_bert = tokenizer_bert(train_new_sentences, train_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
valid_inputs_new_sen_new_word_bert = tokenizer_bert(valid_new_sentences, valid_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
test_inputs_new_sen_new_word_bert = tokenizer_bert(test_new_sentences, test_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")

train_inputs_ori_sen_new_word_bert = tokenizer_bert(train_original_sentences, train_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
valid_inputs_ori_sen_new_word_bert = tokenizer_bert(valid_original_sentences, valid_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
test_inputs_ori_sen_new_word_bert = tokenizer_bert(test_original_sentences, test_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")

train_inputs_new_sen_ori_word_bert = tokenizer_bert(train_new_sentences, train_original_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
valid_inputs_new_sen_ori_word_bert = tokenizer_bert(valid_new_sentences, valid_original_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
test_inputs_new_sen_ori_word_bert = tokenizer_bert(test_new_sentences, test_original_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")


XLNET

In [None]:
## XLNet Tokenizer ##

########################################################################################################################################

## Some times the XLNet will report an bug "NoneType", please do not worry, it is just because of the Colab issue. You can restart the Colab. ##

########################################################################################################################################


from transformers import XLNetTokenizer

tokenizer_xlnet = XLNetTokenizer.from_pretrained('xlnet-base-cased', do_lower_case=True)

train_inputs_two_sentences_xlnet = tokenizer_xlnet(train_original_sentences, train_new_sentences, padding='max_length', max_length=60, truncation=True, return_tensors="pt")
valid_inputs_two_sentences_xlnet = tokenizer_xlnet(valid_original_sentences, valid_new_sentences, padding='max_length', max_length=60, truncation=True, return_tensors="pt")
test_inputs_two_sentences_xlnet = tokenizer_xlnet(test_original_sentences, test_new_sentences, padding='max_length', max_length=60, truncation=True, return_tensors="pt")

train_inputs_new_sen_new_word_xlnet = tokenizer_xlnet(train_new_sentences, train_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
valid_inputs_new_sen_new_word_xlnet = tokenizer_xlnet(valid_new_sentences, valid_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
test_inputs_new_sen_new_word_xlnet = tokenizer_xlnet(test_new_sentences, test_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")

train_inputs_ori_sen_new_word_xlnet = tokenizer_xlnet(train_original_sentences, train_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
valid_inputs_ori_sen_new_word_xlnet = tokenizer_xlnet(valid_original_sentences, valid_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
test_inputs_ori_sen_new_word_xlnet = tokenizer_xlnet(test_original_sentences, test_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")

train_inputs_new_sen_ori_word_xlnet = tokenizer_xlnet(train_new_sentences, train_original_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
valid_inputs_new_sen_ori_word_xlnet = tokenizer_xlnet(valid_new_sentences, valid_original_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
test_inputs_new_sen_ori_word_xlnet = tokenizer_xlnet(test_new_sentences, test_original_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")

ROBERTA

In [None]:
## Roberta Tokenizer ##

from transformers import RobertaTokenizer 

tokenizer_roberta = RobertaTokenizer.from_pretrained('roberta-base', do_lower_case=True)

train_inputs_two_sentences_roberta = tokenizer_roberta(train_original_sentences, train_new_sentences, padding='max_length', max_length=60, truncation=True, return_tensors="pt")
valid_inputs_two_sentences_roberta = tokenizer_roberta(valid_original_sentences, valid_new_sentences, padding='max_length', max_length=60, truncation=True, return_tensors="pt")
test_inputs_two_sentences_roberta = tokenizer_roberta(test_original_sentences, test_new_sentences, padding='max_length', max_length=60, truncation=True, return_tensors="pt")

train_inputs_new_sen_new_word_roberta = tokenizer_roberta(train_new_sentences, train_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
valid_inputs_new_sen_new_word_roberta = tokenizer_roberta(valid_new_sentences, valid_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
test_inputs_new_sen_new_word_roberta = tokenizer_roberta(test_new_sentences, test_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")

train_inputs_ori_sen_new_word_roberta = tokenizer_roberta(train_original_sentences, train_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
valid_inputs_ori_sen_new_word_roberta = tokenizer_roberta(valid_original_sentences, valid_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
test_inputs_ori_sen_new_word_roberta = tokenizer_roberta(test_original_sentences, test_edit_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")

train_inputs_new_sen_ori_word_roberta = tokenizer_roberta(train_new_sentences, train_original_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
valid_inputs_new_sen_ori_word_roberta = tokenizer_roberta(valid_new_sentences, valid_original_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")
test_inputs_new_sen_ori_word_roberta = tokenizer_roberta(test_new_sentences, test_original_words, padding='max_length', max_length=35, truncation=True, return_tensors="pt")

Get the tensor data

In [None]:
###############################################################################################################################################################

## For different inputs, you could change the <train_inputs_two_sentences_bert> to any other input. Such as <train_inputs_new_sen_new_word_bert>.

###############################################################################################################################################################


#####################################################################################################################################################################################################

## Another warm remind, reborta does not have the "token_type_ids", only have the "input_ids" and "attention_mask". So please update the code if you want to try the roberta.

#####################################################################################################################################################################################################


train_ids = train_inputs_two_sentences_bert['input_ids']
train_attention = train_inputs_two_sentences_bert['attention_mask']
train_token = train_inputs_two_sentences_bert['token_type_ids'] # need to delete it for reborta
train_grade = torch.tensor(train_grade_list)

valid_ids = valid_inputs_two_sentences_bert['input_ids']
valid_attention = valid_inputs_two_sentences_bert['attention_mask']
valid_token = valid_inputs_two_sentences_bert['token_type_ids'] # need to delete it for reborta
valid_grade = torch.tensor(valid_grade_list)

test_ids = test_inputs_two_sentences_bert['input_ids']
test_attention = test_inputs_two_sentences_bert['attention_mask']
test_token = test_inputs_two_sentences_bert['token_type_ids'] # need to delete it for reborta
test_grade = torch.tensor(test_grade_list)

# Three class for bert, xlnet, roberta data. Generate the data loaders.

bert

In [None]:
#######################################

## prepare for bert training. ##

#######################################

class bert_data(tud.Dataset):
    def __init__(self, id, mask, token, grade):
        self.length = id.shape[0]
        self.id = id.to(DEVICE)
        self.mask = mask.to(DEVICE)
        self.token = token.to(DEVICE)
        self.grade = grade.to(DEVICE)
    def __getitem__(self, index):
        return self.id[index], self.mask[index], self.token[index], self.grade[index]
    def __len__(self):
        return self.length

xlnet

In [None]:
#######################################

## prepare for xlnet training. ##

#######################################

class xlnet_data(tud.Dataset):
    def __init__(self, id, mask, token, grade):
        self.length = id.shape[0]
        self.id = id.to(DEVICE)
        self.mask = mask.to(DEVICE)
        self.token = token.to(DEVICE)
        self.grade = grade.to(DEVICE)
    def __getitem__(self, index):
        return self.id[index], self.mask[index], self.token[index], self.grade[index]
    def __len__(self):
        return self.length

roberta

In [None]:
#######################################

## prepare for roberta training. ##

#######################################
class roberta_Dataset(tud.Dataset):
    def __init__(self, id, mask, grade): 
        self.length = id.shape[0]
        self.id = id.to(DEVICE)
        self.mask = mask.to(DEVICE)
        self.grade = grade.to(DEVICE)

    def __getitem__(self, index):
       return self.id[index], self.mask[index], self.grade[index]

    def __len__(self):
        return self.length

get the loader for train, validation, test.

In [None]:
####################################################################################

## warm reminder : you need delete token for reborta testing if you want.##

####################################################################################

##set batch size and get the three loader.

fix_seed()

Batch_size = 16
train_dataset = bert_data(train_ids, train_attention, train_token, train_grade) 
valid_dataset = bert_data(valid_ids, valid_attention, valid_token, valid_grade)
test_dataset = bert_data(test_ids, test_attention, test_token, test_grade)

train_loader = tud.DataLoader(train_dataset, batch_size=Batch_size, shuffle=True)
valid_loader = tud.DataLoader(valid_dataset, batch_size=Batch_size, shuffle=True)
test_loader = tud.DataLoader(test_dataset, batch_size=Batch_size, shuffle=True)

# Train function

In [None]:
## The train function ##
#warm reminding : you need delete token for reborta testing :) 

def train(epochs, optimizer, loss_f, model, train_loader, valid_loader, scheduler):
    fix_seed()
    model = model.to(DEVICE)

    for n in range(epochs):
        model.train()
        train_loss = 0
        for ids_batch, mask_batch, token_batch, grade in train_loader:
            optimizer.zero_grad()

            outputs = model(ids_batch,attention_mask=mask_batch,token_type_ids=token_batch)
            pre_result = outputs[0].squeeze(1) # get the prediction result

            loss = loss_f(pre_result, grade)
            loss.backward()

            optimizer.step()  # update the parameters 
            scheduler.step() # update the learning rate dynamicly.
            
            train_loss += loss.item()
        train_epoch_loss = train_loss / len(train_loader)
        
        val_loss = 0
        model.eval()
        with torch.no_grad():
            for ids_batch, mask_batch, token_batch, grade in valid_loader:
                outputs = model(ids_batch,attention_mask=mask_batch,token_type_ids=token_batch)
                pre_result = outputs[0].squeeze(1)
                loss = loss_f(pre_result, grade)
                val_loss += loss.item()

        val_epoch_loss = val_loss / len(valid_loader)


        print("\nBegin the next epoch...")
        print(f'Epoch: {(n+1)}')
        print(f'Train Loss: {train_epoch_loss:.5f}')
        print(f'Val Loss: {val_epoch_loss:.5f}')

    print("\nOver!")
    return

# Load the pre-trained model
Bert : Bert For Sequence Classification

In [None]:
###########

# we use bert to get the best result. 

###########

## just import again for emphasizing
from transformers import BertForSequenceClassification

# Load bert
model_bert = BertForSequenceClassification.from_pretrained("bert-base-uncased",num_labels = 1, output_attentions = False, output_hidden_states = False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

XLNet: XLNet For Sequence Classification

In [None]:
###########

# You could try the Xlnet if you want, the result is not good. 
# You might also need a <colab pro> account to run this code. :)
# Please press "Ctrl + A", and "Ctrl + /", if you do not want it.

###########

## just import again for emphasizing
from transformers import XLNetForSequenceClassification

# Load XLNet
model_xlnet = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased",num_labels = 1, output_attentions = False, output_hidden_states = False)

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.weight', 'lm_loss.bias']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

Roberta : Roberta For Sequence Classification

In [None]:
###########

# You could try the RoBerta if you want, the result is not good. 
# Please press "Ctrl + A", and "Ctrl + /", if you do not want it.

###########

## just import again for emphasizing
from transformers import RobertaForSequenceClassification

# Load Roberta
model_roberta = RobertaForSequenceClassification.from_pretrained("roberta-base",num_labels = 1, output_attentions = False, output_hidden_states = False)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

# Set the hyperparameters

epoch, learning rate, warmup, weight decay.

In [None]:
# According to the Bert manual.

epoch = 2
bert_lr = 1e-2

## warm up ##
warmup = 0.3
train_step = len(train_loader) * epoch * 2
warmup_step = int(train_step * warmup)

## weightdecay ##
weightdecay = 0
optimizer_grouped_parameters = [
   {'params': [p for n, p in model_bert.named_parameters() if "bert" not in n], 'lr': bert_lr, 'weight_decay': weightdecay},
   {'params': [p for n, p in model_bert.named_parameters() if "bert" in n], 'weight_decay': weightdecay}
]

optimizer

In [None]:
## optimizer : AdamW from transformer ##

bert_eps = 1e-8
bert_fr = 2e-5
# eps is used to avoid zero division nightmare. AdamW is very useful.
optimizer = AdamW(optimizer_grouped_parameters, lr=bert_fr, eps = bert_eps)

scheduler

In [None]:
## scheduler ##

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = warmup_step, num_training_steps = train_step)

loss function

In [None]:
## loss function ##
def RMSE(predictions, labels):
    loss = torch.sqrt(((predictions - labels)**2).mean())
    return loss

loss_function = RMSE

# begin training!

In [None]:
## Begin our training ! ##

train(epoch, optimizer,  loss_function,  model_bert, train_loader, valid_loader,  scheduler)


Begin the next epoch...
Epoch: 1
Train Loss: 0.59640
Val Loss: 0.52932

Begin the next epoch...
Epoch: 2
Train Loss: 0.53617
Val Loss: 0.51369

Over!


Save model

In [None]:
## save model! ##
torch.save(model_bert, '\model_bert.pkl')

# Check the test RMSE

In [None]:
### load model!##
##model_bert = torch.load('\model_bert.pkl')

## testing! ##
fix_seed()
model_bert.eval()

test_ids = test_ids.to(DEVICE)
test_attention = test_attention.to(DEVICE)
test_token = test_token.to(DEVICE)
test_grade = test_grade.to(DEVICE)

with torch.no_grad():
  pre_result = model_bert(test_ids,attention_mask=test_attention,token_type_ids=test_token)[0].squeeze(1)
  test_loss = torch.sqrt(((pre_result - test_grade)**2).mean()).item()
print(f'Test RMSE: {test_loss:.5f}')

Test RMSE: 0.51760


write the result.

In [None]:
## write down! ##
def write_down(predictions, test_data_frame, out_loc):
    test_data_frame['pred'] = predictions
    output = test_data_frame[['id','pred']]
    output.to_csv(out_loc, index=False)
    print('output finished, address: '+os.path.abspath(out_loc))

# write test data result to 'task-1-output.csv'
address = 'task-1-output.csv'

## <test> is the result from <test = pd.read_csv(csv_file_of_test)>
write_down(pre_result.cpu(), test_true, address)

output finished, address: /content/task-1-output.csv


## bert dataset

# Approach 2 Baseline

In [None]:
# Number of epochs
epochs = 10

# Proportion of training data for train compared to dev
train_proportion = 0.8

train_and_dev = train_df['edit']

training_data, dev_data, training_y, dev_y = train_test_split(train_df['edit'], train_df['meanGrade'],
                                                                        test_size=(1-train_proportion),
                                                                        random_state=42)



# We train a Tf-idf model
count_vect = CountVectorizer(stop_words='english')
train_counts = count_vect.fit_transform(training_data)#just use the edit to predict the output grade.
transformer = TfidfTransformer().fit(train_counts)
train_counts = transformer.transform(train_counts)
regression_model = LinearRegression().fit(train_counts, training_y)

# Train predictions
predicted_train = regression_model.predict(train_counts)

# Calculate Tf-idf using train and dev, and validate model on dev:
test_and_test_counts = count_vect.transform(train_and_dev)
transformer = TfidfTransformer().fit(test_and_test_counts)

test_counts = count_vect.transform(dev_data)

test_counts = transformer.transform(test_counts)

# Dev predictions
predicted = regression_model.predict(test_counts)

# We run the evaluation:
print("\nTrain performance:")
sse, mse = model_performance(predicted_train, training_y, True)

print("\nDev performance:")
sse, mse = model_performance(predicted, dev_y, True)


test_countsT = count_vect.transform(test_df['edit'])
test_countsT = transformer.transform(test_countsT)
predictedT = regression_model.predict(test_countsT)
print(predictedT)
print(type(predictedT))


Train performance:
| MSE: 0.13 | RMSE: 0.36666 |

Dev performance:
| MSE: 0.36 | RMSE: 0.60400 |
[0.46666667 0.93586957 0.93586957 ... 0.93586957 1.15555554 0.93586957]
<class 'numpy.ndarray'>


# Approach 2 Non-Pretrained models

## data preprocessing

### helper functions

In [None]:
##############################################################
# remove all sentences which have 0 score in training set
##############################################################

def removeNonGradedRow (train_df):
  train_df=train_df[~train_df['grades'].isin([0])]
  return train_df

In [None]:
##############################################################
# split the dataset based on " "
# lower case all letters in the original sentences
##############################################################
def create_vocab_approach2 (data):
    """
    Creating a corpus of all the tokens used
    """ 
    tokenized_corpus = [] # Let us put the tokenized corpus in a list
    for sentence in data:
        tokenized_sentence = []
        for token in sentence.split(' '): # simplest split is
            token = token.lower()
            tokenized_sentence.append(token)
        tokenized_corpus.append(tokenized_sentence)
    return tokenized_corpus

In [None]:
##############################################################
# get word to idx list for further training
##############################################################
def word2idx_approach2(joint_tokenized_corpus, trainEdit):
    vocabulary = []
    for sentence in joint_tokenized_corpus:
      for word in sentence:
          if word not in vocabulary:
              vocabulary.append(word)
              
    for word in trainEdit:
      if word not in vocabulary:
          vocabulary.append(word)
  
    word2idx = {w: idx+1 for (idx, w) in enumerate(vocabulary)}
    word2idx['<pad>'] = 0
    
    return word2idx

In [None]:
##############################################################
# replace original sentences with edit words
# remove putuations for the new edited sentences
##############################################################

def replaceEdited(joint_tokenized_corpus, trainEdit):
    editedSen = joint_tokenized_corpus
    for i in range(0,len(editedSen)):
      for j in range(0,len(editedSen[i])):
        editedSen[i][j] = re.sub('<.*>', trainEdit[i], editedSen[i][j])
        editedSen[i][j] = re.sub(r'\W+', '', editedSen[i][j])
    return editedSen

In [None]:
##############################################################
# remove putuations for any input list
##############################################################
def removeSymbols(d):
    for i in range(0,len(d)):
      for j in range(0,len(d[i])):
        d[i][j] = re.sub(r'\W+', '', d[i][j])
    return d

In [None]:
##############################################################
# Use Tokenizer to pad all sentence to same length
# convert input numpy array to tensor for output
##############################################################

def data2tensor(trainOriginal, editedSen):
    # Encode strings into integers
    tokenizer = Tokenizer()

    # create vocabulary from all words
    tokenizer.fit_on_texts(trainOriginal)
    tokenizer.fit_on_texts(editedSen)

    # get length (word count) of the longest row
    maxLength = max([len(x) for x in trainOriginal])

    # convert words into integers
    w2iOrg = tokenizer.texts_to_sequences(trainOriginal)
    w2iEdit = tokenizer.texts_to_sequences(editedSen)

    #pad shorter rows' missing with zeros
    padOrg = pad_sequences(w2iOrg, maxlen=maxLength, padding='post')
    padEdit = pad_sequences(w2iEdit, maxlen=maxLength, padding='post')

    tensorOrg = torch.from_numpy(padOrg)
    tensorEdit = torch.from_numpy(padEdit)

    return tensorOrg,tensorEdit

### preprocessing Train,Valid,Test data

In [None]:
train_df = removeNonGradedRow(train_df)

x = train_df.loc[:, ["original", "edit"]]
y = train_df.loc[:, ["meanGrade"]]
Xtrain, Xvalid, Ytrain, Yvalid = train_test_split(x, y, test_size=0.2)

trainOriginal = Xtrain['original']
validOriginal = Xvalid['original']
testOriginal = test_true['original']

trainEdit = Xtrain['edit'].values.tolist()
validEdit = Xvalid['edit'].values.tolist()
testEdit= test_true['edit'].values.tolist()

trainGrade_lst = Ytrain['meanGrade'].to_list()
validGrade_lst = Yvalid['meanGrade'].to_list()
testGrade_lst = test_true['meanGrade'].to_list()

#preprocessing for training data.
replaceSen = create_vocab_approach2(trainOriginal) 
keepOrgianlSen = create_vocab_approach2(trainOriginal)

word2idx = word2idx_approach2(create_vocab_approach2(trainOriginal), trainEdit)
editedSen = replaceEdited(replaceSen, trainEdit)
orgianlSen = removeSymbols(keepOrgianlSen)

orgTensor,editTensor = data2tensor(orgianlSen, editedSen)
labelTensor = torch.FloatTensor(trainGrade_lst)
orgTensor,editTensor=orgTensor.type(torch.LongTensor),editTensor.type(torch.LongTensor)


#pre processing for valid data. V means validation.
replaceSenV = create_vocab_approach2(validOriginal)
keepOrgianlSenV = create_vocab_approach2(validOriginal)

editedSenV = replaceEdited(replaceSenV, validEdit)
orgianlSenV = removeSymbols(keepOrgianlSenV)

orgTensorV,editTensorV = data2tensor(orgianlSenV, editedSenV)
labelTensorV = torch.FloatTensor(validGrade_lst)
orgTensorV,editTensorV=orgTensorV.type(torch.LongTensor),editTensorV.type(torch.LongTensor)


#pre processing for test data. T means Test.
replaceSenT = create_vocab_approach2(testOriginal)
keepOrgianlSenT = create_vocab_approach2(testOriginal)

editedSenT = replaceEdited(replaceSenT, testEdit)
orgianlSenT = removeSymbols(keepOrgianlSenT)

orgTensorT,editTensorT = data2tensor(orgianlSenT, editedSenT)
labelTensorT = torch.FloatTensor(testGrade_lst)
orgTensorT,editTensorT=orgTensorT.type(torch.LongTensor),editTensorT.type(torch.LongTensor)

## mini batch generating

In [None]:
class twoInDataset(tud.Dataset):
    def __init__(self, x1, x2, y1):
        self.len = x1.shape[0]
        self.x1_data = x1.to(device)
        self.x2_data = x2.to(device)
        self.y1_data = y1.to(device)
    def __getitem__(self, index):
        return self.x1_data[index], self.x2_data[index], self.y1_data[index]
    def __len__(self):
        return self.len

# Batching
BATCH_SIZE = 32

train_dataset = twoInDataset(orgTensor, editTensor, labelTensor)
valid_dataset = twoInDataset(orgTensorV, editTensorV, labelTensorV)
test_dataset = twoInDataset(orgTensorT, editTensorT, labelTensorT)

train_dataloader = tud.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_dataloader = tud.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = tud.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
# 1 input model dataset
class oneInpDataset(tud.Dataset):
    def __init__(self, x1, y1):
        self.len = x1.shape[0]
        self.x1_data = x1.to(device)
        self.y1_data = y1.to(device)
    def __getitem__(self, index):
        return self.x1_data[index], self.y1_data[index]
    def __len__(self):
        return self.len


#only study original sentences
train_dataset_1Org = oneInpDataset(orgTensor, labelTensor)
valid_dataset_1Org = oneInpDataset(orgTensorV, labelTensorV)

train_dataloader_1Org = tud.DataLoader(train_dataset_1Org, batch_size=BATCH_SIZE, shuffle=True)
valid_dataloader_1Org = tud.DataLoader(valid_dataset_1Org, batch_size=BATCH_SIZE, shuffle=True)

#only study edited sentences
train_dataset_1Edt = oneInpDataset(editTensor, labelTensor)
valid_dataset_1Edt = oneInpDataset(editTensorV, labelTensorV)

train_dataloader_1Edt = tud.DataLoader(train_dataset_1Edt, batch_size=BATCH_SIZE, shuffle=True)
valid_dataloader_1Edt = tud.DataLoader(valid_dataset_1Edt, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
# test dataset for 1 input
class testDataset(tud.Dataset):
    def __init__(self, x1):
        self.len = x1.shape[0]
        self.x1_data = x1.to(device)
    def __getitem__(self, index):
        return self.x1_data[index]
    def __len__(self):
        return self.len


test_dataset_org = testDataset(orgTensorT)
test_dataloader_org = tud.DataLoader(test_dataset_org, batch_size=BATCH_SIZE)

test_dataset_edt = testDataset(editTensorT)
test_dataloader_edt = tud.DataLoader(test_dataset_edt, batch_size=BATCH_SIZE)

# test dataset for 2 inputs
class testDataset2in(tud.Dataset):
    def __init__(self, x1,x2):
        self.len = x1.shape[0]
        self.x1_data = x1.to(device)
        self.x2_data = x2.to(device)
    def __getitem__(self, index):
        return self.x1_data[index], self.x2_data[index]
    def __len__(self):
        return self.len

test_dataset_2in = testDataset2in(orgTensorT,editTensorT)
test_dataloader_2in = tud.DataLoader(test_dataset_2in, batch_size=BATCH_SIZE)

## Training Modes

### 2 Inputs CNN

In [None]:
class TwoInputsCNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, out_channels, window_size, fc_out_dim, dropout):
        super(TwoInputsCNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.conv = nn.Conv2d(in_channels=1, out_channels=out_channels,
                                      kernel_size=(window_size, embedding_dim))
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(out_channels, fc_out_dim)
          
    def forward(self, x, y):
        xEmbed = self.embedding(x).unsqueeze(1)
        yEmbed= self.embedding(y).unsqueeze(1)

        xFeatureMaps = self.conv(xEmbed).squeeze(3)
        yFeatureMaps = self.conv(yEmbed).squeeze(3)
       
        xFeatureMaps = F.relu(xFeatureMaps)
        yFeatureMaps = F.relu(xFeatureMaps)
        
        xPool = F.max_pool1d(xFeatureMaps, xFeatureMaps.shape[2]).squeeze(2)
        yPool = F.max_pool1d(yFeatureMaps, yFeatureMaps.shape[2]).squeeze(2)

        xDrop = self.dropout(xPool)
        yDrop = self.dropout(yPool)

        out = xDrop * yDrop 
        preds = torch.sum(out, 1, keepdim = True)


        return preds

### 1 input CNN


In [None]:
class oneInputCNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, out_channels, window_size, fc_out_dim, dropout):
        super(oneInputCNN, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        
        self.conv = nn.Conv2d(
          in_channels=1, out_channels=out_channels,
          kernel_size=(window_size, embedding_dim))
        
        self.dropout = nn.Dropout(dropout)

        self.fc = nn.Linear(out_channels, fc_out_dim)
          
    def forward(self, x):
        xEmbed = self.embedding(x).unsqueeze(1)

        xFeatureMaps = self.conv(xEmbed).squeeze(3)

        xFeatureMaps = F.relu(xFeatureMaps)
        
        xPool = F.max_pool1d(xFeatureMaps, xFeatureMaps.shape[2]).squeeze(2)

        xDrop = self.dropout(xPool)

        out = xDrop
        preds = torch.sum(out, 1, keepdim = True)

        return preds

### 1 input BiLSTM (from approach 1 baseline)

In [None]:
class BiLSTM(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, batch_size, device):
        super(BiLSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding_dim = embedding_dim
        self.device = device
        self.batch_size = batch_size
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, bidirectional=True)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2label = nn.Linear(hidden_dim * 2, 1)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        # Before we've done anything, we dont have any hidden state.
        # Refer to the Pytorch documentation to see exactly why they have this dimensionality.
        # The axes semantics are (num_layers * num_directions, minibatch_size, hidden_dim)
        return torch.zeros(2, self.batch_size, self.hidden_dim).to(self.device), \
               torch.zeros(2, self.batch_size, self.hidden_dim).to(self.device)

    def forward(self, sentence):
        embedded = self.embedding(sentence)
        embedded = embedded.permute(1, 0, 2)

        lstm_out, self.hidden = self.lstm(
            embedded.view(len(embedded), self.batch_size, self.embedding_dim), self.hidden)

        out = self.hidden2label(lstm_out[-1])
        return out

### 2 inputs GRU

In [None]:
class twoInputGRU(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, batch_size, device):
        super(twoInputGRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding_dim = embedding_dim
        self.device = device
        self.batch_size = batch_size
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)

        self.gru = nn.GRU(embedding_dim, hidden_dim, bidirectional=True)

        self.hidden2label = nn.Linear(hidden_dim * 2, 1)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        return torch.zeros(2, self.batch_size, self.hidden_dim).to(self.device)

    def forward(self, x, y):
        embedded_x = self.embedding(x).permute(1, 0, 2)
        embedded_y = self.embedding(y).permute(1, 0, 2)
        gru_out1, self.hidden = self.gru(
            embedded_x.view(len(embedded_x), self.batch_size, self.embedding_dim), self.hidden)
        gru_out2, self.hidden = self.gru(
            embedded_y.view(len(embedded_y), self.batch_size, self.embedding_dim), self.hidden)
        out = self.hidden2label(gru_out1[-1]*gru_out2[-1])
        return out

### 1 input GRU


In [None]:
class GRU(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, vocab_size, batch_size, device):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding_dim = embedding_dim
        self.device = device
        self.batch_size = batch_size
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)

        self.gru = nn.GRU(embedding_dim, hidden_dim, bidirectional=True)

        self.hidden2label = nn.Linear(hidden_dim * 2, 1)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        return torch.zeros(2, self.batch_size, self.hidden_dim).to(self.device)

    def forward(self, sentence):
        embedded = self.embedding(sentence)
        embedded = embedded.permute(1, 0, 2)

        gru_out, self.hidden = self.gru(
            embedded.view(len(embedded), self.batch_size, self.embedding_dim), self.hidden)

        out = self.hidden2label(gru_out[-1])
        return out

## Training

### 2 inputs CNN

In [None]:
# Set up hyperparameters
np.random.seed(SEED)
random.seed(SEED)

EPOCHS = 700
LRATE = 0.001
EMBEDDING_DIM = 50

FC_OUT_DIM = 25
N_OUT_CHAN = 100
WINDOW_SIZE = 3
DROPOUT = 0.7

# Construct the model
modelCNN = TwoInputsCNN(len(word2idx), EMBEDDING_DIM, N_OUT_CHAN, WINDOW_SIZE, FC_OUT_DIM, DROPOUT)

modelCNN = modelCNN.to(device)

optimizer = optim.Adam(modelCNN.parameters(), lr=LRATE)

steps = 30 # set up the scheduler steps
# use scheduler to dynamically change learning rate
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, steps)

# prepare inputs and target output for both train and validation
x_feature = orgTensor.to(device)
y_feature = editTensor.to(device)
target = labelTensor.to(device)

valid_x_feature = orgTensorV.to(device)
valid_y_feature = editTensorV.to(device)
valid_target = labelTensorV.to(device)

#train model through epoch
for epoch in range(1, EPOCHS + 1):
    modelCNN.train()
    optimizer.zero_grad()
    
    predictions = modelCNN(x_feature, y_feature).squeeze(1)

    loss = torch.sqrt(((predictions - target)**2).mean())
    train_loss = loss.item()

    loss.backward()
    optimizer.step()
    scheduler.step()
      
    #validation
    modelCNN.eval()
    with torch.no_grad():
        valid_predictions = modelCNN(valid_x_feature, valid_y_feature).squeeze(1)
        valid_loss = torch.sqrt(((valid_predictions - valid_target)**2).mean()).item()

    print(f'| Epoch: {epoch:02} | Train rmse: {train_loss:.6f} | Val. rmse: {valid_loss:.6f} |')

| Epoch: 01 | Train rmse: 101.400627 | Val. rmse: 90.121140 |
| Epoch: 02 | Train rmse: 95.609360 | Val. rmse: 85.774170 |
| Epoch: 03 | Train rmse: 91.666656 | Val. rmse: 81.683434 |
| Epoch: 04 | Train rmse: 86.842010 | Val. rmse: 77.849220 |
| Epoch: 05 | Train rmse: 83.879333 | Val. rmse: 74.272964 |
| Epoch: 06 | Train rmse: 78.906982 | Val. rmse: 70.956192 |
| Epoch: 07 | Train rmse: 75.558289 | Val. rmse: 67.890869 |
| Epoch: 08 | Train rmse: 72.440826 | Val. rmse: 65.069473 |
| Epoch: 09 | Train rmse: 69.150032 | Val. rmse: 62.486401 |
| Epoch: 10 | Train rmse: 67.034012 | Val. rmse: 60.132481 |
| Epoch: 11 | Train rmse: 63.825657 | Val. rmse: 57.998844 |
| Epoch: 12 | Train rmse: 61.790863 | Val. rmse: 56.071560 |
| Epoch: 13 | Train rmse: 59.683941 | Val. rmse: 54.339962 |
| Epoch: 14 | Train rmse: 57.780762 | Val. rmse: 52.793133 |
| Epoch: 15 | Train rmse: 56.362370 | Val. rmse: 51.420918 |
| Epoch: 16 | Train rmse: 54.823067 | Val. rmse: 50.211590 |
| Epoch: 17 | Train rms

In [None]:
#####Check the test RMSE####
modelCNN.eval()

test_x_feature = orgTensorT.to(device)
test_y_feature = editTensorT.to(device)
test_target = labelTensorT.to(device)

with torch.no_grad():
    test_predictions = modelCNN(test_x_feature, test_y_feature).squeeze(1)
    test_loss = torch.sqrt(((test_predictions - test_target)**2).mean()).item()

print(f'train rmse: {train_loss:.6f} ; test. rmse: {test_loss:.6f} ')

train rmse: 0.635149 ; test. rmse: 0.611087 


### 1 input CNN

In [None]:
# Set up hyperparameters
np.random.seed(SEED)
random.seed(SEED)

EPOCHS = 300
LRATE = 0.001

EMBEDDING_DIM = 50
FC_OUT_DIM = 25

N_OUT_CHANNELS = 100

WINDOW_SIZE = 3
DROPOUT = 0.7

# Construct the model
modelCNN1 = oneInputCNN(len(word2idx), EMBEDDING_DIM, N_OUT_CHANNELS, WINDOW_SIZE, FC_OUT_DIM, DROPOUT)

modelCNN1 = modelCNN1.to(device)

optimizer = optim.Adam(modelCNN1.parameters(), lr=LRATE)

#scheduler and its steps
steps = 50
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, steps)

#prepare train and validation dataset
orgSen = orgTensor.to(device)
edtSen = editTensor.to(device)
target = labelTensor.to(device)

valid_orgSen = orgTensorV.to(device)
valid_target = labelTensorV.to(device)

for epoch in range(1, EPOCHS + 1):
    modelCNN1.train()
    optimizer.zero_grad()

    #original sentence as input
    predictions = modelCNN1(orgSen).squeeze(1)

    #edited sentence as input
    #predictions = modelCNN1(edtSen).squeeze(1)

    loss = torch.sqrt(((predictions - target)**2).mean())
    train_loss = loss.item()

    loss.backward()
    optimizer.step()
    scheduler.step()

    #validation
    modelCNN1.eval()
    with torch.no_grad():
        valid_predictions = modelCNN1(valid_orgSen).squeeze(1)
        valid_loss = torch.sqrt(((valid_predictions - valid_target)**2).mean()).item()

    print(f'| Epoch: {epoch:02} | Train rmse: {train_loss:.6f} | Val. rmse: {valid_loss:.6f} |')

| Epoch: 01 | Train rmse: 89.754951 | Val. rmse: 86.275620 |
| Epoch: 02 | Train rmse: 87.111687 | Val. rmse: 83.990944 |
| Epoch: 03 | Train rmse: 84.561600 | Val. rmse: 81.775063 |
| Epoch: 04 | Train rmse: 82.587685 | Val. rmse: 79.628426 |
| Epoch: 05 | Train rmse: 80.237961 | Val. rmse: 77.552330 |
| Epoch: 06 | Train rmse: 78.214066 | Val. rmse: 75.546646 |
| Epoch: 07 | Train rmse: 76.163582 | Val. rmse: 73.611198 |
| Epoch: 08 | Train rmse: 74.180397 | Val. rmse: 71.748070 |
| Epoch: 09 | Train rmse: 72.204643 | Val. rmse: 69.957703 |
| Epoch: 10 | Train rmse: 70.243958 | Val. rmse: 68.237930 |
| Epoch: 11 | Train rmse: 68.853790 | Val. rmse: 66.586609 |
| Epoch: 12 | Train rmse: 67.087128 | Val. rmse: 65.002502 |
| Epoch: 13 | Train rmse: 65.525093 | Val. rmse: 63.485504 |
| Epoch: 14 | Train rmse: 63.757793 | Val. rmse: 62.034382 |
| Epoch: 15 | Train rmse: 62.500763 | Val. rmse: 60.647991 |
| Epoch: 16 | Train rmse: 60.910954 | Val. rmse: 59.325176 |
| Epoch: 17 | Train rmse

In [None]:
#####Check the test RMSE####
modelCNN1.eval()

test_x_feature = orgTensorT.to(device)
test_target = labelTensorT.to(device)

with torch.no_grad():
    test_predictions = modelCNN1(test_x_feature).squeeze(1)
    test_loss = torch.sqrt(((test_predictions - test_target)**2).mean()).item()

print(f'train rmse: {train_loss:.6f} ; test. rmse: {test_loss:.6f}')

train rmse: 0.535309 ; test. rmse: 0.632448


### BiLSTM

In [None]:
# Set up hyperparameters
INPUT_DIM = len(word2idx)
EMBEDDING_DIM = 30
BATCH_SIZE = 8

modelBiLSTM = BiLSTM(EMBEDDING_DIM, 10, INPUT_DIM, BATCH_SIZE, device)
print("BiLSTM Model initialised.")

modelBiLSTM.to(device)

print("Dataloaders created.")

loss_fn = nn.MSELoss()
loss_fn = loss_fn.to(device)

optimizer = torch.optim.Adam(modelBiLSTM.parameters(),0.0001)

# model training
# original sentence as input
train1in(train_dataloader_1Org, valid_dataloader_1Org, modelBiLSTM, 12)
# Edited sentence as input
#train1in(train_dataloader_1Edt, valid_dataloader_1Edt, modelBiLSTM, 8)

BiLSTM Model initialised.
Dataloaders created.
Training model.
| Epoch: 01 | Train Loss: 1.26 | Train MSE: 1.26 | Train RMSE: 1.12370 |         Val. Loss: 1.14 | Val. MSE: 1.14 |  Val. RMSE: 1.06960 |
| Epoch: 02 | Train Loss: 0.96 | Train MSE: 0.96 | Train RMSE: 0.97847 |         Val. Loss: 0.79 | Val. MSE: 0.79 |  Val. RMSE: 0.89110 |
| Epoch: 03 | Train Loss: 0.57 | Train MSE: 0.57 | Train RMSE: 0.75269 |         Val. Loss: 0.38 | Val. MSE: 0.38 |  Val. RMSE: 0.61858 |
| Epoch: 04 | Train Loss: 0.32 | Train MSE: 0.32 | Train RMSE: 0.56509 |         Val. Loss: 0.32 | Val. MSE: 0.32 |  Val. RMSE: 0.56151 |
| Epoch: 05 | Train Loss: 0.31 | Train MSE: 0.31 | Train RMSE: 0.55326 |         Val. Loss: 0.31 | Val. MSE: 0.31 |  Val. RMSE: 0.56107 |
| Epoch: 06 | Train Loss: 0.31 | Train MSE: 0.31 | Train RMSE: 0.55294 |         Val. Loss: 0.31 | Val. MSE: 0.31 |  Val. RMSE: 0.56087 |
| Epoch: 07 | Train Loss: 0.31 | Train MSE: 0.31 | Train RMSE: 0.55266 |         Val. Loss: 0.31 | Val. MSE: 

In [None]:
#####Check the test RMSE####

BiLSTM_Org_predictions = torch.empty(0)
BiLSTM_Org_predictions = BiLSTM_Org_predictions.to(device)
test_target = labelTensorT.to(device)

modelBiLSTM.eval()

with torch.no_grad():
  for batch in test_dataloader_org:
    feature = batch
    feature = feature.to(device)
    modelBiLSTM.batch_size = feature.shape[0]
    modelBiLSTM.hidden = modelBiLSTM.init_hidden()
    pdc = modelBiLSTM(feature).squeeze(1)
    BiLSTM_Org_predictions = torch.cat((BiLSTM_Org_predictions,pdc), 0)

test_loss = torch.sqrt(((BiLSTM_Org_predictions - test_target)**2).mean()).item()
print(f'test. rmse: {test_loss:.6f}')


test. rmse: 0.578346


### 2 inputs GRU

In [None]:
# Set up hyperparameters
INPUT_DIM = len(word2idx)
EMBEDDING_DIM = 300
BATCH_SIZE = 12

modelgru = twoInputGRU(25, 10, INPUT_DIM, 8, device)
print("GRU Model initialised.")

modelgru.to(device)

loss_fn = nn.MSELoss()
loss_fn = loss_fn.to(device)

optimizer = torch.optim.Adam(modelgru.parameters(),lr=0.001)

# steps = 8
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, steps)

train2in(train_dataloader, valid_dataloader, modelgru, 10)

GRU Model initialised.
Training model.
| Epoch: 01 | Train Loss: 0.32 | Train MSE: 0.32 | Train RMSE: 0.56182 |         Val. Loss: 0.32 | Val. MSE: 0.32 |  Val. RMSE: 0.56549 |
| Epoch: 02 | Train Loss: 0.30 | Train MSE: 0.30 | Train RMSE: 0.54849 |         Val. Loss: 0.33 | Val. MSE: 0.33 |  Val. RMSE: 0.57029 |
| Epoch: 03 | Train Loss: 0.29 | Train MSE: 0.29 | Train RMSE: 0.54060 |         Val. Loss: 0.33 | Val. MSE: 0.33 |  Val. RMSE: 0.57859 |
| Epoch: 04 | Train Loss: 0.28 | Train MSE: 0.28 | Train RMSE: 0.52970 |         Val. Loss: 0.39 | Val. MSE: 0.39 |  Val. RMSE: 0.62508 |
| Epoch: 05 | Train Loss: 0.28 | Train MSE: 0.28 | Train RMSE: 0.52818 |         Val. Loss: 0.34 | Val. MSE: 0.34 |  Val. RMSE: 0.58455 |
| Epoch: 06 | Train Loss: 0.26 | Train MSE: 0.26 | Train RMSE: 0.51133 |         Val. Loss: 0.35 | Val. MSE: 0.35 |  Val. RMSE: 0.59098 |
| Epoch: 07 | Train Loss: 0.28 | Train MSE: 0.28 | Train RMSE: 0.52492 |         Val. Loss: 0.35 | Val. MSE: 0.35 |  Val. RMSE: 0.594

In [None]:
#GRU model with 2 inputs
GRU_2in_predictions = torch.empty(0)
GRU_2in_predictions = GRU_2in_predictions.to(device)
test_target = labelTensorT.to(device)

modelgru.eval()
with torch.no_grad():
  for batch in test_dataloader_2in:
    feature1,feature2 = batch
    feature1,feature2 = feature1.to(device),feature1.to(device)
    modelgru.batch_size = feature1.shape[0]

    modelgru.hidden = modelgru.init_hidden()

    pdc = modelgru(feature1,feature2).squeeze(1)
    GRU_2in_predictions = torch.cat((GRU_2in_predictions,pdc), 0)

test_loss = torch.sqrt(((GRU_2in_predictions - test_target)**2).mean()).item()
print(f'test. rmse: {test_loss:.6f}')

test. rmse: 0.617449


### 1 input GRU

In [None]:
modelgru2 = GRU(25, 10, INPUT_DIM, BATCH_SIZE, device)

modelgru2.to(device)


loss_fn = nn.MSELoss()
loss_fn = loss_fn.to(device)

optimizer = torch.optim.Adam(modelgru2.parameters(),0.0001)

#original sentence as input
train1in(train_dataloader_1Org, valid_dataloader_1Org, modelgru2, 12)

#edited sentence as input
#train1in(train_dataloader_1Edt, valid_dataloader_1Edt, modelgru2, 12)

Training model.
| Epoch: 01 | Train Loss: 0.96 | Train MSE: 0.96 | Train RMSE: 0.98009 |         Val. Loss: 0.82 | Val. MSE: 0.82 |  Val. RMSE: 0.90603 |
| Epoch: 02 | Train Loss: 0.66 | Train MSE: 0.66 | Train RMSE: 0.81131 |         Val. Loss: 0.54 | Val. MSE: 0.54 |  Val. RMSE: 0.73646 |
| Epoch: 03 | Train Loss: 0.43 | Train MSE: 0.43 | Train RMSE: 0.65531 |         Val. Loss: 0.37 | Val. MSE: 0.37 |  Val. RMSE: 0.60543 |
| Epoch: 04 | Train Loss: 0.32 | Train MSE: 0.32 | Train RMSE: 0.56934 |         Val. Loss: 0.32 | Val. MSE: 0.32 |  Val. RMSE: 0.56406 |
| Epoch: 05 | Train Loss: 0.31 | Train MSE: 0.31 | Train RMSE: 0.55356 |         Val. Loss: 0.31 | Val. MSE: 0.31 |  Val. RMSE: 0.56071 |
| Epoch: 06 | Train Loss: 0.31 | Train MSE: 0.31 | Train RMSE: 0.55272 |         Val. Loss: 0.31 | Val. MSE: 0.31 |  Val. RMSE: 0.56057 |
| Epoch: 07 | Train Loss: 0.31 | Train MSE: 0.31 | Train RMSE: 0.55254 |         Val. Loss: 0.31 | Val. MSE: 0.31 |  Val. RMSE: 0.56054 |
| Epoch: 08 | Trai

In [None]:
#GRU model with 1 inputs
GRU_1in_predictions = torch.empty(0)
GRU_1in_predictions = GRU_1in_predictions.to(device)
test_target = labelTensorT.to(device)

modelgru2.eval()
with torch.no_grad():
  #orginial sentence as input
  for batch in test_dataloader_org:
  #edited sentence as input
  #for batch in test_dataseter_edt:
    feature = batch
    feature = feature.to(device)
    modelgru2.batch_size = feature.shape[0]
    modelgru2.hidden = modelgru2.init_hidden()
    pdc = modelgru2(feature).squeeze(1)
    GRU_1in_predictions = torch.cat((GRU_1in_predictions,pdc), 0)

test_loss = torch.sqrt(((GRU_1in_predictions - test_target)**2).mean()).item()   
print(f'test. rmse: {test_loss:.6f}')

test. rmse: 0.577395


# In order to facilitate the viewing of the results, we summarize the tests here.

### CNN


In [None]:
orgSenTest = orgTensorT.to(device)
edtSenTest = editTensorT.to(device)
test_target = labelTensorT.to(device)

# 2 inputs CNN model
modelCNN.eval()
with torch.no_grad():
  cnn_test_predictions = modelCNN(orgSenTest, edtSenTest).squeeze(1)
  test_loss = torch.sqrt(((cnn_test_predictions - test_target)**2).mean()).item()
print(f'2 input CNN: test. rmse: {test_loss:.6f} ')

# 1 input CNN model
modelCNN1.eval()
with torch.no_grad():
  # original sentence input cnn model
  cnn_ori_test_predictions = modelCNN1(orgSenTest).squeeze(1)
  test_loss = torch.sqrt(((cnn_ori_test_predictions - test_target)**2).mean()).item()
  # edit sentence input cnn model
  #cnn_edit_test_predictions = modelCNN1(edtSenTest).squeeze(1)
print(f'1 input CNN: test. rmse: {test_loss:.6f} ')

2 input CNN: test. rmse: 0.611087 
1 input CNN: test. rmse: 0.632448 


### BiLSTM

In [None]:
#biLSTM model with original sentence as only one input
BiLSTM_Org_predictions = torch.empty(0)
BiLSTM_Org_predictions = BiLSTM_Org_predictions.to(device)
test_target = labelTensorT.to(device)

modelBiLSTM.eval()
with torch.no_grad():
  for batch in test_dataloader_org:
    feature = batch
    feature = feature.to(device)
    modelBiLSTM.batch_size = feature.shape[0]

    modelBiLSTM.hidden = modelBiLSTM.init_hidden()

    pdc = modelBiLSTM(feature).squeeze(1)
    BiLSTM_Org_predictions = torch.cat((BiLSTM_Org_predictions,pdc), 0)
test_loss = torch.sqrt(((BiLSTM_Org_predictions - test_target)**2).mean()).item()
print(f'1 input BiLstm with original sentence: Test rmse: {test_loss:.6f} ')

#biLSTM model with edited sentence as only one input
BiLSTM_Edt_predictions = torch.empty(0)
BiLSTM_Edt_predictions = BiLSTM_Edt_predictions.to(device)
modelBiLSTM.eval()
with torch.no_grad():
  for batch in test_dataloader_edt:
    feature = batch
    feature = feature.to(device)
    modelBiLSTM.batch_size = feature.shape[0]

    modelBiLSTM.hidden = modelBiLSTM.init_hidden()

    pdc = modelBiLSTM(feature).squeeze(1)
    BiLSTM_Edt_predictions = torch.cat((BiLSTM_Edt_predictions,pdc), 0)
test_loss = torch.sqrt(((BiLSTM_Edt_predictions - test_target)**2).mean()).item()
print(f'1 input BiLstm with edited sentence: Test rmse: {test_loss:.6f} ')

1 input BiLstm with original sentence: Test rmse: 0.578346 
1 input BiLstm with edited sentence: Test rmse: 0.578442 


### GRU

In [None]:
#GRU model with 2 inputs
GRU_2in_predictions = torch.empty(0)
GRU_2in_predictions = GRU_2in_predictions.to(device)
test_target = labelTensorT.to(device)

modelgru.eval()
with torch.no_grad():
  for batch in test_dataloader_2in:
    feature1,feature2 = batch
    feature1,feature2 = feature1.to(device),feature1.to(device)
    modelgru.batch_size = feature1.shape[0]

    modelgru.hidden = modelgru.init_hidden()

    pdc = modelgru(feature1,feature2).squeeze(1)
    GRU_2in_predictions = torch.cat((GRU_2in_predictions,pdc), 0)

test_loss = torch.sqrt(((GRU_2in_predictions - test_target)**2).mean()).item()
print(f'2 input GRU: Test rmse: {test_loss:.6f} ')

#GRU model with 1 inputs
GRU_1in_predictions = torch.empty(0)
GRU_1in_predictions = GRU_1in_predictions.to(device)
modelgru2.eval()
with torch.no_grad():
  #orginial sentence as input
  #(if edited sentence as input, change the code)
  #(for batch in test_dataseter_edt:)
  for batch in test_dataloader_org:
    feature = batch
    feature = feature.to(device)
    modelgru2.batch_size = feature.shape[0]
    modelgru2.hidden = modelgru2.init_hidden()
    pdc = modelgru2(feature).squeeze(1)
    GRU_1in_predictions = torch.cat((GRU_1in_predictions,pdc), 0)

test_loss = torch.sqrt(((GRU_1in_predictions - test_target)**2).mean()).item()
print(f'1 input GRU: Test rmse: {test_loss:.6f} ')

2 input GRU: Test rmse: 0.617449 
1 input GRU: Test rmse: 0.577395 


## output predicted file

In [None]:
## write down! ##
def write_down(predictions, test_data_frame, out_loc):
    test_data_frame['pred'] = predictions
    output = test_data_frame[['id','pred']]
    output.to_csv(out_loc, index=False)
    print('output finished, address: '+os.path.abspath(out_loc))

In [None]:
baselinePre = torch.from_numpy(predictedT)
baseline = 'baselineScore.csv'
write_down(baselinePre, test_df, baseline)

GRU_2in = 'gru2in.csv'
write_down(GRU_2in_predictions.cpu(), test_df, GRU_2in)

GRU_1in_org = 'gru1in.csv'
write_down(GRU_1in_predictions.cpu(), test_df, GRU_1in_org)

BiLSTM_1in_org = 'bilstm1in_org.csv'
write_down(BiLSTM_Org_predictions.cpu(), test_df, BiLSTM_1in_org)

cnn_out_loc = 'CNN_two_input_predictionScore.csv'
write_down(cnn_test_predictions.cpu(), test_df, cnn_out_loc)

cnn1_out_loc = 'CNN_ori_predictionScore.csv'
write_down(cnn_ori_test_predictions.cpu(), test_df, cnn1_out_loc)

output finished, address: /content/baselineScore.csv
output finished, address: /content/gru2in.csv
output finished, address: /content/gru1in.csv
output finished, address: /content/bilstm1in_org.csv
output finished, address: /content/CNN_two_input_predictionScore.csv
output finished, address: /content/CNN_ori_predictionScore.csv
