### Reading datasets

In [None]:
# !pip install sentence_transformers

In [None]:
from torch.utils.data import DataLoader
import math
from sentence_transformers import models, losses
from sentence_transformers import SentencesDataset, LoggingHandler, SentenceTransformer, util
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator, LabelAccuracyEvaluator
from sentence_transformers.readers import *
import logging
from datetime import datetime
import gzip
import csv
import os

from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import csv
import sys

#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout

def _read_tsv(input_file, quotechar=None):
    """Reads a tab separated value file."""
    with open(input_file, "r", encoding='utf-8') as f:
        reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
        lines = []
        for line in reader:
            if sys.version_info[0] == 2:
                line = list(unicode(cell, 'utf-8') for cell in line)
            lines.append(line)
        return lines
    
def _create_examples_snli(lines, set_type):
    """Creates examples for the training and dev sets."""
    examples = []
    for (i, line) in enumerate(lines):
        if i == 0:
            continue
        guid = "%s-%s" % (set_type, line[0])
        text_a = line[7]
        text_b = line[8]
        label = line[-1]
        examples.append([guid, text_a, text_b, label])
    return examples

def _create_examples_mnli(lines, set_type):
    """Creates examples for the training and dev sets."""
    examples = []
    for (i, line) in enumerate(lines):
        if i == 0:
            continue
        guid = "%s-%s" % (set_type, line[0])
        text_a = line[8]
        text_b = line[9]
        label = line[-1]
        examples.append([guid, text_a, text_b, label])
    return examples

train_snli = _create_examples_snli(_read_tsv('../SemBERT/glue_data/SNLI/train.tsv'), 'train_s')
dev_snli = _create_examples_snli(_read_tsv('../SemBERT/glue_data/SNLI/dev.tsv'), 'test_s')
test_snli = _create_examples_snli(_read_tsv('../SemBERT/glue_data/SNLI/test.tsv'), 'test_s')

### Building Train, Dev, Test

In [None]:
# Convert the dataset to a DataLoader ready for training
logging.info("Read AllNLI train dataset")
label2int = {"contradiction": 0, "entailment": 1, "neutral": 2}
train_nli_samples = []
dev_nli_samples = []
test_nli_samples = []

for row in tqdm(train_snli):
    label_id = label2int[row[3]]
    train_nli_samples.append(InputExample(guid = row[0], texts=[row[1], row[2]], label=label_id))
for row in tqdm(dev_snli):
    label_id = label2int[row[3]]
    dev_nli_samples.append(InputExample(guid = row[0], texts=[row[1], row[2]], label=label_id))
for row in tqdm(test_snli):
    label_id = label2int[row[3]]
    test_nli_samples.append(InputExample(guid = row[0], texts=[row[1], row[2]], label=label_id))

In [None]:
# Read the dataset
model_name = 'bert-base-uncased'
batch_size = 64

# Use BERT for mapping tokens to embeddings
word_embedding_model = models.BERT(model_name)

# Apply mean pooling to get one fixed sized sentence vector
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
                               pooling_mode_mean_tokens=True,
                               pooling_mode_cls_token=False,
                               pooling_mode_max_tokens=False)

model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

train_data_nli = SentencesDataset(train_nli_samples, model=model)
train_dataloader_nli = DataLoader(train_data_nli, shuffle=True, batch_size=batch_size)
train_loss_nli = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=len(label2int))


dev_data_nli = SentencesDataset(dev_nli_samples, model=model)
dev_dataloader_nli = DataLoader(dev_data_nli, shuffle=True, batch_size=batch_size)

test_data_nli = SentencesDataset(test_nli_samples, model=model)
test_dataloader_nli = DataLoader(test_data_nli, shuffle=True, batch_size=batch_size)

test_evaluator = LabelAccuracyEvaluator(test_dataloader_nli, name = 'nli_test', softmax_model = train_loss_nli)

In [None]:
# Read the dataset and initial model
model_name = 'bert-base-uncased'
model_save_path = 'output/snli_'+model_name+'-full_iteration_'

num_epochs = 1
warmup_steps = math.ceil(len(train_data_nli) * num_epochs / batch_size * 0.1) #10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))
train_objectives = [(train_dataloader_nli, train_loss_nli)]

validation_performance = []
test_performance = []

test_evaluator = LabelAccuracyEvaluator(test_dataloader_nli, name = 'nli_test', softmax_model = train_loss_nli)
dev_evaluator = LabelAccuracyEvaluator(dev_dataloader_nli, name = 'nli_test', softmax_model = train_loss_nli)

validation_performance.append(model.evaluate(dev_evaluator))
test_performance.append(model.evaluate(test_evaluator))
print(f'Iteration - {0} ...')
print(f'Validation performance - {validation_performance[-1]} ...')
print(f'Test performance - {test_performance[-1]} ...')

for i in range(3):
    model.fit(train_objectives=train_objectives, output_path=model_save_path+str(i+1))
    validation_performance.append(model.evaluate(dev_evaluator))
    test_performance.append(model.evaluate(test_evaluator))
    print(f'Iteration - {i+1} ...')
    print(f'Validation performance - {validation_performance[-1]} ...')
    print(f'Test performance - {test_performance[-1]} ...')
    model.save(model_save_path+str(i+1))

### The same experiment with trancated model

In [None]:
train_snli = _create_examples_snli(_read_tsv('../SemBERT/glue_data/SNLI/train_filtered.tsv'), 'train_s')

# Convert the dataset to a DataLoader ready for training
logging.info("Read AllNLI train dataset")
label2int = {"contradiction": 0, "entailment": 1, "neutral": 2}
train_nli_samples = []
dev_nli_samples = []
test_nli_samples = []

for row in tqdm(train_snli):
    label_id = label2int[row[3]]
    train_nli_samples.append(InputExample(guid = row[0], texts=[row[1], row[2]], label=label_id))
for row in tqdm(dev_snli):
    label_id = label2int[row[3]]
    dev_nli_samples.append(InputExample(guid = row[0], texts=[row[1], row[2]], label=label_id))
for row in tqdm(test_snli):
    label_id = label2int[row[3]]
    test_nli_samples.append(InputExample(guid = row[0], texts=[row[1], row[2]], label=label_id))
    
    
# Read the dataset
model_name = 'bert-base-uncased'
batch_size = 64

# Use BERT for mapping tokens to embeddings
word_embedding_model = models.BERT(model_name)

# Apply mean pooling to get one fixed sized sentence vector
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
                               pooling_mode_mean_tokens=True,
                               pooling_mode_cls_token=False,
                               pooling_mode_max_tokens=False)

model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

train_data_nli = SentencesDataset(train_nli_samples, model=model)
train_dataloader_nli = DataLoader(train_data_nli, shuffle=True, batch_size=batch_size)
train_loss_nli = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=len(label2int))

dev_data_nli = SentencesDataset(dev_nli_samples, model=model)
dev_dataloader_nli = DataLoader(dev_data_nli, shuffle=True, batch_size=batch_size)

test_data_nli = SentencesDataset(test_nli_samples, model=model)
test_dataloader_nli = DataLoader(test_data_nli, shuffle=True, batch_size=batch_size)

test_evaluator = LabelAccuracyEvaluator(test_dataloader_nli, name = 'nli_test', softmax_model = train_loss_nli)


# Read the dataset and initial model
model_name = 'bert-base-uncased'
model_save_path = 'output/snli_'+model_name+'-truncated_iteration_'

num_epochs = 1
warmup_steps = math.ceil(len(train_data_nli) * num_epochs / batch_size * 0.1) #10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))
train_objectives = [(train_dataloader_nli, train_loss_nli)]

test_evaluator = LabelAccuracyEvaluator(test_dataloader_nli, name = 'nli_test', softmax_model = train_loss_nli)
dev_evaluator = LabelAccuracyEvaluator(dev_dataloader_nli, name = 'nli_test', softmax_model = train_loss_nli)

validation_performance.append(model.evaluate(dev_evaluator))
test_performance.append(model.evaluate(test_evaluator))
print(f'Iteration - {0} ...')
print(f'Validation performance - {validation_performance[-1]} ...')
print(f'Test performance - {test_performance[-1]} ...')

for i in range(3):
    model.fit(train_objectives=train_objectives, output_path=model_save_path+str(i+1))
    validation_performance.append(model.evaluate(dev_evaluator))
    test_performance.append(model.evaluate(test_evaluator))
    print(f'Iteration - {i+1} ...')
    print(f'Validation performance - {validation_performance[-1]} ...')
    print(f'Test performance - {test_performance[-1]} ...')
    model.save(model_save_path+str(i+1))

## Dumping results

In [None]:
import pickle

a = {'validation_performance': validation_performance,
     'test_performance': test_performance}

with open('results.pickle', 'wb') as handle:
    pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('results.pickle', 'rb') as handle:
    b = pickle.load(handle)

print(a == b)

# Experiments with effitiency:

In [None]:
from tqdm.auto import tqdm
embeddings = []
for sam in tqdm(test_nli_samples):
    embeddings.append(train_loss_nli.model.encode(sam.texts, show_progress_bar = False))

In [None]:
preds = [] 
for embs in tqdm(embeddings):
    d = np.hstack([embs[0], embs[1], np.abs(embs[0] - embs[1])])
    preds.append(np.argmax(train_loss_nli.classifier(torch.Tensor(d)).detach().numpy()))