# Prepare dataset and pre-trained model

In [1]:
# if run from here
# mount it
from google.colab import drive
import os
drive.mount('/content/drive')

Mounted at /content/drive


In [20]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

### Prepare pretrained model

In [2]:
!pip install transformers

from transformers import BertTokenizer, BertForSequenceClassification, AdamW, BertConfig
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/2c/d8/5144b0712f7f82229a8da5983a8fbb8d30cec5fbd5f8d12ffe1854dcea67/transformers-4.4.1-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 8.0MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/71/23/2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97/tokenizers-0.10.1-cp37-cp37m-manylinux2010_x86_64.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 57.5MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 60.1MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp37-none-any.whl size=893262 sha256=f1765

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




### Prepare data

In [3]:
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
import pickle

# import all indexes
FILE_NAME = '/content/drive/MyDrive/STAT946/DC2/data2/trainset_indexes.pkl'
infile = open(FILE_NAME,'rb')
all_indexes = pickle.load(infile)

# import train and val dataset 
train_dataset=torch.load("/content/drive/MyDrive/STAT946/DC2/data2/train_dataset20000.pt")
val_dataset=torch.load("/content/drive/MyDrive/STAT946/DC2/data2/val_dataset20000.pt")


print(len(train_dataset)/15)
print(len(val_dataset)/15)
val_set_index = all_indexes[int(len(train_dataset)/15):
                        int(len(train_dataset)/15+len(val_dataset)/15)]
print(len(val_set_index))   

18000.0
2000.0
2000


# Model training

In [4]:
# The DataLoader needs to know our batch size for training, so we specify it 
# here. For fine-tuning BERT on a specific task, the authors recommend a batch 
# size of 16 or 32.
batch_size = 32

# Create the DataLoaders for our training and validation sets.
# We'll take training samples in random order. 
train_dataloader = DataLoader(
            train_dataset,  # The training samples.
            sampler = RandomSampler(train_dataset), # Select batches randomly
            batch_size = batch_size # Trains with this batch size.
        )

# For validation the order doesn't matter, so we'll just read them sequentially.
validation_dataloader = DataLoader(
            val_dataset, # The validation samples.
            sampler = SequentialSampler(val_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )


In [5]:
# Setup CUDA, GPU & distributed training
# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla P100-PCIE-16GB


In [None]:
# put model on device
model.to(device)

In [10]:
from transformers import get_linear_schedule_with_warmup
import numpy as np
import time
import datetime
from scipy.stats import spearmanr
# Note: AdamW is a class from the huggingface library (as opposed to pytorch) 
# I believe the 'W' stands for 'Weight Decay fix"
optimizer = AdamW(model.parameters(),
                  lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5
                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.
                )

# Number of training epochs. The BERT authors recommend between 2 and 4. 
# We chose to run for 4, but we'll see later that this may be over-fitting the
# training data.
epochs = 4

# Total number of training steps is [number of batches] x [number of epochs]. 
# (Note that this is not the same as the number of training samples).
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)


# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)


def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))


def spearsman(t,p):
  rhos = []
  t = np.array(t)
  p = np.array(p)
  for col_trues, col_pred in zip(t.T, p.T):
    rhos.append(spearmanr(col_trues, col_pred, axis=0))
  return np.mean(rhos)

In [30]:
 # check if device can process a batch of data
 step, batch = next(enumerate(train_dataloader))
 inputs = {'input_ids':      batch[0],
                'attention_mask': batch[1],
                'token_type_ids': batch[2],
                'labels':         batch[3]
              }
inputs['input_ids'][0]

tensor([  101,  2122,  9597, 26597,  9125,  3384,  1011,  9078,  3723, 24198,
         7783,  2015,  1012,   102,  2632,  4801,  4890, 13490,  2003,  2019,
         2804,  4668,  1999,  7554, 10752,  2073,  1037,  5536,  2632,  4801,
         2638,  9909,  2000,  1037,  6351,  8516,  2177,  2000,  2433,  2019,
         1155,  1011,  2632,  4801,  4890,  2140,  6544,  1012,   102,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0],
       dtype=torch.int32)

### Training loop

In [31]:
import random


# This training code is based on the `run_glue.py` script here:
# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128

# Set the seed value all over the place to make this reproducible.
seed_val = 42
epochs = 5

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# We'll store a number of quantities such as training and validation loss, 
# validation accuracy, and timings.
training_stats = []

# Measure the total training time for the whole run.
total_t0 = time.time()

# For each epoch...
for epoch_i in range(0, epochs):
    
    # ========================================
    #               Training
    # ========================================
    
    # Perform one full pass over the training set.

    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Measure how long the training epoch takes.
    t0 = time.time()

    # Reset the total loss for this epoch.
    total_train_loss = 0

    # Put the model into training mode. Don't be mislead--the call to 
    # `train` just changes the *mode*, it doesn't *perform* the training.
    # `dropout` and `batchnorm` layers behave differently during training
    # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)
    model.train()

    # For each batch of training data...
    for step, batch in enumerate(train_dataloader):

        # Progress update every 100 batches.
        if step % 100 == 0 and not step == 0:
            # Calculate elapsed time in minutes.
            elapsed = format_time(time.time() - t0)
            
            # Report progress.
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

        # Unpack this training batch from our dataloader. 
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using the 
        # `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids 
        #   [1]: attention masks
        #   [2]: labels 
        batch = tuple(t.to(device) for t in batch)
        inputs = {'input_ids':      batch[0],
                'attention_mask': batch[1],
                'token_type_ids': batch[2],
                'labels':         batch[3]
              }
      

        result = model(**inputs)
        
      

        # Always clear any previously calculated gradients before performing a
        # backward pass. PyTorch doesn't do this automatically because 
        # accumulating the gradients is "convenient while training RNNs". 
        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
        model.zero_grad()        

        # Perform a forward pass (evaluate the model on this training batch).
        # In PyTorch, calling `model` will in turn call the model's `forward` 
        # function and pass down the arguments. The `forward` function is 
        # documented here: 
        # https://huggingface.co/transformers/model_doc/bert.html#bertforsequenceclassification
        # The results are returned in a results object, documented here:
        # https://huggingface.co/transformers/main_classes/output.html#transformers.modeling_outputs.SequenceClassifierOutput
        # Specifically, we'll get the loss (because we provided labels) and the
        # "logits"--the model outputs prior to activation.
        
     
        loss = result.loss
        logits = result.logits

        # Accumulate the training loss over all of the batches so that we can
        # calculate the average loss at the end. `loss` is a Tensor containing a
        # single value; the `.item()` function just returns the Python value 
        # from the tensor.
        total_train_loss += loss.item()
    
        # Perform a backward pass to calculate the gradients.
        loss.backward()

        # Clip the norm of the gradients to 1.0.
        # This is to help prevent the "exploding gradients" problem.
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        # Update parameters and take a step using the computed gradient.
        # The optimizer dictates the "update rule"--how the parameters are
        # modified based on their gradients, the learning rate, etc.
        optimizer.step()

        # Update the learning rate.
        scheduler.step()

    # Calculate the average loss over all of the batches.
    avg_train_loss = total_train_loss / len(train_dataloader)            
    
    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epcoh took: {:}".format(training_time))

    # ========================================
    #               Validation
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set.

    print("")
    print("Running Validation...")

    t0 = time.time()

    # Put the model in evaluation mode--the dropout layers behave differently
    # during evaluation.
    model.eval()

    # Tracking variables 
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0

    # Evaluate data for one epoch
    for batch in validation_dataloader:
        
        # Unpack this training batch from our dataloader. 
        #
        # As we unpack the batch, we'll also copy each tensor to the GPU using 
        # the `to` method.
        #
        # `batch` contains three pytorch tensors:
        #   [0]: input ids 
        #   [1]: attention masks
        #   [2]: labels 
        batch = tuple(t.to(device) for t in batch)
        inputs = {'input_ids':      batch[0],
                'attention_mask': batch[1],
                'token_type_ids': batch[2],
                'labels':         batch[3]
              }
      

        
        # Tell pytorch not to bother with constructing the compute graph during
        # the forward pass, since this is only needed for backprop (training).
        with torch.no_grad():        

            # Forward pass, calculate logit predictions.
            # token_type_ids is the same as the "segment ids", which 
            # differentiates sentence 1 and 2 in 2-sentence tasks.
            result = model(**inputs)

        # Get the loss and "logits" output by the model. The "logits" are the 
        # output values prior to applying an activation function like the 
        # softmax.
        loss = result.loss
        logits = result.logits
            
        # Accumulate the validation loss.
        total_eval_loss += loss.item()

        # Move logits and labels to CPU
        logits = logits.detach().cpu().numpy()
        label_ids =inputs['labels'].to('cpu').numpy()

        # Calculate the accuracy for this batch of test sentences, and
        # accumulate it over all batches.
        total_eval_accuracy += flat_accuracy(logits, label_ids)
        

    # Report the final accuracy for this validation run.
    avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
    print("  Validate Accuracy: {0:.2f}".format(avg_val_accuracy))
    
    # Calculate the average loss over all of the batches.
    avg_val_loss = total_eval_loss / len(validation_dataloader)
    
    # Measure how long the validation run took.
    validation_time = format_time(time.time() - t0)
    
    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))

    # Record all statistics from this epoch.
    training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )

print("")
print("Training complete!")

print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))


Training...
  Batch   100  of  8,438.    Elapsed: 0:01:07.
  Batch   200  of  8,438.    Elapsed: 0:02:14.
  Batch   300  of  8,438.    Elapsed: 0:03:22.
  Batch   400  of  8,438.    Elapsed: 0:04:29.
  Batch   500  of  8,438.    Elapsed: 0:05:36.
  Batch   600  of  8,438.    Elapsed: 0:06:43.
  Batch   700  of  8,438.    Elapsed: 0:07:51.
  Batch   800  of  8,438.    Elapsed: 0:08:58.
  Batch   900  of  8,438.    Elapsed: 0:10:05.
  Batch 1,000  of  8,438.    Elapsed: 0:11:13.
  Batch 1,100  of  8,438.    Elapsed: 0:12:20.
  Batch 1,200  of  8,438.    Elapsed: 0:13:28.
  Batch 1,300  of  8,438.    Elapsed: 0:14:35.
  Batch 1,400  of  8,438.    Elapsed: 0:15:42.
  Batch 1,500  of  8,438.    Elapsed: 0:16:50.
  Batch 1,600  of  8,438.    Elapsed: 0:17:58.
  Batch 1,700  of  8,438.    Elapsed: 0:19:06.
  Batch 1,800  of  8,438.    Elapsed: 0:20:13.
  Batch 1,900  of  8,438.    Elapsed: 0:21:20.
  Batch 2,000  of  8,438.    Elapsed: 0:22:28.
  Batch 2,100  of  8,438.    Elapsed: 0:23:36.


In [32]:
batch_size

32

# Summary of training

In [33]:
import pandas as pd

# Display floats with two decimal places.
pd.set_option('precision', 2)

# Create a DataFrame from our training statistics.
df_stats = pd.DataFrame(data=training_stats)

# Use the 'epoch' as the row index.
df_stats = df_stats.set_index('epoch')

# A hack to force the column headers to wrap.
#df = df.style.set_table_styles([dict(selector="th",props=[('max-width', '70px')])])

# Display the table.
df_stats

Unnamed: 0_level_0,Training Loss,Valid. Loss,Valid. Accur.,Training Time,Validation Time
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,0.32,0.35,0.85,1:34:51,0:03:04
2,0.18,0.44,0.85,1:35:05,0:03:04
3,0.09,0.63,0.85,1:35:00,0:03:03
4,0.04,0.92,0.85,1:34:02,0:03:04
5,0.02,0.92,0.85,1:34:17,0:03:03


In [34]:
output_dir = '/content/drive/MyDrive/STAT946/DC2/model_save/'
import os 
# Create output directory if needed
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

print("Saving model to %s" % output_dir)

# Save a trained model, configuration and tokenizer using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
model_to_save.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

# Good practice: save your training arguments together with the trained model
# torch.save(args, os.path.join(output_dir, 'training_args.bin'))


Saving model to /content/drive/MyDrive/STAT946/DC2/model_save/


('/content/drive/MyDrive/STAT946/DC2/model_save/tokenizer_config.json',
 '/content/drive/MyDrive/STAT946/DC2/model_save/special_tokens_map.json',
 '/content/drive/MyDrive/STAT946/DC2/model_save/vocab.txt',
 '/content/drive/MyDrive/STAT946/DC2/model_save/added_tokens.json')

In [None]:
output_dir = '/content/drive/MyDrive/STAT946/DC2/model_save/'
# Load a trained model and vocabulary that you have fine-tuned
model = model.from_pretrained(output_dir)
tokenizer = tokenizer.from_pretrained(output_dir)

# Copy the model to the GPU.
model.to(device)

## let's check the performance of model on the val dataset


In [14]:
# ========================================
    #               Validation
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set.

print("")
print("Running Validation on toy set...")

t0 = time.time()

# Put the model in evaluation mode--the dropout layers behave differently
# during evaluation.
model.eval()

# Tracking variables 
total_eval_accuracy = 0
total_eval_loss = 0
nb_eval_steps = 0

# Evaluate data for one epoch
label = np.array([])
for batch in validation_dataloader:
    
    # Unpack this training batch from our dataloader. 
    #
    # As we unpack the batch, we'll also copy each tensor to the GPU using 
    # the `to` method.
    #
    # `batch` contains three pytorch tensors:
    #   [0]: input ids 
    #   [1]: attention masks
    #   [2]: labels 
    batch = tuple(t.to(device) for t in batch)
    inputs = {'input_ids':      batch[0],
              'attention_mask': batch[1],
              'token_type_ids': batch[2]
          }
  

    
    # Tell pytorch not to bother with constructing the compute graph during
    # the forward pass, since this is only needed for backprop (training).
    with torch.no_grad():        

        # Forward pass, calculate logit predictions.
        # token_type_ids is the same as the "segment ids", which 
        # differentiates sentence 1 and 2 in 2-sentence tasks.
        result = model(**inputs)

    # Get the loss and "logits" output by the model. The "logits" are the 
    # output values prior to applying an activation function like the 
    # softmax.
    loss = result.loss
    logits = result.logits
        
    # Accumulate the validation loss.
    # total_eval_loss += loss.item()

    # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()

    pred_flat = np.argmax(logits, axis=1).flatten()
    label = np.append(label, pred_flat)

    # label_ids =inputs['labels'].to('cpu').numpy()
    

    # Calculate the accuracy for this batch of test sentences, and
    # accumulate it over all batches.
    # total_eval_accuracy += flat_accuracy(logits, label_ids)
    

# Report the final accuracy for this validation run.
avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
# print("  Accuracy: {0:.2f}".format(avg_val_accuracy))
# print("  Validation took: {:}".format(validation_time))
label = label.reshape(2000,15)


Running Validation on toy set...


In [19]:
val_label = topSort(label)

In [20]:
spearsman(val_label, val_set_index)

0.3729897576399608

# Top Sort


In [16]:
from collections import defaultdict 
import csv
import ast
class Graph: 
    '''
    The code for this class is based on geeksforgeeks.com
    '''
    def __init__(self,vertices): 
        self.graph = defaultdict(list) 
        self.V = vertices 
  
    def addEdge(self, u, v): 
        self.graph[u].append([v]) 
    
    def topologicalSortUtil(self, v, visited, stack): 
  
        visited[v] = True
  
        for i in self.graph[v]: 
            if visited[i[0]] == False: 
                self.topologicalSortUtil(i[0], visited, stack) 
  
        stack.insert(0,v) 
  
    def topologicalSort(self): 
        visited = [False]*self.V 
        stack =[] 

        for i in range(self.V): 
            if visited[i] == False: 
                self.topologicalSortUtil(i, visited, stack) 
  
        return stack
        
    def isCyclicUtil(self, v, visited, recStack): 
  
        visited[v] = True
        recStack[v] = True
  
        for neighbour in self.graph[v]:
            if visited[neighbour[0]] == False: 
                if self.isCyclicUtil(
                    neighbour[0], visited, recStack) == True: 
                    return True
            elif recStack[neighbour[0]] == True: 
                self.graph[v].remove(neighbour)
                return True
  
        recStack[v] = False
        return False
  
    def isCyclic(self): 
        visited = [False] * self.V 
        recStack = [False] * self.V 
        for node in range(self.V): 
            if visited[node] == False: 
                if self.isCyclicUtil(node, visited, recStack) == True: 
                    return True
        return False

class Stats(object):
    
    def __init__(self):
        self.n_samp = 0
        self.n_sent = 0
        self.n_pair = 0
        self.corr_samp = 0
        self.corr_sent = 0
        self.corr_pair = 0
        self.lcs_seq = 0
        self.tau = 0
        self.dist_window = [1, 2, 3]
        self.min_dist = [0]*len(self.dist_window)
        
    def pairwise_metric(self, g):
        '''
        This  calculates the percentage of skip-bigrams for which the 
        relative order is predicted correctly. Rouge-S metric.
        '''
        common = 0
        for vert in range(g.V):
            to_nodes = g.graph[vert]
            to_nodes = [node[0] for node in to_nodes]
            gold_nodes = list(range(vert+1, g.V))
            common += len(set(gold_nodes).intersection(set(to_nodes)))

        return common
    
    def kendall_tau(self, porder, gorder):
        '''
        It calculates the number of inversions required by the predicted 
        order to reach the correct order.
        '''
        pred_pairs, gold_pairs = [], []
        for i in range(len(porder)):
            for j in range(i+1, len(porder)):
                pred_pairs.append((porder[i], porder[j]))
                gold_pairs.append((gorder[i], gorder[j]))
        common = len(set(pred_pairs).intersection(set(gold_pairs)))
        uncommon = len(gold_pairs) - common
        tau = 1 - (2*(uncommon/len(gold_pairs)))

        return tau
    
    def min_dist_metric(self, porder, gorder):
        '''
        It calculates the displacement of sentences within a given window.
        '''
        count = [0]*len(self.dist_window)
        for i in range(len(porder)):
            pidx = i
            pval = porder[i]
            gidx = gorder.index(pval)
            for w, window in enumerate(self.dist_window):
                if abs(pidx-gidx) <= window:
                    count[w] += 1
        return count
    
    def lcs(self, X , Y): 
        m = len(X) 
        n = len(Y) 

        L = [[None]*(n+1) for i in range(m+1)] 

        for i in range(m+1): 
            for j in range(n+1): 
                if i == 0 or j == 0 : 
                    L[i][j] = 0
                elif X[i-1] == Y[j-1]: 
                    L[i][j] = L[i-1][j-1]+1
                else: 
                    L[i][j] = max(L[i-1][j] , L[i][j-1]) 

        return L[m][n] 
    
    def sample_match(self, order, gold_order):
        '''
        It calculates the percentage of samples for which the entire 
        sequence was correctly predicted. (PMR)
        '''
        return order == gold_order
    
    def sentence_match(self, order, gold_order):
        '''
        It measures the percentage of sentences for which their absolute 
        position was correctly predicted. (Acc)
        '''
        return sum([1 for x in range(len(order)) if order[x] == gold_order[x]])
    
    def update_stats(self, nvert, npairs, order, gold_order, g):
        self.n_samp += 1
        self.n_sent += nvert
        self.n_pair += npairs
        
        if self.sample_match(order, gold_order):
            self.corr_samp += 1
        self.corr_sent += self.sentence_match(order, gold_order)
        self.corr_pair += self.pairwise_metric(g)
        self.lcs_seq += self.lcs(order, gold_order)
        self.tau += self.kendall_tau(order, gold_order)
        window_counts = self.min_dist_metric(order, gold_order)
        for w, wc in enumerate(window_counts):
            self.min_dist[w] += wc
        
    def print_stats(self):
        print("Perfect Match: " + str(self.corr_samp*100/self.n_samp))
        print("Sentence Accuracy: " + str(self.corr_sent*100/self.n_sent))
        print("Rouge-S: " + str(self.corr_pair*100/self.n_pair))
        print("LCS: " + str(self.lcs_seq*100/self.n_sent))
        print("Kendall Tau Ratio: " + str(self.tau/self.n_samp))
        for w, window in enumerate(self.dist_window):
            print("Min Dist Metric for window " + str(window) + ": " + \
                                    str(self.min_dist[w]*100/self.n_sent))



def get_pos(j):
  if (j>=0) & (j<=4): return 0, j+1
  if (j>=5) & (j<=8): return 1, j-3
  if (j>=9) & (j<=11): return 2, j-6
  if (j>=12) & (j<=13): return 3, j-8
  else: return 4, 5

# inputs: data should be n*15 array
def topSort(data):
  orders = []
  for instance in data:
    g = Graph(6)
    for j in range(0,15):
      pred = instance[j]
      pos_s1, pos_s2 = get_pos(j)

      if pred == 1: 
        g.addEdge(pos_s1, pos_s2)
      if pred == 0: 
        g.addEdge(pos_s2, pos_s1)
    while g.isCyclic():
      g.isCyclic()

    sorted = g.topologicalSort()
    arr = []
    for i in range(0,6):
      arr.append(sorted.index(i))
  
    orders.append(arr)
  return orders

In [17]:
# import pickle
# FILE_NAME = '/content/drive/MyDrive/STAT946/DC2/top sort/pairs.pkl'
# infile = open(FILE_NAME,'rb')
# data = pickle.load(infile)

# FILE_NAME = '/content/drive/MyDrive/STAT946/DC2/top sort/indexes.pkl'
# infile = open(FILE_NAME,'rb')
# indexes = pickle.load(infile)


FileNotFoundError: ignored

In [None]:
prediction = topSort(label)

In [None]:
import pickle
FILE_NAME = '/content/drive/MyDrive/STAT946/DC2/top sort/toy_indexes.pkl'
infile = open(FILE_NAME,'rb')
toy_indexes = pickle.load(infile)
toy_indexes = np.array(toy_indexes)

In [None]:
def calculate_accuracy(orders_gold, orders_predicted):
    num_correct = np.sum(orders_predicted == orders_gold)
    num_total =  orders_gold.shape[0] * 6
    return num_correct / num_total

In [None]:
toy_indexes[0:10]

In [None]:
calculate_accuracy(toy_indexes, prediction)

# Perform on test set

In [21]:
test_dataset = torch.load('/content/drive/MyDrive/STAT946/DC2/data2/test_dataset.pt')
print(len(test_dataset)/15)
# For validation the order doesn't matter, so we'll just read them sequentially.
test_dataloader = DataLoader(
            test_dataset, # The validation samples.
            sampler = SequentialSampler(test_dataset), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )

2000.0


In [22]:
# ========================================
    #               Test
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set.

print("")
print("Running Validation on toy set...")

t0 = time.time()

# Put the model in evaluation mode--the dropout layers behave differently
# during evaluation.
model.eval()

# Tracking variables 
total_eval_accuracy = 0
total_eval_loss = 0
nb_eval_steps = 0

# Evaluate data for one epoch
label = np.array([])
for batch in test_dataloader:
    
    # Unpack this training batch from our dataloader. 
    #
    # As we unpack the batch, we'll also copy each tensor to the GPU using 
    # the `to` method.
    #
    # `batch` contains three pytorch tensors:
    #   [0]: input ids 
    #   [1]: attention masks
    #   [2]: labels 
    batch = tuple(t.to(device) for t in batch)
    inputs = {'input_ids':      batch[0],
            'attention_mask': batch[1],
            'token_type_ids': batch[2]
          }
  

    
    # Tell pytorch not to bother with constructing the compute graph during
    # the forward pass, since this is only needed for backprop (training).
    with torch.no_grad():        

        # Forward pass, calculate logit predictions.
        # token_type_ids is the same as the "segment ids", which 
        # differentiates sentence 1 and 2 in 2-sentence tasks.
        result = model(**inputs)

    # Get the loss and "logits" output by the model. The "logits" are the 
    # output values prior to applying an activation function like the 
    # softmax.
    loss = result.loss
    logits = result.logits
        
    # Accumulate the validation loss.
    # total_eval_loss += loss.item()

    # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()

    pred_flat = np.argmax(logits, axis=1).flatten()
    label = np.append(label, pred_flat)

    # label_ids =inputs['labels'].to('cpu').numpy()
    

    # Calculate the accuracy for this batch of test sentences, and
    # accumulate it over all batches.
    # total_eval_accuracy += flat_accuracy(logits, label_ids)
    

# Report the final accuracy for this validation run.
# avg_val_accuracy = total_eval_accuracy / len(validation_dataloader)
# print("  Accuracy: {0:.2f}".format(avg_val_accuracy))
# print("  Validation took: {:}".format(validation_time))
test_label = label.reshape(2000,15)


Running Validation on toy set...


In [23]:
pdct = topSort(test_label)
pdct

[[1, 5, 3, 2, 4, 0],
 [2, 3, 4, 1, 0, 5],
 [2, 3, 1, 0, 4, 5],
 [5, 0, 2, 1, 4, 3],
 [4, 2, 3, 0, 1, 5],
 [3, 0, 5, 4, 2, 1],
 [1, 5, 4, 0, 3, 2],
 [2, 5, 0, 1, 4, 3],
 [1, 4, 2, 3, 0, 5],
 [4, 1, 2, 0, 3, 5],
 [4, 2, 1, 0, 5, 3],
 [3, 5, 4, 2, 1, 0],
 [2, 3, 1, 5, 0, 4],
 [1, 2, 4, 3, 0, 5],
 [0, 4, 2, 5, 3, 1],
 [1, 5, 2, 3, 0, 4],
 [2, 0, 5, 3, 1, 4],
 [1, 3, 0, 2, 4, 5],
 [3, 4, 5, 2, 1, 0],
 [1, 3, 0, 5, 4, 2],
 [5, 4, 1, 3, 0, 2],
 [4, 1, 5, 2, 3, 0],
 [2, 3, 4, 1, 0, 5],
 [3, 5, 0, 4, 2, 1],
 [5, 4, 0, 1, 2, 3],
 [4, 0, 1, 2, 3, 5],
 [3, 2, 0, 4, 5, 1],
 [2, 0, 3, 5, 4, 1],
 [1, 3, 5, 2, 0, 4],
 [5, 4, 2, 1, 0, 3],
 [5, 1, 0, 3, 4, 2],
 [5, 1, 4, 0, 2, 3],
 [0, 2, 5, 3, 1, 4],
 [1, 3, 5, 2, 0, 4],
 [2, 3, 4, 5, 0, 1],
 [5, 3, 1, 2, 4, 0],
 [1, 0, 2, 3, 4, 5],
 [3, 2, 4, 0, 5, 1],
 [1, 2, 5, 0, 4, 3],
 [2, 5, 0, 3, 4, 1],
 [4, 1, 2, 5, 3, 0],
 [4, 3, 2, 5, 1, 0],
 [1, 3, 5, 2, 0, 4],
 [1, 5, 2, 4, 3, 0],
 [3, 0, 4, 1, 2, 5],
 [4, 5, 2, 3, 0, 1],
 [1, 0, 4, 3, 2, 5],
 [3, 0, 5, 4,

In [24]:
# create data frame for submission file
import pandas as pd
# construct pdct to submit.
d = {'ID': range(1,2001), 
     'index1': range(2000),
     'index2': range(2000),
     'index3': range(2000),
     'index4': range(2000),
     'index5': range(2000),
     'index6': range(2000)}

df = pd.DataFrame(data=d)

In [None]:
pdct[0:10]

[[1, 2, 5, 4, 3, 0],
 [1, 0, 4, 5, 3, 2],
 [1, 3, 4, 0, 2, 5],
 [3, 0, 1, 4, 2, 5],
 [0, 1, 3, 4, 2, 5],
 [0, 1, 4, 5, 2, 3],
 [3, 0, 4, 1, 5, 2],
 [1, 5, 4, 0, 2, 3],
 [0, 2, 4, 1, 3, 5],
 [1, 3, 4, 2, 0, 5]]

In [25]:
for i in range(0,2000):
  instance = pdct[i]
  for j in range(1,7):
    df.iloc[i,j]=instance[j-1]
df.to_csv("/content/drive/MyDrive/STAT946/DC2/pdct2",index=False)  