<a href="https://colab.research.google.com/github/untergunter/Chess-documentation/blob/main/Hyper_parameter_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/untergunter/enriched_stancy.git

Cloning into 'enriched_stancy'...
remote: Enumerating objects: 136, done.[K
remote: Counting objects: 100% (136/136), done.[K
remote: Compressing objects: 100% (101/101), done.[K
remote: Total 136 (delta 70), reused 94 (delta 34), pack-reused 0[K
Receiving objects: 100% (136/136), 3.71 MiB | 13.18 MiB/s, done.
Resolving deltas: 100% (70/70), done.


In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.10.0-py3-none-any.whl (2.8 MB)
[K     |████████████████████████████████| 2.8 MB 12.1 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 39.5 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 49.0 MB/s 
Collecting huggingface-hub>=0.0.12
  Downloading huggingface_hub-0.0.16-py3-none-any.whl (50 kB)
[K     |████████████████████████████████| 50 kB 6.8 MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 41.0 MB/s 
Installing collected packages: tokenizers, sacremoses, pyyaml, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: P

In [3]:
import torch
import torch.nn as nn
import pandas as pd
import os

from glob import glob
from datetime import datetime 
from transformers import BertTokenizer,AdamW,BertConfig,BertModel
from torch.utils.data import TensorDataset,DataLoader,RandomSampler
from torch.nn import CrossEntropyLoss, CosineEmbeddingLoss
from sklearn.metrics import f1_score,recall_score,precision_score
from tqdm.notebook import tqdm

In [13]:
def test_consistency_model(model, dataloader, device):
    y_true = []
    y_pred = []
    model.eval()
    with torch.no_grad():
        for batch in dataloader:
            together_ids, together_masks, claim_ids, claim_masks, labels = batch
            together_ids = together_ids.to(device)
            together_masks = together_masks.to(device)
            claim_ids = claim_ids.to(device)
            claim_masks = claim_masks.to(device)
            labels = labels.to(device)

            model_prediction = model.predict(together_ids,
                              together_masks,
                              claim_ids,
                              claim_masks,
                              )
            y_true += [int(label) for label in labels]
            y_pred += [int(label) for label in model_prediction]
    return y_true,y_pred

def find_file(files_list,file_name):
    for file in files_list:
        if file.split(os.sep)[-1] == file_name:
            return file
    return None

def get_paper_train_dev_test():
    all_tsv = glob('./**/*.tsv', recursive=True)
    dev = find_file(all_tsv,'dev.tsv')
    dev = pd.read_csv(dev,
                      sep='\t',
                      names=['index','text','perspective','stance_label_3']) if dev else None
    train = find_file(all_tsv, 'train.tsv')
    train = pd.read_csv(train,
                      sep='\t',
                      names=['index', 'text', 'perspective', 'stance_label_3']) if train else None

    test = find_file(all_tsv, 'test.tsv')
    test = pd.read_csv(test,
                        sep='\t',
                        names=['index', 'text', 'perspective', 'stance_label_3']) if test else None

    return train,dev,test

def make_tokenizer():
    tknzr = BertTokenizer.from_pretrained('bert-base-uncased')

    def tokenize_list_of_strings(list_of_strings):
        ids, attentions = [], []
        for input_string in list_of_strings:
            encoded = tknzr.encode_plus(input_string,
                                        add_special_tokens=False,
                                        truncation=True,
                                        padding='max_length',
                                        max_length=100,

                                        return_attention_mask=True,
                                        return_tensors='pt'
                                        )
            id_tensor = encoded['input_ids']
            attention_tensor = encoded['attention_mask']
            ids.append(id_tensor)
            attentions.append(attention_tensor)
        ids = torch.cat(ids, dim=0)
        attentions = torch.cat(attentions, dim=0)
        return ids, attentions

    return tokenize_list_of_strings

def make_2_kinds_data_set(raw_data,batch_size:int=24):


    claim = '[CLS] ' + raw_data['text'].str.strip() + ' [SEP]'
    perspective = raw_data['perspective'].str.strip() + ' [SEP]'
    together = claim + perspective

    label =[1 if single_label=='supports' else 0 for single_label in raw_data['stance_label_3'] ]

    preprocessor = make_tokenizer()

    claim_ids,claim_masks = preprocessor(claim)
    together_ids,together_masks = preprocessor(together)
    labels = torch.tensor(label)


    together_only_dataset = TensorDataset(together_ids,
                                          together_masks,
                                          labels)
    together_and_claim_dataset = TensorDataset(together_ids,
                                               together_masks,
                                               claim_ids,
                                               claim_masks,
                                               labels)

    together_only_loader = DataLoader(
        together_only_dataset,
        sampler=RandomSampler(together_only_dataset),
        batch_size=batch_size
    )

    together_and_claim_loader = DataLoader(
        together_and_claim_dataset,
        sampler=RandomSampler(together_and_claim_dataset),
        batch_size=batch_size
    )

    # together_only_loader->claim_ids, claim_masks, labels
    # together_and_claim_loader->together_ids,together_masks,
    # claim_ids,claim_masks,labels

    return together_only_loader,together_and_claim_loader

def create_empty_df():
  df = pd.DataFrame({  'epoch':[],
                       'loss':[],
                       'f1':[],
                       'precision':[],
                       'recall':[],
                       'seconds':[],
                       'hyper_parameters':[]})

  df.to_csv('enriched_stancy/bert_consistency_hp_opt.csv',index=False)

def add_to_result_csv(loss,f1,precision,
                      recall,seconds,epoch,hp):
  df = pd.DataFrame({  'epoch':[epoch],
                       'loss':[loss],
                       'f1':[f1],
                       'precision':[precision],
                       'recall':[recall],
                       'seconds':[seconds],
                       'hyper_parameters':[str(hp)]})

  df.to_csv('enriched_stancy/bert_consistency_hp_opt.csv',header=False,mode='a',index=False)

In [14]:
class DoubleLoss(nn.Module):

    def __init__(self,device):
        super(DoubleLoss, self).__init__()
        self.device = device

        bert_config = BertConfig.from_pretrained('bert-base-uncased',
                                                 output_hidden_states=True)
        self.bert = BertModel.from_pretrained('bert-base-uncased',
                                              config=bert_config)
        self.stance = nn.Linear(769, 2)
        self.cosine = nn.CosineSimilarity()
        self.dropout = nn.Dropout(0.1)
        self.similarity_cosine_loss = CosineEmbeddingLoss()
        self.stance_loss_func = CrossEntropyLoss()

    def forward(self, both_ids, both_mask, claim_ids, claim_mask,labels = None):

        both_hs = self.bert(both_ids, attention_mask=both_mask).pooler_output
        both_hs = self.dropout(both_hs)

        claim_hs = self.bert(claim_ids, attention_mask=claim_mask).pooler_output

        cos_sim = self.cosine(both_hs, claim_hs).unsqueeze(1)
        combined = torch.cat([both_hs, cos_sim], dim=1)
        probabilities = self.stance(combined)

        if labels is not None:

            # first loss

            stance_loss = \
                self.stance_loss_func(probabilities.view(-1,2),
                                 labels.view(-1))

            # second loss
            similarity_labels = torch.ones(labels.shape,device=self.device)
            similarity_labels[ labels == 0 ] = -1

            loss_claim = self.similarity_cosine_loss(both_hs,
                                                 claim_hs,
                                                 similarity_labels.float())

            loss = stance_loss + loss_claim

            return loss

        return combined, probabilities

    def predict(self,both_ids, both_mask, claim_ids, claim_mask):
        combined, probabilities = self.forward(both_ids, both_mask, claim_ids, claim_mask)
        _, predicted = torch.max(probabilities, 1)
        return predicted

In [15]:
def train_consistency(batch_size,lr,eps):
    hyper_parameters = {'batch_size':batch_size,'lr':lr,'eps':eps}

    train, dev, test = get_paper_train_dev_test()
    train_together_only_loader, train_together_and_claim_loader = \
        make_2_kinds_data_set(train,batch_size)
    # dev_together_only_loader, dev_together_and_claim_loader = \
    #     make_2_kinds_data_set(dev,batch_size)
    test_together_only_loader, test_together_and_claim_loader = \
        make_2_kinds_data_set(test,batch_size)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = DoubleLoss(device).to(device)
    optimizer = AdamW(model.parameters(),
                      lr=lr,
                      eps=eps
                      )


    # add the basic performance
    y_true, y_pred = test_consistency_model(model, test_together_and_claim_loader, device)
    weighted_f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)

    add_to_result_csv(-1,weighted_f1,precision,recall,-1,0,hyper_parameters)


    for epoch in tqdm(range(1,11)):
      # train
        model.train()
        start_time = datetime.now()
        total_loss = 0
        for batch in tqdm(train_together_and_claim_loader):
            model.zero_grad()
            together_ids, together_masks, claim_ids, claim_masks, labels = batch

            together_ids = together_ids.to(device)
            together_masks = together_masks.to(device)
            claim_ids = claim_ids.to(device)
            claim_masks = claim_masks.to(device)
            labels = labels.to(device)

            loss = model(
                        together_ids,
                        together_masks,
                        claim_ids,
                        claim_masks,
                        labels
                        )
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
        # test and save
        y_true, y_pred = test_consistency_model(model, test_together_and_claim_loader, device)
        end_time = datetime.now()
        total_seconds = (end_time-start_time).seconds

        weighted_f1 = f1_score(y_true, y_pred, average='weighted')
        precision = precision_score(y_true, y_pred)
        recall = recall_score(y_true, y_pred)

        add_to_result_csv(total_loss,weighted_f1,precision,recall,total_seconds,epoch,hyper_parameters)

In [16]:
def tested(df,bs,lr,eps)->bool:

    test_done = \
        len(
            df[(df['hyper_parameters']==
            str({'batch_size':bs,'lr':lr,'eps':eps}))&
               (df['epoch']==10)
            ]) ==1
    return test_done

searched_alredy = pd.read_csv('enriched_stancy/bert_consistency_hp_opt.csv')
hyper_parameters = {'batch_size':[32,24,16],
                    'lr' : [0.001*i for i in (0.1,0.5,1,2,5,10)],
                    'eps' : [1e-8*i for i in (0.1,0.5,1,2,5,10)]
                    }


for bs in hyper_parameters['batch_size']:
  for lr in hyper_parameters['lr']:
    for eps in hyper_parameters['eps']:
      if tested(searched_alredy,bs,lr,eps)>0:
        continue
      else:
        train_consistency(bs,lr,eps)

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/219 [00:00<?, ?it/s]

KeyboardInterrupt: ignored