# Installations and Imports


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%%capture
!pip install transformers
!pip install accelerate
!pip install sentencepiece
!pip install datasets

In [None]:
from transformers import AutoModel
from transformers import AutoTokenizer
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import gc
import sentencepiece
from torch.utils.data import Dataset, DataLoader
import re
from tqdm import tqdm
from accelerate import Accelerator
import pickle

import os

In [None]:
SEED = 1111
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

#Dataset preparation

In [None]:
import json
with open(f"/content/drive/MyDrive/shroom/val.model-agnostic.json", "r") as outfile:
  val_model_agnostic=json.load(outfile)
print(len(val_model_agnostic))

val_model_agnostic_df=pd.DataFrame(data=val_model_agnostic, columns=["src", "tgt", "hyp", "ref", "task", "model", "labels", "label", "p(Hallucination)"])
val_model_agnostic_df.head(4)

499


Unnamed: 0,src,tgt,hyp,ref,task,model,labels,label,p(Hallucination)
0,The writer had just entered into his eighteent...,Resembling a weasel (in appearance).,Resembling or characteristic of a weasel.,tgt,DM,,"[Hallucination, Not Hallucination, Not Halluci...",Not Hallucination,0.2
1,Sailors ' and fishermen 's <define> sheath - k...,.,Alternative form of sheath knife,tgt,DM,,"[Hallucination, Hallucination, Hallucination, ...",Hallucination,0.8
2,"As to age , Bead could not form any clear impr...","(poetic) An instant, a short moment.",(obsolete) A short period of time.,tgt,DM,,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
3,Because redpillers are usually normies or <def...,"(incel, _, slang) A man of a slightly lower ra...",(slang) An incel.,tgt,DM,,"[Not Hallucination, Not Hallucination, Halluci...",Not Hallucination,0.2


In [None]:
with open(f"/content/drive/MyDrive/shroom/val.model-aware.v2.json", "r") as outfile:
  val_model_aware=json.load(outfile)
print(len(val_model_aware))

val_model_aware_df=pd.DataFrame(data=val_model_aware, columns=["src", "tgt", "hyp", "ref", "task", "model", "labels", "label", "p(Hallucination)"])
val_model_aware_df.head(4)

501


Unnamed: 0,src,tgt,hyp,ref,task,model,labels,label,p(Hallucination)
0,The sides of the casket were covered with heav...,A decorative feature that sits on top of somet...,A sloping top .,tgt,DM,ltg/flan-t5-definition-en-base,"[Not Hallucination, Hallucination, Not Halluci...",Hallucination,0.6
1,Please try not to overreact if she drives badl...,To react too much or too intensely .,To react too much .,tgt,DM,ltg/flan-t5-definition-en-base,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
2,"To prevent spoilage , store in a cool , dry pl...",The process of spoiling .,The process of spoiling ; the state of being s...,tgt,DM,ltg/flan-t5-definition-en-base,"[Hallucination, Not Hallucination, Hallucinati...",Hallucination,0.6
3,The way the opposition has framed the argument...,To construct in words so as to establish a con...,To arrange in a particular way .,tgt,DM,ltg/flan-t5-definition-en-base,"[Hallucination, Not Hallucination, Not Halluci...",Hallucination,0.6


In [None]:
all_val_df=pd.concat((val_model_agnostic_df, val_model_aware_df), ignore_index=True)
all_val_df

Unnamed: 0,src,tgt,hyp,ref,task,model,labels,label,p(Hallucination)
0,The writer had just entered into his eighteent...,Resembling a weasel (in appearance).,Resembling or characteristic of a weasel.,tgt,DM,,"[Hallucination, Not Hallucination, Not Halluci...",Not Hallucination,0.2
1,Sailors ' and fishermen 's <define> sheath - k...,.,Alternative form of sheath knife,tgt,DM,,"[Hallucination, Hallucination, Hallucination, ...",Hallucination,0.8
2,"As to age , Bead could not form any clear impr...","(poetic) An instant, a short moment.",(obsolete) A short period of time.,tgt,DM,,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
3,Because redpillers are usually normies or <def...,"(incel, _, slang) A man of a slightly lower ra...",(slang) An incel.,tgt,DM,,"[Not Hallucination, Not Hallucination, Halluci...",Not Hallucination,0.2
4,On the second day of massive live - fire drill...,"An island in Dongyin, Lienchiang, Taiwan, in t...","An island in Lienchiang County, Taiwan.",tgt,DM,,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
...,...,...,...,...,...,...,...,...,...
995,Doonii fayyadamuun meeshaa geejibuun namootaba...,Using ships to transport goods is by far the m...,Using a gas-fired device is a way to stop peop...,either,MT,facebook/nllb-200-distilled-600M,"[Hallucination, Hallucination, Hallucination, ...",Hallucination,1.0
996,وبما أن مونتيفيديو موجودة في جنوب خط الاستواء،...,"Since Montevideo is south of the Equator, it i...",Since Montevideo is located south of the equat...,either,MT,facebook/nllb-200-distilled-600M,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
997,Gin-abiabi hiya han Deputy Prime Minister han ...,He was greeted by Singapore's Deputy Prime Min...,He was also a supporter of Singapore's Deputy ...,either,MT,facebook/nllb-200-distilled-600M,"[Hallucination, Hallucination, Not Hallucinati...",Hallucination,0.8
998,འབུ་ཞེས་པའི་ཐ་སྙད་དེ་ཉིད་འབུ་སྲིན་དཔྱད་རིག་པ་བ...,The term bug is used by entomologists in a for...,The concept of a worm is that the parasitic or...,either,MT,facebook/nllb-200-distilled-600M,"[Hallucination, Hallucination, Hallucination, ...",Hallucination,1.0


In [None]:
all_val_df=all_val_df.sample(frac=1, random_state=42)

In [None]:
dm_val_df=all_val_df[all_val_df['task']=="DM"]
mt_val_df=all_val_df[all_val_df['task']=="MT"]
pg_val_df=all_val_df[all_val_df['task']=="PG"]

In [None]:
dm_val_df

Unnamed: 0,src,tgt,hyp,ref,task,model,labels,label,p(Hallucination)
521,Oxygen levels on earth skyrocketed 2.4 billion...,"That which precurses : a forerunner , predeces...",Something that is the first stage of a process .,tgt,DM,ltg/flan-t5-definition-en-base,"[Hallucination, Not Hallucination, Hallucinati...",Hallucination,0.6
660,"There they were before us , mother and babe , ...",In an eloquent manner ; stated well .,In an eloquent manner .,tgt,DM,ltg/flan-t5-definition-en-base,"[Hallucination, Not Hallucination, Not Halluci...",Not Hallucination,0.2
678,"Daniel is amazing , wonderful , fantastic , an...",An adjective used to praise something exceptio...,A word or phrase that is more than the usual o...,tgt,DM,ltg/flan-t5-definition-en-base,"[Not Hallucination, Hallucination, Not Halluci...",Not Hallucination,0.4
626,`` i 've always been a newshound and i was glu...,An investigative reporter .,a person who is interested in or devoted to th...,tgt,DM,ltg/flan-t5-definition-en-base,"[Hallucination, Hallucination, Hallucination, ...",Hallucination,0.6
513,[ raoul vaneigem ] i have nothing in common wi...,Process by which radical or subversive ideas a...,the action of recovering something,tgt,DM,ltg/flan-t5-definition-en-base,"[Hallucination, Hallucination, Hallucination, ...",Hallucination,1.0
...,...,...,...,...,...,...,...,...,...
614,Ye say i played checkers with him -- and -- ''...,"While or as if choking , or in such a way as t...",In a choking manner .,tgt,DM,ltg/flan-t5-definition-en-base,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
20,Cheers <define> marrow </define> !,"(Geordie, informal) A friend, pal, buddy, mate.",(slang) The human body.,tgt,DM,,"[Hallucination, Hallucination, Hallucination, ...",Hallucination,1.0
71,so as no fuller on earth can <define> white </...,(transitive) To make white; to whiten; to bleach.,(transitive) To make white.,tgt,DM,,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
106,"The body , however , failed to alter the <defi...",(by extension) Characterized by ruthlessly att...,"(idiomatic) Unconstrained, unadulterated.",tgt,DM,,"[Hallucination, Hallucination, Hallucination, ...",Hallucination,1.0


In [None]:
mt_val_df

Unnamed: 0,src,tgt,hyp,ref,task,model,labels,label,p(Hallucination)
411,Где мне его искать?,Where should I look for it?,Where do I want him?,either,MT,,"[Hallucination, Hallucination, Hallucination, ...",Hallucination,1.0
859,பயணிகள் தங்கள் வீட்டுப் பகுதிகளில் தெரியாத வில...,Travellers may encounter animal pests that the...,Travelers may encounter insects that destroy u...,either,MT,facebook/nllb-200-distilled-600M,"[Hallucination, Hallucination, Hallucination, ...",Hallucination,1.0
973,Ol 'case' we ol i saspekim H5N1 long Croatia n...,Suspected cases of H5N1 in Croatia and Denmark...,"In the case of H5N1 in Croatia and Denmark, th...",either,MT,facebook/nllb-200-distilled-600M,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.2
938,ٲخرکار، چُھ لیڈرٕیی آسن ٹیمچہ کامیٲبی تہ ناکٲم...,"After all, the leader is ultimately responsibl...","Finally, leadership is responsible for the suc...",either,MT,facebook/nllb-200-distilled-600M,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
899,Kebanyakan teleskop kajian moden merupakan fas...,Most modern research telescopes are enormous f...,Most modern survey telescopes are large facili...,either,MT,facebook/nllb-200-distilled-600M,"[Not Hallucination, Hallucination, Not Halluci...",Not Hallucination,0.2
...,...,...,...,...,...,...,...,...,...
458,В моей стране люди обычно приветствуют друг др...,People generally greet each other by shaking h...,"In my country, people usually welcome each oth...",either,MT,,"[Hallucination, Not Hallucination, Not Halluci...",Not Hallucination,0.4
330,"Будем надеяться, в этом году не случится то же...",Let's hope the same thing doesn't happen this ...,Hopefully this year won’t be the same thing.,either,MT,,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
466,"Ты действительно думаешь, что я могу летать?",You really think I can fly?,Do you really think I can fly?,either,MT,,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
860,Moi semellante ao mando a distancia dun televi...,"Resembling a television remote, the controller...","Much like the remote control of a television, ...",either,MT,facebook/nllb-200-distilled-600M,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0


In [None]:
pg_val_df

Unnamed: 0,src,tgt,hyp,ref,task,model,labels,label,p(Hallucination)
737,But we all know that sadly it does not matter ...,,It doesn't matter if they're in a state or not.,src,PG,tuner007/pegasus_paraphrase,"[Not Hallucination, Hallucination, Hallucinati...",Hallucination,0.6
740,We are obliged to him for this.,,We have to do it for him.,src,PG,tuner007/pegasus_paraphrase,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.2
811,"There will be other texts on Europol, and Parl...",,Parliament will be the colegislator of Europol.,src,PG,tuner007/pegasus_paraphrase,"[Hallucination, Not Hallucination, Hallucinati...",Not Hallucination,0.4
280,This is not your house anymore.,You don't live here anymore.,This isn't your house anymore.,either,PG,,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0
761,At the moment the shipbuilding sector is in ta...,,The shipbuilding sector isn't doing well right...,src,PG,tuner007/pegasus_paraphrase,"[Not Hallucination, Hallucination, Not Halluci...",Not Hallucination,0.4
...,...,...,...,...,...,...,...,...,...
769,"Indeed, it is on the agenda for the topical an...",,The debate will take place the day after tomor...,src,PG,tuner007/pegasus_paraphrase,"[Not Hallucination, Not Hallucination, Halluci...",Not Hallucination,0.4
308,"Eh, just a few days.",Just a couple of days.,"Uh, just for a couple of days.",either,PG,,"[Hallucination, Not Hallucination, Not Halluci...",Not Hallucination,0.2
214,I don't question that.,I have no doubt.,I do not question that.,either,PG,,"[Not Hallucination, Hallucination, Not Halluci...",Not Hallucination,0.2
700,I would draw attention to the fact that the qu...,,Questions surrounding Eurostat have been raise...,src,PG,tuner007/pegasus_paraphrase,"[Not Hallucination, Not Hallucination, Not Hal...",Not Hallucination,0.0


In [None]:
val_df=mt_val_df
train_df=pd.concat((dm_val_df, pg_val_df), ignore_index=True)

In [None]:
gold_labels=[]
first_sentences=[]
second_sentences=[]

for i in tqdm(range(len(train_df))):
  sample=train_df.iloc[i]
  src=sample['src']
  tgt=sample['tgt']
  ref=sample['ref']
  hyp=sample['hyp']
  task=sample['task']
  label=sample['label'].strip().lower()

  if label=="hallucination":
    label="contradiction"
  else:
    label="entailment"

  if task=="MT" or task=="DM":
    sentence1=sample['tgt'].strip().lower()
    sentence2=sample['hyp'].strip().lower()
  else:
    sentence1=sample['src'].strip().lower()
    sentence2=sample['hyp'].strip().lower()

  assert len(sentence1)>0
  assert len(sentence2)>0

  gold_labels.append(label)
  first_sentences.append(sentence1)
  second_sentences.append(sentence2)


100%|██████████| 625/625 [00:00<00:00, 5927.47it/s]


In [None]:
temp_dict={
    'gold_label':gold_labels,
    'sentence1':first_sentences,
    'sentence2':second_sentences
}

train_df_part1=pd.DataFrame(data=temp_dict)
train_df_part1.head(3)

Unnamed: 0,gold_label,sentence1,sentence2
0,contradiction,"that which precurses : a forerunner , predeces...",something that is the first stage of a process .
1,entailment,in an eloquent manner ; stated well .,in an eloquent manner .
2,entailment,an adjective used to praise something exceptio...,a word or phrase that is more than the usual o...


In [None]:
gold_labels=[]
first_sentences=[]
second_sentences=[]

for i in tqdm(range(len(val_df))):
  sample=val_df.iloc[i]
  src=sample['src']
  tgt=sample['tgt']
  ref=sample['ref']
  hyp=sample['hyp']
  task=sample['task']
  label=sample['label'].strip().lower()

  if label=="hallucination":
    label="contradiction"
  else:
    label="entailment"

  if task=="MT" or task=="DM":
    sentence1=sample['tgt'].strip().lower()
    sentence2=sample['hyp'].strip().lower()
  else:
    sentence1=sample['src'].strip().lower()
    sentence2=sample['hyp'].strip().lower()

  assert len(sentence1)>0
  assert len(sentence2)>0

  gold_labels.append(label)
  first_sentences.append(sentence1)
  second_sentences.append(sentence2)


100%|██████████| 375/375 [00:00<00:00, 7064.13it/s]


In [None]:
temp_dict={
    'gold_label':gold_labels,
    'sentence1':first_sentences,
    'sentence2':second_sentences
}

df_dev=pd.DataFrame(data=temp_dict)
df_dev.head(3)

Unnamed: 0,gold_label,sentence1,sentence2
0,contradiction,where should i look for it?,where do i want him?
1,contradiction,travellers may encounter animal pests that the...,travelers may encounter insects that destroy u...
2,entailment,suspected cases of h5n1 in croatia and denmark...,"in the case of h5n1 in croatia and denmark, th..."


In [None]:
max_sentence_len=100
def trim_sentence(sent):
    try:
        sent = sent.split()
        sent = sent[:max_sentence_len]
        return " ".join(sent)
    except:
        return sent

In [None]:
train_df_part1['sentence1'] = train_df_part1['sentence1'].apply(trim_sentence)
train_df_part1['sentence2'] = train_df_part1['sentence2'].apply(trim_sentence)
df_dev['sentence1'] = df_dev['sentence1'].apply(trim_sentence)
df_dev['sentence2'] = df_dev['sentence2'].apply(trim_sentence)

train_df_part1 = train_df_part1.loc[train_df_part1['gold_label'].isin(['entailment','contradiction'])]
df_dev = df_dev.loc[df_dev['gold_label'].isin(['entailment','contradiction'])]

print(len(train_df_part1), len(df_dev))

625 375


add new labeled data

In [None]:
path="/content/drive/MyDrive/shroom/our_final_shroom_models/all_3000_labeled_samples.csv"

train_model_agnostic_labeled=pd.read_csv(path)
train_model_agnostic_labeled.head()

Unnamed: 0,src,tgt,hyp,ref,task,label
0,Не волнуйся. Это только временно.,Don't worry. It's only temporary.,"Don't worry, it's only temporary.",either,MT,0
1,"Тома никогда нет там, где он должен быть.",Tom is never where he's supposed to be.,Tom is never where he should be.,either,MT,0
2,Мне сложно работать с Томом.,I have trouble working with Tom.,It's hard for me to work with Tom.,either,MT,0
3,"Воду, пожалуйста.",I'd like some water.,"Water, please.",either,MT,0
4,"Я не ожидал, что Том предаст меня.",I didn't think that Tom would betray me.,I didn't expect Tom to betray me.,either,MT,0


In [None]:
train_pg=train_model_agnostic_labeled[train_model_agnostic_labeled['task']=="PG"]
train_pg

Unnamed: 0,src,tgt,hyp,ref,task,label
334,"You're not alone, Claire.",,"You're not alone, claire- -",src,PG,0
335,"Who told you to throw acid at Vargas, hmm?",,"Who told you to throw acid at Vargas, hmmm?",src,PG,0
336,Where the pure angel merges with the antic Sphinx,,♪ Where the pure angel merges with the antic s...,src,PG,0
337,Where is it written what is it I'm meant to be,,Where is it written what is it I'm meant to be?,src,PG,0
338,We'll find the skipper and then we'll go home.,,We'll find the skipper and then we'll go home.,src,PG,0
...,...,...,...,...,...,...
662,Violent crimes involving ordinary people.,,⁇ crimes involving ordinary human beings.,src,PG,1
663,Take Ziva.,,"Take Ziva, take Ziva, take Ziva.",src,PG,1
664,Jewellery?,,⁇ ?,src,PG,1
665,"Hello, Jan.",,"Hello, Jan.",src,PG,0


In [None]:
gold_labels=[]
first_sentences=[]
second_sentences=[]

for i in tqdm(range(len(train_pg))):
  sample=train_pg.iloc[i]
  src=sample['src']
  tgt=sample['tgt']
  ref=sample['ref']
  hyp=sample['hyp']
  task=sample['task']
  label=sample['label']

  if label==1:
    label="contradiction"
  else:
    label="entailment"

  sentence1=src

  if type(sentence1)!=str:
    sentence1=str(sentence1)

  sentence1=sentence1.strip().lower()
  assert len(sentence1)>0

  sentence2=sample["hyp"]

  if type(sentence2)!=str:
    sentence2=str(sentence2)

  sentence2=sentence2.strip().lower()

  assert len(sentence2)>0

  gold_labels.append(label)
  first_sentences.append(sentence1)
  second_sentences.append(sentence2)


100%|██████████| 333/333 [00:00<00:00, 9013.20it/s]


In [None]:
train_mt=train_model_agnostic_labeled[train_model_agnostic_labeled['task']=="MT"]
train_mt

Unnamed: 0,src,tgt,hyp,ref,task,label
0,Не волнуйся. Это только временно.,Don't worry. It's only temporary.,"Don't worry, it's only temporary.",either,MT,0
1,"Тома никогда нет там, где он должен быть.",Tom is never where he's supposed to be.,Tom is never where he should be.,either,MT,0
2,Мне сложно работать с Томом.,I have trouble working with Tom.,It's hard for me to work with Tom.,either,MT,0
3,"Воду, пожалуйста.",I'd like some water.,"Water, please.",either,MT,0
4,"Я не ожидал, что Том предаст меня.",I didn't think that Tom would betray me.,I didn't expect Tom to betray me.,either,MT,0
...,...,...,...,...,...,...
2995,Одолжи мне бумаги для заметок или что-нибудь в...,Lend me a memo pad or something.,Let me borrow some notebooks or something.,either,MT,1
2996,Общая тайна доверие формирует.,A secret shared is a trust formed.,A shared secret builds trust.,either,MT,0
2997,Ты в последнее время сама не своя. Что-то не так?,You haven't been yourself recently. Is somethi...,You've been out of your mind lately.,either,MT,0
2998,Я бы хотел вернуться в Токио через Гонолулу.,I'd like to return to Tokyo via Honolulu.,I'd like to go back to Tokyo through Honolulu.,either,MT,0


In [None]:
for i in tqdm(range(len(train_mt))):
  sample=train_mt.iloc[i]
  src=sample['src']
  tgt=sample['tgt']
  ref=sample['ref']
  hyp=sample['hyp']
  task=sample['task']
  label=sample['label']

  if label==1:
    label="contradiction"
  else:
    label="entailment"

  sentence1=sample['tgt']

  if type(sentence1)!=str:
    sentence1=str(sentence1)

  sentence1=sentence1.strip().lower()
  assert len(sentence1)>0

  sentence2=sample["hyp"]

  if type(sentence2)!=str:
    sentence2=str(sentence2)

  sentence2=sentence2.strip().lower()

  assert len(sentence2)>0

  gold_labels.append(label)
  first_sentences.append(sentence1)
  second_sentences.append(sentence2)


100%|██████████| 1334/1334 [00:00<00:00, 9456.07it/s]


In [None]:
temp_dict={
    'gold_label':gold_labels,
    'sentence1':first_sentences,
    'sentence2':second_sentences
}

df=pd.DataFrame(data=temp_dict)
df

Unnamed: 0,gold_label,sentence1,sentence2
0,entailment,"you're not alone, claire.","you're not alone, claire- -"
1,entailment,"who told you to throw acid at vargas, hmm?","who told you to throw acid at vargas, hmmm?"
2,entailment,where the pure angel merges with the antic sphinx,♪ where the pure angel merges with the antic s...
3,entailment,where is it written what is it i'm meant to be,where is it written what is it i'm meant to be?
4,entailment,we'll find the skipper and then we'll go home.,we'll find the skipper and then we'll go home.
...,...,...,...
1662,contradiction,lend me a memo pad or something.,let me borrow some notebooks or something.
1663,entailment,a secret shared is a trust formed.,a shared secret builds trust.
1664,entailment,you haven't been yourself recently. is somethi...,you've been out of your mind lately.
1665,entailment,i'd like to return to tokyo via honolulu.,i'd like to go back to tokyo through honolulu.


In [None]:
df['sentence1'] = df['sentence1'].apply(trim_sentence)
df['sentence2'] = df['sentence2'].apply(trim_sentence)

df = df.loc[df['gold_label'].isin(['entailment','contradiction'])]

print(len(df))

1667


In [None]:
new_df_train=pd.concat([df, train_df_part1])
new_df_train

Unnamed: 0,gold_label,sentence1,sentence2
0,entailment,"you're not alone, claire.","you're not alone, claire- -"
1,entailment,"who told you to throw acid at vargas, hmm?","who told you to throw acid at vargas, hmmm?"
2,entailment,where the pure angel merges with the antic sphinx,♪ where the pure angel merges with the antic s...
3,entailment,where is it written what is it i'm meant to be,where is it written what is it i'm meant to be?
4,entailment,we'll find the skipper and then we'll go home.,we'll find the skipper and then we'll go home.
...,...,...,...
620,entailment,"indeed, it is on the agenda for the topical an...",the debate will take place the day after tomor...
621,entailment,"eh, just a few days.","uh, just for a couple of days."
622,entailment,i don't question that.,i do not question that.
623,entailment,i would draw attention to the fact that the qu...,questions surrounding eurostat have been raise...


In [None]:
new_df_train=new_df_train.sample(frac=1, random_state=42)
len(new_df_train)

2292

In [None]:
new_df_train.head(2)

Unnamed: 0,gold_label,sentence1,sentence2
207,entailment,at a distance ; disconnected .,remote ; not confined to a particular location .
1414,entailment,"fortunately, today tom is much better than he ...","fortunately, tom is much better today than he ..."


In [None]:
labels_dict={
    "entailment":0,
    'contradiction':1,
}

In [None]:
def prepare_df(input_df):
  gold_labels=[]
  first_sents=[]
  second_sents=[]

  for i in tqdm(range(len(input_df))):
    sample=input_df.iloc[i]

    sentence1=sample['sentence1']
    sentence2=sample['sentence2']

    if type(sentence1)!=str:
      sentence1=str(sentence1)

    if type(sentence2)!=str:
      sentence2=str(sentence2)

    first_sents.append(sentence1)
    second_sents.append(sentence2)
    gold_labels.append(labels_dict[sample['gold_label']])

  temp_dict={
    'gold_label':gold_labels,
    'sentence1': first_sents,
    'sentence2': second_sents
  }
  df=pd.DataFrame(data=temp_dict)

  return df

In [None]:
train_df=prepare_df(new_df_train)
dev_df=prepare_df(df_dev)

100%|██████████| 2292/2292 [00:00<00:00, 16994.62it/s]
100%|██████████| 375/375 [00:00<00:00, 18385.53it/s]


In [None]:
train_df

Unnamed: 0,gold_label,sentence1,sentence2
0,0,at a distance ; disconnected .,remote ; not confined to a particular location .
1,0,"fortunately, today tom is much better than he ...","fortunately, tom is much better today than he ..."
2,0,this peach is a beauty.,this peach is beautiful.
3,0,having self-respect ; having pride or knowledg...,having respect for oneself ; disposed to respe...
4,0,zugzwang is a situation in which the obligatio...,zugzwang is a situation in which the obligatio...
...,...,...,...
2287,0,that really shocked me.,i was really shocked.
2288,0,tom doesn't fear death.,tom is not afraid of death.
2289,0,does anybody know?,does anyone know?
2290,0,he's afraid of you.,he's afraid of you.


In [None]:
from transformers import AutoTokenizer, DebertaForSequenceClassification

model_checkpoint=f"/content/drive/MyDrive/shroom/our_final_shroom_models/deberta-v3-large-nli.pt"

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [None]:
from datasets import DatasetDict, Dataset

dataset = DatasetDict({'train': Dataset.from_pandas(train_df), 'dev':Dataset.from_pandas(dev_df)})
dataset

DatasetDict({
    train: Dataset({
        features: ['gold_label', 'sentence1', 'sentence2'],
        num_rows: 2292
    })
    dev: Dataset({
        features: ['gold_label', 'sentence1', 'sentence2'],
        num_rows: 375
    })
})

In [None]:
max_length = 256

def tokenize_preprocess_function(examples):

    input=tokenizer(examples['sentence1'], examples['sentence2'], max_length=max_length, truncation=True, padding=True, return_tensors="pt")

    data_dict={
        'input_ids': input['input_ids'],
        'label':torch.tensor(examples['gold_label']),
        }

    return data_dict

tokenized_datasets = dataset.map(tokenize_preprocess_function, batched=True)
tokenized_datasets

Map:   0%|          | 0/2292 [00:00<?, ? examples/s]

Map:   0%|          | 0/375 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['gold_label', 'sentence1', 'sentence2', 'input_ids', 'label'],
        num_rows: 2292
    })
    dev: Dataset({
        features: ['gold_label', 'sentence1', 'sentence2', 'input_ids', 'label'],
        num_rows: 375
    })
})

#Huggingface Model and Trainer

In [None]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

num_labels=2

model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

In [None]:
from datasets import load_metric

task="mnli"
metric = load_metric('glue', task)

  metric = load_metric('glue', task)
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


Downloading builder script:   0%|          | 0.00/1.84k [00:00<?, ?B/s]

In [None]:
batch_size=8
metric_name = "accuracy"
model_checkpoint = "microsoft/deberta-v3-large"
model_name = model_checkpoint.split("/")[-1]
num_epochs=10

args = TrainingArguments(
    output_dir=f"/content/{model_name}.checkpoint",
    seed=0,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epochs,
    logging_steps=200,
    weight_decay=0.01,
    load_best_model_at_end=True,
    save_total_limit=2,
    metric_for_best_model=metric_name,
    push_to_hub=False,
    fp16=True
)

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets['dev'],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

train on mt and pg, eval on mt

In [None]:
trainer.evaluate()

You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'eval_loss': 0.07365628331899643,
 'eval_accuracy': 0.9866666666666667,
 'eval_runtime': 6.5502,
 'eval_samples_per_second': 57.25,
 'eval_steps_per_second': 7.175}

In [None]:
trainer.train()

You're using a DebertaV2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.3086,0.210641,0.933333
2,0.2478,0.081998,0.981333
3,0.1236,0.10654,0.981333
4,0.0759,0.146022,0.976
5,0.0193,0.191085,0.976
6,0.0102,0.198252,0.976
7,0.0157,0.195478,0.976
8,0.0152,0.189139,0.976
9,0.0162,0.183017,0.976
10,0.0036,0.185172,0.976


TrainOutput(global_step=2870, training_loss=0.07780512139356925, metrics={'train_runtime': 1670.158, 'train_samples_per_second': 13.723, 'train_steps_per_second': 1.718, 'total_flos': 3710968460586960.0, 'train_loss': 0.07780512139356925, 'epoch': 10.0})

In [None]:
trainer.save_model(f"/content/drive/MyDrive/shroom/{model_name}_MT_best.pt")