In [1]:
!pip install transformers[sentencepiece]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers[sentencepiece]
  Downloading transformers-4.21.0-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 3.7 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 39.1 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 47.3 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 12.8 MB/s 
Collecting sentencepiece!=0.1.92,>=0.1.91
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |█

In [2]:
!pip install accelerate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting accelerate
  Downloading accelerate-0.11.0-py3-none-any.whl (123 kB)
[?25l[K     |██▋                             | 10 kB 12.7 MB/s eta 0:00:01[K     |█████▎                          | 20 kB 18.0 MB/s eta 0:00:01[K     |████████                        | 30 kB 17.7 MB/s eta 0:00:01[K     |██████████▋                     | 40 kB 8.6 MB/s eta 0:00:01[K     |█████████████▎                  | 51 kB 8.3 MB/s eta 0:00:01[K     |████████████████                | 61 kB 9.7 MB/s eta 0:00:01[K     |██████████████████▋             | 71 kB 9.1 MB/s eta 0:00:01[K     |█████████████████████▎          | 81 kB 7.4 MB/s eta 0:00:01[K     |████████████████████████        | 92 kB 8.2 MB/s eta 0:00:01[K     |██████████████████████████▋     | 102 kB 9.0 MB/s eta 0:00:01[K     |█████████████████████████████▎  | 112 kB 9.0 MB/s eta 0:00:01[K     |██████████████████████████████

In [3]:
import json
from tqdm import tqdm
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [4]:
model_name = "allenai/unifiedqa-v2-t5-large-1251000"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

Downloading spiece.model:   0%|          | 0.00/773k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/1.74k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/2.09k [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.75G [00:00<?, ?B/s]

In [5]:
from accelerate import Accelerator

accelerator = Accelerator()
device = accelerator.device
model.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=1024, out_features=4096, bias=False)
              (wo): Linear(in_features=4096, out_features=1024, bias=False)
              (d

In [6]:
def validate(evidence, claim, **generator_args):
    input_ids = tokenizer.encode(claim + '\n' + evidence, return_tensors="pt")
    input_ids = input_ids.to(device)
    res = model.generate(input_ids, **generator_args)
    answer = tokenizer.batch_decode(res, skip_special_tokens=True)
    for word in answer:
      if word != "yes" and word != "no":
        print(word)
    
    assert len(answer) == 1

    return True if answer[0] == 'yes' else False

In [7]:
def get_prediction(filename):
  print(f'Processing file: {filename}')
  with open(filename) as f:
    data = json.load(f)


  results = []
  results_both_same = 0

  tp, tn, fp, fn = 0, 0, 0, 0
  for spouse_data in tqdm(data):
    person_one, person_two = spouse_data[0]
    claim_1 = f' is {person_one} married to {person_two}?' 
    claim_2 = f'is {person_two} married to {person_one}?' 

    correct = spouse_data[2]
    evidence = spouse_data[3]
    result_1 = validate(evidence, claim_1)
    results.append((claim_1, evidence, correct, result_1))
    tp += 1 if (result_1 == correct == True) else 0
    tn += 1 if (result_1 == correct == False) else 0
    fp += 1 if (result_1 == True and correct == False) else 0
    fn += 1 if (result_1 == False and correct == True) else 0

    result_2 = validate(evidence, claim_2)
    results.append((claim_2, evidence, correct, result_2))
    tp += 1 if (result_2 == correct == True) else 0
    tn += 1 if (result_2 == correct == False) else 0
    fp += 1 if (result_2 == True and correct == False) else 0
    fn += 1 if (result_2 == False and correct == True) else 0
    
    results_both_same += 1 if (result_1 == result_2) else 0

  
  precision = tp / (tp + fp)
  recall = tp / (tp + fn)

  print('Done')
  print(f'tp={tp},fp={fp},tn={tn},fn={fn}')
  print(f'Precision: {precision}')
  print(f'Recall: {recall}')
  print(f'F1 score: {(2*precision*recall)/(precision + recall)}')
  print(f'Count of both same result: {results_both_same}')

  return results

In [8]:
results = get_prediction('./spouse_fact_validation_gt.json')

Processing file: ./spouse_fact_validation_gt.json


100%|██████████| 100/100 [00:33<00:00,  2.99it/s]

Done
tp=93,fp=4,tn=84,fn=19
Precision: 0.9587628865979382
Recall: 0.8303571428571429
F1 score: 0.8899521531100478
Count of both same result: 95





In [9]:
import pandas as pd
results_df = pd.DataFrame(results, columns= ['Question', 'Support', 'Correct', 'Predicted'])
results_df.to_excel('results.xlsx', index = False)