# Set-up

In [None]:
# mount to google drive
import os
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
os.chdir('./drive/MyDrive/Project')

In [None]:
!pip install simplet5
!pip install datasets
!pip install transformers
!pip install evaluate
!pip install rouge_score
!pip install sacrebleu
!pip install accelerate -U

In [None]:
import sacrebleu
import rouge_score

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import torch

import nltk
import evaluate

from datasets import load_dataset, DatasetDict, Dataset
from transformers import T5Tokenizer, DataCollatorForSeq2Seq
from transformers import T5ForConditionalGeneration, Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import Trainer, TrainingArguments
import tqdm
from tqdm.auto import tqdm
from simplet5 import SimpleT5

INFO:pytorch_lightning.utilities.seed:Global seed set to 42


In [None]:
torch.cuda.empty_cache()

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [None]:
# load data
df_train = pd.read_csv('./train_datasets/iCliniq_train.csv')
df_test = pd.read_csv('./test_datasets/iCliniq_test.csv')

# Model

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(df_train, test_size=0.2, random_state=123)

train_df = train_df.rename(columns={'Question': 'source_text', 'Answer_cut': 'target_text'})
val_df = val_df.rename(columns={'Question': 'source_text', 'Answer_cut': 'target_text'})

train_df['source_text'] = "[Question] " + train_df['source_text']
val_df['source_text'] = "[Question] " + val_df['source_text']

In [None]:
model = SimpleT5()
model.from_pretrained(model_type="t5", model_name="t5-small")

Downloading:   0%|          | 0.00/2.27k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/773k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.32M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/231M [00:00<?, ?B/s]

In [None]:
torch.cuda.empty_cache()


In [None]:
model.train(train_df = train_df,
            eval_df = val_df,
            source_max_token_len=200,
            target_max_token_len=200,
            batch_size=32,
            max_epochs=3,
            use_gpu=True)

INFO:pytorch_lightning.utilities.distributed:GPU available: True, used: True
INFO:pytorch_lightning.utilities.distributed:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.distributed:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.accelerators.gpu:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type                       | Params
-----------------------------------------------------
0 | model | T5ForConditionalGeneration | 60.5 M
-----------------------------------------------------
60.5 M    Trainable params
0         Non-trainable params
60.5 M    Total params
242.026   Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.seed:Global seed set to 42


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

# Evaluation

In [None]:
model = SimpleT5()
model.load_model("t5", "./outputs/iCliniq-t5-checkpoints", use_gpu=torch.cuda.is_available())

tokenizer = T5Tokenizer.from_pretrained("./outputs/iCliniq-t5-checkpoints")
model = T5ForConditionalGeneration.from_pretrained("./outputs/iCliniq-t5-checkpoints")
model.to(device)

df_test_iCliniq = df_test

def generate_answers_batch(questions, batch_size=32):
    # initialize the answer list generated by batch processing
    batch_generated_answers = []
    for i in tqdm(range(0, len(questions), batch_size), desc="Generating answers"):
        batch_questions = ["[Question] " + q for q in questions[i:i+batch_size]]
        batch_inputs = tokenizer(batch_questions, padding=True, truncation=True, max_length=512, return_tensors="pt").to(device)
        with torch.no_grad():
            batch_outputs = model.generate(**batch_inputs, max_length=500,
                                           min_length=50,
                                           length_penalty=2.0)
        batch_answers = [tokenizer.decode(output, skip_special_tokens=True) for output in batch_outputs]
        batch_generated_answers.extend(batch_answers)
    return batch_generated_answers



generated_answers = generate_answers_batch(df_test_MedQuAD['Question'].tolist())

df_test_iCliniq['Generated_Answer'] = generated_answers

df_test_iCliniq.to_csv("./outputs/iCliniqGenerated_Answer.csv", index = False)



Generating answers:   0%|          | 0/186 [00:00<?, ?it/s]

In [None]:
model = SimpleT5()
model.load_model("t5", "./outputs/iCliniq-t5-checkpoints", use_gpu=torch.cuda.is_available())

q_test = df_test['Question'][110]
q_ans = df_test['Answer'][110]

print("Question: ", q_test)
print('-'*50)
print("Reference Answer: ",q_ans)
predicted_ans = model.predict(q_test)[0]
print("Predicted Answer: " ,predicted_ans)

Question:  what do bedbug bites look like?
--------------------------------------------------
Reference Answer:  the first sign of bedbugs may be red itchy bites on the skin usually on the arms or shoulders. bedbugs tend to leave straight rows of bites unlike some other insects that leave bites here and there.
Predicted Answer:  bedbug bites are a common type of bite that can be caused by the presence of bacteria in the body. it's not uncommon for bedbug bites to appear like they have been treated with antibiotics or other medications.


In [None]:
references = [[ref_ans] for ref_ans in df_test_iCliniq['Answer']]
predictions = [pre_ans for pre_ans in df_test_iCliniq['Generated_Answer']]


sacrebleu = evaluate.load("sacrebleu")
sacrebleu_results = sacrebleu.compute(predictions=predictions, references=references)
print(f"SacreBLEU Results: {sacrebleu_results}")

rouge = evaluate.load("rouge")
rouge_results = rouge.compute(predictions=predictions, references=references)
print(f"ROUGE Results: {rouge_results}")


Downloading builder script:   0%|          | 0.00/8.15k [00:00<?, ?B/s]

SacreBLEU Results: {'score': 0.6699844979683427, 'counts': [73856, 9736, 2514, 703], 'totals': [900114, 894163, 888212, 882261], 'precisions': [8.205182899055009, 1.088839506890802, 0.2830405353676825, 0.07968163615982118], 'bp': 1.0, 'sys_len': 900114, 'ref_len': 585268}


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

ROUGE Results: {'rouge1': 0.0932493269093778, 'rouge2': 0.013628001507380326, 'rougeL': 0.07960058124319679, 'rougeLsum': 0.079468033533136}
