<a href="https://colab.research.google.com/github/zavavan/LLM_PromptEngineering_Relation_Extraction/blob/main/LLMPromptEngineeringExamples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Initialize Hugging Face Models:

In [None]:
!pip -q install git+https://github.com/huggingface/transformers # need to install from github
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U datasets scipy # ipywidgets
!pip install -U bitsandbytes accelerate xformers einops
!pip install --upgrade torch torchvision
!pip install datasets
!pip install evaluate \
    rouge_score \
    loralib \
    peft --quiet
!pip install tqdm

In [None]:
import torch
# Check if CUDA is available before importing cuda
if torch.cuda.is_available():
    from torch import cuda
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")
import evaluate
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM,  AutoModelForSeq2SeqLM, GenerationConfig, BitsAndBytesConfig
from datasets import load_dataset
from transformers import TrainingArguments

CUDA is available. Using GPU.


In [None]:
#!pip install openpyxl
#!pip install XlsxWriter
import regex as re
import pickle
import json
import warnings
warnings.filterwarnings('ignore')
import sys
import time
import csv
import pandas as pd
import datetime
import os
import regex as re
from string import punctuation
import tqdm
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'; print(device)
import random

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
#model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
model_id = "Open-Orca/Mistral-7B-OpenOrca"

In [None]:
bnb_config = transformers.BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)

model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
quantization_config=bnb_config,
device_map='auto',
)

tokenizer = transformers.AutoTokenizer.from_pretrained(
model_id,padding_side="left"
)
if tokenizer.pad_token is None:
  tokenizer.pad_token = tokenizer.eos_token
#model.generation_config.pad_token_id = tokenizer.pad_token_id

## LOAD HUGGING FACE DATASET


In [None]:
huggingface_dataset_name = ""
dataset = load_dataset(huggingface_dataset_name)

NameError: name 'load_dataset' is not defined

## LEARNING METHODS:

### ZERO-SHOT

In [None]:
## this is a simple function to generate prompt tailored for Relaton CLassification on text. Modify according to your specific task
def generate_relation_classification_prompt(input_text, e1, e2):

  return  f"""Given a text enclosed in triple quotese and a pair of entities E1 and E2, classify the relation holding between E1 and E2.
The relations are identified with N labels from 0 to N-1. The meaning of the labels is the following:
0 means that E1 ... E2
1 ...
N means that E1 and E2 are in none of the relations above.
For the output use the format LABEL: X
Text:'''{input_text}'''
Entities: E1:'''{e1}''', E2:'''{e2}'''"""


## example with input sentence: "Hyperglycemia : Patient is not known to be a diabetic and was felt [**12-26**] steroids , his sugars were controlled on sliding scale insulin in the hospital"
## and entities: e1='Hyperglycemia', e2='steroids'
print(generate_relation_classification_prompt('Hyperglycemia : Patient is not known to be a diabetic and was felt [**12-26**] steroids , his sugars were controlled on sliding scale insulin in the hospital', 'Hyperglycemia', 'steroids'))

Given a text enclosed in triple quotese and a pair of entities E1 and E2, classify the relation holding between E1 and E2.
The relations are identified with N labels from 0 to N-1. The meaning of the labels is the following:
0 means that E1 ... E2
1 ...
N means that E1 and E2 are in none of the relations above.
For the output use the format LABEL: X
Text:'''Hyperglycemia : Patient is not known to be a diabetic and was felt [**12-26**] steroids , his sugars were controlled on sliding scale insulin in the hospital'''
Entities: E1:'''Hyperglycemia''', E2:'''steroids'''


In [None]:
## RUNNING ZERO-SHOT ON DATASET TEST PARTITION


model_generated_labels_zero_shot = []  #store output labels


## regex patterns to match response strings. Adapt according to output format specified in promp (expect non-compliance from LLMs response)
answer_label_regex_pattern =  re.compile(r'LABEL:\s?(\d+)')
answer_label_regex_pattern_1 =  re.compile(r'the label is\s?(\d+)')
answer_label_regex_pattern_2 =  re.compile(r'The relation between E1 and E2 is\s?(\d+)')


for i, example in enumerate(tqdm.tqdm(dataset['test'])):
  input_text = example['Text']  ##reading the input sentence from dataset. To be modified according to the target dataset used
  e1 = example['E1'] ## reading input entities from the dataset. To be adapted according to the target dataset
  e2 = example['E2'] ## reading input entities from the dataset. To be adapted according to the target dataset
  label = example['Label'] ## reading gold standard relation label from the dataset. To be adapted according to the target dataset
  prompt = generate_relation_classification_prompt(input_text, e1, e2)

  inputs = tokenizer(prompt, return_tensors='pt',padding=True#, add_special_tokens=False
                      )

  output = tokenizer.decode(
      model.generate(
          inputs["input_ids"],
          pad_token_id=tokenizer.pad_token_id,
          max_new_tokens=200, # set according to task use case
      )[0],
      skip_special_tokens=True
  )

  ## removing prompt text and new lines from response
  processed_output = output.replace(prompt,'')
  processed_output = re.sub('\n|\r','',processed_output)

  # matching answer regex patterns
  answer_match = answer_label_regex_pattern.search(processed_output)
  answer_match_1 = answer_label_regex_pattern_1.search(processed_output)
  answer_match_2 = answer_label_regex_pattern_2.search(processed_output)

  if answer_match:
    temp_label = answer_match.group(1)
    model_generated_labels_zero_shot.append(temp_label)
  elif answer_match_1:
    temp_label = answer_match_1.group(1)
    model_generated_labels_zero_shot.append(temp_label)
  elif answer_match_2:
    temp_label = answer_match_2.group(1)
    model_generated_labels_zero_shot.append(temp_label)
  else:
    # define a default label assignment if response does not match expected format
    model_generated_labels_zero_shot.append('N')

  ## DEBUGGING OUTPUT (adapt accordingly):
  print('-'.join('' for x in range(100)))
  print('Example ', i + 1)
  print('-'.join('' for x in range(100)))
  print(f'INPUT PROMPT:\n{prompt}')
  print('-'.join('' for x in range(100)))
  print(f'GOLD STANDARD LABEL\n{label}')
  print('-'.join('' for x in range(100)))
  print(f'PROCESSED OUTPUT:\n{processed_output}\n')
  print('-'.join('' for x in range(100)))

### ### Chain Of Thought Prompt

In [None]:
## Modify according to your specific task
def generate_relation_classification_CoT_prompt_0(input_text, e1, e2):

  return  f"""
Given a text enclosed in triple quotes and a pair of entities E1 and E2 enclosed in triple quotes, what is the relation holding between E1 and E2?
Text: '''{input_text}'''.
E1: '''{e1}''', E2: '''{e2}'''
"""


## sample function taking as input the LLM response of a first prompt interaction "processed_output_0". Modify according to your specific task
def generate_relation_classification_CoT_prompt_1(processed_output_0, e1, e2):

  return  f"""
Now, given the following relation between the entities E1:'''{e1}''' and E2: '''{e2}'''
Relation: {processed_output_0}
which of the following labels from 0 to 8 best describes it? The meaning of the labels is the following:
0 means that '''{e1}''' causes '''{e2}'''
...
8 means that '''{e1}''' and '''{e2}''' are in none of the relations above.
For the output use the format LABEL: X
"""

In [None]:
model_generated_labels_zero_shot_CoT = []


## regex patterns to match response strings. Adapt according to output format specified in promp (expect non-compliance from LLMs response)
answer_label_regex_pattern =  re.compile(r'LABEL:\s?(\d+)')
answer_label_regex_pattern_1 =  re.compile(r'the label is\s?(\d+)')
answer_label_regex_pattern_2 =  re.compile(r'The relation between E1 and E2 is\s?(\d+)')

for i, example in enumerate(tqdm.tqdm(dataset['test'])):

  input_text = example['Text']  ##reading the input sentence from dataset. To be modified according to the target dataset used
  e1 = example['E1']  ## reading input entities from the dataset. To be adapted according to the target dataset
  e2 = example['E2']  ## reading input entities from the dataset. To be adapted according to the target dataset
  gold_standard_label = example['Label']  ## reading gold standard relation label from the dataset. To be adapted according to the target dataset

  # first interaction
  prompt_CoT_0 = generate_relation_classification_CoT_prompt_0(input_text, e1, e2)

  inputs = tokenizer(prompt_CoT_0, return_tensors='pt',padding=True)
  output_0 = tokenizer.decode(
      model.generate(
          inputs["input_ids"],
          pad_token_id=tokenizer.pad_token_id,
          max_new_tokens=200, ## adapt according to use case task
      )[0],
      skip_special_tokens=True
  )

  ## removing prompt text and new lines from response
  processed_output_0= output_0.replace(prompt_CoT_0,'')
  processed_output_0 = re.sub('\n|\r','',processed_output_0)


  # second interaction
  prompt_CoT_1 = generate_relation_classification_CoT_prompt_1(processed_output_0, e1, e2):

  inputs = tokenizer(prompt_CoT_1, return_tensors='pt',padding=True)
  output = tokenizer.decode(
      model.generate(
          inputs["input_ids"],
          pad_token_id=tokenizer.pad_token_id,
          max_new_tokens=50, ## adapt according to use case task
      )[0],
      skip_special_tokens=True
  )

  ## removing prompt text and new lines from response
  processed_output= output.replace(prompt_CoT_1,'')
  processed_output = re.sub('\n|\r','',processed_output)


  answer_match = answer_label_regex_pattern.search(processed_output)
  answer_match_1 = answer_label_regex_pattern_1.search(processed_output)
  answer_match_2 = answer_label_regex_pattern_2.search(processed_output)

  if answer_match:
    model_generated_labels_zero_shot_CoT.append(answer_match.group(1))
  elif answer_match_1:
    model_generated_labels_zero_shot_CoT.append(answer_match_1.group(1))
  elif answer_match_2:
    model_generated_labels_zero_shot_CoT.append(answer_match_2.group(1))
  # define a default label assignment if response does not match expected format
  else:
    model_generated_labels_zero_shot_CoT.append('N')

  ## DEBUGGING OUTPUT (adapt accordingly):
  print('-'.join('' for x in range(100)))
  print('Example ', i + 1)
  print('-'.join('' for x in range(100)))
  print('GOLD STANDARD LABEL: ' + str(gold_standard_label))
  print('-'.join('' for x in range(100)))
  print(f'CoT PROMPT 0:\n{prompt_CoT_0}')
  print('-'.join('' for x in range(100)))
  print(f'CoT PROMPT 0 MODEL RESPONSE:\n{processed_output_0}\n')
  print('-'.join('' for x in range(100)))
  print(f'CoT PROMPT 1:\n{prompt_CoT_1}')
  print('-'.join('' for x in range(100)))
  print(f'CoT PROMPT 1 MODEL RESPONSE:\n{processed_output}')

## FEW-SHOT

In [None]:
# function that generates n-shot examples from the training partition of a given HuggingFace dataset

def generate_n_shot_examples_relation_labels(dataset,n):
  n_shot_example_string = ''
  sampled_examples = random.sample(list(dataset['train']), n)
  for i, example in enumerate(sampled_examples):
    if i < n:
      n_shot_example_string=n_shot_example_string + "Text:'''" + example['Text'] + "'''\n"
      n_shot_example_string=n_shot_example_string + "Entities: E1:'''" + str(example['E1']) + "''', E2:'''" + str(example['E2']) + "'''\n"
      n_shot_example_string=n_shot_example_string + 'LABEL: ' + str(example['Label']) + '\n'
  return n_shot_example_string

In [None]:
## this is a simple function to generate prompt tailored for Relaton CLassification on text. Modify according to your specific task
def generate_relation_classification_prompt_instructions():
  return  f"""Given a text enclosed in triple quotese and a pair of entities E1 and E2, classify the relation holding between E1 and E2.
The relations are identified with N labels from 0 to N-1. The meaning of the labels is the following:
0 means that E1 ... E2
1 ...
N means that E1 and E2 are in none of the relations above.
For the output use the format LABEL: X"""



def generate_relation_classification_prompt_input_part(input_text,e1,e2):
  return f"""Text: '''{input_text}'''
E1: '''{e1}''', E2: '''{e2}'''
"""

In [None]:
## function for running relation classification using few-shot approach (with "num_examples" examples) and returning a list of parsed result labels

def run_few_shot_n_examples_re_classification(num_examples):
  model_generated_labels_few_shot = []

  prompt_instructions = generate_relation_classification_prompt_instructions()
  for i, example in enumerate(tqdm.tqdm(dataset['test'])):

    #the few shot example are generated randomly for each text instance, to mitigate the impact of choosing a set of poor examples in the prompt:
    n_shot_examples = generate_n_shot_examples_relation_labels(dataset,num_examples)

    input_text = example['Text']
    e1 = example['E1']
    e2 = example['E2']
    gold_standard_label = example['Label']
    prompt = f"""{prompt_instructions}
{n_shot_examples}
{generate_relation_classification_prompt_input_part(input_text,e1,e2)}"""

    inputs = tokenizer(prompt, return_tensors='pt',padding=True)

    output = tokenizer.decode(
        model.generate(
            inputs["input_ids"],
            pad_token_id=tokenizer.pad_token_id,
            max_new_tokens=20,  # set according to task use case
        )[0],
        skip_special_tokens=True
    )

    ## removing prompt text and new lines from response
    processed_output = output.replace(prompt,'')
    processed_output = re.sub('\n|\r','',processed_output)

    answer_match = answer_label_regex_pattern.search(processed_output)
    answer_match_1 = answer_label_regex_pattern_1.search(processed_output)
    answer_match_2 = answer_label_regex_pattern_2.search(processed_output)

    if answer_match:
      model_generated_labels_few_shot.append(answer_match.group(1))
    elif answer_match_1:
      model_generated_labels_few_shot.append(answer_match_1.group(1))
    elif answer_match_2:
      model_generated_labels_few_shot.append(answer_match_2.group(1))
    else:
      model_generated_labels_few_shot.append('N')

    print('-'.join('' for x in range(100)))
    print('Example ', i + 1)
    print('-'.join('' for x in range(100)))
    print(f'INPUT PROMPT:\n{prompt}')
    print('-'.join('' for x in range(100)))
    print(f'GOLD STANDARD LABEL:\n{gold_standard_label}')
    print('-'.join('' for x in range(100)))
    print(f'MODEL GENERATION - FEW SHOT:\n{output}\n')
    print('-'.join('' for x in range(100)))
    print(f'PROCESSED OUTPUT:\n{processed_output}\n')
    print('-'.join('' for x in range(100)))
  return model_generated_labels_few_shot

In [None]:
model_generated_labels_few_shot = run_few_shot_n_examples_re_classification(5)

## EVALUATION:

In [None]:
metricAccuracy = evaluate.load("accuracy")
metricF1 = evaluate.load("f1")

In [None]:
gs_labels = []

for i, example in enumerate(dataset['test']):
  gs_labels.append(example['Label'])

accuracy_results = metricAccuracy.compute(predictions=model_generated_labels,
    references=gs_labels)
print(accuracy_results)

fscore_results = metricF1.compute(predictions=model_generated_labels,
    references=gs_labels, average='macro')
print(fscore_results)