In [1]:
MODEL_DIR = r'D:\claimpkg\claimpkg-clone\src\resources\model'

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

# Load the PEFT config
peft_config = PeftConfig.from_pretrained(MODEL_DIR)

# Load the base model (same as used during training)
base_model = AutoModelForCausalLM.from_pretrained(peft_config.base_model_name_or_path)

# Load the adapter weights on top of the base model
model = PeftModel.from_pretrained(base_model, MODEL_DIR)

# Load tokenizer
MODEL_ID = "meta-llama/Llama-3.2-1B"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=False)

tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.eos_token_id



In [3]:
import os
import pickle

DATA_DIR = 'resources'
# Data dir = (1) working directory, (2) move out of test, (3) move out of src, and append to resources
DATA_DIR = os.path.join(os.getcwd(), '..', 'resources')
print("Data Directory:", DATA_DIR)

TRAIN_FILE = 'finetune_train_data.pickle'
TEST_FILE = 'finetune_test_data.pickle'
VALID_FILE = 'finetune_validation_data.pickle'

TRAIN_FILE_PATH = os.path.join(DATA_DIR, TRAIN_FILE)
TEST_FILE_PATH = os.path.join(DATA_DIR, TEST_FILE)
VALID_FILE_PATH = os.path.join(DATA_DIR, VALID_FILE)

train_data = None
test_data = None
valid_data = None

import pickle
# Load
with open(TRAIN_FILE_PATH, 'rb') as f:
    train_data = pickle.load(f)
with open(TEST_FILE_PATH, 'rb') as f:
    test_data = pickle.load(f)
with open(VALID_FILE_PATH, 'rb') as f:
    valid_data = pickle.load(f)

Data Directory: d:\claimpkg\claimpkg-clone\src\test\..\resources


In [5]:
# Run validation with item 5
item = 'Khalid Mahmood is the leader of a city which was the birthplace of architect, Vedat Tek, who designed 103 Colmore Row and I.C.Tower.'
input_text = f"Claim: {item}\nGenerate pseudo-subgraph:\n"
input_ids = tokenizer(input_text, return_tensors='pt').input_ids
output_ids = model.generate(input_ids, max_new_tokens=250)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("Input Text:\n", input_text)
# print("Ground Truth Output Text:\n", item['output'])
print("\nGenerated Output Text:\n", output_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Input Text:
 Claim: Khalid Mahmood is the leader of a city which was the birthplace of architect, Vedat Tek, who designed 103 Colmore Row and I.C.Tower.
Generate pseudo-subgraph:


Generated Output Text:
 Claim: Khalid Mahmood is the leader of a city which was the birthplace of architect, Vedat Tek, who designed 103 Colmore Row and I.C.Tower.
Generate pseudo-subgraph:
<e>103 Colmore Row</e> || architect || <e>Vedat Tek</e>
<e>103 Colmore Row</e> || architect || <e>Khalid Mahmood</e>
<e>103 Colmore Row</e> || architect || <e>I.C.Tower</e>
<e>Vedat Tek</e> || birthPlace || <e>103 Colmore Row</e>
<e>Vedat Tek</e> || birthPlace || <e>Khalid Mahmood</e>
<e>Vedat Tek</e> || birthPlace || <e>I.C.Tower</e>
<e>103 Colmore Row</e> || architect || <e>Vedat Tek</e>
<e>Khalid Mahmood</e> || architect || <e>Vedat Tek</e>
<e>I.C.Tower</e> || architect || <e>Vedat Tek</e>
<e>Vedat Tek</e> || leader || <e>103 Colmore Row</e>
<e>Vedat Tek</e> || leader || <e>Khalid Mahmood</e>
<e>Vedat Tek


In [21]:
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
import torch

MODEL_ID = "meta-llama/Llama-3.2-1B"

model_not_ft = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    torch_dtype="auto",
)
model_not_ft.config.pad_token_id = tokenizer.eos_token_id

# Check if model is runable now
item = valid_data[50]
input_text = f"Claim: {item['input']}\nGenerate pseudo-subgraph:\n"
input_ids = tokenizer(input_text, return_tensors='pt').input_ids
output_ids = model_not_ft.generate(input_ids, max_new_tokens=200)
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
print("Input Text:\n", input_text)
print("Ground Truth Output Text:\n", item['output'])
print("\nGenerated Output Text:\n", output_text)

Some parameters are on the meta device device because they were offloaded to the disk and cpu.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Input Text:
 Claim: Tay Garnett is the director of English without Tears.
Generate pseudo-subgraph:

Ground Truth Output Text:
 <e>English Without Tears</e> || director || <e>Tay Garnett</e>
<e>English Without Tears</e> || director || <e>Tay Garnett</e>

Generated Output Text:
 Claim: Tay Garnett is the director of English without Tears.
Generate pseudo-subgraph:
  1. Get all the actors in the database
  2. Get all the movies they starred in
  3. Get all the movies they acted in
  4. Get all the movies they acted in
  5. Get all the movies they acted in
  6. Get all the movies they acted in
  7. Get all the movies they acted in
  8. Get all the movies they acted in
  9. Get all the movies they acted in
  10. Get all the movies they acted in
  11. Get all the movies they acted in
  12. Get all the movies they acted in
  13. Get all the movies they acted in
  14. Get all the movies they acted in
  15. Get all the movies they acted in
  16. Get all the movies they acted in
  17. Get all t