<a href="https://colab.research.google.com/github/zeniaharoon/KineXAI/blob/main/Recommendation_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Set up the environment
!pip install transformers datasets

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Step 2: Load the data
import json
import pandas as pd

datapath = '/content/drive/MyDrive/SureStart Mentorship/LLM/'

# Load training data
with open(datapath + 'training_data.json', 'r') as f:
    train_data = json.load(f)

# Load test data
with open(datapath + 'test_data.json', 'r') as f:
    test_data = json.load(f)

In [None]:
def json_to_dataframe(data):
    records = []
    for category, details in data.items():
        for exercise in details['exercises']:
            prompt = f"What exercises can I do for {category} pain?"  # Create prompt
            response = exercise['explanation']
            records.append({
                'prompt': prompt,
                'response': response
            })
    return pd.DataFrame(records)

train_df = json_to_dataframe(train_data)
test_df = json_to_dataframe(test_data)

In [None]:
print(train_df.head())
print(test_df.head())

                                         prompt  \
0  What exercises can I do for lower back pain?   
1  What exercises can I do for lower back pain?   
2  What exercises can I do for lower back pain?   
3  What exercises can I do for lower back pain?   
4  What exercises can I do for lower back pain?   

                                            response  
0  Start on your hands and knees. Inhale as you a...  
1  Kneel on the floor, sit back on your heels, an...  
2  Lie on your back with your knees bent. Flatten...  
3  Lie on your back with your knees bent and feet...  
4  Lie on your back with your knees bent and feet...  
                                         prompt  \
0  What exercises can I do for lower back pain?   
1  What exercises can I do for lower back pain?   
2        What exercises can I do for neck pain?   
3        What exercises can I do for neck pain?   

                                            response  
0  Start on your hands and knees. Inhale as you a...

In [None]:
# Step 3: Load the conversational model
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments

# Use a valid model name from the Hugging Face Model Hub
model_name = "microsoft/DialoGPT-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Add a padding token to the tokenizer
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(model_name)

In [None]:
import datasets
# Tokenize the data
def tokenize_function(examples):
    return tokenizer(examples['prompt'],  # Tokenize the 'prompt' column
                     examples['response'], # Tokenize the 'response' column
                     padding="max_length",
                     truncation=True)

train_dataset = datasets.Dataset.from_pandas(train_df)
test_dataset = datasets.Dataset.from_pandas(test_df)

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

# Convert tokens to tensors and ensure proper alignment
train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])
test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

In [None]:
import torch
from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments

def compute_loss(model, inputs, return_outputs=False):
    # Move inputs to the same device as the model
    device = next(model.parameters()).device
    inputs = {k: v.to(device) for k, v in inputs.items()}

    # Forward pass
    # NOTE: The labels are expected to be part of the inputs
    outputs = model(**inputs, labels=inputs['input_ids'])
    loss = outputs.loss  # Extract the loss directly from the model's output

    return (loss, outputs) if return_outputs else loss

In [None]:
# Step 4: Fine-tune the model
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=5e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=15,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Set the custom loss function
trainer.compute_loss = compute_loss

trainer.train()



Epoch,Training Loss,Validation Loss
1,No log,No log
2,No log,No log
3,No log,No log
4,No log,No log
5,No log,No log
6,No log,No log
7,No log,No log
8,No log,No log
9,No log,No log
10,No log,No log


TrainOutput(global_step=300, training_loss=0.045461260477701826, metrics={'train_runtime': 434.8416, 'train_samples_per_second': 2.76, 'train_steps_per_second': 0.69, 'total_flos': 627100876800000.0, 'train_loss': 0.045461260477701826, 'epoch': 15.0})

In [None]:
# Step 5: Save the model
model.save_pretrained("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

('./fine_tuned_model/tokenizer_config.json',
 './fine_tuned_model/special_tokens_map.json',
 './fine_tuned_model/vocab.json',
 './fine_tuned_model/merges.txt',
 './fine_tuned_model/added_tokens.json',
 './fine_tuned_model/tokenizer.json')

In [None]:
def generate_response(prompt, max_length=50, temperature=0.7):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to('cuda')
    outputs = model.generate(**inputs, max_length=max_length, temperature=temperature, do_sample=False, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

prompt = "What exercises can I do for lower back pain?"
response = generate_response(prompt, max_length=50, temperature=0.8)
print(response)



What exercises can I do for lower back pain?Lie on your back with your knees bent. Flatten your back against the floor by tightening your abdominal muscles and tilting your pelvis up slightly.


In [None]:
# Example usage (Modified)
prompt = "What exercises can I do for lower neck pain?"
response = generate_response(prompt, max_length=50, temperature=0.8)  # Adjusted parameters
print(response)

What exercises can I do for lower neck pain?Sit or stand. Rotate your head 45 degrees to one side and tilt your chin down towards your armpit. Use your hand to gently pull your head further.
