In [3]:
import os 
import torch

from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer

In [4]:
train_dir = 'dataset/train' # train the model 
validation_dir = 'dataset/validation' # validate the model
test_dir = 'dataset/test' # unseen data
OUTPUT_DIR = "Results"
LOG_DIR = "Logs"

# Loading Pre-Trained Model

In [None]:
# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
model = AutoModelForCausalLM.from_pretrained("klyang/MentaLLaMA-chat-7B")

pytorch_model.bin:  46%|####6     | 12.4G/27.0G [00:00<?, ?B/s]

# Training the model


## Setting up training arguments

In [None]:
training_args = TrainingArguments(
    output_dir = OUTPUT_DIR,        # Directory to save the model
    evaluation_strategy = "epoch",    # Evaluate every epoch
    learning_rate = 2e-5,             # Learning rate
    per_device_train_batch_size = 2,  # Batch size per device
    per_device_eval_batch_size = 2,   # Evaluation batch size
    num_train_epochs = 3,             # Number of epochs
    weight_decay = 0.01,              # Weight decay
    save_strategy = "epoch",          # Save model every epoch
    logging_dir = LOG_DIR,           # Directory for logs
    logging_steps = 10,               # Log every 10 steps
)

## Initialising the trainer

In [None]:
def compute_metrics(eval_pred):
    logits = eval_pred.predictions  # Predictions from the model
    labels = eval_pred.label_ids    # True labels

    # Calculate loss (if not already returned by the model)
    loss_fn = torch.nn.CrossEntropyLoss()
    loss = loss_fn(torch.tensor(logits).view(-1, logits.shape[-1]), torch.tensor(labels).view(-1)).item()

    # Calculate accuracy
    predictions = logits.argmax(axis=-1)
    accuracy = (predictions == labels).astype(float).mean().item()

    return {"loss": loss, "accuracy": accuracy}

In [None]:
# import tensorflow as tf 

# def compute_metrics(eval_pred):
#     logits = eval_pred.predictions  # Predictions from the model
#     labels = eval_pred.label_ids    # True labels

#     # Calculate loss (if not already returned by the model)
#     loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) # Use SparseCategoricalCrossentropy
#     loss = loss_fn(labels, logits).numpy() # Calculate Loss

#     # Calculate accuracy
#     predictions = tf.argmax(logits, axis=-1).numpy()
#     accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32)).numpy()

#     return {"loss": loss, "accuracy": accuracy}

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["train"], 
    tokenizer=tokenizer,
    compute_metrics=compute_metrics  # Pass the compute_metrics function
)

In [None]:
# Train the model
trainer.train()

## Saving the model

In [None]:
trainer.save_model("./fine-tuned-model")
tokenizer.save_pretrained("./fine-tuned-model")

# Evaluation

In [None]:
print("Chatbot is ready! Type 'exit' to stop.")
while True:
    # Get user input
    user_input = input("You: ")
    if user_input.lower() == 'exit':
        break
    
    # model = AutoModelForCausalLM.from_pretrained("./fine-tuned-model")
    # tokenizer = AutoTokenizer.from_pretrained("./fine-tuned-model")

    inputs = tokenizer(user_input, return_tensors="pt") 
    outputs = model.generate(**inputs, max_length=100, num_return_sequences=1)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    print(generated_text)

# Model for translation

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")