# Nutri Chatbot

In [None]:
!pip install unsloth
!pip install transformers
!pip install.datasets
!pip install trl
!pip install accelerate
!pip install matplotlib

In [None]:
import torch
import matplotlib.pyplot as plt
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments

In [None]:
dataset = load_dataset("Tom158/Nutritional-LLama", split="train")
print(f"Dataset size: {len(dataset)} rows")
print("Sample entry:")
print(dataset[0]['text'][:500])
dataset = dataset.select_columns(["text"])

In [None]:
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None
)

In [None]:
#param Eff FT
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407
)

In [None]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template="llama-3.1",
    mapping={"role": "role", "content": "content", "user": "user", "assistant": "assistant"}
)

In [None]:
#supervised FineT
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        output_dir="nutrition_finetuned_model_v2",
        optim="adamw_8bit",
        weight_decay=0.01, #regularz
        lr_scheduler_type="linear",
        seed=3407
    )
)

In [None]:
train_output = trainer.train()
training_loss = [log["loss"] for log in trainer.state.log_history if "loss" in log]

In [None]:
plt.plot(training_loss, label="Training Loss")
plt.xlabel("Step")
plt.ylabel("Loss")
plt.title("Training Loss Over Time")
plt.legend()
plt.show()

In [None]:
model.save_pretrained("nutrition_finetuned_model_v2")
tokenizer.save_pretrained("nutrition_finetuned_model_v2")

In [None]:
inference_model, inference_tokenizer = FastLanguageModel.from_pretrained(
    model_name="nutrition_finetuned_model_v2",
    max_seq_length=2048,
    load_in_4bit=True
)
FastLanguageModel.for_inference(inference_model)

In [None]:
!zip -r nutrition_finetuned_model_v2.zip /content/nutrition_finetuned_model_v2

In [None]:
from google.colab import files
files.download('/content/nutrition_finetuned_model_v2.zip')

In [None]:
test_prompts = [
    "give me keto diet advice",
    "i want nutrition level in scrambled egg with chicken rice",
    "I want some high protein diet",
    "nutrition levels in fish and rice",
    "how much vitamin C in corn snack?"
]

In [None]:
for prompt in test_prompts:
    messages = [{"role": "user", "content": prompt}]
    formatted_prompt = inference_tokenizer.apply_chat_template(messages, tokenize=False)
    model_inputs = inference_tokenizer(formatted_prompt, return_tensors="pt").to("cuda")
    generated_ids = inference_model.generate(
        **model_inputs,
        max_new_tokens=256,
        temperature=0.7,
        do_sample=True,
        pad_token_id=inference_tokenizer.pad_token_id
    )
    response = inference_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    print(f"\nUser: {prompt}\n")
    print(f"\nAssistant: {response}\n")