In [1]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    logging
)
from peft import LoraConfig
from trl import SFTTrainer
from rouge_score import rouge_scorer

# 1. CONFIGURATION
# ====================================================
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" 
new_model_name = "sap-tinyllama-finetuned"

# 2. LOAD DATASET
# ====================================================
dataset = load_dataset("json", data_files="sap_instruction_dataset.jsonl", split="train")

# 3. LOAD MODEL (4-BIT QUANTIZATION)
# ====================================================
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# 4. LORA CONFIGURATION
# ====================================================
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,  
    bias="none",
    task_type="CAUSAL_LM",
)

# 5. TRAINING ARGUMENTS
# ====================================================
training_arguments = TrainingArguments(
    output_dir="./sap_finetune_results",
    num_train_epochs=3,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    optim="paged_adamw_8bit",
    save_steps=25,
    logging_steps=5,
    learning_rate=2e-4,
    fp16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
)

# 6. INITIALIZE TRAINER
# ====================================================
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="instruction",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)

# 7. TRAIN
# ====================================================
print("Starting training on RTX 3050...")
trainer.train()
print("Training Complete!")

# Save
trainer.model.save_pretrained(new_model_name)

# 8. EVALUATION
# ====================================================
print("\n--- Running Evaluation ---")
test_sample = dataset[0]
prompt = f"<|user|>\n{test_sample['instruction']}\n<|assistant|>\n"
reference = test_sample['output']

input_ids = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**input_ids, max_new_tokens=150)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(f"\n[Generated]: {generated_text}")

  from .autonotebook import tqdm as notebook_tqdm


bin c:\Users\HP\anaconda3\envs\sap_agent\lib\site-packages\bitsandbytes\libbitsandbytes_cuda121.dll


Generating train split: 3 examples [00:00, 30.59 examples/s]
You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.
Map: 100%|██████████| 3/3 [00:00<00:00, 11.99 examples/s]
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Starting training on RTX 3050...


100%|██████████| 3/3 [00:06<00:00,  2.18s/it]


{'train_runtime': 6.5397, 'train_samples_per_second': 1.376, 'train_steps_per_second': 0.459, 'train_loss': 1.5790642102559407, 'epoch': 2.0}
Training Complete!

--- Running Evaluation ---

[Generated]: <|user|>
I am facing an issue in SAP HANA. The symptoms are: SQL statements run for a long time or consume a high amount of resources in terms of memory and CPU.... How do I resolve this?
<|assistant|>
To resolve this issue, you can follow the steps below:

1. Check the SQL statements that are taking a long time or consuming high resources.
2. Identify the SQL statements that are causing the issue.
3. Analyze the performance of the database and identify bottlenecks.
4. Optimize the SQL statements by reducing the number of joins, using indexes, and optimizing the query plan.
5. Implement caching or other techniques to improve the performance of the database.
6. Monitor the database performance regularly to identify any trends or patterns that may indicate potential issues.
7. Regularly r