In [None]:
!pip install unsloth
!pip install -U bitsandbytes
!pip show bitsandbytes

In [None]:
!pip install -U bitsandbytes
!pip install accelerate
!pip show bitsandbytes
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen2.5-7B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = True, #
    random_state = 42,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2025.8.10 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [4]:
import pandas as pd

df = pd.read_csv("hf://datasets/Abhishekcr448/Hinglish-Everyday-Conversations-1M/hinglish_conversations.csv")
df.to_csv('cleaned_hinglish_dataset.csv', index=False)

formatted_string = ""

# Iterate over each row in the DataFrame
for index, row in df.head(1000).iterrows():
    user_text = row['input']
    assistant_text = row['output']

    # Append the formatted user and assistant text to the string
    formatted_string += f"<|im_start|>user\n{user_text}<|im_end|>\n"
    formatted_string += f"<|im_start|>assistant\n{assistant_text}<|im_end|>\n"



In [None]:
import csv

file_path = 'cleaned_hinglish_dataset.csv'  # Replace with your CSV file path
lines_to_read = 10  # Number of lines to print (including header)

with open(file_path, 'r', newline='') as csvfile:
    csv_reader = csv.reader(csvfile)

    # Read and print a specific number of lines
    for i, row in enumerate(csv_reader):
        if i < lines_to_read:
            print(row)
        else:
            break  # Stop after reading the desired number of lines

In [5]:
alpaca_prompt = """<|im_start|>user
{}<|im_end|>
<|im_start|>assistant
{}<|im_end|>"""

def formatting_prompts_func(examples):
    """
    This function takes a batch of examples from the dataset and formats them
    into the required conversational format for the Qwen model.
    """
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for input_text, output_text in zip(inputs, outputs):
        # Format the text using the alpaca_prompt template and append the EOS token
        text = alpaca_prompt.format(input_text, output_text) + tokenizer.eos_token
        texts.append(text)
    return {"text": texts}
pass

from datasets import load_dataset

datasets = load_dataset("Abhishekcr448/Hinglish-Everyday-Conversations-1M", split="train")

README.md: 0.00B [00:00, ?B/s]

hinglish_conversations.csv:   0%|          | 0.00/185M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1001323 [00:00<?, ? examples/s]

In [6]:
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(datasets)
dataset = dataset.map(formatting_prompts_func, batched = True)

Map:   0%|          | 0/1001323 [00:00<?, ? examples/s]

In [7]:
dataset[5]

{'input': 'aapko kya problem hai?',
 'output': 'kuch nahi, bas tension hai.',
 'text': '<|im_start|>user\naapko kya problem hai?<|im_end|>\n<|im_start|>assistant\nkuch nahi, bas tension hai.<|im_end|><|endoftext|>'}

In [8]:
from transformers import TrainingArguments
from trl import SFTTrainer

training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    max_steps=60,
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_dir="results/runs",
    logging_strategy="steps",
    logging_steps=1,  # Log every 10 steps
    save_strategy="steps",
    save_steps=10, # Save a checkpoint every 10 steps
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=42,
    output_dir="outputs",  # Directory to save the model checkpoints
    report_to="none"
)

# 5. Initialize the Trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,  # Can be set to True for faster training on certain datasets
    args=training_args,
)

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/1001323 [00:00<?, ? examples/s]

In [9]:

trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,001,323 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 40,370,176 of 7,655,986,688 (0.53% trained)


Step,Training Loss,entropy
1,4.4674,0
2,4.2912,No Log
3,4.5327,No Log
4,5.0168,No Log
5,4.3745,No Log
6,4.0342,No Log
7,4.2887,No Log
8,4.0087,No Log
9,4.2854,No Log
10,4.006,No Log


Unsloth: Will smartly offload gradients to save VRAM!


In [12]:
from transformers import pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
prompt = "<|im_start|>user\nlast friday tumnea kaun see movie dekhe<|im_end|>\n<|im_start|>assistant\n"
outputs = pipe(prompt, max_new_tokens=200)
print(outputs[0]['generated_text'])

Device set to use cuda:0


<|im_start|>user
last friday tumnea kaun see movie dekhe<|im_end|>
<|im_start|>assistant
bilkul, main bahut acha film dekha, par next week bhi kuch aur plan karna padega!
