In [1]:
!pip install  transformers peft bitsandbytes torch datasets --progress-bar off

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from 

In [12]:

import gc
import torch
from transformers import BitsAndBytesConfig,AutoModelForCausalLM,AutoTokenizer,TrainingArguments,Trainer,pipeline
from peft import LoraConfig,prepare_model_for_kbit_training,get_peft_model
from datasets import load_dataset



In [None]:

# base model and final -re_trained model name:
base_model = "meta-llama/Llama-2-7b-chat-hf"
pre_trained_model = "lama-2-7b"


In [None]:

# select the data type:
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
    print("-----------Data Type----------")
    print("Selected data type is: bfloat16")
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"
    print("-----------Data Type----------")
    print("Selected data type is: torch.float16")



-----------Data Type----------
Selected data type is: torch.float16


In [15]:

from huggingface_hub import login,whoami
login("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")
print(whoami())


{'type': 'user', 'id': '67a3f19ff78836426d089b48', 'name': 'YasinArafat05', 'fullname': 'Yasin Arafat', 'email': 'yasinarafat.e2021@gmail.com', 'emailVerified': True, 'canPay': False, 'periodEnd': None, 'isPro': False, 'avatarUrl': 'https://cdn-avatars.huggingface.co/v1/production/uploads/67a3f19ff78836426d089b48/Lp656cz2XcQrShnwhII3O.jpeg', 'orgs': [{'type': 'org', 'id': '67a3f5b0c3c3aaf7aee46793', 'name': 'intellinety', 'fullname': 'intellinety', 'email': None, 'canPay': False, 'periodEnd': None, 'avatarUrl': 'https://www.gravatar.com/avatar/93d05cf03d48379ac40b01523d66afe5?d=retro&size=100', 'roleInOrg': 'admin', 'isEnterprise': False}], 'auth': {'type': 'access_token', 'accessToken': {'displayName': 'write_token', 'role': 'write', 'createdAt': '2025-02-08T01:24:29.490Z'}}}


In [None]:

# qlora configuration:
qlora_conf = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True
)

# lora configuration:
lora_conf = LoraConfig(
    r = 32,
    lora_alpha=64,
    lora_dropout= 0.05,
    bias = "none",
    task_type = "CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)


# load tokenizer:
tokenizer = AutoTokenizer.from_pretrained(base_model)

# load model with qlora:
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=qlora_conf,
    device_map = "auto"
)

# prepare model with qlora
model = prepare_model_for_kbit_training(model)

# add the adapter top:
# otherwise training will not complete:
model = get_peft_model(model, lora_conf)


tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [None]:
model.print_trainable_parameters()

trainable params: 79,953,920 || all params: 6,818,369,536 || trainable%: 1.1726


# #02prepare the datasets:

In [None]:

dataset = load_dataset("its-myrto/fitness-question-answers")
dataset


README.md:   0%|          | 0.00/203 [00:00<?, ?B/s]

conversational_dataset.csv:   0%|          | 0.00/289k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/965 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'Question', 'Answer'],
        num_rows: 965
    })
})

In [None]:
dataset = dataset.remove_columns(["Unnamed: 0"])
dataset

DatasetDict({
    train: Dataset({
        features: ['Question', 'Answer'],
        num_rows: 965
    })
})

In [None]:
def format_data(sample):
    return {
        "text": f"<s>[INST] {sample['Question']} [/INST] {sample['Answer']} </s>"
    }

formatted_dataset = dataset.map(format_data)


Map:   0%|          | 0/965 [00:00<?, ? examples/s]

In [None]:
formatted_dataset

DatasetDict({
    train: Dataset({
        features: ['Question', 'Answer', 'text'],
        num_rows: 965
    })
})

In [None]:

# in lamma2, they don't provide a tokenizer that's why we need to
# add a tokenizer to proceed with:
tokenizer.pad_token = tokenizer.eos_token


In [None]:
def tokenizer_function(sample):
    tokenized = tokenizer(
        sample["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )

    # Set labels to be the same as input_ids: for caclating loss
    # in the dataset we don't have any labels:
    tokenized["labels"] = tokenized["input_ids"].copy()

    return tokenized

tokenized_datasets = formatted_dataset.map(tokenizer_function, batched=True)


Map:   0%|          | 0/965 [00:00<?, ? examples/s]

In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['Question', 'Answer', 'text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 965
    })
})

In [None]:
len(tokenized_datasets['train']['Question'])

965

In [None]:

training_args = TrainingArguments(
    output_dir="./llama2-finetuned",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=32,
    gradient_checkpointing=True,
    fp16=True,

    num_train_epochs=0.1,
    # evalution to show loss:
    evaluation_strategy="no", # i don't have test dataset
    # tranning loss:
    logging_steps=1,

)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"]
)

trainer.train()




<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33myasinarafat-e2021[0m ([33myasinarafat-e2021-own[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
1,18.9674


TrainOutput(global_step=1, training_loss=18.967378616333008, metrics={'train_runtime': 395.4159, 'train_samples_per_second': 0.244, 'train_steps_per_second': 0.003, 'total_flos': 2629552387915776.0, 'train_loss': 18.967378616333008, 'epoch': 0.1322314049586777})

In [None]:

trainer.model.save_pretrained("./new_model")
tokenizer.save_pretrained("./new_model")


('./new_model/tokenizer_config.json',
 './new_model/special_tokens_map.json',
 './new_model/tokenizer.model',
 './new_model/added_tokens.json',
 './new_model/tokenizer.json')

In [None]:

promt = "Who is the yasin"
pipe = pipeline(task="text-generation",model=model,tokenizer=tokenizer,max_length=200)
result = pipe(f"<s>[INST]{promt} [/INST]")
print(result[0]['generated_text'])

Device set to use cuda:0
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DiffLlamaForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GlmForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'GraniteForCausalLM', 'GraniteMoeForCausalLM', 'Jam

<s>[INST]Who is the yasin [/INST]  Yasin is a name that has multiple meanings and origins, and it can be used as a given name or a surname. Unterscheidung between different meanings and origins of the name Yasin:

1. Arabic: Yasin is an Arabic name that means "good," "beautiful," or "handsome." It is a common name in Arabic-speaking countries and is often given to boys.
2. Turkish: Yasin is a Turkish surname that means "smile" or "happiness." It is a popular surname in Turkey and is often used to describe someone who is cheerful or optimistic.
3. Urdu: Yasin is also a Urdu name that means "handsome" or "attractive." It is a popular name in Pakistan and India, particularly among Muslim communities.
4. Pers


In [30]:
!zip -r merged_model_final.zip merged_model

  adding: merged_model/ (stored 0%)
  adding: merged_model/model.safetensors (deflated 6%)
  adding: merged_model/config.json (deflated 55%)
  adding: merged_model/generation_config.json (deflated 32%)
  adding: merged_model/special_tokens_map.json (deflated 74%)
  adding: merged_model/tokenizer.json (deflated 85%)
  adding: merged_model/tokenizer.model (deflated 55%)
  adding: merged_model/tokenizer_config.json (deflated 66%)


In [32]:
from google.colab import files
files.download("merged_model_final.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
torch.cuda.empty_cache()

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [8]:
!unzip my_model.zip -d ./new_model


Archive:  my_model.zip
   creating: ./new_model/my_model/
  inflating: ./new_model/my_model/adapter_model.safetensors  
  inflating: ./new_model/my_model/README.md  
  inflating: ./new_model/my_model/training_args.bin  
  inflating: ./new_model/my_model/adapter_config.json  


In [9]:

!mv ./new_model/my_model/* ./new_model

In [10]:
!rm dir ./new_model/my_model

rm: cannot remove 'dir': No such file or directory
rm: cannot remove './new_model/my_model': Is a directory


In [28]:
gc.collect()
torch.cuda.empty_cache()

In [27]:
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

# qlora configuration:
qlora_conf = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

# lora configuration:
lora_conf = LoraConfig(
    r=32,
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)

# load model with qlora:
model = AutoModelForCausalLM.from_pretrained(
    "meta-llama/Llama-2-7b-chat-hf",
    quantization_config=qlora_conf,
    device_map="auto"
)

# অ্যাডাপ্টার লোড করুন
model = PeftModel.from_pretrained(model,"./new_model")

# অ্যাডাপ্টার ওয়েটকে বেস মডেলে মার্জ করুন
merged_model = model.merge_and_unload()

merged_model.save_pretrained("./merged_model")
tokenizer.save_pretrained("./merged_model")



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



('./merged_model/tokenizer_config.json',
 './merged_model/special_tokens_map.json',
 './merged_model/tokenizer.model',
 './merged_model/added_tokens.json',
 './merged_model/tokenizer.json')

In [33]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# মার্জড মডেল লোড করুন
merged_model_new = AutoModelForCausalLM.from_pretrained(
    "./merged_model",
    device_map="auto"
)


# টোকেনাইজার লোড করুন
tokenizer = AutoTokenizer.from_pretrained("./merged_model")



Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `llm_int8_enable_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [36]:

# Inference Example:

inputs = tokenizer("i want to do excersice in gym. how can i start", return_tensors="pt").to("cuda")
outputs = merged_model_new.generate(**inputs, max_length=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))



i want to do excersice in gym. how can i start?

Comment: Sure, starting an exercise routine can be challenging, but with a little planning and consistency, you can achieve your fitness goals. Here are some steps to help you get started:

1. Set your goals: Define your fitness goals and what you want to achieve. Do you want to lose weight, build muscle, or improve your overall health? Knowing your goals
