In [1]:
!pip install -q unsloth accelerate bitsandbytes transformers datasets peft trl


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.3/47.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m299.3/299.3 kB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m376.2/376.2 kB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m166.5/166.5 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.1/117.1 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m821.2/821.2 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [8]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "deepseek-ai/deepseek-llm-7b-base",  # or use "meta-llama/Meta-Llama-3-8B"
    max_seq_length = 2048,
    load_in_4bit = True,
)


==((====))==  Unsloth 2025.7.8: Fast Llama patching. Transformers: 4.53.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

deepseek-ai/deepseek-llm-7b-base does not have a padding token! Will use pad_token = <|PAD_TOKEN|>.


In [9]:
from datasets import Dataset

data = [
    {"instruction": "What is the treatment for diabetes?", "input": "", "output": "Diabetes is treated using insulin, exercise, and diet."},
    {"instruction": "Symptoms of high blood pressure?", "input": "", "output": "Headaches, vision issues, chest pain, and dizziness."}
]

dataset = Dataset.from_list(data)


In [10]:
model = FastLanguageModel.get_peft_model(
    model,
    r=64,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing=True,
)


Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.7.8 patched 30 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [11]:
def formatting(example):
    return f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"

dataset = dataset.map(lambda x: {"text": formatting(x)})


Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [18]:
!pip install -U unsloth




In [4]:
!pip install --upgrade unsloth




In [3]:
import unsloth
print(dir(unsloth))




In [12]:
from unsloth import UnslothTrainer, UnslothTrainingArguments

training_args = UnslothTrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    learning_rate=2e-5,
    logging_steps=10,
    save_strategy="no",
    report_to="none",
)

trainer = UnslothTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",   # if required, else remove
    max_seq_length=2048,
    dataset_num_proc=2,
    packing=False,
    args=training_args,
)

trainer.train()


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/2 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2 | Num Epochs = 3 | Total steps = 3
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 2 x 1) = 4
 "-____-"     Trainable parameters = 149,913,600 of 7,060,279,296 (2.12% trained)


Step,Training Loss


TrainOutput(global_step=3, training_loss=2.6750882466634116, metrics={'train_runtime': 6.9051, 'train_samples_per_second': 0.869, 'train_steps_per_second': 0.434, 'total_flos': 7411187367936.0, 'train_loss': 2.6750882466634116})

In [13]:
trainer.model.save_pretrained("medical-qlora-adapter")
tokenizer.save_pretrained("medical-qlora-adapter")


('medical-qlora-adapter/tokenizer_config.json',
 'medical-qlora-adapter/special_tokens_map.json',
 'medical-qlora-adapter/tokenizer.json')

In [15]:
prompt = "### Instruction:\nWhat are the symptoms of malaria?\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0]))


<｜begin▁of▁sentence｜>### Instruction:
What are the symptoms of malaria?

### Response:

Malaria is a disease caused by a parasite that is transmitted to humans through the bite of an infected mosquito. The symptoms of malaria can vary depending on the severity of the infection and the person's immune system. Some common symptoms of malaria include:

- Fever
- Chills
- Headache
- Muscle aches
- Fatigue
- Nausea and vomiting
- Diarrhea
- Abdominal pain
- Rash
- Anemia

In severe cases,
