In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM


In [3]:
#!pip install -U datasets
#!pip install -U bitsandbytes


In [4]:
from datasets import load_dataset

dataset = load_dataset("harshith99/HealthCareMagic-100k-llama3", split="train")
print(dataset[0])

def parse_text(example):
    text = example['text']

    # فاصل user
    user_split = text.split('<|start_header_id|>user<|end_header_id|>')
    if len(user_split) < 2:
        return {"instruction": "", "output": ""}
    user_and_after = user_split[1]

    # نأخذ النص قبل assistant
    user_text = user_and_after.split('<|start_header_id|>assistant<|end_header_id|>')[0].strip()

    # نأخذ الرد بعد assistant
    assistant_split = user_and_after.split('<|start_header_id|>assistant<|end_header_id|>')
    if len(assistant_split) < 2:
        return {"instruction": user_text, "output": ""}
    assistant_text = assistant_split[1].replace('<|eot_id|>', '').strip()

    return {"instruction": user_text, "output": assistant_text}
dataset = dataset.map(parse_text)


README.md:   0%|          | 0.00/363 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/78.7M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/112165 [00:00<?, ? examples/s]

{'text': '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\nYou are a helpful assistant helping individuals with their medical queries <|eot_id|><|start_header_id|>user<|end_header_id|>\n I have been having alot of catching ,pain and discomfort under my right rib.  If I twist to either side especially my right it feels like my rib actually catches on something and at times I have to stop try to catch my breath and wait for it to subside.  There are times if I am laughing too hard that it will do the same thing but normally its more so if I have twisted or moved  a certain way <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n Hi thanks for asking question. Here you are complaining pain in particular position esp. While turning to a side. So strong possibility is about moderate degree muscular strain. It might have occurred by heavyweight lift or during some activities. Simple analgesic taken. Take rest. Sleep in supine position. Second here Costco Chat Doctor.  Ribs a

Map:   0%|          | 0/112165 [00:00<?, ? examples/s]

In [31]:
#dataset = load_dataset("harshith99/HealthCareMagic-100k-llama3", split="train")
dataset=dataset.shuffle(seed=42).select(range(10000))
def format_prompt(example):
    return {
        "text": f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    }


dataset = dataset.map(format_prompt)
print(dataset[0]['text'])


Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

### Instruction:
My child has fever for the 5th day today . He is onTaxim 0 100 mg morning n night ,he started with vomiting and lose stools  which stopped on day 3 and on day 4 he started sneezing and throat congestion but as of today the fever is coming every 8hours and is about 100 so do I need to worry and investigate further or should j leave it and its only a virAl fever which will subside after the 5th day .my xSon is 5 years old <|eot_id|>

### Response:
Such symptoms are usually caused due to certain viral infections. It is usually a self limiting condition and not a cause of concern. Irrespective of the use of antibiotics, it should still take 5-7 days to recover. Giving steam inhalation and some antiallergic like cetirizine should provide relief. Gurgling with lukewarm water should help in relieving the throat congestion. Giving paracetamol will help to bring down the temperature. Give ORS or oral rely Chat Doctor. If the child is active and playful, you need not worry. Howe

In [33]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

prompt = "I have been having alot of catching ,pain and discomfort under my right rib"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


I have been having alot of catching ,pain and discomfort under my right ribs. I have been to a chiropractor and a doctor and they both say I have a hernia. I have been having this pain for about 2 weeks now. I have been taking pain killers and anti inflammatory medication but it is not helping. I have been having this pain for about 2 weeks now. I have been taking pain killers and anti inflammatory medication but it is not helping. I have been having this pain for about


In [34]:
prompt = """### Instruction:
I have diabetes, and I was wondering if there are any foods that can improve my blood sugar control?

### Response:"""

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=150)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

# To show only the answer part (strip prompt)
answer = response.split("### Response:")[-1].strip()
print(answer)


Yes, there are several foods that can improve blood sugar control in people with diabetes. Here are some examples:

1. Fruits: Fruits are a great source of natural sugars, such as fruits like apples, bananas, and oranges. They can help regulate blood sugar levels and improve overall health.

2. Vegetables: Vegetables are also a great source of natural sugars, such as carrots, sweet potatoes, and broccoli. They can help regulate blood sugar levels and provide nutrients that are important for overall health.

3. Whole grains: Whole grains, such as whole wheat bread, brown


In [7]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules = ["q_proj", "v_proj", "k_proj", "o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


In [8]:
def tokenize(example):
    tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized = dataset.map(tokenize, batched=True, remove_columns=["instruction", "output", "text"])

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

In [9]:
from transformers import TrainingArguments, Trainer

args = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    logging_steps=10,
    output_dir="./medalpaca-finetuned",
    save_total_limit=2,
    save_steps=100,
    report_to="none",
    fp16=True
# تعطيل تتبع W&B

    #evaluation_strategy="steps",
    #eval_steps=100
)
trainer = Trainer(
    model=model,
    train_dataset=tokenized,
    args=args,
    tokenizer=tokenizer
)

trainer.train()


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,2.3508
20,1.6958
30,1.4636
40,1.5147
50,1.401
60,1.413
70,1.4299
80,1.3825
90,1.3613
100,1.3933


TrainOutput(global_step=1875, training_loss=1.2988486457824706, metrics={'train_runtime': 11591.4595, 'train_samples_per_second': 2.588, 'train_steps_per_second': 0.162, 'total_flos': 9.554827935744e+16, 'train_loss': 1.2988486457824706, 'epoch': 3.0})

In [10]:
trainer.save_model("./medalpaca-finetuned")
tokenizer.save_pretrained("./medalpaca-finetuned")


('./medalpaca-finetuned/tokenizer_config.json',
 './medalpaca-finetuned/special_tokens_map.json',
 './medalpaca-finetuned/tokenizer.model',
 './medalpaca-finetuned/added_tokens.json',
 './medalpaca-finetuned/tokenizer.json')

In [11]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# 1. تحميل نموذج TinyLlama الأساسي
base_model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    device_map="auto",                  # يستخدم أفضل GPU متاح
    torch_dtype=torch.float16           # لتحسين الأداء
)

# 2. تحميل Tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)

# 3. تحميل LoRA Adapter المدرب
adapter_path = "./medalpaca-finetuned"
model = PeftModel.from_pretrained(model, adapter_path)

# 4. دمج LoRA داخل نموذج TinyLlama الأساسي
model = model.merge_and_unload()  # هذا يحوّل النموذج إلى نسخة واحدة بدون اعتماد على adapter خارجي

# 5. حفظ النموذج والـ Tokenizer المدموجين
save_path = "./merged_model"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print("✅ تم دمج وحفظ النموذج بنجاح في", save_path)


✅ تم دمج وحفظ النموذج بنجاح في ./merged_model


In [16]:
pip install gradio

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting gradio
  Downloading gradio-5.30.0-py3-none-any.whl.metadata (16 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
Co

In [24]:
from transformers import AutoTokenizer, AutoModelForCausalLM

save_path = "/kaggle/working/merged_model"

# تحميل النموذج والـ tokenizer من المجلد
model = AutoModelForCausalLM.from_pretrained(save_path, torch_dtype=torch.float16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(save_path)

# تجربة التوليد
input_text = "What are the symptoms of diabetes?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(model.device)

output = model.generate(input_ids, max_new_tokens=100)
print(tokenizer.decode(output[0], skip_special_tokens=True))


What are the symptoms of diabetes?


In [22]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# تحميل النموذج المدموج
model_path = "/kaggle/working/merged_model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16, device_map="auto")
model.eval()

def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.7,
            repetition_penalty=1.2
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# واجهة Gradio
gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(lines=5, placeholder="اكتب سؤالك هنا...", label="النص"),
    outputs=gr.Textbox(label="رد النموذج"),
    title="💬 تجربة نموذج TinyLlama مدموج مع MedAlpaca",
    description="نموذج مدرب باستخدام LoRA للتفاعل باللغة العربية."
).launch(inline=True)


* Running on local URL:  http://127.0.0.1:7861
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://dad20796636f7e9648.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


