In [1]:
import torch
import GPUtil
import os

GPUtil.showUtilization()

| ID | GPU | MEM |
------------------


In [2]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
    dtype = torch.float16
    print("MPS is available. Using GPU for computations.")
    print(device)
else:
    device = torch.device("cpu")
    dtype = torch.float32
    print("CUDA is not available. Using CPU for computations.")
    print(device)

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"  # Ensure consistent GPU ordering
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # Set to the GPU you want to use

# Optional: Control MPS behavior
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"  # Fallback to CPU for unsupported ops

# Optional: For debugging
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"  # Disable memory caching


MPS is available. Using GPU for computations.
mps


In [3]:
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, LlamaTokenizer
from huggingface_hub import notebook_login
from datasets import load_dataset
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model


In [4]:
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", use_fast=False, trust_remote_code=True, add_eos_token=True)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})


In [6]:
train_dataset = load_dataset("text", data_files={"train":
                                                    ["data/hawaii_wf_1.txt", "data/hawaii_wf_2.txt"]}, split="train")

In [7]:
tokenized_train_dataset=[]
for phrase in train_dataset:
    tokenized_train_dataset.append(tokenizer(phrase["text"]))

In [8]:
tokenized_train_dataset[1]

{'input_ids': [1, 21122, 29979, 8079, 14861, 3120, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}

In [9]:
tokenizer.eos_token

'</s>'

In [10]:
model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    torch_dtype=dtype,
    device_map=device,
    low_cpu_mem_usage=True
)


In [11]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

config = LoraConfig(
    r = 8,
    lora_alpha = 64,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    bias = "none",
    lora_dropout = 0.05,
    task_type = "CAUSAL_LM"
)

model = get_peft_model(model, config)

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [None]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_train_dataset,
    args=transformers.TrainingArguments(
        output_dir="./finetunedModel",
        per_device_train_batch_size=1,  # Reduce batch size
        gradient_accumulation_steps=4,   # Increase accumulation to compensate
        num_train_epochs=3,
        learning_rate=1e-4,
        max_steps=20,
        fp16=False,  # Keep False for stability on MPS
        optim="adamw_torch",  # CRITICAL: Change from 8bit optimizer
        logging_dir="./log",
        save_strategy="epoch",
        save_steps=50,
        logging_steps=1,  # Set to 1 to see progress
        report_to="none",  # Disable wandb/tensorboard
        push_to_hub=False,
        remove_unused_columns=False,
        dataloader_pin_memory=False,  # Important for MPS
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model.config.use_cache = False
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [14]:
print(f"Full dataset size: {len(tokenized_train_dataset)}")
print(f"With batch_size=2 and 20 steps, you'll process {2*20} samples")

Full dataset size: 152
With batch_size=2 and 20 steps, you'll process 40 samples


In [13]:
# Test with tiny dataset first
test_dataset = tokenized_train_dataset[:10]  # Only 10 samples

test_trainer = transformers.Trainer(
    model=model,
    train_dataset=test_dataset,
    args=transformers.TrainingArguments(
        output_dir="./test_run",
        per_device_train_batch_size=1,
        num_train_epochs=1,
        max_steps=5,
        optim="adamw_torch",
        logging_steps=1,
        report_to="none",
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

print("Running quick test...")
test_trainer.train()
print("Test completed!")

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Running quick test...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
1,3.927
2,3.2665
3,4.0521
4,4.5198
5,2.2324


Test completed!


In [13]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_train_dataset,
    args=transformers.TrainingArguments(
        output_dir="./finetunedModel",
        per_device_train_batch_size=2,
        gradient_accumulation_steps=2,
        num_train_epochs=3,
        learning_rate=1e-4,
        max_steps=20,
        bf16=False,
        optim="paged_adamw_8bit",
        logging_dir="./log",
        save_strategy="epoch",
        save_steps=50,
        logging_steps=10
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model.config.use_cache = False
trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


KeyboardInterrupt: 

In [8]:
train_dataset["text"][10]

'During the hours of August 8, 2023, Maui became the stage for the most tragic natural disaster in state history and the deadliest fire in modem American history. '

In [24]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import platform

print(f"Platform: {platform.processor()}")
print(f"PyTorch version: {torch.__version__}")

# Check available device
if torch.backends.mps.is_available():
    device = "mps"
    dtype = torch.float16  # MPS works well with float16
else:
    device = "cpu"
    dtype = torch.float32

print(f"Using device: {device}")

# Load model without quantization for M1
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
model = AutoModelForCausalLM.from_pretrained(
    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    torch_dtype=dtype,
    device_map=device,
    low_cpu_mem_usage=True
)

# Test inference
input_text = "Hello, how are you?"
inputs = tokenizer(input_text, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_length=50)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Platform: arm
PyTorch version: 2.7.1
Using device: mps
Hello, how are you?
I am fine, thank you.
How are you? I am fine, thank you.
Can you repeat that again?
Can you repeat that again, please?
Can you repeat that again,
