# WWDC 2024 : Train Your Machine Learning and AI models on Apple GPUs 

Joona Havukainen, GPU, Graphics and Display Software


In [2]:
import torch 
import random

random.seed(42)
torch.manual_seed(42)

#torch.set_default_device('mps')

device = torch.device("cuda:0" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
print(f"device: {device}")

# print(torch.version.cuda)
# print(torch.backends.cudnn.version())

device: mps


In [None]:
from transformers import LlamaTokenizer, LlamaForCausalLM

model_path = 'openlm-research/open_llama_3b_v2'

tokenizer = LlamaTokenizer.from_pretrained(model_path);

base_model = LlamaForCausalLM.from_pretrained(
    model_path
);

In [None]:
from peft import LoraConfig, PeftModel

lora_config = LoraConfig(
    r = 64,
    lora_alpha = 32,
    lora_dropout = 0.05,
    bias = "none",
    task_type = "CAUSAL_LM"
)


model = PeftModel(base_model, lora_config, adapter_name="Shakespeare")
model.to(device)

In [None]:
import os 
import requests

file_name = "shakespeare.txt"
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"

if not os.path.isfile(file_name):
    data = requests.get(url)
    with open(file_name, 'w') as f:
        f.write(data.text)
        
        
from transformers import TextDataset

train_dataset = TextDataset(
    tokenizer=tokenizer,
    file_path=file_name,
    block_size=128,
)[:256] 

In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="output",
    overwrite_output_dir=True,
    num_train_epochs=10,
    per_device_train_batch_size=32,
    evaluation_strategy='no'  
)

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=True)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_dataset
)

In [None]:
def generate_response(prompt_text, model, tokenizer, max_length=30, num_return_sequences=1):
    input_ids = tokenizer.encode(prompt_text, return_tensors='pt').to(device)
    
    output_sequences = model.generate(
        input_ids = input_ids, 
        max_length=max_length, 
        num_return_sequences=num_return_sequences,
        no_repeat_ngram_size=2,
    )
    
    responses = []
    for response_id in output_sequences:
        response = tokenizer.decode(response_id, skip_special_tokens=True)
        responses.append(response)
        
    return responses 

In [None]:
prompt_text = "Uneasy lies the head that wears a crown."

responses = generate_response(prompt_text, model, tokenizer)
for response in responses:
    print(response)

In [None]:
trainer.train()

In [None]:
prompt_text = "Uneasy lies the head that wears a crown."

responses = generate_response(prompt_text, model, tokenizer)
for response in responses:
    print(response)

In [None]:
save_path = "merged_fine_tuned_openllama2_3b_shakespears"
tokenizer.save_pretrained(save_path)
merged_model = model.merge_and_unload()
merged_model.save_pretrained(save_path)

# ExecuTorch 部署Pytorch部署到边缘设备

Deploy PyTorch models to devices 

MPS backend for acceleration

```bash
git clone --branch git@github.com:pytorch/executorch.git
cd executorch

git submodule sync
git submodule update --init 
./install_requirements.sh --pybind mps
```

最后展示了 在 iPad Pro 上 部署 上面微调后模型，我不懂 Swift 就没做， 之后试试

[基于 Apple GPU 训练机器学习和 AI 模型](https://www.youtube.com/watch?v=CbmTFTsbyPI&t=985s) 11:25
