In [1]:
from accelerate import Accelerator
from transformers import get_linear_schedule_with_warmup
from torch import optim, nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch


transform = transforms.ToTensor()
train_dataset = datasets.MNIST(root="datasets", transform=transform, train=True, download=True)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

model = nn.Sequential(
    nn.Flatten(), 
    nn.Linear(28 * 28, 128),
    nn.ReLU(),
    nn.Linear(128, 10),
)

critrtion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

accelerator = Accelerator()

model, optimizer, train_loader = accelerator.prepare(model, optimizer, train_loader)

epochs = 5
model.train()
for epoch in range(epochs):
    epoch_loss = 0.0
    for imgs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = critrtion(outputs, targets)
        accelerator.backward(loss)
        optimizer.step()
        epoch_loss += loss.item()
    epoch_loss /= len(train_loader)
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}")

Epoch 1/5, Loss: 0.2925
Epoch 2/5, Loss: 0.1297
Epoch 3/5, Loss: 0.0869
Epoch 4/5, Loss: 0.0636
Epoch 5/5, Loss: 0.0485


In [None]:
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, get_linear_schedule_with_warmup
from datasets import load_dataset
from peft import LoraConfig, get_peft_model
import torch


dataset = load_dataset("imdb", split='train[:500]')

model_name = "/home/xwj/Model/gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=512)

encoded_dataset = dataset.map(tokenize_function, batched=True, num_proc=4, remove_columns=['text'])
encoded_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask'])

from torch.utils.data import DataLoader

train_loader = DataLoader(encoded_dataset, batch_size=8, shuffle=True)

model = AutoModelForCausalLM.from_pretrained(model_name)
model.train()

lora_config = LoraConfig(
    r = 8, 
    lora_alpha = 32,
    target_modules = ["c_attn"],
    lora_dropout = 0.1,
)

model = get_peft_model(model, lora_config)

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

accelerator = Accelerator()

model, optimizer, train_loader = accelerator.prepare(model, optimizer, train_loader)

epochs = 3
model.train()

for epoch in range(epochs):
    epoch_loss = 0.0
    for batch in train_loader:
        optimizer.zero_grad()
        outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['input_ids'])
        loss = outputs.loss
        accelerator.backward(loss)
        optimizer.step()
        epoch_loss += loss.item()
    epoch_loss /= len(train_loader)
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}")

# model.save_pretrained("accelerate_gpt2_lora_imdb")

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Epoch 1/3, Loss: 4.6144
Epoch 2/3, Loss: 2.0710
Epoch 3/3, Loss: 1.9783
