In [1]:
import timm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
num_classes = 10 
batch_size = 32
learning_rate = 1e-4
epochs = 3

In [22]:
from torchvision.transforms import v2

In [23]:
transform = v2.Compose(
    [
        v2.ToImage(),
        v2.Resize((224, 224)),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(
            mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)
        ),
    ]
)

In [4]:
train_dataset = datasets.CIFAR10(
    root="./cifar10",
    train=True,
    transform=transform,
    download=True,
)

test_dataset = datasets.CIFAR10(
    root="./cifar10", train=False, transform=transform, download=True
)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,)
test_loader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False, 
)

In [6]:
from transformers import AutoImageProcessor, AutoModelForImageClassification

model_name = "WinKawaks/vit-tiny-patch16-224"
processor = AutoImageProcessor.from_pretrained(model_name,use_fast=True)
model = AutoModelForImageClassification.from_pretrained(model_name)

In [7]:
torch._dynamo.list_backends()

['cudagraphs', 'inductor', 'onnxrt', 'openxla', 'tvm']

In [8]:
model = torch.compile(model)

In [9]:
import torch._dynamo

torch._dynamo.config.suppress_errors = True

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

OptimizedModule(
  (_orig_mod): ViTForImageClassification(
    (vit): ViTModel(
      (embeddings): ViTEmbeddings(
        (patch_embeddings): ViTPatchEmbeddings(
          (projection): Conv2d(3, 192, kernel_size=(16, 16), stride=(16, 16))
        )
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (encoder): ViTEncoder(
        (layer): ModuleList(
          (0-11): 12 x ViTLayer(
            (attention): ViTSdpaAttention(
              (attention): ViTSdpaSelfAttention(
                (query): Linear(in_features=192, out_features=192, bias=True)
                (key): Linear(in_features=192, out_features=192, bias=True)
                (value): Linear(in_features=192, out_features=192, bias=True)
                (dropout): Dropout(p=0.0, inplace=False)
              )
              (output): ViTSelfOutput(
                (dense): Linear(in_features=192, out_features=192, bias=True)
                (dropout): Dropout(p=0.0, inplace=False)
              )
            )


In [12]:
from torchinfo import summary

summary(model,input_size=(32,3,224,224),device="cuda")

W1121 02:12:12.827000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125] WON'T CONVERT torch_dynamo_resume_in_forward_at_124 c:\Coding\python-projects\pytorch-db\.venv\lib\site-packages\transformers\models\vit\modeling_vit.py line 124 
W1121 02:12:12.827000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125] due to: 
W1121 02:12:12.827000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125] Traceback (most recent call last):
W1121 02:12:12.827000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125]   File "c:\Coding\python-projects\pytorch-db\.venv\lib\site-packages\torch\_dynamo\output_graph.py", line 1446, in _call_user_compiler
W1121 02:12:12.827000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125]     compiled_fn = compiler_fn(gm, self.example_inputs())
W1121 02:12:12.827000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125]   File "c:\Coding\python-projects\pytorch-db\.venv\lib\site-packages\torch\_dynamo\repro\after_dynamo.p

Layer (type:depth-idx)                                            Output Shape              Param #
OptimizedModule                                                   [32, 1000]                --
├─ViTForImageClassification: 1-1                                  [32, 1000]                --
│    └─ViTModel: 2-1                                              [32, 197, 192]            --
│    │    └─ViTEmbeddings: 3-1                                    [32, 197, 192]            185,664
│    │    └─ViTEncoder: 3-2                                       [32, 197, 192]            5,338,368
│    │    └─LayerNorm: 3-3                                        [32, 197, 192]            384
│    └─Linear: 2-2                                                [32, 1000]                193,000
Total params: 5,717,416
Trainable params: 5,717,416
Non-trainable params: 0
Total mult-adds (G): 1.10
Input size (MB): 19.27
Forward/backward pass size (MB): 1297.72
Params size (MB): 22.72
Estimated Total Size (MB): 

In [13]:
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        logits = outputs.logits
        loss = criterion(logits, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}")
    # 6. Evaluation
    model.eval()
    correct = 0
    total = 0
    with torch.inference_mode():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs= model(images)
            logits = outputs.logits
            _, predicted = torch.max(logits, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

W1121 02:12:46.994000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125] WON'T CONVERT forward c:\Coding\python-projects\pytorch-db\.venv\lib\site-packages\transformers\models\vit\modeling_vit.py line 831 
W1121 02:12:46.994000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125] due to: 
W1121 02:12:46.994000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125] Traceback (most recent call last):
W1121 02:12:46.994000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125]   File "c:\Coding\python-projects\pytorch-db\.venv\lib\site-packages\torch\_dynamo\output_graph.py", line 1446, in _call_user_compiler
W1121 02:12:46.994000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125]     compiled_fn = compiler_fn(gm, self.example_inputs())
W1121 02:12:46.994000 20260 Lib\site-packages\torch\_dynamo\convert_frame.py:1125]   File "c:\Coding\python-projects\pytorch-db\.venv\lib\site-packages\torch\_dynamo\repro\after_dynamo.py", line 129, in __call__
W112

Epoch [1/3], Loss: 0.2935
Test Accuracy: 94.61%
Epoch [2/3], Loss: 0.1215
Test Accuracy: 94.26%
Epoch [3/3], Loss: 0.0861
Test Accuracy: 93.72%


In [14]:
class_names = train_dataset.classes

class_names

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [38]:
import torchvision
from pathlib import Path

image_path = Path("./kocak.jpg")

with torch.inference_mode():
    custom_image = torchvision.io.decode_image(image_path)
    custom_image = transform(custom_image)
    outputs = model(custom_image.unsqueeze(dim=0).to(device))
    logits = outputs.logits
    y_pred = torch.softmax(logits, dim=1)
    y_label = y_pred.argmax(dim=1)
    print(y_pred.max(dim=1))
    print(class_names[y_label])

torch.return_types.max(
values=tensor([0.9999], device='cuda:0'),
indices=tensor([0], device='cuda:0'))
airplane
