### Add Path of source code file

In [1]:
import sys
import os
from pathlib import Path

def add_top_directory_to_python_path(top_directory : str = "Project"):
    """
    Add top directory of Project to Python Path for managing import between directory

    Args:
        top_directory (str, optional): top folder of project. Defaults to "Project".
    """
    current_dir_path = Path(os.getcwd())
    current_dir_name = current_dir_path.stem
    
    while (current_dir_name != top_directory):
        current_dir_path = current_dir_path.parent
        current_dir_name = current_dir_path.stem
    
    sys.path.append(str(current_dir_path))
    print(f"[INFO] : add [{current_dir_path}] to Python path")

add_top_directory_to_python_path(top_directory="Project")

[INFO] : add [/home/widium/Programming/AI/Vision-Transformer/Project] to Python path


### Load Dataloader

In [2]:
import torch
from pathlib import Path

SAVING_PATH = Path("data/dataset/")

train_dataset_path = SAVING_PATH / "train_dataset.pth"
test_dataset_path = SAVING_PATH / "test_dataset.pth"

train_dataloader = torch.load(f=train_dataset_path)
test_dataloader = torch.load(f=test_dataset_path)

len(train_dataloader), len(test_dataloader)

(8, 3)

### Create the ViT Base Instance
![](https://i.imgur.com/GLaAgax.png)

- Define the Hyperparameters Constante for recreate the ViTBase 

In [3]:
NBR_CLASS = 3
HEIGHT = 224
WIDTH = 224
COLOR = 3
PATCH_SIZE = 16
EMBEDDING = 768
NBR_ENCODER_BLOCK = 12
NBR_HEADS = 12
MLP_UNITS = 3072

In [4]:
from modeling.builder.vit_model import VisionTransformerClassifier

vit_base = VisionTransformerClassifier(
    nbr_classes=NBR_CLASS,
    height=HEIGHT,
    width=WIDTH,
    color_channels=COLOR,
    patch_size=PATCH_SIZE,
    embedding_size=EMBEDDING,
    nbr_encoder_blocks=NBR_ENCODER_BLOCK,
    nbr_heads=NBR_HEADS ,
    mlp_units=MLP_UNITS,
    dropout_embedding=0.1,
    dropout_attention=0.0,
    dropout_mlp=0.1,
)

In [5]:
from torchinfo import summary

summary(model=vit_base, 
        input_size=(1, COLOR, HEIGHT, WIDTH),
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"]
) 

Layer (type (var_name))                                                Input Shape          Output Shape         Param #              Trainable
VisionTransformerClassifier (VisionTransformerClassifier)              [1, 3, 224, 224]     [1, 3]               --                   True
├─ImageTokenizer (image_tokenizer)                                     [1, 3, 224, 224]     [1, 196, 768]        --                   True
│    └─PatchExtractor (patch_extractor)                                [1, 3, 224, 224]     [1, 3, 14, 14]       --                   True
│    │    └─Conv2d (patch_extractor)                                   [1, 3, 224, 224]     [1, 3, 14, 14]       2,307                True
│    └─PatchTokenizer (patch_tokenizer)                                [1, 3, 14, 14]       [1, 196, 3]          --                   --
│    │    └─Flatten (flatten)                                          [1, 3, 14, 14]       [1, 3, 196]          --                   --
│    └─PatchTokenEmbedding

### Create Loss Function, Optimizer and Metrics for Training Loop

In [6]:
from torch.nn import CrossEntropyLoss
from torchmetrics import Accuracy
from torch.optim import Adam


optimizer = Adam(vit_base.parameters(), lr=0.001)
loss_function = CrossEntropyLoss()
metric_function = Accuracy(task="multiclass", num_classes=NBR_CLASS)

### Use the `train` Function 
- setup device 

In [7]:
### Setup device agnostic code
device = "cpu"

vit_base.to(device)
print(f"Using device: {device}")

Using device: cpu


In [9]:
import os
import psutil

def get_memory_usage():
    process = psutil.Process(os.getpid())
    mem_info = process.memory_info()
    return mem_info.rss / (1024 ** 2)  # Convert bytes to MB

In [10]:
memory_usage = get_memory_usage()
print(f"Memory usage: {memory_usage:.2f} MB")

Memory usage: 4905.39 MB


In [11]:
def get_total_memory():
    mem_info = psutil.virtual_memory()
    return mem_info.total / (1024 ** 3)  # Convert bytes to GB

total_memory = get_total_memory()
print(f"Total memory capacity: {total_memory:.2f} GB")

Total memory capacity: 31.08 GB


In [12]:
from torch.nn import Module

def compute_size_of_model(model : Module)->dict:
    """compute the detailed size of Pytorch Model

    Args:
        model (Module): model

    Returns:
        dict: python dictionary with 3 size 
        - `params` : accumulate size of all trainable parameters in module
        - `buffer` : accumulate size of all non-trainable tensors in module
        - `entire` : params + buffer
    """
    size = dict()
    size["params"] = 0
    size["buffer"] = 0
    
    for param in model.parameters():
        size["params"] += param.nelement() * param.element_size()

    for buffer in model.buffers():
        size["buffer"] += buffer.nelement() * buffer.element_size()

    # Convert to Bytes to MegaBytes
    size["params"] /= 1024**2
    size["buffer"] /= 1024**2
    
    # compute the entire size in MegaBytes
    size["entire"] =  size["params"] + size["buffer"]
    
    return (size)

In [13]:
size = compute_size_of_model(vit_base)

print(f'Model Parameters size: {size["params"]:.3f} (MB)')
print(f'Model Utils size: {size["buffer"]:.3f} (MB)')
print(f'Model Entire Size: {size["entire"]:.3f} (MB)')

Model Parameters size: 325.072 (MB)
Model Utils size: 0.000 (MB)
Model Entire Size: 325.072 (MB)


In [12]:
from modeling.training.train import train

history = train(
    model=vit_base,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_function=loss_function,
    metric_function=metric_function,
    device=device,
    epochs=5
)

  0%|          | 0/5 [00:00<?, ?it/s]

Epochs            | Train Loss        | Train Accuracy    | Val Loss          | Val Accuracy      |
20.0% [1/5]       | 3.3105            | 0.3242            | 1.0139            | 0.5417            |
-----------------------------------------------------------------------------------------------
