In [None]:
!pip install transformers torch datasets



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
from datasets import load_dataset, DatasetDict, Dataset
from transformers import ViTFeatureExtractor, ViTForImageClassification, TrainingArguments, Trainer
from torchvision import transforms
from sklearn.model_selection import train_test_split
import torch

image_size = (224, 224)
batch_size = 32
num_classes = 5

base_path = '/content/drive/MyDrive/Aaron/VIT_Model'

train_paths = {
    'Myocardial Infraction': os.path.join(base_path, 'Myocardial Infraction', 'MTrain'),
    'Abnormal Heartbeats': os.path.join(base_path, 'Abnormal', 'AbnormalTrain'),
    'Normal Heartbeats': os.path.join(base_path, 'Normal', 'NormalTrain'),
    'History of MI' : os.path.join(base_path,'HistoryOfMI','HistoryOfMiTrain'),
    'Covid_19' : os.path.join(base_path,'Covid_19','Covid_19Train')

}

test_paths = {
    'Myocardial Infraction': os.path.join(base_path, 'Myocardial Infraction', 'MTest'),
    'Abnormal Heartbeats': os.path.join(base_path, 'Abnormal', 'AbnormalTest'),
    'Normal Heartbeats': os.path.join(base_path, 'Normal', 'NormalTest'),
    'History of MI' : os.path.join(base_path,'HistoryOfMI','HistoryOfMiTest'),
    'Covid_19' : os.path.join(base_path,'Covid_19','Covid_19Test')
}


def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if os.path.isfile(img_path):
            images.append(img_path)
            labels.append(label)
    return images, labels

train_images, train_labels = [], []
test_images, test_labels = [], []

for label, path in train_paths.items():
    images, labels = load_images_from_folder(path, label)
    train_images.extend(images)
    train_labels.extend(labels)

for label, path in test_paths.items():
    images, labels = load_images_from_folder(path, label)
    test_images.extend(images)
    test_labels.extend(labels)

label_mapping = {'Myocardial Infraction': 0, 'Abnormal Heartbeats': 1, 'Normal Heartbeats': 2,'History of MI': 3,'Covid_19': 4}

train_labels = [label_mapping[label] for label in train_labels]
test_labels = [label_mapping[label] for label in test_labels]

train_data = {'image': train_images, 'label': train_labels}
test_data = {'image': test_images, 'label': test_labels}

train_dataset = Dataset.from_dict(train_data)
test_dataset = Dataset.from_dict(test_data)

dataset = DatasetDict({'train': train_dataset, 'test': test_dataset})

feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
])





In [None]:
from PIL import Image
import os
def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if os.path.isfile(img_path):
            images.append(img_path)
            labels.append(label)
    return images, labels

train_images, train_labels = [], []
test_images, test_labels = [], []

for label, path in train_paths.items():
    images, labels = load_images_from_folder(path, label)
    train_images.extend(images)
    train_labels.extend(labels)

for label, path in test_paths.items():
    images, labels = load_images_from_folder(path, label)
    test_images.extend(images)
    test_labels.extend(labels)

label_mapping = {'Myocardial Infraction': 0, 'Abnormal Heartbeats': 1, 'Normal Heartbeats': 2,'History of MI': 3,'Covid_19': 4}

train_labels = [label_mapping[label] for label in train_labels]
test_labels = [label_mapping[label] for label in test_labels]

train_data = {'image': train_images, 'label': train_labels}
test_data = {'image': test_images, 'label': test_labels}

train_dataset = Dataset.from_dict(train_data)
test_dataset = Dataset.from_dict(test_data)

dataset = DatasetDict({'train': train_dataset, 'test': test_dataset})

feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std),
])

def transform_example(example):
    images = [Image.open(img_path).convert("RGB") for img_path in example['image']]
    example['pixel_values'] = [transform(img) for img in images]
    return example

dataset = dataset.map(transform_example, batched=True)

dataset.set_format(type='torch', columns=['pixel_values', 'label'])


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [None]:
from transformers import ViTForImageClassification
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224', num_labels=num_classes, ignore_mismatched_sizes=True)


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([5]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([5, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
)


In [None]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'


In [None]:
print(f'Number of labels: {num_classes}')
print(f'Train dataset size: {len(dataset["train"])}')
print(f'Test dataset size: {len(dataset["test"])}')


Number of labels: 5
Train dataset size: 500
Test dataset size: 100


In [None]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(torch.cuda.current_device()))

True
0
Tesla T4


In [None]:
# !pip install --upgrade torch torchvision transformers



In [None]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset['train'],
    eval_dataset=dataset['test'],
)


In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,0.5883,0.473482
2,0.3415,0.241798
3,0.2115,0.27643
4,0.0734,0.059818
5,0.0371,0.02382
6,0.0023,0.017902
7,0.0038,0.058887
8,0.001,0.010685
9,0.0009,0.012455
10,0.0008,0.013763


TrainOutput(global_step=320, training_loss=0.15637990353425266, metrics={'train_runtime': 665.3462, 'train_samples_per_second': 7.515, 'train_steps_per_second': 0.481, 'total_flos': 3.8747036611584e+17, 'train_loss': 0.15637990353425266, 'epoch': 10.0})

In [None]:
metrics = trainer.evaluate()
print(metrics)

{'eval_loss': 0.013762980699539185, 'eval_runtime': 2.1262, 'eval_samples_per_second': 47.033, 'eval_steps_per_second': 3.292, 'epoch': 10.0}


In [None]:
from sklearn.metrics import accuracy_score

def compute_metrics(p):
    logits = p.predictions
    predictions = logits.argmax(axis=-1)
    return {
        'accuracy': accuracy_score(p.label_ids, predictions)
    }

trainer.compute_metrics = compute_metrics

results = trainer.evaluate()
print(results)


{'eval_loss': 0.013762980699539185, 'eval_accuracy': 0.99, 'eval_runtime': 2.1556, 'eval_samples_per_second': 46.39, 'eval_steps_per_second': 3.247, 'epoch': 10.0}


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

test_predictions = trainer.predict(dataset['test'])
logits = test_predictions.predictions
predictions = np.argmax(logits, axis=-1)


In [None]:
true_labels = np.array(test_predictions.label_ids)
accuracy = accuracy_score(true_labels, predictions)
print(f'Accuracy: {accuracy}')

Accuracy: 0.99


In [None]:
model.save_pretrained('./fine-tuned-vit')