In [None]:
!yes | oidv6 downloader --dataset=OIDv6 --classes "Horse" "Fish" "Monkey" --type_data "train" --no_labels --limit 1000 --dataset "data/"
!yes | oidv6 downloader --dataset=OIDv6 --classes "Horse" "Fish" "Monkey" --type_data "validation" --no_labels --limit 125  --dataset "data/"
!yes | oidv6 downloader --dataset=OIDv6 --classes "Horse" "Fish" "Monkey" --type_data "test" --no_labels --limit 125  --dataset "data/"

In [None]:
# model properties
NUMBER_OF_CLASSES = 3
CLASSES = ["Fish", "Horse", "Monkey"]

BATCH_SIZE = 16
NUMBER_OF_WORKERS = 4

IMAGE_SIZE = 128

In [None]:
# train imports
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# train properties
NUMBER_OF_TRAIN_IMAGES_PER_CLASS = 1000
NUMBER_OF_VALIDATION_IMAGES_PER_CLASS = 125

LEARNING_RATE = 0.001
NUMBER_OF_EPOCHS = 40


In [None]:
class ConvolutionalNetwork(torch.nn.Module):
  def __init__(self, number_of_classes: int) -> None:
    super().__init__()

    # block 1
    self.conv_1_1 = torch.nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
    self.batch_norm_1_1 = torch.nn.BatchNorm2d(num_features=32)
    self.conv_1_2 = torch.nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
    self.batch_norm_1_2 = torch.nn.BatchNorm2d(64)
    self.pool_1 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
    self.dropout_1 = torch.nn.Dropout(p=0.25)

    # block 2
    self.conv_2_1 = torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
    self.batch_norm_2_1 = torch.nn.BatchNorm2d(64)
    self.conv_2_2 = torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
    self.batch_norm_2_2 = torch.nn.BatchNorm2d(128)
    self.pool_2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
    self.dropout_2 = torch.nn.Dropout(p=0.25)

    # block 3
    self.conv_3_1 = torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
    self.batch_norm_3_1 = torch.nn.BatchNorm2d(256)
    self.conv_3_2 = torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
    self.batch_norm_3_2 = torch.nn.BatchNorm2d(256)
    self.pool_3 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
    self.dropout_3 = torch.nn.Dropout(p=0.25)

    self.adaptive_pool = torch.nn.AdaptiveAvgPool2d((8, 8))

    # fully connected layers
    self.fc1 = torch.nn.Linear(256 * 8 * 8, 1024)
    self.dropout_fc = torch.nn.Dropout(p=0.5)
    self.fc2 = torch.nn.Linear(1024, number_of_classes)

    self.relu = torch.nn.ReLU()
    self.flatten = torch.nn.Flatten()

  def forward(self, x) -> None:
    return torch.nn.Sequential(
        self.conv_1_1,
        self.relu,
        self.batch_norm_1_1,
        self.conv_1_2,
        self.relu,
        self.batch_norm_1_2,
        self.pool_1,
        self.dropout_1,
        self.conv_2_1,
        self.relu,
        self.batch_norm_2_1,
        self.conv_2_2,
        self.relu,
        self.batch_norm_2_2,
        self.pool_2,
        self.dropout_2,
        self.conv_3_1,
        self.relu,
        self.batch_norm_3_1,
        self.conv_3_2,
        self.relu,
        self.batch_norm_3_2,
        self.pool_3,
        self.dropout_3,
        self.adaptive_pool,
        self.flatten,
        self.fc1,
        self.relu,
        self.dropout_fc,
        self.fc2,
        )(x)

In [None]:
# calculate normalization for train dataset
normalization_transformations = torchvision.transforms.Compose([
    torchvision.transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    torchvision.transforms.ToTensor(),
])

normalization_dataset = torchvision.datasets.ImageFolder(
    root="data/train/",
    transform=normalization_transformations
)

normalization_dataloader = torch.utils.data.DataLoader(dataset=normalization_dataset, batch_size=BATCH_SIZE, num_workers=NUMBER_OF_WORKERS)
channels_sum = torch.zeros(3)
channels_sq_sum = torch.zeros(3)

for images, labels in normalization_dataloader:
    channels_sum += images.sum(dim=[0, 2, 3])
    channels_sq_sum += (images ** 2).sum(dim=[0, 2, 3])

total_pixels = IMAGE_SIZE * IMAGE_SIZE * NUMBER_OF_CLASSES * NUMBER_OF_TRAIN_IMAGES_PER_CLASS
normalization_mean = channels_sum / total_pixels
normalization_var = (channels_sq_sum / total_pixels) - normalization_mean**2
normalization_std = torch.sqrt(normalization_var)

print(normalization_mean)
print(normalization_var)
print(normalization_std)

In [None]:
# calculated values are assigned manually to avoid recalculation
NORMALIZATION_MEAN = [0.4469, 0.4574, 0.4104]
NORMALIZATION_STD = [0.2720, 0.2576, 0.2663]

In [None]:
# train transformations
train_transformations = torchvision.transforms.Compose([
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomRotation(degrees=15),
    torchvision.transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    torchvision.transforms.Resize(size=(IMAGE_SIZE, IMAGE_SIZE)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(NORMALIZATION_MEAN, NORMALIZATION_STD),
])

validation_transformations = torchvision.transforms.Compose([
  torchvision.transforms.Resize(size=(IMAGE_SIZE, IMAGE_SIZE)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize(NORMALIZATION_MEAN, NORMALIZATION_STD),
])


In [None]:
# initialize train device
train_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(train_device)

In [None]:
# define datasets and dataloaders
train_dataset = torchvision.datasets.ImageFolder(root="data/train/", transform=train_transformations)
validation_dataset = torchvision.datasets.ImageFolder(root="data/validation/", transform=validation_transformations)

train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=NUMBER_OF_WORKERS, shuffle=True)
validation_dataloader = torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=BATCH_SIZE, num_workers=NUMBER_OF_WORKERS, shuffle=False)

In [None]:
for images, labels in train_dataloader:
    print(images)

In [None]:
#initialize train model
train_model = ConvolutionalNetwork(number_of_classes=NUMBER_OF_CLASSES)
train_model.to(train_device)

In [None]:
# train model
train_losses = np.zeros(NUMBER_OF_EPOCHS)
validation_losses = np.zeros(NUMBER_OF_EPOCHS)

train_accuracies = np.zeros(NUMBER_OF_EPOCHS)
validation_accuracies = np.zeros(NUMBER_OF_EPOCHS)

loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=train_model.parameters(), lr=LEARNING_RATE)

for epoch in range(NUMBER_OF_EPOCHS):
  train_loss_acum = np.array([], dtype = np.float32)
  validation_loss_acum = np.array([], dtype = np.float32)

  correct_train = 0
  total_train = 0

  correct_validation = 0
  total_validation = 0

  #train
  train_model.train()
  for images, labels in train_dataloader:
    images = images.to(train_device)
    labels = labels.to(train_device)

    predictions = train_model(images)
    train_loss = loss_function(predictions, labels)
    train_loss_acum = np.append(train_loss_acum, train_loss.cpu().detach().numpy())

    preds = torch.argmax(predictions, dim=1)
    correct_train += (preds == labels).sum().item()
    total_train += labels.size(0)

    train_loss.backward()
    optimizer.step()
    optimizer.zero_grad()

  # validate
  train_model.eval()
  with torch.no_grad():
    for images, labels in validation_dataloader:
      images = images.to(train_device)
      labels = labels.to(train_device)

      predictions = train_model(images)
      validation_loss = loss_function(predictions, labels)
      validation_loss_acum = np.append(validation_loss_acum, validation_loss.cpu().detach().numpy())

      preds = torch.argmax(predictions, dim=1)
      correct_validation += (preds == labels).sum().item()
      total_validation += labels.size(0)

  train_loss_mean = np.mean(train_loss_acum)
  validation_loss_mean = np.mean(validation_loss_acum)

  train_accuracy = correct_train / total_train
  validation_accuracy = correct_validation / total_validation

  train_losses[epoch] = train_loss_mean
  validation_losses[epoch] = validation_loss_mean

  train_accuracies[epoch] = train_accuracy
  validation_accuracies[epoch] = validation_accuracy

  print(f'Epoch: {epoch}, Train loss: {train_loss_mean} Validation loss: {validation_loss_mean}')
  print(f'Epoch: {epoch}, Train accuracy: {train_accuracy} Validation accuracy: {validation_accuracy}')

In [None]:
epochs = np.arange(1, NUMBER_OF_EPOCHS + 1)

# loss graph
plt.figure(figsize=(10, 4))
plt.plot(epochs, train_losses, label='Train Loss')
plt.plot(epochs, validation_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# accuracy graph
plt.figure(figsize=(10, 4))
plt.plot(epochs, train_accuracies, label='Train Accuracy')
plt.plot(epochs, validation_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# save train model
torch.save(train_model.state_dict(), "final_CN.pth")

In [None]:
# test imports
import torch
import torchvision
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.metrics

In [None]:
# initialize test device
test_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(test_device)

In [None]:
# initialize test model
test_model = ConvolutionalNetwork(number_of_classes=NUMBER_OF_CLASSES)
test_model.load_state_dict(torch.load("final_CN.pth", weights_only=True))
test_model.to(test_device)

In [None]:
# test transformations
test_transformations = torchvision.transforms.Compose([
  torchvision.transforms.Resize(size=(IMAGE_SIZE, IMAGE_SIZE)),
  torchvision.transforms.ToTensor(),
  torchvision.transforms.Normalize(NORMALIZATION_MEAN, NORMALIZATION_STD),
])

In [None]:
# initialize test dataset and dataloader
test_dataset = torchvision.datasets.ImageFolder(root="data/test/", transform=test_transformations)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=NUMBER_OF_WORKERS, shuffle=False)


In [None]:
test_model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_dataloader:
        images = images.to(test_device)
        labels = labels.to(test_device)

        predictions = test_model(images)
        preds = torch.argmax(predictions, dim=1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

In [None]:
# test statistics
cm = sklearn.metrics.confusion_matrix(y_true=all_labels, y_pred=all_preds)
plt.figure(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens', xticklabels=CLASSES, yticklabels=CLASSES)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
print(sklearn.metrics.classification_report(all_labels, all_preds, target_names=CLASSES))

In [None]:
# server application
import fastapi
import torch
import torch.nn.functional as F
import torchvision
import uvicorn
from PIL import Image
import io


class ModelController(object):
    def __init__(self) -> None:
        self.prod_model = ConvolutionalNetwork(
            number_of_classes=NUMBER_OF_CLASSES
        )
        self.prod_model.load_state_dict(
            torch.load("final_CN.pth", weights_only=True)
        )
        self.prod_model.eval()

        self.prod_device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu"
        )
        self.prod_model.to(self.prod_device)

        self.prod_transformations = torchvision.transforms.Compose(
            [
                torchvision.transforms.Resize(size=(IMAGE_SIZE, IMAGE_SIZE)),
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize(
                    NORMALIZATION_MEAN, NORMALIZATION_STD
                ),
            ]
        )

    def model_endpoint(
        self,
        image_file: fastapi.UploadFile = fastapi.File(...),
    ) -> fastapi.Response:
        image_bytes = image_file.file.read()
        image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
        image_tensor = (
            self.prod_transformations(image).unsqueeze(0).to(self.prod_device)
        )

        with torch.no_grad():
            outputs = self.prod_model(image_tensor)
            predicted_idx = torch.argmax(outputs, dim=1).item()
            predicted_class = CLASSES[predicted_idx]
            softmax_values = F.softmax(outputs, dim=1).squeeze().cpu().numpy()
            prediction_mapping = {
                class_name: float(softmax_values[i])
                for i, class_name in enumerate(CLASSES)
            }
            return fastapi.responses.JSONResponse(
                {
                    "Predicted class": predicted_class,
                    "Predictions": prediction_mapping,
                }
            )


async def main() -> None:
    model_controller = ModelController()
    app = fastapi.FastAPI()
    app.add_api_route(
        path="/model",
        endpoint=model_controller.model_endpoint,
        methods=["POST"],
    )
    config = uvicorn.Config(app=app)
    server = uvicorn.Server(config)
    await server.serve()


if __name__ == "__main__":
    await main()