In [None]:
!pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[K     |████████████████████████████████| 85.5 MB 96 kB/s 
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2


In [None]:
import glob
from itertools import chain
import os
import random
import zipfile
from tqdm.notebook import tqdm
import pickle
import time
import copy


import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR, CosineAnnealingLR, ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms, models
import faiss                            
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [None]:
PATH_TRAIN = "/content/drive/MyDrive/COURSES/CS232/CBMIR/ct_dataset_split/train"
PATH_VALID = "/content/drive/MyDrive/COURSES/CS232/CBMIR/ct_dataset_split/val"

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')

In [None]:
data_transforms = {
    'train': transforms.Compose([
                        transforms.RandomResizedCrop(224),
                        transforms.ToTensor(),
                        transforms.Normalize(
                            [0.485, 0.456, 0.406], 
                            [0.229, 0.224, 0.225]
                        )
    ]),
    'val': transforms.Compose([
                        transforms.Resize(224),
                        transforms.CenterCrop(224),
                        transforms.ToTensor(),
                        transforms.Normalize(
                            [0.485, 0.456, 0.406], 
                            [0.229, 0.224, 0.225]
                        )
    ]),
}

data_dir = '/content/drive/MyDrive/COURSES/CS232/CBMIR/ct_dataset_split/'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),  data_transforms[x])
                  for x in ['train', 'val']}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=4)
              for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) 
                for x in ['train', 'val']}

class_names = image_datasets['train'].classes 


  cpuset_checked))


# Training

In [None]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25, save_log=False):
    history = {
        'all_epoch_info': []
    }
    train = {

    }
    val = {

    }
    
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            # if phase == 'train':
            #     scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
            if phase == 'train':
                train = {
                    'loss': epoch_loss,
                    'acc': epoch_acc.item()
                }
            if phase == 'val':
                val = {
                    'loss': epoch_loss,
                    'acc': epoch_acc.item()
                }

            # deep copy the model
            if phase=='val':
                scheduler.step(epoch_loss)
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), '/content/drive/MyDrive/COURSES/CS232/CBMIR//model/CBMIR_resnet18_method3.pt')

        history['all_epoch_info'].append({
            'epoch': epoch,
            'info': { 
                'train': train,
                'val': val
            }
        })
        # print(history)
        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model if not save_log else model, history

RuntimeError: ignored

In [None]:
model_ft = models.resnet18(pretrained=True)
cnt = 0
for param in model_ft.parameters():
    if cnt < 61 * 3 // 4:
        param.requires_grad = False
    else:
        break
    cnt += 1
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 3.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, 7)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)

scheduler = ReduceLROnPlateau(optimizer_ft, 'min', factor=0.1, patience=3, min_lr=1e-6, verbose=True)

In [None]:
from torchsummary import summary
summary(model_ft, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [None]:
model_ft, history = train_model(model_ft, criterion, optimizer_ft, scheduler=scheduler, num_epochs=50, save_log=True)

Epoch 1/50
----------


  cpuset_checked))


train Loss: 0.1604 Acc: 0.9484
val Loss: 0.0930 Acc: 0.9761

Epoch 2/50
----------
train Loss: 0.0712 Acc: 0.9801
val Loss: 0.0138 Acc: 0.9935

Epoch 3/50
----------
train Loss: 0.0452 Acc: 0.9876
val Loss: 0.0534 Acc: 0.9826

Epoch 4/50
----------
train Loss: 0.0291 Acc: 0.9901
val Loss: 0.0198 Acc: 0.9957

Epoch 5/50
----------
train Loss: 0.0678 Acc: 0.9826
val Loss: 0.0152 Acc: 0.9957

Epoch 6/50
----------
train Loss: 0.0485 Acc: 0.9870
val Loss: 0.0450 Acc: 0.9870
Epoch 00006: reducing learning rate of group 0 to 1.0000e-04.

Epoch 7/50
----------
train Loss: 0.0370 Acc: 0.9870
val Loss: 0.0252 Acc: 0.9913

Epoch 8/50
----------
train Loss: 0.0165 Acc: 0.9938
val Loss: 0.0182 Acc: 0.9935

Epoch 9/50
----------
train Loss: 0.0169 Acc: 0.9919
val Loss: 0.0212 Acc: 0.9913

Epoch 10/50
----------
train Loss: 0.0208 Acc: 0.9919
val Loss: 0.0159 Acc: 0.9978
Epoch 00010: reducing learning rate of group 0 to 1.0000e-05.

Epoch 11/50
----------
train Loss: 0.0145 Acc: 0.9963
val Loss: 0.0

# Load model

In [None]:
load_model = models.resnet18(pretrained=True).cuda()
num_ftrs = load_model.fc.in_features
load_model.fc = nn.Linear(num_ftrs, 7)
load_model = load_model.to(device)
load_model.load_state_dict(torch.load('/content/drive/MyDrive/COURSES/CS232/CBMIR/model/CBMIR_resnet18_method3.pt'))
load_model = torch.nn.Sequential(*list(load_model.children())[:-1])

In [None]:
val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [None]:
faiss_index = faiss.IndexFlatL2(512)   # build the index
collection = []
# storing the image representations
im_indices = []
with torch.no_grad():
    for f in glob.glob(os.path.join(PATH_TRAIN, '*/*')):
        im = pil_loader(f)
        im = im.resize((224,224))
        im = torch.tensor([val_transforms(im).numpy()]).cuda()

        preds = load_model(im)
        preds = np.array([preds[0].cpu().numpy().flatten()])
        faiss_index.add(preds) #add the representation to index
        im_indices.append(f)   #store the image name to find it later on
    
    for f in glob.glob(os.path.join(PATH_VALID, '*/*')):
        im = pil_loader(f)
        im = im.resize((224,224))
        im = torch.tensor([val_transforms(im).numpy()]).cuda()

        preds = load_model(im)
        preds = np.array([preds[0].cpu().numpy().flatten()])
        faiss_index.add(preds) #add the representation to index
        im_indices.append(f)   #store the image name to find it later on

In [None]:
pickle_out = open("/content/drive/MyDrive/COURSES/CS232/CBMIR/collection/collection_resnet18_method3.pickle", "wb")
pickle.dump(faiss_index, pickle_out)
pickle_out.close()

np.save("/content/drive/MyDrive/COURSES/CS232/CBMIR/collection/im_indices_resnet18_method3.npy", im_indices)

# Testing


In [None]:
pickle_in = open("/content/drive/MyDrive/COURSES/CS232/CBMIR/collection/collection_resnet18_method3.pickle", "rb")
load_faiss_index = pickle.load(pickle_in)
load_im_indices = np.load("/content/drive/MyDrive/COURSES/CS232/CBMIR/collection/im_indices_resnet18_method3.npy")

In [None]:
PATH_TEST = "/content/drive/MyDrive/COURSES/CS232/CBMIR/ct_dataset_split/test"
# Retrieval with a query image
list_ap = []
with torch.no_grad():
    for folder in os.listdir(PATH_TEST):
        for f in os.listdir(os.path.join(PATH_TEST, folder)):
            im = pil_loader(os.path.join(PATH_TEST, folder, f))
            print('query:', os.path.join(PATH_TEST, folder, f))
            im = im.resize((224,224))
            im = torch.tensor([val_transforms(im).numpy()]).cuda()

            test_embed = load_model(im)

            test_embed = np.array([test_embed[0].cpu().numpy().flatten()])
            # break
            _, I = load_faiss_index.search(test_embed, 15)
            # print("Retrieved Image: {}".format(load_im_indices[I[0][0]]))

            correct = 0
            ap = 0
            for i in range(15):
                pred = load_im_indices[I[0][i]].split('/')[-2]
                print("Retrieved Image:", load_im_indices[I[0][i]])
                if pred == folder:
                  correct += 1
                  ap += correct / (i + 1)
            ap /= correct    
            list_ap.append(ap)
            print('correct:', correct)
            print('ap:', ap)
            print('\n\n/--------------------------------------/')

In [None]:
import statistics
map = statistics.mean(list_ap)
map