<a href="https://colab.research.google.com/github/xyzkpsf/CS-W182-CV-Project/blob/main/Simple_Add_CV_Ensemble_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import torchvision
import pandas as pd
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
from torchvision.io import read_image
from skimage import io, transform
from PIL import Image
import matplotlib.pyplot as plt
import time
import copy
import os
import sys

In [None]:
!unzip '/content/drive/MyDrive/Spring 2021/CS 182/tiny-imagenet-200.zip'

In [3]:
class CustomImageDataset(Dataset):
    def __init__(self, val_label_dir, img_dir, train_path, transform=None, target_transform=None):
        super(CustomImageDataset, self).__init__()
        self.val_label_file = pd.read_csv(val_label_dir, delimiter = "\t", names=["pics", "labels", "_1", "_2", "_3", "_4"])
        self.img_labels = self.val_label_file[["pics", 'labels']]
        self.img_dir = img_dir
        self.transform = transform
        self.classes, self.class_to_idx = self._find_classes(train_path)

    def __len__(self):
        return len(self.img_labels)

    def _find_classes(self, dir):
        """
        Finds the class folders in a dataset.
        Args:
            dir (string): Root directory path.
        Returns:
            tuple: (classes, class_to_idx) where classes are relative to (dir), and class_to_idx is a dictionary.
        Ensures:
            No class is a subdirectory of another.
        """
        if sys.version_info >= (3, 5):
            # Faster and available in Python 3.5 and above
            classes = [d.name for d in os.scandir(dir) if d.is_dir()]
        else:
            classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
        classes.sort()
        class_to_idx = {classes[i]: i for i in range(len(classes))}
        return classes, class_to_idx

    def pad(self, img):
        padding = np.ones((64, 64, 2))
        img = img.reshape((64, 64, 1))
        img = np.concatenate((img, padding), axis=2)
        return img.astype(np.float32)
      
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        img = Image.open(img_path)
        img = copy.deepcopy(np.asarray(img))
        # if it has less than 3 channels
        if img.shape != (64, 64, 3):
            img = self.pad(img)
        #print(img.shape)
        label = self.img_labels.iloc[idx, 1]
        label = self.class_to_idx[label]
        if self.transform:
            img = self.transform(img)
        #sample = {"image": img, "label": label}
        return img, label

In [19]:
data_transforms = {
    'train': transforms.Compose([
        transforms.ToTensor(),
        #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.ToTensor(),
        #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = '/content/tiny-imagenet-200'

val_label_dir = '/content/tiny-imagenet-200/val/val_annotations.txt'

image_datasets = {}

image_datasets['train'] = datasets.ImageFolder(os.path.join(data_dir, 'train'),
                                          data_transforms['train'])

image_datasets['val'] = CustomImageDataset(val_label_dir, data_dir+'/val/images', 
                                           os.path.join(data_dir, 'train'),
                                           transform=data_transforms['val'])

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=128,
                                             shuffle=True, num_workers=2)
                                            for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
# No pretrained parameters


# First model
model1 = models.resnet18(pretrained=False)
model1.avgpool = nn.AdaptiveAvgPool2d(1)
num_ftrs = model1.fc.in_features
model1.fc = nn.Linear(num_ftrs, 200)
model1.conv1 = nn.Conv2d(3,64, kernel_size=(3,3), stride=(1,1), padding=(1,1))
model1.maxpool = nn.Sequential()
model1 = model1.to(device)


# Second model
model2 = models.resnet50(pretrained=False)
model2.avgpool = nn.AdaptiveAvgPool2d(1)
num_ftrs = model2.fc.in_features
model2.fc = nn.Linear(num_ftrs, 200)
model2.conv1 = nn.Conv2d(3,64, kernel_size=(3,3), stride=(1,1), padding=(1,1))
model2.maxpool = nn.Sequential()
model2 = model2.to(device)


# Third model
model3 = models.resnet101(pretrained=False)
model3.avgpool = nn.AdaptiveAvgPool2d(1)
num_ftrs = model3.fc.in_features
model3.fc = nn.Linear(num_ftrs, 200)
model3.conv1 = nn.Conv2d(3,64, kernel_size=(3,3), stride=(1,1), padding=(1,1))
model3.maxpool = nn.Sequential()
model3 = model3.to(device)


#Multi GPU
#model = torch.nn.DataParallel(model, device_ids=[0, 1])


In [None]:
# Don't REPLACE the old one.
# Remeber to change the temp.pt each time.
# model_save_path = "/content/drive/MyDrive/CV Project Model State Dict/temp.pt"
# torch.save(model.state_dict(), model_save_path)

In [6]:
class MyEnsemble(nn.Module):
    def __init__(self, model1, model2, model3):
          super(MyEnsemble,self).__init__()
          self.model1 = model1 
          self.model2 = model2 
          self.model3 = model3  


    def forward(self, x):
        x1 = self.model1(x.clone())
        
        x2 = self.model2(x.clone())
           
        x3 = self.model3(x)
        
        x = x1 + x2 + x3

        return x

In [14]:
def validation(model, dataloaders, criterion):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    correct = 0
    total = 0
    with torch.no_grad():
        for i,(inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()
            if i % 10 == 0:
                print('\rIteration: {}/{}, Avg Acc: {:.4f}'.format(i+1, len(dataloaders['val']), (100 * correct / total)))
    print("Total Sample numbers: {}, Overall Acc: {:.2f}".format(total, (100 * correct / total)))


In [8]:

PATH1 = "/content/drive/MyDrive/CV Project Model State Dict/ResNet18 10-Epoch.pt"
model1.load_state_dict(torch.load(PATH1))
for param in model1.parameters():
    param.requires_grad_(False)


PATH2 = "/content/drive/MyDrive/CV Project Model State Dict/ResNet50 5-Epoch.pt"
model2.load_state_dict(torch.load(PATH2))
for param in model2.parameters():
    param.requires_grad_(False)

PATH3 = "/content/drive/MyDrive/CV Project Model State Dict/ResNet101.pt"
model3.load_state_dict(torch.load(PATH3))
for param in model3.parameters():
    param.requires_grad_(False)



In [21]:

# Only finish training model1 so far
model = MyEnsemble(model1, model2, model3)
model = model.to(device)
#validation(model, dataloaders, nn.CrossEntropyLoss())

In [23]:
validation(model, dataloaders, nn.CrossEntropyLoss())

Iteration: 1/79, Avg Acc: 47.6562
Iteration: 11/79, Avg Acc: 46.4489
Iteration: 21/79, Avg Acc: 45.4613
Iteration: 31/79, Avg Acc: 46.2198
Iteration: 41/79, Avg Acc: 44.4360
Iteration: 51/79, Avg Acc: 45.8180
Iteration: 61/79, Avg Acc: 46.7085
Iteration: 71/79, Avg Acc: 47.1831
Total Sample numbers: 10000, Overall Acc: 47.40
