## Ensemble model
The ensemble combines five models:
1. best L1 value
2. best L2 value
3. augmented 
4. best dropout value 
5. best model architecture (TBD)

and compares the ensemble performance to the individual models' performance.

Import packages

In [1]:
import torch
from preprocess import holdout_set
import glob
import random
import numpy as np
import torchvision
import os
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, precision_score
import pytorch_i3d_1lesslayer
# for running in notebooks only:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]='2'

Load best models

In [7]:
models_path = "/home/jt9744/COS429/429_Final_new/ensemble_models/"
L1_name = "30epochs_l1_lr_0.1_ld_0.0001"
L2_name = "30epochs_l2_lr_0.1_wd_1e-11"
augment_name = "60epochs_l2_lr_0.1_wd_1e-07_augment"
dropout_name = "30epochs_l1_lr_0.1_ld_1e-07_dropout_06" 
architecture_name = "30epochs_l2_lr_0.1_wd_1e-07_1lesslayer" 
baseline_name = "30epochs_l2_lr_0.1_wd_1e-07"

In [3]:
best_L1 = torch.load(models_path + L1_name)
best_L2 = torch.load(models_path + L2_name)
best_augment = torch.load(models_path + augment_name)
best_dropout = torch.load(models_path + dropout_name)
best_architecture = torch.load(models_path + architecture_name)
baseline = torch.load("/scratch/network/hishimwe/models/" + baseline_name)

Build dataset class for loading data later

In [4]:
class dataset(torch.utils.data.Dataset):
    
    def __init__(self, paths, v_names, v_labels, num_samples=16, transforms=None): # num_samples cannot be lower than 16
        self.num_samples = num_samples
        self.frames = dict()
        for p in paths:
            self.frames[p] = sorted(glob.glob(p+"/*.jpg"))
        self.data = paths
        self.video_names = v_names
        self.video_labels = v_labels
        self.transforms = transforms
    
    def __getitem__(self, idx):
        # get original video
        p = self.data[idx]
        
        # sample frames uniformly and create newly sampled video 
        num_frames = len(self.frames[p])-1
        sampled_idx = np.linspace(0, num_frames, self.num_samples) #get num_samples frames from the video
        images = []
        index = np.where(self.video_names == p.split('/')[-1]) #index of p's video name in video_names
        label_video = self.video_labels[index] # the labels for the video
        for i in sampled_idx:
            image = torchvision.io.read_image(self.frames[p][int(i)])
            small_dim = min(image.shape[-2:])
            image = torchvision.transforms.functional.center_crop(image, (small_dim, small_dim))
            image = torchvision.transforms.functional.resize(image, (224, 224), antialias=True)
            images.append(image)
        images = torch.stack(images, axis=1)
        
        # data augmentation 
        if (self.transforms is not None):
            images = np.array(self.transforms(images.numpy()))
            # normalize
            images = (images/255)*2 - 1 # values are between -1 and 1
            return torch.from_numpy(images).type(torch.FloatTensor), label_video 
        
        else: 
            images = (images/255)*2 - 1 #values are between -1 and 1
            return images, label_video 

    def __len__(self):
        return len(self.data)

Extract videos

In [19]:
# video_paths = ["IMG_8595-Copy1.MOV", "IMG_8595-Copy2.MOV", "IMG_8595-Copy3.MOV",
#                "IMG_8595-Copy4.MOV", "IMG_8595-Copy5.MOV", "IMG_8595-Copy6.MOV",
#                "IMG_8595-Copy7.MOV", "IMG_8595-Copy8.MOV", "IMG_8595-Copy9.MOV",
#                "IMG_8595.MOV"]

In [5]:
# extract the validation video names and labels
_, video_val, _, label_val, unique_labels = holdout_set(0.25) 
batch_size = 10 # batch size when loading data
num_videos_val = len(video_val)
num_classes = len(set(label_val)) # count unique labels (11 classes)

# extract validation video paths 
video_frames_path = "/scratch/network/hishimwe/image" 
paths = glob.glob(video_frames_path+"/*")
random.seed(0)
random.shuffle(paths)
good_paths_val = list(filter(lambda c: c.split('/')[-1] in video_val, paths)) 

# construct dataset and dataloader
d_val = dataset(paths=good_paths_val, v_names=video_val, v_labels= label_val)
loader_val = torch.utils.data.DataLoader(d_val, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=1)

Create ensemble model from all the best models by averaging their predictions

In [6]:
def get_logits(model, loader, num_classes, save_name):
    ground_truth = []
    logits = []

    for data, label in loader:
        torch.cuda.empty_cache()
        data = data.cuda()
        label = label.squeeze().type(torch.LongTensor).cuda()
        per_frame_logits = model(data).mean(2)
        
        ground_truth.extend(list(label.cpu().detach().numpy()))
        logits.extend(per_frame_logits.tolist())
    
    np.savetxt(models_path+'LOGITS_'+save_name, np.array(logits))
    np.savetxt(models_path+'TRUTH_'+save_name, np.array(ground_truth))
    
    torch.cuda.empty_cache()

In [8]:
%time get_logits(model=baseline, loader=loader_val, num_classes=num_classes, save_name=baseline_name)

CPU times: user 20.8 s, sys: 8.58 s, total: 29.4 s
Wall time: 2min 7s


In [7]:
%time get_logits(model=best_L1, loader=loader_val, num_classes=num_classes, save_name=L1_name)

CPU times: user 20.5 s, sys: 9.48 s, total: 30 s
Wall time: 2min 23s


In [8]:
%time get_logits(model=best_L2, loader=loader_val, num_classes=num_classes, save_name=L2_name)

CPU times: user 19.8 s, sys: 8.59 s, total: 28.4 s
Wall time: 1min 40s


In [9]:
%time get_logits(model=best_augment, loader=loader_val, num_classes=num_classes, save_name=augment_name)

CPU times: user 19.8 s, sys: 8.51 s, total: 28.3 s
Wall time: 1min 37s


In [10]:
%time get_logits(model=best_dropout, loader=loader_val, num_classes=num_classes, save_name=dropout_name)

CPU times: user 19.9 s, sys: 8.45 s, total: 28.4 s
Wall time: 1min 47s


In [11]:
%time get_logits(model=best_architecture, loader=loader_val, num_classes=num_classes, save_name=architecture_name)

CPU times: user 19.5 s, sys: 7.85 s, total: 27.3 s
Wall time: 1min 36s
