In [1]:
import argparse
import torch
import torch.nn as nn
import torchvision
import torchattacks
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.optim as optim
import matplotlib.pyplot as plt

import numpy as np

from hess import data
import hess.nets as models
from model import make_cnn, accuracy
from hess_vec_prod import min_max_hessian_eigs

from utils import accuracy

from datasets import load_dataset

def eff_dim(x, s = 1.):
    x = x[x!=1.] #remove eigenvalues that didnt converge from the lanczos computation to make things less noisy
    return np.sum(x / (x + s))

CUDA = torch.cuda.is_available()
device = torch.device("cuda" if CUDA else "cpu")
print(f"CUDA: {CUDA}")

CUDA: True


In [None]:
# https://gist.github.com/bonlime/4e0d236cf98cd5b15d977dfa03a63643

#### Run

In [2]:
BATCH_SIZE = 64
NUM_CLASSES = 1000

preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

testset = torchvision.datasets.ImageFolder(root='../imagenet/val', transform=preprocess)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=8)

trainset =  torchvision.datasets.ImageFolder(root='../imagenet/train', transform=preprocess)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                          shuffle=False, num_workers=8)

def evaluate_accuracy(model, dataloader, topk=(1,), log=False):
    all_output = None
    all_true_labels = None
    first = True

    model.eval()
    for x, y in dataloader:
      inputs = x.to(device)

      logits = model(inputs)
      model.zero_grad()

      if first:
          all_output = logits.detach().cpu()
          all_true_labels = y.detach()
          first = False
      else:
          all_output = torch.concat([all_output, logits.detach().cpu()], axis=0)
          all_true_labels = torch.concat([all_true_labels, y], axis=0)

          if log and all_true_labels.shape[0] % 500 == 0:
              print(f"{all_true_labels.shape[0]}, ", end='')

    if log:
      print("fin!")
    model.train()
    return accuracy(all_output, all_true_labels, topk=topk)

In [3]:
model_names = ['resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152']
results = {
    'params': {
        'models': model_names,
    },
    'results': {},
}

min_max_fn = min_max_hessian_eigs
kwargs = {"nsteps": 100} # default is 100
START_INDEX = 0

for MODEL_CHOICE in np.arange(START_INDEX, len(model_names)):

    model = torch.hub.load('pytorch/vision', model_names[MODEL_CHOICE], weights="IMAGENET1K_V1").to(device)
    model.eval()
    print(f"Loaded {model_names[MODEL_CHOICE]}!")

    criterion = nn.CrossEntropyLoss()
    
    #train_acc = evaluate_accuracy(model, trainloader, (1,))[0][1]
    #print(f" - Accuracy: (train) {train_acc:.2f}%")
    eval_acc = evaluate_accuracy(model, testloader, (1,))[0][1]
    print(f" - Accuracy: (eval) {eval_acc:.2f}%")

    # model.train() # TODO: see if makes a difference??
    # max_eval, min_eval, hvps, pos_evals, neg_evals, pos_bases = min_max_fn(
    #     model, trainloader, criterion, use_cuda=True, verbose=False, **kwargs
    # )
    # if neg_evals is not None:
    #     neg_evals = neg_evals.cpu().numpy()
    # eigs = pos_evals.cpu().numpy()
    # train_effective_dimension = eff_dim(eigs)

    model.eval()
    max_eval, min_eval, hvps, pos_evals, neg_evals, pos_bases = min_max_fn(
        model, testloader, criterion, use_cuda=True, verbose=False, **kwargs
    )
    if neg_evals is not None:
        neg_evals = neg_evals.cpu().numpy()
    eigs = pos_evals.cpu().numpy()
    test_effective_dimension = eff_dim(eigs)

    results['results'][model_names[MODEL_CHOICE]] = {
        #'train_acc': train_acc,
        'eval_acc': eval_acc,
        # 'eff_dim_train': train_effective_dimension,
        'eff_dim_test': test_effective_dimension,
    }
    print(results['results'][model_names[MODEL_CHOICE]])
    torch.save(results, f'./resnet_eff_dims.pth')

Using cache found in /home/dgk27/.cache/torch/hub/pytorch_vision_main


Loaded resnet18!
 - Accuracy: (eval) 69.76%


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.31 GiB (GPU 0; 10.57 GiB total capacity; 4.63 GiB already allocated; 2.31 GiB free; 8.05 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF