# Download features
PR layer1-layer5

In [4]:
! gdown --id 1GvwThxv4FZoSpLxXVuagTJiIWDEk1FQm
! unzip LogME-CTC.zip

Downloading...
From: https://drive.google.com/uc?id=1GvwThxv4FZoSpLxXVuagTJiIWDEk1FQm
To: /content/LogME-CTC.zip
100% 711k/711k [00:00<00:00, 147MB/s]
Archive:  LogME-CTC.zip
   creating: LogME-CTC/
  inflating: LogME-CTC/layer_1_groundtruth.txt  
  inflating: LogME-CTC/layer_1_output.txt  
  inflating: LogME-CTC/layer_2_output.txt  
  inflating: LogME-CTC/layer_3_output.txt  
  inflating: LogME-CTC/layer_4_output.txt  
 extracting: LogME-CTC/pr_feature.npy  
 extracting: LogME-CTC/ranking_feature_layer_1.npy  
 extracting: LogME-CTC/ranking_feature_layer_2.npy  
 extracting: LogME-CTC/ranking_feature_layer_3.npy  
 extracting: LogME-CTC/ranking_feature_layer_4.npy  


In [None]:
import warnings

import numpy as np
from numba import njit


@njit
def each_evidence(y_, f, fh, v, s, vh, N, D):
    """
    compute the maximum evidence for each class
    """
    epsilon = 1e-5
    alpha = 1.0
    beta = 1.0
    lam = alpha / beta
    tmp = (vh @ (f @ np.ascontiguousarray(y_)))
    for _ in range(11):
        # should converge after at most 10 steps
        # typically converge after two or three steps
        gamma = (s / (s + lam)).sum()
        # A = v @ np.diag(alpha + beta * s) @ v.transpose() # no need to compute A
        # A_inv = v @ np.diag(1.0 / (alpha + beta * s)) @ v.transpose() # no need to compute A_inv
        m = v @ (tmp * beta / (alpha + beta * s))
        alpha_de = (m * m).sum()
        alpha = gamma / (alpha_de + epsilon)
        beta_de = ((y_ - fh @ m) ** 2).sum()
        beta = (N - gamma) / (beta_de + epsilon)
        new_lam = alpha / beta
        if np.abs(new_lam - lam) / lam < 0.01:
            break
        lam = new_lam
    evidence = D / 2.0 * np.log(alpha) \
               + N / 2.0 * np.log(beta) \
               - 0.5 * np.sum(np.log(alpha + beta * s)) \
               - beta / 2.0 * (beta_de + epsilon) \
               - alpha / 2.0 * (alpha_de + epsilon) \
               - N / 2.0 * np.log(2 * np.pi)
    return evidence / N, alpha, beta, m


# use pseudo data to compile the function
# D = 20, N = 50
f_tmp = np.random.randn(20, 50).astype(np.float64)
each_evidence(np.random.randint(0, 2, 50).astype(np.float64), f_tmp, f_tmp.transpose(), np.eye(20, dtype=np.float64), np.ones(20, dtype=np.float64), np.eye(20, dtype=np.float64), 50, 20)


@njit
def truncated_svd(x):
    u, s, vh = np.linalg.svd(x.transpose() @ x)
    s = np.sqrt(s)
    u_times_sigma = x @ vh.transpose()
    k = np.sum((s > 1e-10) * 1)  # rank of f
    s = s.reshape(-1, 1)
    s = s[:k]
    vh = vh[:k]
    u = u_times_sigma[:, :k] / s.reshape(1, -1)
    return u, s, vh
truncated_svd(np.random.randn(20, 10).astype(np.float64))


class LogME(object):
    def __init__(self, regression=False):
        """
            :param regression: whether regression
        """
        self.regression = regression
        self.fitted = False
        self.reset()

    def reset(self):
        self.num_dim = 0
        self.alphas = []  # alpha for each class / dimension
        self.betas = []  # beta for each class / dimension
        # self.ms.shape --> [C, D]
        self.ms = []  # m for each class / dimension

    def _fit_icml(self, f: np.ndarray, y: np.ndarray):
        """
        LogME calculation proposed in the ICML 2021 paper
        "LogME: Practical Assessment of Pre-trained Models for Transfer Learning"
        at http://proceedings.mlr.press/v139/you21b.html
        """
        fh = f
        f = f.transpose()
        D, N = f.shape
        v, s, vh = np.linalg.svd(f @ fh, full_matrices=True)

        evidences = []
        self.num_dim = y.shape[1] if self.regression else int(y.max() + 1)
        for i in range(self.num_dim):
            y_ = y[:, i] if self.regression else (y == i).astype(np.float64)
            evidence, alpha, beta, m = each_evidence(y_, f, fh, v, s, vh, N, D)
            evidences.append(evidence)
            self.alphas.append(alpha)
            self.betas.append(beta)
            self.ms.append(m)
        self.ms = np.stack(self.ms)
        return np.mean(evidences)

    def _fit_fixed_point(self, f: np.ndarray, y: np.ndarray):
        """
        LogME calculation proposed in the arxiv 2021 paper
        "Ranking and Tuning Pre-trained Models: A New Paradigm of Exploiting Model Hubs"
        at https://arxiv.org/abs/2110.10545
        """
        N, D = f.shape  # k = min(N, D)
        if N > D: # direct SVD may be expensive
            u, s, vh = truncated_svd(f)
        else:
            u, s, vh = np.linalg.svd(f, full_matrices=False)
        # u.shape = N x k
        # s.shape = k
        # vh.shape = k x D
        s = s.reshape(-1, 1)
        sigma = (s ** 2)

        evidences = []
        self.num_dim = y.shape[1] if self.regression else int(y.max() + 1)
        for i in range(self.num_dim):
            y_ = y[:, i] if self.regression else (y == i).astype(np.float64)
            y_ = y_.reshape(-1, 1)
            x = u.T @ y_  # x has shape [k, 1], but actually x should have shape [N, 1]
            x2 = x ** 2
            res_x2 = (y_ ** 2).sum() - x2.sum()  # if k < N, we compute sum of xi for 0 singular values directly

            alpha, beta = 1.0, 1.0
            for _ in range(11):
                t = alpha / beta
                gamma = (sigma / (sigma + t)).sum()
                m2 = (sigma * x2 / ((t + sigma) ** 2)).sum()
                res2 = (x2 / ((1 + sigma / t) ** 2)).sum() + res_x2
                alpha = gamma / (m2 + 1e-5)
                beta = (N - gamma) / (res2 + 1e-5)
                t_ = alpha / beta
                evidence = D / 2.0 * np.log(alpha) \
                           + N / 2.0 * np.log(beta) \
                           - 0.5 * np.sum(np.log(alpha + beta * sigma)) \
                           - beta / 2.0 * res2 \
                           - alpha / 2.0 * m2 \
                           - N / 2.0 * np.log(2 * np.pi)
                evidence /= N
                if abs(t_ - t) / t <= 1e-3:  # abs(t_ - t) <= 1e-5 or abs(1 / t_ - 1 / t) <= 1e-5:
                    break
            evidence = D / 2.0 * np.log(alpha) \
                       + N / 2.0 * np.log(beta) \
                       - 0.5 * np.sum(np.log(alpha + beta * sigma)) \
                       - beta / 2.0 * res2 \
                       - alpha / 2.0 * m2 \
                       - N / 2.0 * np.log(2 * np.pi)
            evidence /= N
            m = 1.0 / (t + sigma) * s * x
            m = (vh.T @ m).reshape(-1)
            evidences.append(evidence)
            self.alphas.append(alpha)
            self.betas.append(beta)
            self.ms.append(m)
        self.ms = np.stack(self.ms)
        return np.mean(evidences)

    _fit = _fit_fixed_point

    def fit(self, f: np.ndarray, y: np.ndarray):
        """
        :param f: [N, F], feature matrix from pre-trained model
        :param y: target labels.
            For classification, y has shape [N] with element in [0, C_t).
            For regression, y has shape [N, C] with C regression-labels
        :return: LogME score (how well f can fit y directly)
        """
        if self.fitted:
            warnings.warn('re-fitting for new data. old parameters cleared.')
            self.reset()
        else:
            self.fitted = True
        f = f.astype(np.float64)
        if self.regression:
            y = y.astype(np.float64)
            if len(y.shape) == 1:
                y = y.reshape(-1, 1)
        return self._fit(f, y)

    def predict(self, f: np.ndarray):
        """
        :param f: [N, F], feature matrix
        :return: prediction, return shape [N, X]
        """
        if not self.fitted:
            raise RuntimeError("not fitted, please call fit first")
        f = f.astype(np.float64)
        logits = f @ self.ms.T
        if self.regression:
            return logits
        return np.argmax(logits, axis=-1)

# Calculate the logME class directly

In [None]:
print('Conducting transferability calculation...')
logme = LogME(regression=False)

Conducting transferability calculation...


In [None]:
import argparse, os
import torch
import torchvision.models as models
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torchvision import datasets
from LogME import LogME
import pprint

models_hub = ['mobilenet_v2', 'mnasnet1_0', 'densenet121', 'densenet169', 'densenet201',
               'resnet34', 'resnet50', 'resnet101', 'resnet152', 'googlenet', 'inception_v3']


def get_configs():
    parser = argparse.ArgumentParser(
        description='Ranking pre-trained models')
    parser.add_argument('--gpu', default=0, type=int,
                        help='GPU num for training')
    parser.add_argument('--batch_size', default=48, type=int)

    # dataset
    parser.add_argument('--dataset', default="aircraft",
                        type=str, help='Name of dataset')
    parser.add_argument('--data_path', default="/data/FGVCAircraft/train",
                        type=str, help='Path of dataset')
    parser.add_argument('--num_workers', default=2, type=int,
                        help='Num of workers used in dataloading')
    # model
    configs = parser.parse_args()

    return configs


def forward_pass(score_loader, model, fc_layer):
    """
    a forward pass on target dataset
    :params score_loader: the dataloader for scoring transferability
    :params model: the model for scoring transferability
    :params fc_layer: the fc layer of the model, for registering hooks
    returns
        features: extracted features of model
        outputs: outputs of model
        targets: ground-truth labels of dataset
    """
    features = []
    outputs = []
    targets = []
    
    def hook_fn_forward(module, input, output):
        features.append(input[0].detach().cpu())
        outputs.append(output.detach().cpu())
    
    forward_hook = fc_layer.register_forward_hook(hook_fn_forward)
    print("before target data")
    model.eval()
    with torch.no_grad():
        for _, (data, target) in enumerate(score_loader):
            targets.append(target)
            data = data.cuda()
            _ = model(data)
    
    forward_hook.remove()
    features = torch.cat([x for x in features])
    outputs = torch.cat([x for x in outputs])
    targets = torch.cat([x for x in targets])
    print("==================shape of the features", features.shape)
    return features, outputs, targets



def score_model(score_loader, model_name, dataset_name):
    print(f'Calc Transferabilities of {model_name} on {dataset_name}')

    if model_name == 'inception_v3':
            model = models.__dict__[model_name](pretrained=True, aux_logits=False).cuda()
    else:
        model = models.__dict__[model_name](pretrained=True).cuda()

    # different models has different linear projection names
    if model_name in ['mobilenet_v2', 'mnasnet1_0']:
        fc_layer = model.classifier[-1]
    elif model_name in ['densenet121', 'densenet169', 'densenet201']:
        fc_layer = model.classifier
    elif model_name in ['resnet34', 'resnet50', 'resnet101', 'resnet152', 'googlenet', 'inception_v3']:
        fc_layer = model.fc
    else:
        # try your customized model
        raise NotImplementedError

    print('Conducting features extraction...')
    features, outputs, targets = forward_pass(score_loader, model, fc_layer)
    # predictions = F.softmax(outputs)

    print('Conducting transferability calculation...')
    logme = LogME(regression=False)
    score = logme.fit(features.numpy(), targets.numpy())

    

    print(f'LogME of {model_name}: {score}\n')
    return score


In [None]:
#config
dataset = "aircraft"
data_path ="/data/FGVCAircraft/train" 
num_workers = 2
batch_size = 48

In [None]:
# training 時做 data augmentation
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(), # 隨機將圖片水平翻轉
    transforms.RandomRotation(15), # 隨機旋轉圖片
    transforms.ToTensor(), # 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization)
])
# testing 時不需做 data augmentation
test_transform = transforms.Compose([                          
    transforms.ToTensor(),
])
batch_size = 32
dataset = datasets.FGVCAircraft('../data',  download=True,
                       transform=train_transform)
test_set = datasets.FGVCAircraft('../data', 
                       transform=test_transform)

In [None]:
torch.cuda.set_device(0)

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
score_dict = {}
for model in models_hub:
        model = model
        if model == 'inception_v3': # inception_v3 is pretrained on 299x299 images
            transform=transforms.Compose([  
                transforms.Resize((299, 299)),
                transforms.ToTensor(),
                normalize
            ])
        else:
            transform=transforms.Compose([  # other models are pretrained on 224x224 images
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                normalize
            ])
        score_dataset = dataset
        # or try your customized dataset
        score_loader = DataLoader(score_dataset, batch_size=batch_size, shuffle=False,
            num_workers=num_workers, pin_memory=True)
        score_dict[model] = score_model(score_loader, model, score_dataset)
results = sorted(score_dict.items(), key=lambda i: i[1], reverse=True)
print(f'Models ranking on {dataset}: ')


Calc Transferabilities of mobilenet_v2 on Dataset FGVCAircraft
    Number of datapoints: 6667
    Root location: ../data
    StandardTransform
Transform: Compose(
               RandomHorizontalFlip(p=0.5)
               RandomRotation(degrees=[-15.0, 15.0], interpolation=nearest, expand=False, fill=0)
               ToTensor()
           )
Conducting features extraction...
before target data




RuntimeError: ignored

In [None]:
np.loadtxt('data_table.txt', skiprows=0 )