In [1]:
import os
import sys

import time
# import torch
import numpy as np
import pandas as pd

import sys
import argparse

from sklearn.decomposition import PCA

# -------- fix random seed 
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
class Logger(object):
    def __init__(self, filename='default.log', stream=sys.stdout):
        self.terminal = stream
        self.log = open(filename, 'a')
    
    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)

    def flush(self):
        pass

import metrics

np.random.seed(1)

train_In_size = 1000
train_OoD_size = 0

exp_var_ratio = 0.95
import pandas as pd

train_id = pd.read_csv("./train.csv")
test_id = pd.read_csv("./test.csv")
ood_id = pd.read_csv("./ood.csv")
ood_train_id = pd.read_csv("./ood_train.csv")

cache_name = os.path.join('./', "CIFAR10_ResNet18_ce_pretrain_features_test.npy")
CIFAR10_test = np.load(cache_name, allow_pickle=True)
cache_name = os.path.join('./', "CIFAR10_ResNet18_ce_pretrain_features_DTD.npy")
DTD = np.load(cache_name, allow_pickle=True)

train = CIFAR10_test[train_id.T.to_numpy()[0]-1]

shuffled_train = np.random.permutation(train)
train = shuffled_train[:train_In_size]

test = CIFAR10_test[test_id.T.to_numpy()[0]-1]
ood = DTD[ood_id.T.to_numpy()[0]-1] #ood_id.T.to_numpy()[0]-1

ood_train = DTD[ood_train_id.T.to_numpy()[0]-1]
shuffled_ood_train = np.random.permutation(ood_train)
ood_train = shuffled_ood_train[:train_OoD_size]

# train = np.vstack((train, ood_train))
train = np.vstack((test, ood))

# PCA step
pca = PCA(n_components=50)
train = pca.fit_transform(train)  # Fit & transform train
test = pca.transform(test) 
ood = pca.transform(ood) 


feat_log = train
feat_log = feat_log.astype(np.float32)

feat_log_val = test
feat_log_val = feat_log_val.astype(np.float32)

ood_feat_log_all = {}
ood_feat_log = ood
ood_feat_log = ood_feat_log.astype(np.float32)
ood_feat_log_all['DTD'] = ood_feat_log
print("Features loaded.")
print("Feature dimension = %d"%feat_log.shape[1])

# -------- such an l2 normalization indicates a feature mapping w.r.t. a cosine kernel
normalizer = lambda x: x / (np.linalg.norm(x, ord=2, axis=-1, keepdims=True) + 1e-10)
prepos_feat = lambda x: np.ascontiguousarray(normalizer(x))

ftrain = prepos_feat(feat_log)
ftest = prepos_feat(feat_log_val)
food_all = {}
food_all['DTD'] = prepos_feat(ood_feat_log_all['DTD'])

###### missing code for CoRP

# -------- centralize the mapped features
mu = ftrain.mean(axis=0)
ftrain = ftrain - mu
ftest = ftest - mu
for ood_dataset, food in food_all.items():
    food_all[ood_dataset] = food - mu

# -------- linear PCA
print()
print("Running linear PCA...")
K = ftrain.T.dot(ftrain)
u_full, s, _ = np.linalg.svd(K)
# ---- the reduction dimension q is
# ---- selected according to the explained variance ratio
q, s_accuml = -1, np.zeros(ftrain.shape[1])
for i in range(ftrain.shape[1]):
    s_accuml[i] = sum(s[:i]) / sum(s)
    if i > 0 and q < 0:
        if s_accuml[i-1] < exp_var_ratio and s_accuml[i] >= exp_var_ratio:
            q = i
print("Linear PCA finished.")
print("explained variance ratio = %f"%exp_var_ratio)
print("reduction dimension    q = %d"%q)
print("s_accuml at q-1 = %f"%s_accuml[q-1])
print("s_accuml at q   = %f"%s_accuml[q])
print("s_accuml at q+1 = %f"%s_accuml[q+1])

# -------- reconstruction error for OoD detection
u_q = u_full[:,:q]

reconstruct_in = u_q.dot(u_q.T).dot(ftrain.T).T
scores_in_train = - np.linalg.norm(ftrain-reconstruct_in, ord=2, axis=1)


reconstruct_in = u_q.dot(u_q.T).dot(ftest.T).T
scores_in = - np.linalg.norm(ftest-reconstruct_in, ord=2, axis=1)

all_results = []
for ood_dataset, food in food_all.items():
    reconstruct_ood = u_q.dot(u_q.T).dot(food.T).T 
    scores_ood = - np.linalg.norm(food-reconstruct_ood, ord=2, axis=1)
print()

np.save("./KPCA_CoP_In.npy", -scores_in)
np.save("./KPCA_CoP_OOD.npy", -scores_ood)

# print(metrics.cal_metric(scores_in, scores_ood))

# print()
# print("The program ends...")
# print("-------- -------- --------")
# print()

Features loaded.
Feature dimension = 50

Running linear PCA...
Linear PCA finished.
explained variance ratio = 0.950000
reduction dimension    q = 10
s_accuml at q-1 = 0.944795
s_accuml at q   = 0.953910
s_accuml at q+1 = 0.959074



In [2]:
print(metrics.cal_metric(scores_in, scores_ood))

print()
print("The program ends...")
print("-------- -------- --------")
print()

{'FPR': 0.913, 'AUROC': 0.5767335, 'DTERR': 0.33166666666666667, 'AUIN': 0.7318662660869695, 'AUOUT': 0.3906969677431318}

The program ends...
-------- -------- --------



In [130]:
# No training samples

import os
import sys

import time
import torch
import numpy as np
import pandas as pd

import sys
import argparse

# -------- fix random seed 
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
class Logger(object):
    def __init__(self, filename='default.log', stream=sys.stdout):
        self.terminal = stream
        self.log = open(filename, 'a')
    
    def write(self, message):
        self.terminal.write(message)
        self.log.write(message)

    def flush(self):
        pass

import metrics

np.random.seed(1)

train_In_size = 1000
train_OoD_size = 0

exp_var_ratio = 0.95
import pandas as pd

train_id = pd.read_csv("./train.csv")
test_id = pd.read_csv("./test.csv")
ood_id = pd.read_csv("./ood.csv")
ood_train_id = pd.read_csv("./ood_train.csv")

cache_name = os.path.join('./', "CIFAR10_ResNet18_ce_pretrain_features_test.npy")
CIFAR10_test = np.load(cache_name, allow_pickle=True)
cache_name = os.path.join('./', "CIFAR10_ResNet18_ce_pretrain_features_DTD.npy")
DTD = np.load(cache_name, allow_pickle=True)

train = CIFAR10_test[train_id.T.to_numpy()[0]-1]

shuffled_train = np.random.permutation(train)
train = shuffled_train[:train_In_size]

test = CIFAR10_test[test_id.T.to_numpy()[0]-1]
ood = DTD[ood_id.T.to_numpy()[0]-1] #ood_id.T.to_numpy()[0]-1

ood_train = DTD[ood_train_id.T.to_numpy()[0]-1]
shuffled_ood_train = np.random.permutation(ood_train)
ood_train = shuffled_ood_train[:train_OoD_size]

# train = np.vstack((train, ood_train))
train = np.vstack((test, ood))

feat_log = train
feat_log = feat_log.astype(np.float32)

feat_log_val = test
feat_log_val = feat_log_val.astype(np.float32)

ood_feat_log_all = {}
ood_feat_log = ood
ood_feat_log = ood_feat_log.astype(np.float32)
ood_feat_log_all['DTD'] = ood_feat_log
print("Features loaded.")
print("Feature dimension = %d"%feat_log.shape[1])

# -------- such an l2 normalization indicates a feature mapping w.r.t. a cosine kernel
normalizer = lambda x: x / (np.linalg.norm(x, ord=2, axis=-1, keepdims=True) + 1e-10)
prepos_feat = lambda x: np.ascontiguousarray(normalizer(x))

ftrain = prepos_feat(feat_log)
ftest = prepos_feat(feat_log_val)
food_all = {}
food_all['DTD'] = prepos_feat(ood_feat_log_all['DTD'])

###### missing code for CoRP

# -------- centralize the mapped features
mu = ftrain.mean(axis=0)
ftrain = ftrain - mu
ftest = ftest - mu
for ood_dataset, food in food_all.items():
    food_all[ood_dataset] = food - mu

# -------- linear PCA
print()
print("Running linear PCA...")
K = ftrain.T.dot(ftrain)
u_full, s, _ = np.linalg.svd(K)
# ---- the reduction dimension q is
# ---- selected according to the explained variance ratio
q, s_accuml = -1, np.zeros(ftrain.shape[1])
for i in range(ftrain.shape[1]):
    s_accuml[i] = sum(s[:i]) / sum(s)
    if i > 0 and q < 0:
        if s_accuml[i-1] < exp_var_ratio and s_accuml[i] >= exp_var_ratio:
            q = i
print("Linear PCA finished.")
print("explained variance ratio = %f"%exp_var_ratio)
print("reduction dimension    q = %d"%q)
print("s_accuml at q-1 = %f"%s_accuml[q-1])
print("s_accuml at q   = %f"%s_accuml[q])
print("s_accuml at q+1 = %f"%s_accuml[q+1])

# -------- reconstruction error for OoD detection
u_q = u_full[:,:q]

reconstruct_in = u_q.dot(u_q.T).dot(ftrain.T).T
scores_in_train = - np.linalg.norm(ftrain-reconstruct_in, ord=2, axis=1)


reconstruct_in = u_q.dot(u_q.T).dot(ftest.T).T
scores_in = np.linalg.norm(ftest-reconstruct_in, ord=2, axis=1)

all_results = []
for ood_dataset, food in food_all.items():
    reconstruct_ood = u_q.dot(u_q.T).dot(food.T).T 
    scores_ood = np.linalg.norm(food-reconstruct_ood, ord=2, axis=1)
print()

np.save("./KPCA_CoP_In.npy", scores_in)
np.save("./KPCA_CoP_OOD.npy", scores_ood)

# print(metrics.cal_metric(scores_in, scores_ood))

# print()
# print("The program ends...")
# print("-------- -------- --------")
# print()

Features loaded.
Feature dimension = 512

Running linear PCA...
Linear PCA finished.
explained variance ratio = 0.950000
reduction dimension    q = 10
s_accuml at q-1 = 0.941782
s_accuml at q   = 0.953404
s_accuml at q+1 = 0.957051



In [131]:
print(metrics.cal_metric(- scores_in, - scores_ood))

print()
print("The program ends...")
print("-------- -------- --------")
print()

{'FPR': 0.974, 'AUROC': 0.38311775, 'DTERR': 0.3333333333333333, 'AUIN': 0.5809770880420106, 'AUOUT': 0.27326118403340655}

The program ends...
-------- -------- --------



In [127]:
scores_in.mean()
scores_ood.mean()

-0.13983703