In [2]:
import logging
import os
import json
import jsonpickle
import pickle
import numpy as np
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.feature_selection import RFECV
import utils.models
from utils.abstract import AbstractDetector
from utils.models import load_model, load_ground_truth, wrap_network_prediction
import torch
import torchvision
import skimage.io
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, auc
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score, ShuffleSplit
from sklearn import svm
from sklearn.calibration import CalibratedClassifierCV
from tqdm import tqdm
from collections import defaultdict
from multiprocessing import Process, Queue
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
# perform standard scaling
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
r13_data = '/data/xinqiao/round13/data/'

In [3]:
def cross_val_report(X, y, scoring='roc_auc'):
    cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
    SVM_model = svm.SVC(kernel='linear', C=1, random_state=42)
    scores = cross_val_score(SVM_model, X, y, cv=cv, scoring=scoring)
    print("SVM: %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

    cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
    RF_model = RandomForestClassifier(max_depth=5, n_estimators=200)
    scores = cross_val_score(RF_model, X, y, cv=cv, scoring=scoring)
    print("RF: %0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

In [4]:
def svd_in_dim(weight, dim):
    u, s, v = torch.svd(weight.reshape(weight.shape[dim], -1), compute_uv=False)
    return s

In [5]:
def get_features(model):
    svd_list = []
    for name, param in model.named_parameters():
        weight = param.data
        numdims = len(weight.shape)
        if numdims > 1:
            if numdims > 3:
                for mydim in range(numdims - 2):
                    s = svd_in_dim(weight,mydim).detach().cpu().numpy()
                    svd_list.extend([s[0],s[1],s[2]])
            else:
                s = svd_in_dim(weight,0).detach().cpu().numpy()
                svd_list.extend([s[0],s[1],s[2]])
    
    return np.asarray(svd_list)

In [6]:
def load_models_dirpath(models_dirpath, training):
    X = defaultdict(list)
    Y = defaultdict(list)

    for model_path in tqdm(models_dirpath):
        model, model_repr, model_class = load_model(os.path.join(model_path, "model.pt"))
        model_ground_truth = load_ground_truth(model_path)

        feats = get_features(model)
        X[model_class].append(feats)
        if training:
            Y[model_class].append(model_ground_truth)

    return X, Y if training else X

In [6]:
model_path_list = sorted([os.path.join(r13_data, model) for model in os.listdir(r13_data)])
X, Y = load_models_dirpath(model_path_list, training=True)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 128/128 [12:15<00:00,  5.75s/it]


In [7]:
# save the features
# with open('X.pkl', 'wb') as f:
#     pickle.dump(X, f)
# with open('Y.pkl', 'wb') as f:
#     pickle.dump(Y, f)

# load the features
with open('X.pkl', 'rb') as f:
    X = pickle.load(f)
with open('Y.pkl', 'rb') as f:
    Y = pickle.load(f)

In [8]:
for model_class in X.keys():
    X[model_class] = np.asarray(X[model_class])
    Y[model_class] = np.asarray(Y[model_class])
    
    # standard scaling
    X[model_class] = scaler.fit_transform(X[model_class])

    clf = svm.SVC(kernel='linear', C=1, random_state=42)
    rfecv = RFECV(estimator=clf, step=1, cv=5, scoring='roc_auc', min_features_to_select=10)
    rfecv.fit(X[model_class], Y[model_class])

    ## Get cv scores
    print("Optimal number of features : %d" % rfecv.n_features_)
    print(model_class)
    cross_val_report(X[model_class][:, rfecv.support_], Y[model_class], scoring='accuracy')
    
    newfeats = X[model_class][:, rfecv.support_]

    ## Fit model
    clf = svm.SVC(kernel='linear', C=1, random_state=42, probability=True)
    clf.fit(newfeats, Y[model_class])
    calibrator = CalibratedClassifierCV(clf, method='sigmoid', cv='prefit')
    calibrator.fit(newfeats, Y[model_class])
    print(calibrator.score(newfeats, Y[model_class]))

    # save model, scaler, rfecv
    with open('learned_parameters/model_{}.pkl'.format(model_class), 'wb') as f:
        pickle.dump(calibrator, f)
    with open('learned_parameters/scaler_{}.pkl'.format(model_class), 'wb') as f:
        pickle.dump(scaler, f)
    with open('learned_parameters/rfecv_{}.pkl'.format(model_class), 'wb') as f:
        pickle.dump(rfecv, f)


Optimal number of features : 105
SSD
SVM: 0.66 accuracy with a standard deviation of 0.10
RF: 0.52 accuracy with a standard deviation of 0.12
0.8958333333333334
Optimal number of features : 23
DetrForObjectDetection
SVM: 0.97 accuracy with a standard deviation of 0.06
RF: 0.57 accuracy with a standard deviation of 0.13
1.0
Optimal number of features : 140
FasterRCNN
SVM: 0.70 accuracy with a standard deviation of 0.20
RF: 0.80 accuracy with a standard deviation of 0.13
0.7708333333333334


In [9]:
# load scaler, rfecv, model
with open('learned_parameters/scaler_{}.pkl'.format('FasterRCNN'), 'rb') as f:
    scaler = pickle.load(f)

with open('learned_parameters/rfecv_{}.pkl'.format('FasterRCNN'), 'rb') as f:
    rfecv = pickle.load(f)

with open('learned_parameters/model_{}.pkl'.format('FasterRCNN'), 'rb') as f:
    model = pickle.load(f)

In [15]:
model.predict_proba([newfeats[0]])[0][1]

0.16767491590067574