In [1]:
import pandas as pd
from numpyencoder import NumpyEncoder
import json
from util.nn_model import NNModel
from util.evaluator import *
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

In [2]:
dataset = pd.read_csv("../../data/synthetic/synthetic_data_simple.csv")
dataset = dataset.to_numpy().astype(np.float32)
lens = len(dataset)
train, test = dataset[0:int(lens * 0.7), ], dataset[int(lens*0.7):, ]
train_x, train_y = train[:, 0:4], train[:, 4]
test_x, test_y = test[:, 0:4], test[:, 4]
train_y = train_y.astype(np.int64)
test_y = test_y.astype(np.int64)

model = NNModel('../../train/synthetic/synthetic_model_simple.pt')
model1 = NNModel('../../train/synthetic/synthetic_model_simple_v1.pt')

# obtain true negative set of test set
idx = np.where(test_y == 0)[0]
pred_y = model.predict(test_x)
idx1 = np.where(pred_y == 0)[0]
tn_idx = set(idx).intersection(idx1)
abnormal_test = test_x[list(tn_idx)]

# obtain true positive set of train set
idx2 = np.where(train_y == 1)[0]
pred_ty = model.predict(train_x)
idx3 = np.where(pred_ty == 1)[0]
tp_idx = set(idx2).intersection(idx3)
normal_train = train_x[list(tp_idx)]

# set the value to replace
normal_range = np.array([[0.5, 0.4, 0., 0.5]]).astype(np.float32)
normal_range = np.array([[0.55, 0.45, 0.05, 0.55]]).astype(np.float32)

# initialize the evaluator
evaluator = Evaluator(train_x, normal_train)

In [3]:
def run_cemsp(json_path):
    with open(json_path) as f:
        cemsp_json = json.load(f)

    d = len(cemsp_json['data'][0][0])
    cfs_list = cemsp_json['cf']
    _cfs_list = cemsp_json['cf2']
    diversity_list = []
    diversity2_list = []

    count_diversity_list = []
    count_diversity2_list = []

    for cfs, _cfs in zip(cfs_list, _cfs_list):
        cfs = [item['cf'] for item in cfs]
        cfs = np.reshape(cfs, (-1, d))
        # print(cfs.shape)
        diversity = evaluator.diversity(cfs)
        count_diversity = evaluator.count_diversity(cfs)

        _cfs = [item for item in _cfs]
        _cfs = np.reshape(_cfs, (-1, d))
        print(_cfs.shape)
        diversity2 = evaluator.diversity(_cfs)
        count_diversity2 = evaluator.count_diversity(_cfs)

        diversity_list.append(diversity)
        diversity2_list.append(diversity2)
        count_diversity_list.append(count_diversity)
        count_diversity2_list.append(count_diversity2)

    print(cemsp_json['diversity'])
    print(diversity_list)

    print(cemsp_json['diversity2'])
    print(diversity2_list)

    cemsp_json['diversity'] = diversity_list
    cemsp_json['diversity2'] = diversity2_list
    cemsp_json['count_diversity'] = count_diversity_list
    cemsp_json['count_diversity2'] = count_diversity2_list
    with open(json_path, "w") as f:
        json.dump(cemsp_json, f, cls = NumpyEncoder)

def run(json_path):
    with open(json_path) as f:
        cemsp_json = json.load(f)

    d = len(cemsp_json['data'][0][0])
    cfs_lists = cemsp_json['cf']
    diversity_list = []
    diversity2_list = []

    count_diversity_list = []
    count_diversity2_list = []

    for cfs_list in cfs_lists:

        cfs = [_tmp_result['cf'] for _tmp_result in cfs_list]
        cfs = np.reshape(cfs, (-1, d))
        diversity = evaluator.diversity(cfs)
        count_diversity = evaluator.count_diversity(cfs)

        _cfs = [_tmp_result['cf2'] for _tmp_result in cfs_list]
        _cfs = np.reshape(_cfs, (-1, d))
        diversity2 = evaluator.diversity(_cfs)
        count_diversity2 = evaluator.count_diversity(_cfs)

        diversity_list.append(diversity)
        diversity2_list.append(diversity2)
        count_diversity_list.append(count_diversity)
        count_diversity2_list.append(count_diversity2)

    print(cemsp_json['diversity'])
    print(diversity_list)

    print(cemsp_json['diversity2'])
    print(diversity2_list)

    cemsp_json['diversity'] = diversity_list
    cemsp_json['diversity2'] = diversity2_list
    cemsp_json['count_diversity'] = count_diversity_list
    cemsp_json['count_diversity2'] = count_diversity2_list
    with open(json_path, "w") as f:
        json.dump(cemsp_json, f, cls = NumpyEncoder)

In [4]:
cemsp_path = 'synthetic_cemsp.json'
cfproto_path = 'synthetic_cfproto1.json'
dice_path = 'synthetic_dice.json'
gs_path = 'synthetic_growingsphere.json'
plaincf_path = 'synthetic_plaincf.json'

In [5]:
run_cemsp(cemsp_path)
run(cfproto_path)
run(dice_path)
run(gs_path)
run(plaincf_path)

(3, 4)
(3, 4)
(3, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(3, 4)
(3, 4)
(3, 4)
(2, 4)
(3, 4)
(3, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(3, 4)
(3, 4)
(2, 4)
(3, 4)
(3, 4)
(3, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(3, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(3, 4)
(3, 4)
(2, 4)
(2, 4)
(3, 4)
(3, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(3, 4)
(3, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(3, 4)
(3, 4)
(2, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)
(3, 4)
(2, 4)
(3, 4)
(2, 4)
(3, 4)
(2, 4)
(2, 4)
(3, 4)
(2, 4)