## Analyzing the result and plotting the graphs

### Performance of the models

In [None]:
import torch
from utils.model import DNN
import yaml
from utils.dataset import return_dataloader
import numpy as np
from algorithms.milp import MILP_Inte, MILP_Avai
from attrdict import AttrDict
import matplotlib.pyplot as plt
import cvxpy as cp
from tqdm import tqdm
import pandas as pd

args = AttrDict({
            "flexible_feature": np.arange(0,6),
            "fixed_feature": np.arange(6,12),
            "batch_size": 100,
            "impute_value": 0.0          
        })

with open("utils/config.yml", 'r') as config_file:
    config = yaml.safe_load(config_file)

train_loader, test_loader = return_dataloader(config, args)
len(train_loader.dataset), len(test_loader.dataset)

In [None]:
# Plot options
plt.rcParams['lines.linewidth'] = 3.5
plt.rcParams['font.size'] = 16
plt.rcParams['font.weight'] = 'bold'
plt.rcParams["axes.labelweight"] = "bold"
plt.rcParams['figure.figsize'] = (6,4)
plt.rcParams['figure.constrained_layout.use'] = True
plt.rcParams['legend.loc'] = 'lower right'
plt.rcParams['legend.fontsize'] = 16

In [None]:
print('train size: ', len(train_loader.sampler), 'test size: ', len(test_loader.sampler))
feature_size = train_loader.dataset[0][0].size(0)
model_clean = DNN(feature_size, config['no_layer'], config['first_hidden_size'])
model_clean.load_state_dict(torch.load('trained_models/dnn_model.pt'))

model_adver_05 = DNN(feature_size, config['no_layer'], config['first_hidden_size'])
model_adver_05.load_state_dict(torch.load('trained_models/dnn_model_adver_0.5.pt'))

model_adver_00 = DNN(feature_size, config['no_layer'], config['first_hidden_size'])
model_adver_00.load_state_dict(torch.load('trained_models/dnn_model_adver_0.0.pt'))
model_clean.layer_list

In [None]:
def return_mape(model, loader):
    mape_summary = []
    for data, target in loader:
        output = model(data)
        mape = torch.abs((output - target) / target) * 100
        mape_summary += mape.detach().numpy().tolist()
    return np.mean(mape_summary)

In [None]:
# mape of the clean model
print('Clean:')
print('Clean Train', return_mape(model_clean, train_loader))
print('Clean Test', return_mape(model_clean, test_loader))
print('Adver 0.5:')
print('Adver 0.5 Train', return_mape(model_adver_05, train_loader))
print('Adver 0.5 Test', return_mape(model_adver_05, test_loader))
print('Adver 0.0:')
print('Adver 0.0 Train', return_mape(model_adver_00, train_loader))
print('Adver 0.0 Test', return_mape(model_adver_00, test_loader))

### Computational time

In [None]:
time = 0.
idx = 0
for mode in ['max', 'min']:
    for i in range(1,7):
        for impute_value in [0.0, 0.5]:
            summary = np.load(f"milp_result/plain_avai_{mode}_{i}_{impute_value}_True.npy", allow_pickle=True).item()
            time += summary['time']
            idx += 1

print('average time parallel: ', time / idx)

time = 0.
idx = 0
for mode in ['max', 'min']:
    for i in range(1,7):
        for impute_value in [0.0, 0.5]:
            summary = np.load(f"milp_result/plain_avai_{mode}_{i}_{impute_value}_False.npy", allow_pickle=True).item()
            time += summary['time']
            idx += 1

print('average time sequential: ', time / idx)

### Adversarial Performance of the clean model

#### Available attacks

In [None]:
def evaluate_avai(train_type, mode, impute_value):
    time, dev, ave_dev, max_dev, min_dev, std_dev, vio_no, zero_no, success_rate, missing_no = [], [], [], [], [], [], [], [], [], []
    for i in range(1,7):
        summary = np.load(f"milp_result/{train_type}_avai_{mode}_{i}_{impute_value}_True.npy", allow_pickle=True).item()
        time.append(round(summary['time'],2))
        dev.append((summary['dev']))
        ave_dev.append(round(summary['average deviation'],2))
        max_dev.append(round(summary['max deviation'],2))
        min_dev.append(round(summary['min deviation'],2))
        std_dev.append(round(summary['std deviation'],2))
        vio_no.append(summary['violation no'])
        zero_no.append(round(summary['zero no']/len(test_loader.dataset)*100,2))
        success_rate.append(summary['success rate'])
        missing_no.append(summary['actual missing no'])
        
    return dev, time, missing_no, zero_no

In [None]:
mode = 'max'
impute_value = 0.0
train_type = 'plain'
dev, time, missing_no, zero_no = evaluate_avai(train_type, mode, impute_value)
print('zero no:', zero_no)

plt.boxplot(dev, widths = 0.5, showfliers=False, whis=(0,100))
plt.xlabel(r'$\beta$')
plt.ylabel('MPE')
if mode == 'max':
    plt.ylim((-2, 35))
else:
    plt.ylim((-80, 2))
    
plt.savefig(f'figure/dev_{mode}_{impute_value}.pdf', dpi=300)
plt.show()

for i in range(6):
    plt.figure()
    bins = np.arange(0, 6 + 1.5) - 0.5
    plt.hist(missing_no[i], bins = bins)
    plt.xticks(bins+0.5)
    plt.xlim((-0.5, 6.5))
    plt.ylim((0,len(test_loader.dataset)))
    plt.xlabel('Actual Missing No.')
    plt.ylabel('Data No.')
    plt.savefig(f'figure/missing_{mode}_{impute_value}_{i}.pdf', dpi=300)
    plt.show()

#### Integrity Adversarial Attack

In [None]:
def evaluate_inte(mode, epsilon):
    summary = np.load(f'milp_result/plain_inte_{mode}_{epsilon}_True.npy', allow_pickle=True).item()
    dev = summary['dev']
    return dev

In [None]:
mode = 'max'

dev_avai_0, _, _, _ = evaluate_avai('plain', mode, 0.0)
dev_avai_5, _, _, _ = evaluate_avai('plain', mode, 0.5)

dev_inte = []
for impute in [0.1,0.2,0.3,0.4,0.5]:
    dev_inte.append(evaluate_inte(mode, impute))

plt.boxplot(dev_inte, widths = 0.5, showfliers=False, whis=(0,100))
plt.xlabel(r'$\epsilon$')
plt.xticks([1,2,3,4,5], [0.1, 0.2,0.3,0.4,0.5])
plt.hlines(np.mean(dev_avai_0[-1]), 0, 6, colors='r', linestyles='dashed', label = f'AVAI({mode},0,6)')
plt.hlines(np.mean(dev_avai_5[-1]), 0, 6, colors='g', linestyles='dashed', label = f'AVAI({mode},mean,6)')
plt.legend(loc = 'best')
plt.ylabel('MPE')
plt.savefig(f'figure/inte_{mode}.pdf', dpi=300)
plt.show()

### Adversarial Training

In [None]:
mode = 'min'
impute_value = 0.0

dev_clean, _, _, _ = evaluate_avai('plain', mode, impute_value)
dev_adver_0, _, _, _ = evaluate_avai('0.0', mode, impute_value)
dev_adver_5, _, _, _ = evaluate_avai('0.5', mode, impute_value)

print('zero no:', zero_no)
width = 0.3
x = np.arange(1,7)

plt.bar(x - 1.5*width, np.median(np.array(dev_clean), axis = -1), width, label = 'Clean')
plt.bar(x - 0.5*width, np.median(np.array(dev_adver_0), axis = -1), width, label = 'Adver c = 0.0')
plt.bar(x + 0.5*width, np.median(np.array(dev_adver_5), axis = -1), width, label = 'Adver c = mean')
plt.xlabel(r'$\beta$')
plt.ylabel('MPE')
plt.legend(loc = 'best')
plt.savefig(f'figure/adver_{mode}_{impute_value}.pdf', dpi=300)