In [1]:
import numpy as np
import torch
import torch.nn.functional as F
from prep import get_data
from torch import nn
from kfold_experiments import kfold_normal_training_softgedt, kfold_adversarial_training_softgedt,kfold_normal_training_neural_networks, kfold_adversarial_training_neural_networks
import create_file_folder

# Create dataset

In [2]:
# load dataset
filename = 'diabetes'
create_file_folder(filename)

# Preprocess the data without standardizing
data, ncat,dtypes = get_data(filename)

# create editability vector
non_editable_vector = torch.tensor([1 for i in range(data[:,:-1].shape[1])])

# record datatypes
dtypes[dtypes=='object'] = 'pos_int'
dtypes[dtypes=='float64'] = 'float'

datatypes = dtypes[:-1].to_numpy().astype('object')
datatypes[(datatypes=='int64') & np.min(data[:,:-1]>=0,axis=0)]='pos_int'
datatypes[(datatypes=='float64') & np.min(data[:,:-1]>=0,axis=0)]='pos_float'
datatypes = datatypes.tolist()

# Normal Training

In [3]:
kfold_normal_training_softgedt(filename, data, ncat, non_editable_vector, datatypes, include_sum_weight = True)

experiment: 0
Fold: 0
epoch: 0
train acc: tensor(0.8761) valid acc: tensor(0.6818) trigger times: 0
epoch: 1
train acc: tensor(0.9000) valid acc: tensor(0.7013) trigger times: 0
epoch: 2
train acc: tensor(0.9087) valid acc: tensor(0.6948) trigger times: 0
epoch: 3
train acc: tensor(0.9022) valid acc: tensor(0.6883) trigger times: 1
epoch: 4
train acc: tensor(0.9217) valid acc: tensor(0.7013) trigger times: 2
epoch: 5
train acc: tensor(0.9196) valid acc: tensor(0.7143) trigger times: 0
epoch: 6
train acc: tensor(0.9283) valid acc: tensor(0.6948) trigger times: 0
epoch: 7
train acc: tensor(0.9261) valid acc: tensor(0.6948) trigger times: 1
epoch: 8
train acc: tensor(0.9261) valid acc: tensor(0.6883) trigger times: 2
epoch: 9
train acc: tensor(0.9217) valid acc: tensor(0.6818) trigger times: 3
Early Stopping
SoftGeDT Accuracy on Training set: tensor(0.9261)
SoftGeDT Accuracy on Validation set: tensor(0.6883)
SoftGeDT Accuracy on Test set: tensor(0.7468)
Average Accuracy on Test set: tenso

# Adversarial Training

In [3]:
kfold_adversarial_training_softgedt(filename, data, ncat, non_editable_vector, datatypes, include_sum_weight=True)

Fold: 0
epoch: 0
train error: 0.2608695652173913 train loss: 1.8997170531231424 validation error 0.35064935064935066
epoch: 1
train error: 0.26956521739130435 train loss: 0.992144427092179 validation error 0.2922077922077922
epoch: 2
train error: 0.2608695652173913 train loss: 0.7330216926077138 validation error 0.2922077922077922
epoch: 3
train error: 0.24347826086956523 train loss: 0.625985626552416 validation error 0.2922077922077922
epoch: 4
train error: 0.24347826086956523 train loss: 0.5591042404589446 validation error 0.2987012987012987
epoch: 5
train error: 0.24565217391304348 train loss: 0.5395604745201442 validation error 0.2922077922077922
epoch: 6
train error: 0.25217391304347825 train loss: 0.5415748782779859 validation error 0.2987012987012987
epoch: 7
train error: 0.25869565217391305 train loss: 0.5499620105909264 validation error 0.2987012987012987
epoch: 8
train error: 0.24347826086956523 train loss: 0.5361204499783723 validation error 0.2792207792207792
epoch: 9
train

# Simple Neural Network

In [3]:
class Net(nn.Module):
    def __init__(self, input_dim):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(input_dim, 80).double()
        self.layer2 = nn.Linear(80, 80).double()
        self.layer3 = nn.Linear(80, 80).double()
        self.layer4 = nn.Linear(80, 2).double()
        self.dropout = nn.Dropout(0.5)
    def forward(self, x):
        x = F.relu(self.dropout(self.layer1(x)))
        x = F.relu(self.dropout(self.layer2(x)))
        x = F.relu(self.dropout(self.layer3(x)))
        x = self.layer4(x)
        return x

In [6]:
kfold_normal_training_neural_networks(nn, filename, data, ncat, non_editable_vector, datatypes)

Fold: 0
epoch: 0
train acc: tensor(0.6870) valid acc: tensor(0.7338) trigger times: 0
epoch: 1
train acc: tensor(0.7717) valid acc: tensor(0.7338) trigger times: 0
epoch: 2
train acc: tensor(0.7652) valid acc: tensor(0.7208) trigger times: 1
epoch: 3
train acc: tensor(0.7891) valid acc: tensor(0.7403) trigger times: 2
epoch: 4
train acc: tensor(0.7935) valid acc: tensor(0.7338) trigger times: 0
epoch: 5
train acc: tensor(0.8000) valid acc: tensor(0.7208) trigger times: 1
epoch: 6
train acc: tensor(0.8196) valid acc: tensor(0.7143) trigger times: 2
epoch: 7
train acc: tensor(0.8304) valid acc: tensor(0.7013) trigger times: 3
Early Stopping
FNN Accuracy on Training set: tensor(0.8196)
FNN Accuracy on Validation set: tensor(0.7143)
FNN Accuracy on Test set: tensor(0.7532)
Average Accuracy on Test set: tensor(0.7532)
Original Accuracy on Test set: tensor(0.5195)
Average Original Accuracy on Test set: tensor(0.5195)
ADT error, epsilon:0.1: 0.2922077922077922
Average ADT error, epsilon:0.1 0

## 5 fold cross validation adversarial training

In [11]:
kfold_adversarial_training_neural_networks(Net, filename, data, ncat, non_editable_vector, datatypes)

Fold: 0
epoch: 0
train error: 0.3347826086956522 train loss: 0.6101939353509036 validation error 0.2792207792207792
epoch: 1
train error: 0.2217391304347826 train loss: 0.5162572430903808 validation error 0.2987012987012987
epoch: 2
train error: 0.24130434782608695 train loss: 0.4985728158649179 validation error 0.2922077922077922
epoch: 3
train error: 0.23043478260869565 train loss: 0.48017663683628636 validation error 0.2792207792207792
epoch: 4
train error: 0.24782608695652175 train loss: 0.512762648148782 validation error 0.2792207792207792
epoch: 5
train error: 0.23478260869565218 train loss: 0.49358863082317855 validation error 0.2857142857142857
epoch: 6
train error: 0.21956521739130436 train loss: 0.47232020722817813 validation error 0.2792207792207792
epoch: 7
train error: 0.21304347826086956 train loss: 0.45747649532282414 validation error 0.2987012987012987
epoch: 8
train error: 0.2217391304347826 train loss: 0.45737837537132336 validation error 0.3051948051948052
epoch: 9
t