In [1]:
import os
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [2]:
import time
import random

import numpy as np
import pandas as pd
import scipy as sp

import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import OneHotEncoder

from tqdm import tqdm

import matplotlib.pyplot as plt

from sktime.datasets import load_UCR_UEA_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import exp_attributions as exp_att
import exp_perturbation_analysis as exp_pa
import exp_perturbation_card as exp_card

In [3]:
random_seed = 13

torch.manual_seed(random_seed)
random.seed(random_seed)
np.random.seed(random_seed)

In [4]:
dataset = 'ElectricDevices'
dataset_name = dataset.lower()

cur_time = time.strftime('%Y-%m-%d_%H-%M-%S')
base_dir = f'./results/{dataset_name}--{cur_time}'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
X_train, y_train = load_UCR_UEA_dataset(name=dataset, split='train', return_type='numpyflat')
X_test, y_test = load_UCR_UEA_dataset(name=dataset, split='test', return_type='numpyflat')

print(f'Length training data: {len(X_train)} labels: {len(y_train)} test data: {len(X_test)} labels: {len(y_test)}')

Length training data: 8926 labels: 8926 test data: 7711 labels: 7711


In [6]:
encoder = OneHotEncoder(categories='auto', sparse=False)

y_train_ohe = encoder.fit_transform(np.expand_dims(y_train, axis=-1))
y_test_ohe = encoder.transform(np.expand_dims(y_test, axis=-1))

y_train_norm = y_train_ohe.argmax(axis=-1)
y_test_norm = y_test_ohe.argmax(axis=-1)



In [7]:
class FordADataset(Dataset):

    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        inputs = self.X[idx]
        label = self.y[idx]
        
        return inputs, label

In [8]:
dataset_train = FordADataset(X_train, y_train_ohe)
dataset_test = FordADataset(X_test, y_test_ohe)

In [9]:
dataloader_train = DataLoader(dataset_train, batch_size=120, shuffle=True)
dataloader_train_not_shuffled = DataLoader(dataset_train, batch_size=120, shuffle=False)
dataloader_test = DataLoader(dataset_test, batch_size=120, shuffle=False)

In [80]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 10, kernel_size=7, stride=1),
            nn.ReLU(inplace=True)
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(10, 50, kernel_size=5, stride=1),
            nn.ReLU(inplace=True)
        )
        self.conv3 = nn.Sequential(
            nn.Conv1d(50, 100, kernel_size=3, stride=1),
            nn.ReLU(inplace=True)
        )
        
        self.fc1 = nn.Sequential(
            nn.Linear(100 * 84, 100),
            nn.Dropout(0.5),
            nn.ReLU(inplace=True)
        )
        self.fc2 = nn.Sequential(
            nn.Linear(100, 7),
            nn.Softmax(-1)
        )
        
        self.downsampling = nn.Sequential(
            nn.Conv1d(1, 1, kernel_size=7, stride=1, dilation=2, bias=False),
        )
        
    def forward(self, x):
        y = self.downsampling(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        
        x = torch.add(x, y)

        batch_size = x.shape[0]
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x

In [81]:
def trainer(model, dataloader_train, criterion):
    running_loss = 0

    model.train()

    for idx, (inputs, labels) in enumerate(dataloader_train):
        inputs = inputs.reshape(inputs.shape[0], 1, -1)
        inputs = inputs.float().to(device)
        labels = labels.float().to(device)

        optimizer.zero_grad()
        preds = model(inputs)
        loss = criterion(preds, labels.argmax(dim=-1))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

    train_loss = running_loss / len(dataloader_train)
    
    return train_loss


def validator(model, dataloader_test, criterion):
    running_loss = 0

    model.eval()

    for idx, (inputs, labels) in enumerate(dataloader_test):
        inputs = inputs.reshape(inputs.shape[0], 1, -1)
        inputs = inputs.float().to(device)
        labels = labels.float().to(device)

        preds = model(inputs)
        loss = criterion(preds, labels.argmax(dim=-1))
        
        running_loss += loss.item()

    train_loss = running_loss / len(dataloader_train)
    
    return train_loss

In [83]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleCNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss = nn.CrossEntropyLoss()

In [84]:
epochs = 500

for epoch in range(epochs):
    train_loss = trainer(model, dataloader_train, loss)
    if epoch % 10 == 0:
        print('Val', validator(model, dataloader_test, loss))

Val 1.4655966567993164
Val 1.3613561407725017
Val 1.3416965023676555
Val 1.3390001567204792
Val 1.336631809870402
Val 1.3378122393290202
Val 1.3401881965001423
Val 1.3374788777033488
Val 1.3283348242441813
Val 1.3269352213541667
Val 1.3226448933283488
Val 1.3227354685465496
Val 1.3245678647359211
Val 1.3233643547693887
Val 1.3210416316986084
Val 1.3253126700719198
Val 1.3187217489878336
Val 1.3234867604573568
Val 1.325048631032308
Val 1.3225840536753337
Val 1.3198017358779908
Val 1.3251192585627238
Val 1.3295419279734293
Val 1.3218554830551148
Val 1.3201945861180624
Val 1.3238636541366577
Val 1.3256730270385741
Val 1.3250374523798625
Val 1.3225642871856689
Val 1.3293760045369467
Val 1.3244184732437134
Val 1.3241381247838337
Val 1.3187435102462768
Val 1.3297885020573934
Val 1.319689154624939
Val 1.3285725212097168
Val 1.3206424522399902
Val 1.3226128721237183
Val 1.3249079847335816
Val 1.3244259627660115
Val 1.328822708129883
Val 1.321747096379598
Val 1.324715126355489
Val 1.32126656850

In [85]:
model.eval()

preds = []
labels = []
for x in dataloader_train_not_shuffled:
    input_, label_ = x
    input_ = input_.reshape(input_.shape[0], 1, -1)
    input_ = input_.float().to(device)
    label_ = label_.float().to(device)

    pred_ = model(input_)
    preds.extend(pred_)
    labels.extend(label_)

preds = torch.stack(preds)
labels = torch.stack(labels)
print('Prediction Accuracy Train', np.round((preds.argmax(dim=-1) == labels.argmax(dim=-1)).int().sum().float().item() / len(preds), 4))

y_train_pred = preds.cpu().detach().numpy()

Prediction Accuracy Train 0.9343


In [86]:
model.eval()

preds = []
labels = []
for x in dataloader_test:
    input_, label_ = x
    input_ = input_.reshape(input_.shape[0], 1, -1)
    input_ = input_.float().to(device)
    label_ = label_.float().to(device)

    pred_ = model(input_)
    preds.extend(pred_)
    labels.extend(label_)

preds = torch.stack(preds)
labels = torch.stack(labels)
print('Prediction Accuracy Test', np.round((preds.argmax(dim=-1) == labels.argmax(dim=-1)).int().sum().float().item() / len(preds), 4))

y_test_pred = preds.cpu().detach().numpy()

Prediction Accuracy Test 0.6416


# Generate attributions for the training data

In [None]:
model.eval()

In [None]:
sample, label = dataset_train[0]
shape = sample.reshape(1, -1).shape
baselines = torch.from_numpy(np.array([dataset_train[torch.randint(len(dataset_train), (1,))][0] for _ in range(10)])).reshape(-1, *shape).float().to(device)

In [None]:
attributions_train = {}
predictions_train = {}

attr_batch, preds_batch = exp_att.generate_attributions_batch(shape, model, device, dataloader_train_not_shuffled, baselines)
attributions_train.update(attr_batch)
predictions_train.update(preds_batch)

del attr_batch
del preds_batch

attr_single, preds_single = exp_att.generate_attributions_single(shape, model, device, dataloader_train_not_shuffled, baselines)
attributions_train.update(attr_single)
predictions_train.update(preds_single)

del attr_single
del preds_single

In [None]:
overall_results = exp_pa.perturbation_analysis(attributions_train, X_train, y_train, model, device, base_dir)

In [None]:
for cur_attr_name in attributions_train:
    cur_attr_name = cur_attr_name.lower().replace(' ', '_')
    cur_dir = f'{base_dir}/method-{cur_attr_name}'
    for deletion_value in exp_pa.values:
        deletion_value_fnc, deletion_length = deletion_value
        name = deletion_value_fnc.__name__ + ' ' + str(deletion_length)
        attribution = cur_attr_name
        
        results = overall_results[cur_attr_name][name]
        exp_card.create_perturbation_analysis_card(dataset, attribution, name, X_train, results, cur_dir)

In [None]:
import json
from json import JSONEncoder

class NumpyArrayEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        if isinstance(obj, np.int64):
            return str(obj)
        if isinstance(obj, np.float32):
            return str(obj)
        return JSONEncoder.default(self, obj)

overall_results_json = json.dumps(overall_results, cls=NumpyArrayEncoder)

with open(f'{base_dir}/results.json', 'w') as f:
    f.write(overall_results_json)

In [None]:
import shutil

shutil.make_archive(f'./results/time-{cur_time}', 'zip', base_dir)