In [1]:
import os
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [2]:
import time
import random

import numpy as np
import pandas as pd
import scipy as sp

import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import OneHotEncoder

from tqdm import tqdm

import matplotlib.pyplot as plt

from sktime.datasets import load_UCR_UEA_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import exp_attributions as exp_att
import exp_perturbation_analysis as exp_pa
import exp_perturbation_card as exp_card

In [3]:
random_seed = 13

torch.manual_seed(random_seed)
random.seed(random_seed)
np.random.seed(random_seed)

In [4]:
dataset = 'FordB'
dataset_name = dataset.lower()

cur_time = time.strftime('%Y-%m-%d_%H-%M-%S')
base_dir = f'./results/{dataset_name}--{cur_time}'


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
X_train, y_train = load_UCR_UEA_dataset(name=dataset, split='train', return_type='numpyflat')
X_test, y_test = load_UCR_UEA_dataset(name=dataset, split='test', return_type='numpyflat')

print(f'Length training data: {len(X_train)} labels: {len(y_train)} test data: {len(X_test)} labels: {len(y_test)}')

Length training data: 3601 labels: 3601 test data: 1320 labels: 1320


In [6]:
encoder = OneHotEncoder(categories='auto', sparse=False)

y_train_ohe = encoder.fit_transform(np.expand_dims(y_train, axis=-1))
y_test_ohe = encoder.transform(np.expand_dims(y_test, axis=-1))

y_train_norm = y_train_ohe.argmax(axis=-1)
y_test_norm = y_test_ohe.argmax(axis=-1)



In [7]:
class FordBDataset(Dataset):

    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        inputs = self.X[idx]
        label = self.y[idx]
        
        return inputs, label

In [8]:
dataset_train = FordBDataset(X_train, y_train_ohe)
dataset_test = FordBDataset(X_test, y_test_ohe)

In [9]:
dataloader_train = DataLoader(dataset_train, batch_size=120, shuffle=True)
dataloader_train_not_shuffled = DataLoader(dataset_train, batch_size=120, shuffle=False)
dataloader_test = DataLoader(dataset_test, batch_size=120, shuffle=False)

In [10]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 10, kernel_size=3, stride=1),
            nn.ReLU(inplace=True)
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(10, 50, kernel_size=3, stride=1),
            nn.MaxPool1d(3),
            nn.ReLU(inplace=True)
        )
        self.conv3 = nn.Sequential(
            nn.Conv1d(50, 100, kernel_size=3, stride=1),
            nn.MaxPool1d(3),
            nn.ReLU(inplace=True)
        )
        
        self.fc1 = nn.Sequential(
            nn.Linear(100 * 54, 100),
            nn.Dropout(0.5),
            nn.ReLU(inplace=True)
        )
        self.fc2 = nn.Sequential(
            nn.Linear(100, 2),
            nn.Softmax(-1)
        )
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)

        batch_size = x.shape[0]
        x = x.view(batch_size, -1)
        x = self.fc1(x)
        x = self.fc2(x)
        
        return x

In [11]:
def trainer(model, dataloader_train, criterion):
    running_loss = 0

    model.train()

    for idx, (inputs, labels) in enumerate(dataloader_train):
        inputs = inputs.reshape(inputs.shape[0], 1, -1)
        inputs = inputs.float().to(device)
        labels = labels.float().to(device)

        optimizer.zero_grad()
        preds = model(inputs)
        loss = criterion(preds, labels.argmax(dim=-1))
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

    train_loss = running_loss / len(dataloader_train)
    
    return train_loss


def validator(model, dataloader_test, criterion):
    running_loss = 0

    model.eval()

    for idx, (inputs, labels) in enumerate(dataloader_test):
        inputs = inputs.reshape(inputs.shape[0], 1, -1)
        inputs = inputs.float().to(device)
        labels = labels.float().to(device)

        preds = model(inputs)
        loss = criterion(preds, labels.argmax(dim=-1))
        
        running_loss += loss.item()

    train_loss = running_loss / len(dataloader_train)
    
    return train_loss

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleCNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss = nn.CrossEntropyLoss()

In [13]:
epochs = 500

for epoch in range(epochs):
    train_loss = trainer(model, dataloader_train, loss)
    if epoch % 10 == 0:
        print('Val', validator(model, dataloader_test, loss))

Val 0.24538052082061768
Val 0.2271573466639365
Val 0.19473911677637407
Val 0.1729797576704333
Val 0.16226956536692957
Val 0.15826877278666343
Val 0.1547743562729128
Val 0.15435011925235873
Val 0.15064942259942332
Val 0.15206417537504627
Val 0.15143455036224857
Val 0.14788151844855277
Val 0.1474695869030491
Val 0.15191022426851333
Val 0.1476726916528517
Val 0.1469672485705345
Val 0.14794497336110762
Val 0.146093939581225
Val 0.14691076163322694
Val 0.14579709018430403
Val 0.1537398878605135
Val 0.1466710154087313
Val 0.14736624686948716
Val 0.14725754914745207
Val 0.14794617699038598
Val 0.14752341951093367
Val 0.14705410984254652
Val 0.1469624888512396
Val 0.14640250994313148
Val 0.14645722700703528
Val 0.14668357180010888
Val 0.14631924513847597
Val 0.14703099093129557
Val 0.14645788842631924
Val 0.14649166310987166
Val 0.14676230184493527
Val 0.1466953533311044
Val 0.14676340260813314
Val 0.148573471653846
Val 0.14848103927027795
Val 0.1556036183910985
Val 0.14734717049906332
Val 0.1

In [14]:
model.eval()

preds = []
labels = []
for x in dataloader_train_not_shuffled:
    input_, label_ = x
    input_ = input_.reshape(input_.shape[0], 1, -1)
    input_ = input_.float().to(device)
    label_ = label_.float().to(device)

    pred_ = model(input_)
    preds.extend(pred_)
    labels.extend(label_)

preds = torch.stack(preds)
labels = torch.stack(labels)
print('Prediction Accuracy Train', np.round((preds.argmax(dim=-1) == labels.argmax(dim=-1)).int().sum().float().item() / len(preds), 4))

y_train_pred = preds.cpu().detach().numpy()

Prediction Accuracy Train 0.9944


In [15]:
model.eval()

preds = []
labels = []
for x in dataloader_test:
    input_, label_ = x
    input_ = input_.reshape(input_.shape[0], 1, -1)
    input_ = input_.float().to(device)
    label_ = label_.float().to(device)

    pred_ = model(input_)
    preds.extend(pred_)
    labels.extend(label_)

preds = torch.stack(preds)
labels = torch.stack(labels)
print('Prediction Accuracy Test', np.round((preds.argmax(dim=-1) == labels.argmax(dim=-1)).int().sum().float().item() / len(preds), 4))

y_test_pred = preds.cpu().detach().numpy()

Prediction Accuracy Test 0.8909


# Generate attributions for the training data

In [18]:
model.eval()

SimpleCNN(
  (conv1): Sequential(
    (0): Conv1d(1, 10, kernel_size=(3,), stride=(1,))
    (1): ReLU(inplace=True)
  )
  (conv2): Sequential(
    (0): Conv1d(10, 50, kernel_size=(3,), stride=(1,))
    (1): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU(inplace=True)
  )
  (conv3): Sequential(
    (0): Conv1d(50, 100, kernel_size=(3,), stride=(1,))
    (1): MaxPool1d(kernel_size=3, stride=3, padding=0, dilation=1, ceil_mode=False)
    (2): ReLU(inplace=True)
  )
  (fc1): Sequential(
    (0): Linear(in_features=5400, out_features=100, bias=True)
    (1): Dropout(p=0.5, inplace=False)
    (2): ReLU(inplace=True)
  )
  (fc2): Sequential(
    (0): Linear(in_features=100, out_features=2, bias=True)
    (1): Softmax(dim=-1)
  )
)

### General attribution method

In [19]:
sample, label = dataset_train[0]
shape = sample.reshape(1, -1).shape
baselines = torch.from_numpy(np.array([dataset_train[torch.randint(len(dataset_train), (1,))][0] for _ in range(10)])).reshape(-1, *shape).float().to(device)

In [None]:
attributions_train = {}
predictions_train = {}

attr_batch, preds_batch = exp_att.generate_attributions_batch(shape, model, device, dataloader_train_not_shuffled, baselines)
attributions_train.update(attr_batch)
predictions_train.update(preds_batch)

del attr_batch
del preds_batch

attr_single, preds_single = exp_att.generate_attributions_single(shape, model, device, dataloader_train_not_shuffled, baselines)
attributions_train.update(attr_single)
predictions_train.update(preds_single)

del attr_single
del preds_single

In [None]:
overall_results = exp_pa.perturbation_analysis(attributions_train, X_train, y_train, model, device, base_dir)

In [None]:
for cur_attr_name in attributions_train:
    cur_attr_name = cur_attr_name.lower().replace(' ', '_')
    cur_dir = f'{base_dir}/method-{cur_attr_name}'
    for deletion_value in exp_pa.values:
        deletion_value_fnc, deletion_length = deletion_value
        name = deletion_value_fnc.__name__ + ' ' + str(deletion_length)
        attribution = cur_attr_name
        
        results = overall_results[cur_attr_name][name]
        exp_card.create_perturbation_analysis_card(dataset, attribution, name, X_train, results, cur_dir)

In [None]:
import json
from json import JSONEncoder

class NumpyArrayEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        if isinstance(obj, np.int64):
            return str(obj)
        if isinstance(obj, np.float32):
            return str(obj)
        return JSONEncoder.default(self, obj)

overall_results_json = json.dumps(overall_results, cls=NumpyArrayEncoder)

with open(f'{base_dir}/results.json', 'w') as f:
    f.write(overall_results_json)

In [None]:
import shutil

shutil.make_archive(f'./results/time-{cur_time}', 'zip', base_dir)