## Import Libraries

In [1]:
from __future__ import print_function

import os
import random
import pickle

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

from linformer import Linformer
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader, Dataset
from tqdm.notebook import tqdm

from vit_pytorch.vit import ViT

In [2]:
print(f"Torch: {torch.__version__}")

Torch: 1.11.0


In [3]:
# Training settings
batch_size = 16
epochs = 20
lr = 3e-5
gamma = 0.7
seed = 42

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(seed)

In [5]:
device = 'cuda'

## Load Data

In [6]:
with open('train_none_crash.pickle', 'rb') as f:
	none_crash_train_list = pickle.load(f)

with open('test_none_crash.pickle', 'rb') as f:
	none_crash_test_list = pickle.load(f)

with open('train_vulner.pickle', 'rb') as f:
	vulner_train_list = pickle.load(f)

with open('test_vulner.pickle', 'rb') as f:
	vulner_test_list = pickle.load(f)

In [7]:
# ex) train_list[i]: "../../dataset/data_preprocessed/None-crash\21-12-01-11-07-44_end_extract_drive26\00449.pickle"
train_list = none_crash_train_list + vulner_train_list
test_list = none_crash_test_list + vulner_test_list

random.shuffle(train_list)
random.shuffle(test_list)

### Split Train data into Train & Validation

In [8]:
train_labels = [path.split('/')[4].split('\\')[0] for path in train_list]

In [9]:
train_list, valid_list = train_test_split(train_list, 
                                          test_size=0.2,
                                          stratify=train_labels,
                                          random_state=seed)

In [10]:
print(f"Train Data: {len(train_list)}")
print(f"Validation Data: {len(valid_list)}")
print(f"Test Data: {len(test_list)}")

Train Data: 24685
Validation Data: 6172
Test Data: 3429


## Load Dataset

In [11]:
class lidar_dataset(Dataset):
    def __init__(self, file_list):
        self.file_list = file_list # file name

    def __len__(self):
        self.filelength = len(self.file_list)
        return self.filelength

    def __getitem__(self, idx):
        lidar_path = self.file_list[idx]
        
        with open(lidar_path,"rb") as fr:
            data = pickle.load(fr)

        tensor = data['tensor']
        # 0 is None-crash
        # 1 is Vulnerable
        if data['label'] == 'None-crash':  
            label = 0
        else:
            label = 1
            
        return tensor, label

In [12]:
train_data = lidar_dataset(train_list)
valid_data = lidar_dataset(valid_list)
test_data = lidar_dataset(test_list)

In [13]:
train_loader = DataLoader(dataset = train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(dataset = valid_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset = test_data, batch_size=batch_size, shuffle=True)

## Effecient Attention

### Linformer

In [14]:
efficient_transformer = Linformer(
    dim=128,
    seq_len=196+1, #patches + 1
    depth=12,
    heads=8,
    k=64
)

### ViT

In [15]:
model = ViT(
    dim=128,
    image_size=28,
    patch_size=2,
    num_classes=2,
    channels=14,
    depth=12,
    heads=8,
    mlp_dim = 2048
).to(device)

### Training

In [16]:
# loss function
criterion = nn.CrossEntropyLoss()
# optimizer
optimizer = optim.Adam(model.parameters(), lr=lr)
# scheduler
scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

In [17]:
for epoch in range(epochs):
    epoch_loss = 0
    epoch_accuracy = 0

    for data, label in tqdm(train_loader):
        data = data.to(device)
        label = label.to(device)

        output = model(data)
        loss = criterion(output, label)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        acc = (output.argmax(dim=1) == label).float().mean()
        epoch_accuracy += acc / len(train_loader)
        epoch_loss += loss / len(train_loader)

    with torch.no_grad():
        epoch_val_accuracy = 0
        epoch_val_loss = 0
        for data, label in valid_loader:
            data = data.to(device)
            label = label.to(device)

            val_output = model(data)
            val_loss = criterion(val_output, label)

            acc = (val_output.argmax(dim=1) == label).float().mean()
            epoch_val_accuracy += acc / len(valid_loader)
            epoch_val_loss += val_loss / len(valid_loader)

    print(
        f"Epoch : {epoch+1} - loss : {epoch_loss:.4f} - acc: {epoch_accuracy:.4f} - val_loss : {epoch_val_loss:.4f} - val_acc: {epoch_val_accuracy:.4f}\n"
    )

  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 1 - loss : 0.1605 - acc: 0.9467 - val_loss : 0.0745 - val_acc: 0.9783



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 2 - loss : 0.0765 - acc: 0.9758 - val_loss : 0.0595 - val_acc: 0.9835



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 3 - loss : 0.0539 - acc: 0.9828 - val_loss : 0.0519 - val_acc: 0.9830



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 4 - loss : 0.0409 - acc: 0.9870 - val_loss : 0.0407 - val_acc: 0.9862



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 5 - loss : 0.0303 - acc: 0.9898 - val_loss : 0.0323 - val_acc: 0.9887



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 6 - loss : 0.0245 - acc: 0.9920 - val_loss : 0.0249 - val_acc: 0.9917



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 7 - loss : 0.0189 - acc: 0.9936 - val_loss : 0.0336 - val_acc: 0.9891



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 8 - loss : 0.0151 - acc: 0.9955 - val_loss : 0.0221 - val_acc: 0.9919



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 9 - loss : 0.0108 - acc: 0.9966 - val_loss : 0.0189 - val_acc: 0.9932



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 10 - loss : 0.0086 - acc: 0.9973 - val_loss : 0.0142 - val_acc: 0.9966



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 11 - loss : 0.0067 - acc: 0.9979 - val_loss : 0.0231 - val_acc: 0.9924



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 12 - loss : 0.0068 - acc: 0.9982 - val_loss : 0.0215 - val_acc: 0.9948



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 13 - loss : 0.0064 - acc: 0.9979 - val_loss : 0.0249 - val_acc: 0.9943



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 14 - loss : 0.0031 - acc: 0.9991 - val_loss : 0.0731 - val_acc: 0.9849



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 15 - loss : 0.0050 - acc: 0.9985 - val_loss : 0.0396 - val_acc: 0.9926



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 16 - loss : 0.0042 - acc: 0.9987 - val_loss : 0.0317 - val_acc: 0.9934



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 17 - loss : 0.0038 - acc: 0.9986 - val_loss : 0.0158 - val_acc: 0.9963



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 18 - loss : 0.0007 - acc: 0.9999 - val_loss : 0.0326 - val_acc: 0.9935



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 19 - loss : 0.0049 - acc: 0.9985 - val_loss : 0.0218 - val_acc: 0.9948



  0%|          | 0/1543 [00:00<?, ?it/s]

Epoch : 20 - loss : 0.0019 - acc: 0.9995 - val_loss : 0.0126 - val_acc: 0.9966



In [18]:
# Last trained model
model.eval()
torch.save(model, "model.pt")

## Test(Confusion matrix)

In [19]:
TP, FP, FN, TN = 0, 0, 0, 0
TP_list = list()
FP_list = list()
FN_list = list()
TN_list = list()

print("#Test-data:", len(test_list))

for i in range(len(test_list)):
    with open(test_list[i], "rb") as fr:
        tmp_data = pickle.load(fr)

    test_label = tmp_data['label']
    if test_label == "None-crash":
        test_label = 0
    else:
        test_label = 1

    # Make tensor as input of model
    test_tensor = np.asarray(tmp_data['tensor'])
    test_tensor = np.asarray([test_tensor])
    test_tensor = torch.from_numpy(test_tensor)
    test_tensor = test_tensor.to(torch.float32).cuda()

    pred = model(test_tensor)
    if pred[0][0] > pred[0][1]:
        pred_idx = 0
    elif pred[0][0] < pred[0][1]:
        pred_idx = 1
    else:
        assert pred[0][0] == pred[0][1], 'Same class score'

    if pred_idx == 1 and test_label == 1:
        TP += 1
        TP_list.append(test_list[i])
    elif pred_idx == 1 and test_label == 0:
        FP += 1
        FP_list.append(test_list[i])
    elif pred_idx == 0 and test_label == 1:
        FN += 1
        FN_list.append(test_list[i])
    elif pred_idx == 0 and test_label == 0:
        TN += 1
        TN_list.append(test_list[i])

#Test-data: 3429


In [20]:
acc = (TP+TN)/(TP+TN+FP+FN)
spec = (TN/(FP+TN))
prec = (TP/(TP+FP))
recall = (TP/(TP+FN))

print("Accuracy:", acc)
print("Specificity:", spec)
print("Precision:", prec)
print("Recall:", recall)
print("F1 Score:", (2*prec*recall/(prec+recall)))

Accuracy: 0.9965004374453194
Specificity: 0.999344262295082
Precision: 0.9946091644204852
Recall: 0.9736147757255936
F1 Score: 0.984
