### GDN Based VM

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import DataLoader, random_split, Subset

from util.env import get_device, set_device
from util.preprocess import build_loc_net, construct_data
from util.net_struct import get_feature_map, get_fc_graph_struc
from util.iostream import printsep

from datasets.WaferDataset import WaferDataset

from models.GDN import GDN

from run.train import train
from run.test  import test
from run.evaluate import get_err_scores, get_best_performance_data, get_val_performance_data, get_full_err_scores

import sys
from datetime import datetime

import os
import argparse
from pathlib import Path

import json
import random

In [2]:
# check GPU situation
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))
print(torch.version.cuda)

True
1
0
<torch.cuda.device object at 0x000001D3DEF81780>
NVIDIA GeForce RTX 3090 Ti
11.8


In [3]:
# function to set random seed
def setup_seed(seed):
     random.seed(seed)
     np.random.seed(seed)
     if torch.cuda.is_available():
          torch.cuda.manual_seed_all(seed)

     torch.manual_seed(seed)
     torch.cuda.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)

     torch.backends.cudnn.deterministic = True
     torch.backends.cudnn.benchmark = False

In [4]:
class Main():
    def __init__(self, train_config, env_config, debug=False):

        self.train_config = train_config
        self.env_config = env_config
        self.datestr = None

        dataset = self.env_config['dataset']
        
        sampled_orig, unsampled_orig, test_orig = self.get_orig_data(dataset)
        sampled_train_orig = pd.concat([sampled_orig.iloc[i:i+316] for i in range(0, len(sampled_orig), 316*2)], ignore_index=True)
        unsampled_train_orig = pd.concat([unsampled_orig.iloc[i:i+316] for i in range(316, len(unsampled_orig), 316*2)], ignore_index=True)

        train, test = sampled_train_orig, unsampled_train_orig

        if 'attack' in train.columns:
            train = train.drop(columns=['attack'])

        feature_map = get_feature_map(dataset)
        fc_struc = get_fc_graph_struc(dataset)

        set_device(env_config['device'])
        self.device = get_device()

        fc_edge_index = build_loc_net(fc_struc, list(train.columns), feature_map=feature_map)
        fc_edge_index = torch.tensor(fc_edge_index, dtype = torch.long)

        self.feature_map = feature_map

        train_dataset_indata = construct_data(train, feature_map, labels=train.MRR.tolist(), mrrs=train.former_MRR.tolist())
        test_dataset_indata = construct_data(test, feature_map, labels=test.MRR.tolist(), mrrs=train.former_MRR.tolist())

        cfg = {'wafer_len': train_config['wafer_len']}

        train_dataset = WaferDataset(train_dataset_indata, fc_edge_index, config=cfg)
        test_dataset = WaferDataset(test_dataset_indata, fc_edge_index, config=cfg)


        train_dataloader, val_dataloader = self.get_loaders(train_dataset, train_config['seed'], train_config['batch'], val_ratio = train_config['val_ratio'])

        self.train_dataset = train_dataset
        self.test_dataset = test_dataset


        self.train_dataloader = train_dataloader
        self.val_dataloader = val_dataloader
        self.test_dataloader = DataLoader(test_dataset, batch_size=train_config['batch'],
                            shuffle=False)


        edge_index_sets = []
        edge_index_sets.append(fc_edge_index)

        self.model = GDN(edge_index_sets, len(feature_map), 
                dim=train_config['dim'], 
                input_dim=train_config['wafer_len'],
                topk=train_config['topk'],
            ).to(self.device)

    def run(self):

        if len(self.env_config['load_model_path']) > 0:
            model_save_path = self.env_config['load_model_path']
        else:
            model_save_path = self.get_save_path()[0]

            self.train_log = train(self.model, model_save_path, 
                config = self.train_config,
                train_dataloader=self.train_dataloader,
                val_dataloader=self.val_dataloader, 
                feature_map=self.feature_map,
                test_dataloader=self.test_dataloader,
                test_dataset=self.test_dataset,
                train_dataset=self.train_dataset,
                dataset_name=self.env_config['dataset']
            )
        
        # test            
        self.model.load_state_dict(torch.load(model_save_path))
        best_model = self.model.to(self.device)

        test_loss, test_result = test(best_model, self.test_dataloader)
        val_loss, val_result = test(best_model, self.val_dataloader)

        return test_loss, val_loss, test_result, val_result

    def get_loaders(self, train_dataset, seed, batch, val_ratio=0.1):
        dataset_len = int(len(train_dataset))
        train_use_len = int(dataset_len * (1 - val_ratio))
        val_use_len = int(dataset_len * val_ratio)
        val_start_index = random.randrange(train_use_len)
        indices = torch.arange(dataset_len)

        train_sub_indices = torch.cat([indices[:val_start_index], indices[val_start_index+val_use_len:]])
        train_subset = Subset(train_dataset, train_sub_indices)

        val_sub_indices = indices[val_start_index:val_start_index+val_use_len]
        val_subset = Subset(train_dataset, val_sub_indices)

        train_dataloader = DataLoader(train_subset, batch_size=batch,
                                shuffle=False)

        val_dataloader = DataLoader(val_subset, batch_size=batch,
                                shuffle=False)

        return train_dataloader, val_dataloader

    def get_save_path(self, feature_name=''):

        dir_path = self.env_config['save_path']
        
        if self.datestr is None:
            now = datetime.now()
            self.datestr = now.strftime('%m_%d-%H_%M_%S')
        datestr = self.datestr          

        paths = [
            f'./save_path/{dir_path}/best_{datestr}.pt',
            f'./results/{dir_path}/{datestr}.csv',
        ]

        for path in paths:
            dirname = os.path.dirname(path)
            Path(dirname).mkdir(parents=True, exist_ok=True)

        return paths
    
    def get_orig_data(self, dataset):

        train_orig = pd.read_csv(f'./data/{dataset}/train.csv', sep=',', index_col=0).reset_index(drop=True)
        test_orig = pd.read_csv(f'./data/{dataset}/test.csv', sep=',', index_col=0).reset_index(drop=True)

        short_sampled_orig = train_orig.iloc[316*2:, :].reset_index(drop=True)
        former_mrr_sampled = train_orig.iloc[:-316*2, -1].rename('former_MRR').reset_index(drop=True)
        addmrr_sampled_orig = pd.concat([short_sampled_orig, former_mrr_sampled], axis=1)

        short_unsampled_orig = train_orig.iloc[316*1:, :].reset_index(drop=True)
        former_mrr_unsampled = train_orig.iloc[:-316*1, -1].rename('former_MRR').reset_index(drop=True)
        addmrr_unsampled_orig = pd.concat([short_unsampled_orig, former_mrr_unsampled], axis=1)

        former_mrr_test = pd.concat([train_orig.iloc[-316*1:, -1], test_orig.iloc[:-316*1, -1]], ignore_index=True).rename("former_MRR").reset_index(drop=True)
        addmrr_test_orig = pd.concat([test_orig, former_mrr_test], axis=1)
        
        return addmrr_sampled_orig, addmrr_unsampled_orig, addmrr_test_orig


A456

In [None]:
# training setting
batch=256
epoch=2000
wafer_len=316
dim=16
save_path_pattern=''
dataset='A456'
device='cuda'
seed=2
comment=''
decay=0
val_ratio=0.2
topk=4
report='best'
load_model_path=''

# set random seed
setup_seed(seed)

train_config = {
    'batch': batch,
    'epoch': epoch,
    'wafer_len': wafer_len,
    'dim': dim,
    'comment': comment,
    'seed': seed,
    'decay': decay,
    'val_ratio': val_ratio,
    'topk': topk,
}

env_config={
    'save_path': save_path_pattern,
    'dataset': dataset,
    'report': report,
    'device': device,
    'load_model_path': load_model_path
}

# start traning
main = Main(train_config, env_config, debug=False)
test_loss, _, value_result, _ = main.run()

epoch (1 / 2000) (Train_loss:3389.28735352, ACU_loss:6778.57470703, Val_loss:3240.49389648)
epoch (2 / 2000) (Train_loss:3363.42321777, ACU_loss:6726.84643555, Val_loss:3223.56250000)
epoch (3 / 2000) (Train_loss:3340.14978027, ACU_loss:6680.29956055, Val_loss:3206.55322266)
epoch (4 / 2000) (Train_loss:3315.61572266, ACU_loss:6631.23144531, Val_loss:3189.35424805)
epoch (5 / 2000) (Train_loss:3289.64221191, ACU_loss:6579.28442383, Val_loss:3171.75830078)
epoch (6 / 2000) (Train_loss:3261.93249512, ACU_loss:6523.86499023, Val_loss:3153.76562500)
epoch (7 / 2000) (Train_loss:3232.44445801, ACU_loss:6464.88891602, Val_loss:3135.23510742)
epoch (8 / 2000) (Train_loss:3202.51159668, ACU_loss:6405.02319336, Val_loss:3115.86010742)
epoch (9 / 2000) (Train_loss:3169.18237305, ACU_loss:6338.36474609, Val_loss:3095.69042969)
epoch (10 / 2000) (Train_loss:3133.28100586, ACU_loss:6266.56201172, Val_loss:3073.11743164)
epoch (11 / 2000) (Train_loss:3094.18798828, ACU_loss:6188.37597656, Val_loss:3

In [20]:
print(f'{dataset} MSE : ', test_loss)

A456 MSE :  10.649298429489136


B456

In [None]:
# training setting
batch=256
epoch=2000
wafer_len=316
dim=16
save_path_pattern=''
dataset='B456'
device='cuda'
seed=2
comment=''
decay=0
val_ratio=0.2
topk=4
report='best'
load_model_path=''

# set random seed
setup_seed(seed)

train_config = {
    'batch': batch,
    'epoch': epoch,
    'wafer_len': wafer_len,
    'dim': dim,
    'comment': comment,
    'seed': seed,
    'decay': decay,
    'val_ratio': val_ratio,
    'topk': topk,
}

env_config={
    'save_path': save_path_pattern,
    'dataset': dataset,
    'report': report,
    'device': device,
    'load_model_path': load_model_path
}

# start traning
main = Main(train_config, env_config, debug=False)
test_loss, _, value_result, _ = main.run()

epoch (1 / 2000) (Train_loss:2887.63793945, ACU_loss:5775.27587891, Val_loss:2739.55078125)
epoch (2 / 2000) (Train_loss:2838.68359375, ACU_loss:5677.36718750, Val_loss:2721.56201172)
epoch (3 / 2000) (Train_loss:2789.79992676, ACU_loss:5579.59985352, Val_loss:2702.57348633)
epoch (4 / 2000) (Train_loss:2742.57922363, ACU_loss:5485.15844727, Val_loss:2682.66650391)
epoch (5 / 2000) (Train_loss:2696.58056641, ACU_loss:5393.16113281, Val_loss:2660.85156250)
epoch (6 / 2000) (Train_loss:2653.74658203, ACU_loss:5307.49316406, Val_loss:2637.70336914)
epoch (7 / 2000) (Train_loss:2611.91943359, ACU_loss:5223.83886719, Val_loss:2613.20751953)
epoch (8 / 2000) (Train_loss:2564.98156738, ACU_loss:5129.96313477, Val_loss:2586.44287109)
epoch (9 / 2000) (Train_loss:2517.25585938, ACU_loss:5034.51171875, Val_loss:2557.11157227)
epoch (10 / 2000) (Train_loss:2468.87744141, ACU_loss:4937.75488281, Val_loss:2526.13696289)
epoch (11 / 2000) (Train_loss:2420.74414062, ACU_loss:4841.48828125, Val_loss:2

In [32]:
print(f'{dataset} MSE : ', test_loss)

B456 MSE :  11.936854362487793
