## Tensorflow Memory Management

In [1]:
# 워닝 무시
import warnings
warnings.filterwarnings('ignore')

In [2]:
import tensorflow as tf


gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        # 프로그램 시작시에 메모리 증가가 설정되어야만 합니다
        print(e)

2022-07-24 23:38:35.643001: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-24 23:38:35.732644: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-24 23:38:35.734343: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


## Package Load

In [3]:
import pandas as pd
import numpy as np

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

## Data Load

In [4]:
train_df = pd.read_csv('./data/train.csv')
test_df = pd.read_csv('./data/test.csv')
val_df = pd.read_csv('./data/val.csv')

## Select feature : from EDA

In [5]:
select_feature = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V9', 'V10', 'V11', 'V12', 'V14', 'V16', 'V17', 'V18', 'V30']
select_feature_val = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V9', 'V10', 'V11', 'V12', 'V14', 'V16', 'V17', 'V18', 'V30', 'Class']

In [6]:
train_df = train_df[select_feature]
val_df = val_df[select_feature_val]

## Create AutoEncoder

In [7]:
input_dim = len(select_feature)

In [8]:
class Autoencoder(Model):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()
        self.input_dim = input_dim # 입력 차원의 수
        self.auto_encoder = Sequential()
        self.auto_encoder.add(Dense(self.input_dim / 2, input_shape=(self.input_dim, ), activation='relu'))
        self.auto_encoder.add(BatchNormalization())
        self.auto_encoder.add(Dense(self.input_dim, activation='linear'))
        
    def call(self, x):
        return self.auto_encoder(x)

In [9]:
checkpointer = ModelCheckpoint(filepath='./model/keras_best.h5', verbose=1, save_best_only=True, save_weights_only=True)
earlystopping = EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=20)

In [10]:
autoEncoder = Autoencoder(input_dim)
autoEncoder.compile(optimizer='adam', loss=MeanSquaredError(), metrics=['accuracy'])

2022-07-24 23:38:38.284623: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-24 23:38:38.285691: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-24 23:38:38.287452: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-24 23:38:38.288944: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA 

In [11]:
autoEncoder.fit(train_df, train_df, epochs=400, shuffle=True, validation_data = (train_df, train_df), batch_size=1024, callbacks = [checkpointer])

Epoch 1/400
Epoch 00001: val_loss improved from inf to 1.41259, saving model to ./model/keras_best.h5
Epoch 2/400
Epoch 00002: val_loss improved from 1.41259 to 1.05644, saving model to ./model/keras_best.h5
Epoch 3/400
Epoch 00003: val_loss improved from 1.05644 to 0.82756, saving model to ./model/keras_best.h5
Epoch 4/400
Epoch 00004: val_loss improved from 0.82756 to 0.67450, saving model to ./model/keras_best.h5
Epoch 5/400
Epoch 00005: val_loss improved from 0.67450 to 0.57934, saving model to ./model/keras_best.h5
Epoch 6/400
Epoch 00006: val_loss improved from 0.57934 to 0.52314, saving model to ./model/keras_best.h5
Epoch 7/400
Epoch 00007: val_loss improved from 0.52314 to 0.48906, saving model to ./model/keras_best.h5
Epoch 8/400
Epoch 00008: val_loss improved from 0.48906 to 0.46717, saving model to ./model/keras_best.h5
Epoch 9/400
Epoch 00009: val_loss improved from 0.46717 to 0.45034, saving model to ./model/keras_best.h5
Epoch 10/400
Epoch 00010: val_loss improved from 0

<keras.callbacks.History at 0x7f2451d80430>

In [12]:
val_X = val_df[select_feature]
val_Y = val_df[['Class']]

In [13]:
pred_X = autoEncoder.predict(val_X)

In [29]:
pred_X[0]

array([-0.45536858,  1.0646896 ,  1.1870314 , -0.18127757,  0.39804888,
       -0.16088323,  0.66640943, -0.6653831 , -0.04100302,  0.02083364,
       -0.04329195, -0.02936625, -0.02559275, -0.03648488, -0.02154214,
       -0.03877338], dtype=float32)

In [32]:
val_X[0]

KeyError: 0

In [24]:
cosine_similarity(val_X.values[0].reshape(-1, 1), pred_X[0].reshape(-1, 1))

array([[ 1., -1., -1.,  1., -1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
         1.,  1.,  1.],
       [-1.,  1.,  1., -1.,  1., -1.,  1., -1., -1.,  1., -1., -1., -1.,
        -1., -1., -1.],
       [-1.,  1.,  1., -1.,  1., -1.,  1., -1., -1.,  1., -1., -1., -1.,
        -1., -1., -1.],
       [ 1., -1., -1.,  1., -1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
         1.,  1.,  1.],
       [-1.,  1.,  1., -1.,  1., -1.,  1., -1., -1.,  1., -1., -1., -1.,
        -1., -1., -1.],
       [ 1., -1., -1.,  1., -1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
         1.,  1.,  1.],
       [-1.,  1.,  1., -1.,  1., -1.,  1., -1., -1.,  1., -1., -1., -1.,
        -1., -1., -1.],
       [ 1., -1., -1.,  1., -1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
         1.,  1.,  1.],
       [ 1., -1., -1.,  1., -1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,
         1.,  1.,  1.],
       [-1.,  1.,  1., -1.,  1., -1.,  1., -1., -1.,  1., -1., -1., -1.,
        -1., -1., -1.],
       [-1.,  1.,  1., -1.,  1

In [None]:
sim_list = []

for i in range(len(pred_X)):
    cosine_similarity(val_X[i], pred_X[i])

In [58]:
pred_X.shape

(28462, 16)