In [1]:
import tensorflow as tf
from tensorflow import keras

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os

from keras.preprocessing import image

In [2]:
train_path = 'E:/anomoly_detection_IOT_sensor/iotAnomaly_train/csv_files'

# image 불러오기

- numpy로 저장하기

우선 이미지 파일 하나로 시도해보자.

In [None]:
img_path = os.path.join(train_path, 'label_00006320620311.jpg')
# img_path
img = image.load_img(img_path)
img_tensor = image.img_to_array(img)
img

In [None]:
# expand a dimension (3D -> 4D)
img_tensor = np.expand_dims(img_tensor, axis=0)
img_tensor.shape

In [None]:
# scaling into [0, 1]
img_tensor /= 255.

In [None]:
plt.rcParams['figure.figsize'] = (10, 10) # set figure size
plt.imshow(img_tensor[0])
plt.show()

### image 파일의 형태를 알아보자

9개의 이미지 파일을 보자

In [3]:
target_size_v = 64
target_size_h = 128
def preprocess_img(img_path):
    img = image.load_img(img_path, target_size=(target_size_v, target_size_h))
    img_tensor = image.img_to_array(img)

    # expand a dimension
    img_tensor = np.expand_dims(img_tensor, axis=0)

    # scaling into [0, 1]
    img_tensor /= 255.
    return img_tensor

In [None]:
# draw plot ( 3 * 3 )
# layout
n_pic = 9
n_col = 3
n_row = 3

margin = 3

# blank matrix to store results
total = np.zeros((n_row * target_size_v + (n_row - 1) * margin, n_col * target_size_h + (n_col - 1) * margin, 3))

# append the image name
img_seq = 0
jpg_list = []
# 9장만 저장 ( 미리보기 용도 )
for (path, dir, files) in os.walk(train_path):
    print(path)
    for filename in files:
        ext = os.path.splitext(filename)[-1]
        if ext == '.jpg':
            try :
                jpg_list.append(filename)
                img_seq += 1
            except KeyError as e :
                print("%s/%s" % (path, filename))
                break
            if img_seq == 9 :
                break

img_seq = 0
for i in range(n_row):
    for j in range(n_col):
        img_path = os.path.join(train_path, jpg_list[img_seq])
        img_tensor = preprocess_img(img_path)

        horizontal_start = i * target_size_v + i * margin

        horizontal_end = horizontal_start + target_size_v

        vertical_start = j * target_size_h + j * margin

        vertical_end = vertical_start + target_size_h
        total[horizontal_start : horizontal_end, vertical_start : vertical_end, :] = img_tensor[0]
        img_seq += 1
        
img_seq

In [None]:
# display the pictures in grid
plt.figure(figsize=(32, 32))
plt.imshow(total)
plt.show()

In [None]:
print(img_tensor.shape)
print(total.shape)

### train 데이터셋에 적용

In [4]:
# gpu 메모리 증가를 허용하기 위해서 텐서나 연산 앞에 입력
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
    # 프로그램 시작시에 메모리 증가가 설정되어야만 합니다
        print(e)

X = []
for (path, dir, files) in os.walk(train_path):
    for filename in files:
        ext = os.path.splitext(filename)[-1]
        if ext == '.jpg':
            try :
                X.extend(preprocess_img(os.path.join(train_path, filename)))
            except KeyError as e :
                print("%s/%s" % (path, filename))
                break


In [5]:
len(X), len(X[0]), len(X[0][0]), len(X[0][0][0])

(23974, 64, 128, 3)

X = []
for (path, dir, files) in os.walk(train_path):
    print(path)
    for filename in files:
        ext = os.path.splitext(filename)[-1]
        if ext == '.jpg':
            try :
                img_path = os.path.join(train_path, filename)
                X.extend(preprocess_img(img_path))                    
            except KeyError as e :
                print("%s/%s" % (path, filename))
                break

In [6]:
X = np.array(X)
input_shape = (X.shape[1], X.shape[2], 3)

input_shape

(64, 128, 3)

In [None]:
# 임의의 그림 출력 ( 확인 )
plt.imshow(X[1500])

In [None]:
# fit에서 validation 나누기
# from sklearn.model_selection import train_test_split

# X_train, X_val, y_train, y_val = train_test_split(X, np.array([0]*len(X)), test_size=0.2) # spt로 ins 추측

# X_train.shape, X_val.shape

In [14]:

# 안쓰는 변수 삭제 ( 메모리 지우기)
import gc
gc.collect()
# del X
# del jpg_list
# del img
# del total
# del img_path
# del img_tensor


5094

## Model 작성

In [8]:
from tensorflow.keras import Sequential
# from keras.callbacks import EarlyStopping
# from keras.callbacks import ModelCheckpoint

from tensorflow.keras.layers import Dense, MaxPooling2D, Conv2D, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Reshape

In [9]:
model = Sequential()
model.add(Conv2D(128, kernel_size=(3, 3), padding='same', activation='relu', input_shape=input_shape, name='conv1_64'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(4,4), name='pool1', padding='valid'))

model.add(Reshape((-1,128)))
          
          
model.add(LSTM(64))
model.add(Dense(32, name='dense1', activation='linear'))
model.add(Dense(1, name='dense2', activation='sigmoid'))
model.summary()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1_64 (Conv2D)           (None, 64, 128, 128)      3584      
                                                                 
 pool1 (MaxPooling2D)        (None, 16, 32, 128)       0         
                                                                 
 reshape (Reshape)           (None, 512, 128)          0         
                                                                 
 lstm (LSTM)                 (None, 64)                49408     
                                                                 
 dense1 (Dense)              (None, 32)                2080      
                                                                 
 dense2 (Dense)              (None, 1)                 33        
                                                                 
Total params: 55,105
Trainable params: 55,105
Non-traina

In [10]:
# from keras.models import load_model

# class save(keras.callbacks.Callback):
#     def __init__(self, model):
#         self.count = 0
#         self.model = model

#     def on_epoch_end(self, bath, logs={}):
#         self.count = self.count + 1
#         if self.count % 10 == 0:
#             model.save("mymodel_epoch_{}.h5".format(self.count))
        
# s = save(model)
# es = EarlyStopping(monitor='val_loss', patience=5)
# mc = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)

# gpu 메모리 증가를 허용하기 위해서 텐서나 연산 앞에 입력
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
    # 프로그램 시작시에 메모리 증가가 설정되어야만 합니다
        print(e)
# # 메모리 부족
# strategy = tf.distribute.MirroredStrategy()

# with strategy.scope():
history = model.fit(X[:10000], np.array([0]*10000), epochs=3, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


## test case를 통해 이상치 탐지

train data에 대부분이 정상 데이터라는 가정하에 모두 정상 label값을 가지고 훈련하였다.

test data까지 하면 메모리가 넘쳐서 다운될까봐 코드를 모델 훈련 이후에 넣었다.

In [11]:
test_path = 'E:/anomoly_detection_IOT_sensor/iotAnomaly_test/csv_files'
# gpu 메모리 증가를 허용하기 위해서 텐서나 연산 앞에 입력
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
    # 프로그램 시작시에 메모리 증가가 설정되어야만 합니다
        print(e)

X = []
for (path, dir, files) in os.walk(test_path):
    for filename in files:
        ext = os.path.splitext(filename)[-1]
        if ext == '.jpg':
            try :
                X.extend(preprocess_img(os.path.join(test_path, filename)))
            except KeyError as e :
                print("%s/%s" % (path, filename))
                break

os.putenv('TF_GPU_ALLOCATOR', 'cuda_malloc_async')
 https://www.tensorflow.org/guide/gpu
 InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

In [12]:
X = np.array(X)

In [15]:
y_pred = model.predict(X)

In [16]:
len(y_pred), len(y_pred[0])

(8744, 1)

In [18]:
len(np.unique(y_pred))

3199

In [20]:
import pickle
with open("pickle/CRNN_ypred.pickle", "wb") as handle:
    pickle.dump(y_pred, handle, protocol=pickle.HIGHEST_PROTOCOL)