<a href="https://colab.research.google.com/github/welcomeglory/Python/blob/master/cat_dog_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install tensorflow==2.15.0
import os
import tensorflow as tf
print(tf.__version__)
# path = "/content/drive/MyDrive/세종교육/CNN/model_version/"

2.15.0


In [8]:
class ImageCNN(tf.keras.Model):
    def __init__(self, num_classes=2):
        super(ImageCNN, self).__init__()

        self.conv1 = tf.keras.layers.Conv2D(filters=32, kernel_size=5, strides=1, padding='same', activation='relu')
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2)

        self.conv2 = tf.keras.layers.Conv2D(filters=64, kernel_size=5, strides=1, padding='same', activation='relu')
        self.pool2 = tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2)

        self.conv3 = tf.keras.layers.Conv2D(filters=128, kernel_size=5, strides=1, padding='same', activation='relu')
        self.conv4 = tf.keras.layers.Conv2D(filters=128, kernel_size=5, strides=1, padding='same', activation='relu')

        self.flat = tf.keras.layers.Flatten()
        self.hid1 = tf.keras.layers.Dense(1024, activation='relu')
        self.drop1 = tf.keras.layers.Dropout(0.5)

        self.output_layer = tf.keras.layers.Dense(num_classes, activation=None)

    def call(self, x):
        conv1 = self.conv1(x)
        pool1 = self.pool1(conv1)
        conv2 = self.conv2(pool1)
        pool2 = self.pool2(conv2)
        conv3 = self.conv3(pool2)
        conv4 = self.conv4(conv3)
        flat = self.flat(conv4)
        hid = self.hid1(flat)
        drop = self.drop1(hid)
        logits = self.output_layer(drop)
        return logits

# 옵티마이저 생성
optimizer = tf.keras.optimizers.legacy.Adam()

@tf.function
def backward(model, x, y, train_summary):
    with tf.GradientTape() as grad:
        logits = model(x)
        loss = cross_entropy_loss(logits, y)

    grads = grad.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    with train_summary.as_default():
        tf.summary.scalar('loss', loss, step=optimizer.iterations)
        tf.summary.image('training image', x, max_outputs=9, step=optimizer.iterations)

    return loss

@tf.function
def cross_entropy_loss(logits, y):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))

@tf.function
def accuracy(pred, y):
    correction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    return tf.reduce_mean(tf.cast(correction, tf.float32))

In [9]:
def evaluate_model(testDataIter, model):
    accuracies = []
    for batch in testDataIter:
        b_x, b_y = batch
        pred = model(b_x)
        acc = accuracy(pred, b_y)
        accuracies.append(acc)

    avg_acc = tf.reduce_mean(accuracies)
    print(f"Final Test Accuracy: {avg_acc}")
    return avg_acc

def train_learning(epochs, frq_print, trainDataIter, testDataIter, model, optimizer, path=''):
    # 체크포인트 및 체크포인트 관리자 설정
    ckpt = tf.train.Checkpoint(step=tf.Variable(0), model=model, optimizer=optimizer)
    ckpt_mgr = tf.train.CheckpointManager(ckpt, directory=f"{path}checkPoint/", max_to_keep=5)
    latest_ckpt = ckpt_mgr.latest_checkpoint
    train_summary = tf.summary.create_file_writer(f"{path}tensorboard/train")

    # 이전 체크포인트가 있으면 복원하고 옵티마이저 초기화
    if latest_ckpt:
        ckpt.restore(latest_ckpt)
        print(f"Restored from {latest_ckpt}")

        # 더미 입력으로 모델 호출 (옵티마이저가 변수를 인식하도록)
        try:
            b_x, b_y = next(trainDataIter)
        except StopIteration:
            trainDataIter = iter(train_dataset)  # train_dataset을 이터레이터로 재초기화
            b_x, b_y = next(trainDataIter)

        _ = model(b_x)  # 실제 데이터를 사용하여 모델 호출

        evaluate_model(testDataIter, model)

    # 초기화된 loss 리스트
    lossList = []

    # 훈련 루프
    for epoch in range(int(ckpt.step), epochs):
        try:
            b_x, b_y = next(trainDataIter)
        except StopIteration:
            # 데이터셋 이터레이터가 끝난 경우, 이터레이터를 다시 초기화
            trainDataIter = iter(train_dataset)  # train_dataset을 이터레이터로 재초기화
            b_x, b_y = next(trainDataIter)

        # 백워드 패스 및 손실 계산
        ls = backward(model, b_x, b_y, train_summary)
        lossList.append(ls)

        # 주기적으로 체크포인트 저장 및 모델 성능 출력
        if epoch % frq_print == 0:
            ckpt_mgr.save(checkpoint_number=ckpt.step)
            pred = model(b_x)
            acc = accuracy(pred, b_y)
            print(f"Epoch: {epoch + 1} ====> Loss: {ls}, Accuracy: {acc}")

        # 에포크 수 증가
        ckpt.step.assign_add(1)

    # 최종 평가
    evaluate_model(testDataIter, model)

    return lossList

In [10]:
def load_dataset(paths, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((paths, labels))
    dataset = dataset.map(lambda x, y: (load_and_preprocess_image(x), tf.one_hot(y, depth=2)))
    dataset = dataset.shuffle(buffer_size=1000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    return dataset

# 이미지를 숫자화
def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [128, 128])
    image = tf.cast(image, tf.float32) / 255.0 #normalization
    return image

In [11]:
path = "/content/drive/MyDrive/세종교육/CNN/cat_dog"
files = os.listdir(path)
files = [path + f"/{file_name}" for file_name in files]
cats = [(file_name, 0) for file_name in files if "cat." in file_name]
dogs = [(file_name, 1) for file_name in files if "dog." in file_name]

print(cats[0])

train_dogs = dogs[:int(len(dogs)*0.7)] # 다운사이징
test_dogs = dogs[int(len(dogs)*0.7):]
train_cats = cats[:int(len(cats)*0.7)] # 다운사이징
test_cats = cats[int(len(cats)*0.7):]

train = train_dogs + train_cats
test = test_dogs + test_cats

print(len(train))
print(len(test))


trains, train_labels = zip(*train)
trains = list(trains)
train_labels  = list(train_labels)

tests, test_labels = zip(*test)
tests = list(tests)
test_labels  = list(test_labels)


train_dataset = load_dataset(trains, train_labels, batch_size=64)
train_dataset_iter = iter(train_dataset)
test_dataset = load_dataset(tests, test_labels, batch_size=64)
test_dataset_iter = iter(test_dataset)

for images, labels in train_dataset_iter:
  print(images.shape)
  print(labels.shape)
  break






('/content/drive/MyDrive/세종교육/CNN/cat_dog/cat.9163.jpg', 0)
17500
7500
(64, 128, 128, 3)
(64, 2)


In [13]:
path = "/content/drive/MyDrive/세종교육/model_version/"

epochs = 10000 # 훈련횟수
frq_print = 100
model = ImageCNN()
lossList = train_learning(epochs, frq_print, train_dataset_iter, test_dataset_iter, model, optimizer, path = path)