<a href="https://colab.research.google.com/github/yoonwanggyu/Self_Study/blob/main/%EC%98%A4%ED%94%84%EB%9D%BC%EC%9D%B8/AlexNet_%EA%B5%AC%ED%98%84(tensorflow%2Cpytorch).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# tensorflow 기반 alexnet 구현

In [1]:
import tensorflow as tf

from tensorflow.keras import datasets, layers, models

import matplotlib.pyplot as plt
import numpy as np

In [2]:
(train_images, train_labels), (test_images, test_labels) = datasets.fashion_mnist.load_data()
train_images.shape, train_labels.shape, test_images.shape, test_labels.shape

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [3]:
# 라벨 설정
label_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [4]:
# 채널 값을 부여 : 28, 28 -> 28, 28, 1
train_images = train_images.reshape(-1, 28, 28, 1)
test_images = test_images.reshape(-1, 28, 28, 1)
train_images.shape, test_images.shape

((60000, 28, 28, 1), (10000, 28, 28, 1))

In [5]:
# AlexNet 모델 입력층에 맞게 채널 3장으로 맞춤
train_images = tf.repeat(train_images, 3, axis=3)
test_images = tf.repeat(test_images, 3, axis=3)
train_images.shape, test_images.shape

(TensorShape([60000, 28, 28, 3]), TensorShape([10000, 28, 28, 3]))

In [7]:
# ## 사용할 layer module
# layers.Conv2D()
# layers.Lambda(tf.nn.local_response_normalization)
# layers.MaxPooling2D()

# layers.Flatten()
# layers.Dense()
# layers.Dropout(0.5)

In [13]:
model = models.Sequential()

# 총 8층
# 227 x 227 x 3으로 resize
model.add(layers.experimental.preprocessing.Resizing(227, 227,
                                                     input_shape=train_images.shape[1:]))
# First convolutional layer
model.add(layers.Conv2D(96,(11,11),strides = 4, activation='relu'))  # imagesize에 맞게 큰 filter에 큰 stride 적용
model.add(layers.Lambda(tf.nn.local_response_normalization))         # ReLU 결과값 안정화
model.add(layers.MaxPooling2D((3,3),strides=2))                      # Overlapping Pooling
# Second convolutional layer
model.add(layers.Conv2D(256,(5,5),activation='relu',padding='same'))
model.add(layers.Lambda(tf.nn.local_response_normalization))
model.add(layers.MaxPooling2D((3,3),strides=2))
# Thrid convolutional layer
model.add(layers.Conv2D(384,(3,3),strides=1,activation='relu',padding='same'))
# Fourth convolutional layer
model.add(layers.Conv2D(384,(3,3),strides=1,activation='relu',padding='same'))
# Fifth convolutional layer
model.add(layers.Conv2D(256,(3,3),strides=1,activation='relu',padding='same'))
model.add(layers.MaxPooling2D((3,3),strides=2))   # fm = 6, 6, 256 = 9216

# classification layer
model.add(layers.Flatten())
model.add(layers.Dense(256,activation='relu'))  # 원래 : 4096
model.add(layers.Dropout(0.5))
model.add(layers.Dense(256,activation='relu'))  # 4096
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10,activation='softmax'))  # 1000

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics = ['accuracy'])

# kernel 개수, 크기, stride,w
# conv, pool(overlap), LRN, FC, dropout
# output shape : 1000 -> 10

model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 resizing_4 (Resizing)       (None, 227, 227, 3)       0         
                                                                 
 conv2d_9 (Conv2D)           (None, 55, 55, 96)        34944     
                                                                 
 lambda_4 (Lambda)           (None, 55, 55, 96)        0         
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 27, 27, 96)        0         
 g2D)                                                            
                                                                 
 conv2d_10 (Conv2D)          (None, 27, 27, 256)       614656    
                                                                 
 lambda_5 (Lambda)           (None, 27, 27, 256)       0         
                                                      

In [None]:
model.fit(train_images, train_labels, batch_size=128, epochs=10, validation_split=0.2)

Epoch 1/10


In [None]:
model.evaluate(test_images, test_labels)



[0.2764274775981903, 0.9049999713897705]

In [None]:
# AlexNet 모델을 간소화해봅시다
## resize없이 (28, 28, 3)으로 받아봅시다

## ** activation 은 모두 relu로 유지
## layer 1 : 32개, (3, 3), strides=1 / pooling은 (2, 2) overlapping x / normalizing 유지

## layer 2 : 64개, (3, 3), strides=1 / pooling은 (2, 2) overlapping x / normalizing 유지

## layer 3 : 128개, (3, 3), strides=1 / pooling은 (2, 2) overlapping x

## layer 4, 5 : 삭제

## fully connected layer 1, 2 : node 원하는대로 주기


# pytorch 기반 alexnet 구현

In [13]:
import torch
import torch.nn as nn

class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4),
            nn.ReLU(),
            nn.LocalResponseNorm(size=5),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(96, 192, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.LocalResponseNorm(size=5),
            nn.MaxPool2d(kernel_size=3, stride=2),

            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(384, 384, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


In [14]:
model = AlexNet(num_classes=10)
print(model)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(96, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (5): ReLU()
    (6): LocalResponseNorm(5, alpha=0.0001, beta=0.75, k=1.0)
    (7): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=9216, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, i