# data flow from dataframe

In [1]:
import os
from glob import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
os.listdir('./dataset/cifar')

['labels.txt', 'test', 'train']

In [3]:
train_paths = glob('./dataset/cifar/train/*.png')
test_paths = glob('./dataset/cifar/test/*.png')

len(train_paths), len(test_paths)

(50000, 10000)

In [4]:
path = train_paths[0]
path

'./dataset/cifar/train\\0_frog.png'

In [5]:
cls_name = os.path.basename(path).replace('.png', '').split('_')[-1]
cls_name

'frog'

### class 이름 얻은 것을 함수로 묶기

In [6]:
def get_class_name(path):
    fname = os.path.basename(path)
    return fname.replace('.png', '').split('_')[-1]

In [7]:
path, get_class_name(path)

('./dataset/cifar/train\\0_frog.png', 'frog')

# Class 수 확인

In [8]:
classes_name = [get_class_name(path) for path in train_paths]

In [9]:
np.unique(classes_name)

array(['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
       'horse', 'ship', 'truck'], dtype='<U10')

In [10]:
unique_classes = np.unique(classes_name, return_counts=True)
unique_classes

(array(['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
        'horse', 'ship', 'truck'], dtype='<U10'),
 array([5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000],
       dtype=int64))

In [11]:
classes = unique_classes[0]
len(classes), classes

(10,
 array(['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog',
        'horse', 'ship', 'truck'], dtype='<U10'))

# DataFrame 생성

In [12]:
data = {'path': train_paths, 'class_name': classes_name}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,path,class_name
0,./dataset/cifar/train\0_frog.png,frog
1,./dataset/cifar/train\10000_automobile.png,automobile
2,./dataset/cifar/train\10001_frog.png,frog
3,./dataset/cifar/train\10002_frog.png,frog
4,./dataset/cifar/train\10003_ship.png,ship


In [13]:
df.to_csv('train_dataset.csv', index=False)

In [14]:
test_classes_names = [get_class_name(path) for path in test_paths]

test_data = {'path': test_paths, 'class_name': test_classes_names}
test_df = pd.DataFrame(test_data)
test_df.head()

Unnamed: 0,path,class_name
0,./dataset/cifar/test\0_cat.png,cat
1,./dataset/cifar/test\1000_dog.png,dog
2,./dataset/cifar/test\1001_airplane.png,airplane
3,./dataset/cifar/test\1002_ship.png,ship
4,./dataset/cifar/test\1003_deer.png,deer


In [15]:
test_df.to_csv('test_dataset.csv', index=False)

## Data 확인

In [16]:
from PIL import Image
from tqdm.notebook import tqdm

In [17]:
heights = []
widths = []

for path in tqdm(train_paths):
    img_pil = Image.open(path)
    image = np.array(img_pil)
    h, w, c = image.shape
    
    heights.append(h)
    widths.append(w)

  0%|          | 0/50000 [00:00<?, ?it/s]

In [18]:
np.unique(heights), np.unique(widths)

(array([32]), array([32]))

# Model fitting

In [19]:
import tensorflow as tf
from tensorflow.keras import layers, datasets
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Hyperparameter

In [20]:
num_epochs = 10
batch_size = 32

learning_rate = 0.001
dropout_rate = 0.7

input_shape = (32, 32, 3)
num_classes = 10

## Preprocess

In [21]:
train_df = pd.read_csv('train_dataset.csv')
test_df = pd.read_csv('test_dataset.csv')

In [22]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   width_shift_range=0.3,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

test_datagen = ImageDataGenerator(rescale=1./255)

In [23]:
train_generator = train_datagen.flow_from_dataframe(
        train_df,
        x_col='path',
        y_col='class_name',
        target_size=input_shape[:2],
        batch_size=batch_size
)

validation_generator = test_datagen.flow_from_dataframe(
        test_df,
        x_col='path',
        y_col='class_name',
        target_size=input_shape[:2],
        batch_size=batch_size
)

Found 50000 validated image filenames belonging to 10 classes.
Found 10000 validated image filenames belonging to 10 classes.


## Build and train model

In [24]:
inputs = layers.Input(input_shape)

net = layers.Conv2D(32, (3, 3), padding='SAME')(inputs)
net = layers.Activation('relu')(net)
net = layers.Conv2D(32, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(dropout_rate)(net)

net = layers.Conv2D(64, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.Conv2D(64, (3, 3), padding='SAME')(net)
net = layers.Activation('relu')(net)
net = layers.MaxPooling2D(pool_size=(2, 2))(net)
net = layers.Dropout(dropout_rate)(net)

net = layers.Flatten()(net)
net = layers.Dense(512)(net)
net = layers.Activation('relu')(net)
net = layers.Dropout(dropout_rate)(net)
net = layers.Dense(num_classes)(net)
net = layers.Activation('softmax')(net)

model = tf.keras.Model(inputs=inputs, outputs=net, name='Basic_CNN')

In [25]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [26]:
model.fit(train_generator,
          validation_data=validation_generator,
          epochs=num_epochs, 
          verbose=2)

Epoch 1/10
1563/1563 - 40s - loss: 1.8867 - accuracy: 0.2956 - val_loss: 1.5886 - val_accuracy: 0.4225
Epoch 2/10
1563/1563 - 40s - loss: 1.6613 - accuracy: 0.3847 - val_loss: 1.5725 - val_accuracy: 0.4102
Epoch 3/10
1563/1563 - 40s - loss: 1.5985 - accuracy: 0.4141 - val_loss: 1.7679 - val_accuracy: 0.3575
Epoch 4/10
1563/1563 - 39s - loss: 1.5585 - accuracy: 0.4307 - val_loss: 1.6480 - val_accuracy: 0.3915
Epoch 5/10
1563/1563 - 39s - loss: 1.5286 - accuracy: 0.4451 - val_loss: 1.5175 - val_accuracy: 0.4425
Epoch 6/10
1563/1563 - 40s - loss: 1.5038 - accuracy: 0.4514 - val_loss: 1.4395 - val_accuracy: 0.4597
Epoch 7/10
1563/1563 - 40s - loss: 1.4867 - accuracy: 0.4596 - val_loss: 1.6250 - val_accuracy: 0.4062
Epoch 8/10
1563/1563 - 40s - loss: 1.4682 - accuracy: 0.4673 - val_loss: 1.4493 - val_accuracy: 0.4505
Epoch 9/10
1563/1563 - 41s - loss: 1.4571 - accuracy: 0.4716 - val_loss: 1.4974 - val_accuracy: 0.4502
Epoch 10/10
1563/1563 - 40s - loss: 1.4480 - accuracy: 0.4719 - val_loss:

<tensorflow.python.keras.callbacks.History at 0x2cd9e17efc8>