In [1]:
import os
import sys
import time
import sklearn
import PIL.Image

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from pprint import pprint

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
mpl.rcParams['figure.figsize'] = (12, 12)
mpl.rcParams['axes.grid'] = False

print(sys.version_info)
for module in tf, mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)
tensorflow 2.4.0
matplotlib 3.3.3
numpy 1.19.5
pandas 1.1.5
sklearn 0.24.0
tensorflow 2.4.0
tensorflow.keras 2.4.0


In [7]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

### Read image datasets

In [9]:
class_names = [
    'airplane', 'automobile', 'bird', 'cat', 'deer', 
    'dog', 'frog', 'horse', 'ship', 'truck',
]

train_label_file = './datasets/cifar-10/trainLabels.csv'
test_csv_file = './datasets/cifar-10/sampleSubmission.csv'
train_folder = './datasets/cifar-10/train/'
test_folder = './datasets/cifar-10/test/'

def parse_csv_file(filepath, folder):
    """Parses csv files into (filename(path), label) format"""
    results = []
    with open(filepath, 'r') as f:
        lines = f.readlines()[1:]
    for line in lines:
        image_id, label_str = line.strip('\n').split(',')
        image_full_path = os.path.join(folder, image_id + '.png')
        results.append((image_full_path, label_str))
    return results

train_labels_info = parse_csv_file(train_label_file, train_folder)
test_csv_info = parse_csv_file(test_csv_file, test_folder)

pprint(train_labels_info[0:5])
pprint(test_csv_info[0:5])
print("Number of training examples: {}".format(len(train_labels_info)))
print("Number of test examples: {}".format(len(test_csv_info)))

[('./datasets/cifar-10/train/1.png', 'frog'),
 ('./datasets/cifar-10/train/2.png', 'truck'),
 ('./datasets/cifar-10/train/3.png', 'truck'),
 ('./datasets/cifar-10/train/4.png', 'deer'),
 ('./datasets/cifar-10/train/5.png', 'automobile')]
[('./datasets/cifar-10/test/1.png', 'cat'),
 ('./datasets/cifar-10/test/2.png', 'cat'),
 ('./datasets/cifar-10/test/3.png', 'cat'),
 ('./datasets/cifar-10/test/4.png', 'cat'),
 ('./datasets/cifar-10/test/5.png', 'cat')]
Number of training examples: 50000
Number of test examples: 300000


In [13]:
train_df = pd.DataFrame(train_labels_info[0:45000])
valid_df = pd.DataFrame(train_labels_info[45000:])
test_df = pd.DataFrame(test_csv_info)

train_df.columns = ['filepath', 'class']
valid_df.columns = ['filepath', 'class']
test_df.columns = ['filepath', 'class']

print(train_df.head())
print(valid_df.head())
print(test_df.head())

                          filepath       class
0  ./datasets/cifar-10/train/1.png        frog
1  ./datasets/cifar-10/train/2.png       truck
2  ./datasets/cifar-10/train/3.png       truck
3  ./datasets/cifar-10/train/4.png        deer
4  ./datasets/cifar-10/train/5.png  automobile
                              filepath       class
0  ./datasets/cifar-10/train/45001.png       horse
1  ./datasets/cifar-10/train/45002.png  automobile
2  ./datasets/cifar-10/train/45003.png        deer
3  ./datasets/cifar-10/train/45004.png  automobile
4  ./datasets/cifar-10/train/45005.png    airplane
                         filepath class
0  ./datasets/cifar-10/test/1.png   cat
1  ./datasets/cifar-10/test/2.png   cat
2  ./datasets/cifar-10/test/3.png   cat
3  ./datasets/cifar-10/test/4.png   cat
4  ./datasets/cifar-10/test/5.png   cat


### Construct ImageDataGenerator from dataframe

In [None]:
height = 32
width = 32
channels = 3
batch_size = 32
num_classes = 10

# Flow from dataframe, not directory
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 40,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'nearest',
)
train_generator = train_datagen.flow_from_dataframe(
    train_df,
    directory = './',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size = (height, width),
    batch_size = batch_size,
    seed = 7,
    shuffle = True,
    class_mode = 'sparse',
)

valid_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255)
valid_generator = valid_datagen.flow_from_dataframe(
    valid_df,
    directory = './',
    x_col = 'filepath',
    y_col = 'class',
    classes = class_names,
    target_size = (height, width),
    batch_size = batch_size,
    seed = 7,
    shuffle = False,
    class_mode = "sparse")

train_num = train_generator.samples
valid_num = valid_generator.samples
print(train_num, valid_num)