<a href="https://colab.research.google.com/github/xiapidexiami/tensorflow/blob/master/LeNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os
import zipfile
import random
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import RMSprop
from shutil import copyfile, rmtree

from keras.backend.tensorflow_backend import set_session

download full cats-v-dogs dataset and store it as cats-and-dogs.zip

In [0]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0" #有多个GPU时可以指定只使用第几号GPU
config = tf.ConfigProto()
config.allow_soft_placement=True #允许动态放置张量和操作符
config.gpu_options.per_process_gpu_memory_fraction = 0.4 #最多使用40%GPU内存
config.gpu_options.allow_growth=True   #初始化时不全部占满GPU显存, 按需分配 
sess = tf.Session(config = config)
set_session(sess)

In [0]:
!wget --no-check-certificate \
    "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip" \
    -O "/tmp/cats-and-dogs.zip"

unzip dataset to /tmp

In [0]:
local_zip = "/tmp/cats-and-dogs.zip"
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp/')
zip_ref.close()

Split dataset into training dataset (0.8), validation dataset (0.1), and testing dataset (0.1). 
Since some image data is empty, we should delete when splitting data.

In [0]:
num_cats = len(os.listdir("/tmp/PetImages/Cat"))
num_dogs = len(os.listdir("/tmp/PetImages/Dog"))
print("the number of cats is", num_cats)
print("the number of dogs is", num_dogs)

In [0]:
rmtree("/tmp/cats-v-dogs")
try:
  os.makedirs("/tmp/cats-v-dogs/training/cats/")
  os.makedirs("/tmp/cats-v-dogs/training/dogs/")
  os.makedirs("/tmp/cats-v-dogs/validation/cats/")
  os.makedirs("/tmp/cats-v-dogs/validation/dogs/")
  os.makedirs("/tmp/cats-v-dogs/testing/cats/")
  os.makedirs("/tmp/cats-v-dogs/testing/dogs/")
except OSError:
  pass

In [0]:
def split_data(source_dir, train_dir, validation_dir, test_dir, split_size):
  files = []
  for filename in os.listdir(source_dir):
    file = os.path.join(source_dir, filename)
    if os.path.getsize(file) > 0:
      files.append(filename)
    else:
      print(filename, "is empty!")
    
  random.shuffle(files)
  num_file_full = len(files)
  print("The number of useful file is ", num_file_full)
  
  num_train = int(num_file_full * split_size[0])
  num_validation = int(num_file_full * split_size[1])
  num_test = num_file_full - num_train - num_validation

  train_files = files[:num_train]
  validation_files = files[num_train:num_train+num_validation]
  test_files = files[num_train+num_validation:]

  for filename in train_files:
    copyfile(os.path.join(source_dir, filename), os.path.join(train_dir, filename))

  for filename in validation_files:
    copyfile(os.path.join(source_dir, filename), os.path.join(validation_dir, filename))

  for filename in test_files:
    copyfile(os.path.join(source_dir, filename), os.path.join(test_dir, filename))

cat_source_dir = "/tmp/PetImages/Cat"
cat_train_dir = "/tmp/cats-v-dogs/training/cats/"
cat_validation_dir = "/tmp/cats-v-dogs/validation/cats/"
cat_test_dir = "/tmp/cats-v-dogs/testing/cats/"
dog_source_dir = "/tmp/PetImages/Dog"
dog_train_dir = "/tmp/cats-v-dogs/training/dogs/"
dog_validation_dir = "/tmp/cats-v-dogs/validation/dogs/"
dog_test_dir = "/tmp/cats-v-dogs/testing/dogs/"

split_size = [0.8, 0.1]

split_data(cat_source_dir, cat_train_dir, cat_validation_dir, cat_test_dir, split_size)
split_data(dog_source_dir, dog_train_dir, dog_validation_dir, dog_test_dir, split_size)

print("number in cats training set is", len(os.listdir(cat_train_dir)))
print("number in cats validation set is", len(os.listdir(cat_validation_dir)))
print("number in cats testing set is", len(os.listdir(cat_test_dir)))
print("number in dogs training set is", len(os.listdir(dog_train_dir)))
print("number in dogs validation set is", len(os.listdir(dog_validation_dir)))
print("number in dogs testing set is", len(os.listdir(dog_test_dir)))

ImageGenerator with augementation

In [0]:
train_dir =  "/tmp/cats-v-dogs/training/"
train_datagen = ImageDataGenerator(rescale=1.0/255.0,
                                    rotation_range=40,
                                    width_shift_range=0.2,
                                    height_shift_range=0.2,
                                    shear_range=0.2,
                                    zoom_range=0.2,
                                    horizontal_flip=True,
                                    fill_mode='nearest')
train_generator = train_datagen.flow_from_directory(train_dir, 
                                                    target_size=(150,150),
                                                    batch_size=100,
                                                    class_mode='binary')

validation_dir = "/tmp/cats-v-dogs/validation/"
validation_datagen = ImageDataGenerator(rescale=1.0/255.0)
validation_generator = validation_datagen.flow_from_directory(validation_dir,
                                                              target_size=(150,150),
                                                              batch_size=50,
                                                              class_mode='binary')

test_dir = "/tmp/cats-v-dogs/testing/"
test_datagen = ImageDataGenerator(rescale=1.0/255.0)
test_generator = test_datagen.flow_from_directory(test_dir,
                                                  target_size=(150,150),
                                                  batch_size=50,
                                                  class_mode='binary')

Define a keras model to classify cats vs dogs

In [0]:
model = tf.keras.models.Sequential([tf.keras.layers.Conv2D(6, (5,5), activation='tanh', input_shape=(150,150,3)),
                                   tf.keras.layers.MaxPooling2D(2,2),
                                   tf.keras.layers.Conv2D(16, (5,5), activation='tanh'),
                                   tf.keras.layers.MaxPooling2D(2,2),
                                   tf.keras.layers.Flatten(),
                                   tf.keras.layers.Dense(120, activation='tanh'),
                                   tf.keras.layers.Dense(84, activation='tanh'),
                                   tf.keras.layers.Dense(1, activation='sigmoid')])
model.summary()

In [0]:
model.compile(optimizer=RMSprop(lr=0.0001), loss='binary_crossentropy', metrics=['acc'])
history = model.fit_generator(train_generator,
                             epochs=50,
                             steps_per_epoch=100,
                             validation_data=validation_generator,
                             validation_steps=25,
                             verbose=1)

Plot loss and accuracy

In [0]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', 'Training accuracy')
plt.plot(epochs, val_acc, 'b', 'Validation accuracy')
plt.title('Training and validation accuracy')
plt.figure()

plt.plot(epochs, loss, 'r', "Training Loss")
plt.plot(epochs, val_loss, 'b', "Validation Loss")
plt.title('Training and validation loss')

test evaluation

In [0]:
model.evaluate_generator(test_generator, steps=25)