In [0]:
import zipfile
import os
import pickle
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from google.colab import drive, files
from PIL import Image
from matplotlib.pyplot import imshow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras.preprocessing.image import array_to_img
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential, load_model
from keras.layers.core import Dense, Dropout, Activation,Flatten
from tensorflow.keras.layers import Dense, Flatten, Activation, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, LambdaCallback
from keras.utils import np_utils
%matplotlib inline

drive.mount('/content/gdrive',force_remount=True)

Mounted at /content/gdrive


## Unzipping the file with the images

In [0]:
with zipfile.ZipFile('/content/gdrive/My Drive/Big Data/Projet/celeba-dataset.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/img/')

# Création de la base de données

In [0]:
samp = pickle.load(open('/content/gdrive/My Drive/Big Data/Projet/Listes sample/Mustache.pkl', 'rb'))

In [0]:
path = '/content/img/img_align_celeba/img_align_celeba/'
target = []
data = np.zeros((len(samp), 64, 64, 3)) #dimension of the images
count = 0
for file in os.listdir(path) :
  if file in samp :
    image = Image.open(path + file)
    image = image.resize((64, 64))
    data[count] = np.array(image)
    target.append(file)
    count += 1

In [0]:
data.shape

In [0]:
all_attributes = pd.read_csv('/content/img/list_attr_celeba.csv')
mustache = all_attributes[['image_id','Mustache']]
mustache = mustache.loc[mustache[mustache['image_id'].isin(target) == True].index]
mustache = mustache.set_index('image_id')
mustache = mustache.loc[target]
mustache.head()

Unnamed: 0_level_0,Mustache
image_id,Unnamed: 1_level_1
145452.jpg,1
060196.jpg,-1
007271.jpg,-1
025695.jpg,-1
001372.jpg,1


# Creation of the train and test sets

In [0]:
y = np.array(mustache.Mustache)
y[y == -1] = 0

In [0]:
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size = 0.2)

In [0]:
print(y_train.shape, X_train.shape)

(14733,) (14733, 64, 64, 3)


# Creation of the NN

In [0]:
Mustache_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 62, 62, 64)        1792      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 20, 20, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 18, 18, 128)       73856     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 6, 6, 128)         0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 4, 4, 256)         295168    
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 1, 1, 256)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 256)              

In [0]:
conv_net = Sequential()

# convolution layer 1
conv_net.add(Conv2D(64, (3, 3), activation = 'relu', input_shape = (64, 64, 3)))
conv_net.add(MaxPooling2D(pool_size=(3, 3)))

# convolution layer 2
conv_net.add(Conv2D(128, (3, 3), activation = 'relu'))
conv_net.add(MaxPooling2D(pool_size = (3, 3)))

# convolution layer 3
conv_net.add(Conv2D(256, (3, 3), activation = 'relu'))
conv_net.add(MaxPooling2D(pool_size = (3, 3)))


conv_net.add(Flatten())
conv_net.add(Dense(256, activation = 'relu'))
conv_net.add(Dropout(0.3))
conv_net.add(Dense(128, activation = 'relu'))
conv_net.add(Dense(64, activation = 'relu'))
conv_net.add(Dense(1, activation = 'sigmoid'))

conv_net.compile(loss = 'binary_crossentropy', optimizer = 'adam', 
                 metrics = ['accuracy'])

## callbacks to save the best model

In [0]:
reduce_lr = ReduceLROnPlateau(monitor = 'loss', factor = 0.2,
                              patience = 1, min_lr = 0.001)

# Path where we will save our model
filepath = "/content/gdrive/My Drive/Big Data/models mustache/Mustache.hdf5" 
checkpoint = ModelCheckpoint(filepath, monitor = 'val_accuracy',
                             verbose = 1, save_best_only = True,
                             mode = 'max')
callbacks = [checkpoint, reduce_lr]

In [0]:
history = conv_net.fit(X_train, y_train, batch_size = 128, epochs = 30, 
                       validation_data = (X_test, y_test), callbacks = callbacks) 
#total of 30 epochs

Epoch 1/30
Epoch 00001: val_accuracy improved from -inf to 0.64685, saving model to /content/gdrive/My Drive/Big Data/models mustache/Mustache.hdf5
Epoch 2/30
Epoch 00002: val_accuracy improved from 0.64685 to 0.77307, saving model to /content/gdrive/My Drive/Big Data/models mustache/Mustache.hdf5
Epoch 3/30
Epoch 00003: val_accuracy improved from 0.77307 to 0.79587, saving model to /content/gdrive/My Drive/Big Data/models mustache/Mustache.hdf5
Epoch 4/30
Epoch 00004: val_accuracy improved from 0.79587 to 0.84663, saving model to /content/gdrive/My Drive/Big Data/models mustache/Mustache.hdf5
Epoch 5/30
Epoch 00005: val_accuracy did not improve from 0.84663
Epoch 6/30
Epoch 00006: val_accuracy did not improve from 0.84663
Epoch 7/30
Epoch 00007: val_accuracy did not improve from 0.84663
Epoch 8/30
Epoch 00008: val_accuracy improved from 0.84663 to 0.85342, saving model to /content/gdrive/My Drive/Big Data/models mustache/Mustache.hdf5
Epoch 9/30
Epoch 00009: val_accuracy did not impro

## Load the saved model

In [0]:
Mustache_model = load_model("/content/gdrive/My Drive/Big Data/models mustache/Mustache.hdf5")



In [0]:
accuracy = Mustache_model.evaluate(X_test, y_test, verbose = 0)[1]
print(accuracy)

0.8773072957992554
