In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
sb.set()

In [3]:
import tensorflow as tf
tf.__version__

'2.10.0'

In [4]:
#Function to Load Images & Labels
import os
from tqdm import tqdm  #Display Progress
import cv2 #openCV

def img_load(dataset):
    IMAGE_SIZE = (150, 150)
    output = []   
    images = []
    images_paths = []
    labels = []
    label_indexs = []
    label_index=0
    # Iterate through each folder corresponding to a category
    for folder in os.listdir(dataset):
        
        # Iterate through each image in our folder
        for file in tqdm(os.listdir(os.path.join(dataset, folder))):

            # Get the path name of the image
            img_path = os.path.join(os.path.join(dataset, folder), file)

            # Open and resize the img
            image = cv2.imread(img_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, IMAGE_SIZE) 

            # Append the image and its corresponding label to the output
            images.append(image)
            images_paths.append(os.path.join(folder, file))
            labels.append(folder)
            label_indexs.append(label_index)
        label_index +=1
    images = np.array(images, dtype = 'float32')
    images_paths = np.array(images_paths, dtype = 'str')   
    labels = np.array(labels, dtype = 'str')   
    label_index = np.array(label_index, dtype = 'int32')
    output.append((images, images_paths, labels, np.array(label_indexs)))

    return output

In [5]:
#Loading Data (Training & Test Dataset)
[(train_images, train_images_paths, train_labels, train_indexs)] = img_load('workingData\Pictures\_train\person_gender')

100%|████████████████████████████████████████████████████████████████████████████████| 214/214 [00:03<00:00, 67.56it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 624/624 [00:08<00:00, 72.41it/s]


In [None]:
#from sklearn.utils import shuffle 
#train_images, train_labels = shuffle(train_images, train_labels, random_state=25)

## Data visualization

In [None]:
#Label Dataset Shape
train_labels.shape[0]

In [None]:
np.unique(train_labels, return_counts=True)

In [None]:
_, train_counts = np.unique(train_indexs, return_counts=True)
pd.DataFrame(np.unique(train_indexs, return_counts=True)).plot.bar(figsize=(3,2))
plt.title('Label Count Per Dataset')
plt.show()

In [6]:
#Scale the data
train_images = train_images / 255.0
#test_images = test_images / 255.0

## Model

In [7]:
#Build Model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (150, 150, 3)), 
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3, 3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(8, activation=tf.nn.softmax)
])

In [None]:
model.summary()

In [8]:
#Compile Model
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])

In [9]:
#Training the Model
history = model.fit(train_images, train_indexs, batch_size=100, epochs=10, validation_split = 0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
predictions = model.predict(train_images)     # Vector of probabilities
pred_labels = np.argmax(predictions, axis = 1) # We take the highest probability
pred_labels



array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,

In [11]:
pf = pd.DataFrame(train_images_paths, columns=['images_paths'])
pf['indexs']=train_indexs
pf['prediction']=pred_labels
pf[pf['indexs'] != pf['prediction']].head(20)

Unnamed: 0,images_paths,indexs,prediction
96,person_Female\person_0479.jpg,0,1
224,person_Male\person_0011.jpg,1,0
292,person_Male\person_0079.jpg,1,0
345,person_Male\person_0287.jpg,1,0
408,person_Male\person_0382.jpg,1,0
560,person_Male\person_0627.jpg,1,0
743,person_Male\person_0859.jpg,1,0
754,person_Male\person_0870.jpg,1,0
757,person_Male\person_0873.jpg,1,0
766,person_Male\person_0882.jpg,1,0


In [12]:
pf.groupby(['indexs','prediction'])['prediction'].count()

indexs  prediction
0       0             213
        1               1
1       0              20
        1             604
Name: prediction, dtype: int64

In [1]:
rootlocation='workingData\Pictures\_train\person_gender\\'
dipaly_index=[832,825,823,821,197]
plt_df=pf

fig, axes = plt.subplots(1, np.size(dipaly_index) , figsize=(12,4))
for idx, ax in zip(dipaly_index, axes.ravel()):
    image = cv2.imread(rootlocation+plt_df['images_paths'][idx])
    ax.grid(False)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.set_title('-'+ str(plt_df['indexs'][idx]) + ' | Pred- '+ str(plt_df['prediction'][idx]), size=8)
    ax.imshow(image[:,:,::-1]) # plot image into the subplot

NameError: name 'pf' is not defined

## Model Testing

In [None]:
#Loading Data (Training & Test Dataset)
[(test_images, test_images_paths, test_labels, test_indexs)] = img_load('workingData\Pictures\_test\person_gender')

In [None]:
test_loss = model.evaluate(test_images, test_indexs)

In [None]:
#garbage collection to save memory
import gc
gc.collect()

In [None]:
predictions = model.predict(test_images)     # Vector of probabilities
pred_labels = np.argmax(predictions, axis = 1) # We take the highest probability
pred_labels

In [None]:
pf_test = pd.DataFrame(test_images_paths, columns=['images_paths'])
pf_test['labels']=test_labels
pf_test['indexs']=test_indexs
pf_test['prediction']=pred_labels
pf_test[pf_test['indexs'] != pf_test['prediction']]

In [None]:
#np.count(pf_test['indexs']
#np.unique(pf_test[['indexs','prediction']], return_counts=True)
pf_test.groupby(['indexs','prediction'])['prediction'].count()

In [None]:
rootlocation='workingData\Pictures\_test\person_gender\\'
plt_df=pf_test
dipaly_index=[20,23,100,11,16,13]

fig, axes = plt.subplots(1, np.size(dipaly_index) , figsize=(12,4))
for idx, ax in zip(dipaly_index, axes.ravel()):
    image = cv2.imread(rootlocation+plt_df['images_paths'][idx])
    ax.grid(False)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.set_title(plt_df['labels'][idx] +'-'+ str(plt_df['indexs'][idx]) + ' | Pred- '+ str(plt_df['prediction'][idx]), size=8)
    ax.imshow(image[:,:,::-1]) # plot image into the subplot

In [None]:
def display_random_img(class_names, images, labels):
    index = np.random.randint(images.shape[0])
    plt.figure()
    plt.imshow(images[index])
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.title('Image #{} : '.format(index) + class_names[labels[index]])
    plt.show()
    

#display_random_img (class_names, train_images, train_labels)
display_random_img(class_names, test_images, pred_labels)

## Random Pic from Net

In [None]:
[(test_images, test_images_paths, test_labels, test_indexs)] = img_load('workingData\Pictures\_test\\pgender')

In [None]:
test_images = test_images / 255.0

In [None]:
predictions = model.predict(test_images)     # Vector of probabilities
pred_labels = np.argmax(predictions, axis = 1) # We take the highest probability
pred_labels

In [None]:
pf_test = pd.DataFrame(test_images_paths, columns=['images_paths'])
pf_test['labels']=test_labels
pf_test['indexs']=test_indexs
pf_test['prediction']=pred_labels
pf_test
#pf_test[pf_test['indexs'] != pf_test['prediction']]

In [None]:
rootlocation='workingData\Pictures\_test\pgender\\'
plt_df=pf_test
dipaly_index=[0,1,2,3,4,5,6]

fig, axes = plt.subplots(1, np.size(dipaly_index) , figsize=(12,4))
for idx, ax in zip(dipaly_index, axes.ravel()):
    image = cv2.imread(rootlocation+plt_df['images_paths'][idx])
    ax.grid(False)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.set_title(plt_df['labels'][idx] +'-'+ str(plt_df['indexs'][idx]) + ' | Pred- '+ str(plt_df['prediction'][idx]), size=8)
    ax.imshow(image[:,:,::-1]) # plot image into the subplot