### Ensemble Classfiers

In [26]:
import os
import glob
import numpy as np
import random
import shutil
import tensorflow as tf
from sklearn import metrics
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
from random import randint
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#### Filtering the dataset

In [18]:
# Standardising the naming convention across all folders (Not part of genetic algorithm)
HANDS_DATA_PATH = '/Users/preshita/Desktop/ensemble_test_hand'
class_labels = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7']

for class_label in class_labels:
    os.chdir(HANDS_DATA_PATH)
    curr_dir_path = HANDS_DATA_PATH + '/' + class_label

    for filename in os.listdir(curr_dir_path):
        img_num, file_ext = os.path.splitext(filename)
        new_img_num = str(int(img_num))

        if (new_img_num != img_num):
            old_filepath = os.path.join(curr_dir_path, img_num + file_ext)
            new_filepath = os.path.join(curr_dir_path, new_img_num + file_ext)
            os.renames(old_filepath, new_filepath)

print("Done renaming")

Done renaming


In [59]:
# Extracting common images (Not part of genetic algorithm)
ORIG_FACE_DATA_PATH = '/Users/preshita/Desktop/ensemble_test_face'
ORIG_HAND_DATA_PATH = '/Users/preshita/Desktop/ensemble_test_hand'
ENSEMBLE_FACE_DATA_PATH = '/Users/preshita/Desktop/ensemble_test/face'
ENSEMBLE_HAND_DATA_PATH = '/Users/preshita/Desktop/ensemble_test/hand'

class_labels = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7']

for class_label in class_labels:
    orig_face_dir_path = ORIG_FACE_DATA_PATH + '/' + class_label
    orig_hand_dir_path = ORIG_HAND_DATA_PATH + '/' + class_label
    ensemble_face_dir_path = ENSEMBLE_FACE_DATA_PATH + '/' + class_label
    ensemble_hand_dir_path = ENSEMBLE_HAND_DATA_PATH + '/' + class_label

    for filename in os.listdir(orig_face_dir_path):
        if (os.path.exists(orig_hand_dir_path + '/' + filename)):
            shutil.copy(orig_face_dir_path + '/' + filename, ensemble_face_dir_path + '/' + filename)
            shutil.copy(orig_hand_dir_path + '/' + filename, ensemble_hand_dir_path + '/' + filename)

print('Done filtering common images')

Done filtering common images


In [4]:
COMBINED_ORIG_TRAIN_PATH = '/Users/preshita/Desktop/combined_new/train'
COMBINED_FACE_TRAIN_PATH = '/Users/preshita/Desktop/combined_new_face/new_train'
COMBINED_HAND_TRAIN_PATH = '/Users/preshita/Desktop/combined_new_hand/new_train'

class_labels = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7']

for class_label in class_labels:
    combined_orig_dir_path = COMBINED_ORIG_TRAIN_PATH + '/' + class_label
    combined_face_dir_path = COMBINED_FACE_TRAIN_PATH + '/' + class_label
    combined_hand_dir_path = COMBINED_HAND_TRAIN_PATH + '/' + class_label

    for filename in os.listdir(combined_orig_dir_path):
        if (not os.path.exists(combined_face_dir_path + '/' + filename)):
            shutil.copy(combined_orig_dir_path + '/' + filename, combined_face_dir_path + '/' + filename)
            
        if (not os.path.exists(combined_hand_dir_path + '/' + filename)):
            shutil.copy(combined_orig_dir_path + '/' + filename, combined_hand_dir_path + '/' + filename)

print('Done copying the relevant missing images')

Done copying the relevant missing images


In [5]:
COMBINED_ORIG_TEST_PATH = '/Users/preshita/Desktop/combined_new/test'
COMBINED_FACE_TEST_PATH = '/Users/preshita/Desktop/combined_new_face/test'
COMBINED_HAND_TEST_PATH = '/Users/preshita/Desktop/combined_new_hand/test'

class_labels = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7']

for class_label in class_labels:
    combined_orig_dir_path = COMBINED_ORIG_TEST_PATH + '/' + class_label
    combined_face_dir_path = COMBINED_FACE_TEST_PATH + '/' + class_label
    combined_hand_dir_path = COMBINED_HAND_TEST_PATH + '/' + class_label

    for filename in os.listdir(combined_orig_dir_path):
        if (not os.path.exists(combined_face_dir_path + '/' + filename)):
            shutil.copy(combined_orig_dir_path + '/' + filename, combined_face_dir_path + '/' + filename)
            
        if (not os.path.exists(combined_hand_dir_path + '/' + filename)):
            shutil.copy(combined_orig_dir_path + '/' + filename, combined_hand_dir_path + '/' + filename)

print('Done copying the relevant missing images')

Done copying the relevant missing images


In [6]:
COMBINED_ORIG_UNSEEN_PATH = '/Users/preshita/Desktop/combined_new/unseen'
COMBINED_FACE_UNSEEN_PATH = '/Users/preshita/Desktop/combined_new_face/unseen'
COMBINED_HAND_UNSEEN_PATH = '/Users/preshita/Desktop/combined_new_hand/unseen'

class_labels = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7']

for class_label in class_labels:
    combined_orig_dir_path = COMBINED_ORIG_UNSEEN_PATH + '/' + class_label
    combined_face_dir_path = COMBINED_FACE_UNSEEN_PATH + '/' + class_label
    combined_hand_dir_path = COMBINED_HAND_UNSEEN_PATH + '/' + class_label

    for filename in os.listdir(combined_orig_dir_path):
        if (not os.path.exists(combined_face_dir_path + '/' + filename)):
            shutil.copy(combined_orig_dir_path + '/' + filename, combined_face_dir_path + '/' + filename)
            
        if (not os.path.exists(combined_hand_dir_path + '/' + filename)):
            shutil.copy(combined_orig_dir_path + '/' + filename, combined_hand_dir_path + '/' + filename)

print('Done copying the relevant missing images')

Done copying the relevant missing images


#### Loading the required models

In [27]:
os.chdir('/Users/preshita/Desktop/CS3244-Group-8-Project-2/')
# Loading the best face model
face_model = keras.models.load_model('best_model_face_inception.h5')
print(face_model)

# Loading the best face model
hand_model = keras.models.load_model('best_model_hand_molyswu.h5')
print(hand_model)

<keras.engine.functional.Functional object at 0x135ef82e0>
<keras.engine.functional.Functional object at 0x136f355a0>


#### Setting up the input and output datasets

In [28]:
# Importing data
test_datagen = ImageDataGenerator( rescale = 1.0/255. )

# TODO: Need to make sure that the same image is used for both models
FACE_DATA_PATH = '/Users/preshita/Desktop/combined_new_face/new_train'
HANDS_DATA_PATH = '/Users/preshita/Desktop/combined_new_hand/new_train'

# Computing predictions by the best face model
face_test_generator = test_datagen.flow_from_directory(FACE_DATA_PATH,
                                                    batch_size  = 40,
                                                    class_mode  = 'categorical', 
                                                    target_size = (100, 75), shuffle = False)
face_data_inputs = face_test_generator # Face data input
y_face_preds = face_model.predict(face_test_generator) # Face data predictions

# Computing predictions by the best hand model
hand_test_generator = test_datagen.flow_from_directory(HANDS_DATA_PATH,
                                                    batch_size  = 40,
                                                    class_mode  = 'categorical', 
                                                    target_size = (100, 75), shuffle = False)

hands_data_inputs = hand_test_generator # Hand data input
y_hands_preds = hand_model.predict(hand_test_generator) # Hand data predictions

# Expected data outputs
num_of_classes = 8
expected_output_labels = face_test_generator.classes # Expected output labels, assuming that both models are working with the same images
data_outputs = np.zeros((expected_output_labels.size, num_of_classes))
data_outputs[np.arange(expected_output_labels.size), expected_output_labels] = 1

Found 10225 images belonging to 8 classes.
Found 10225 images belonging to 8 classes.


#### Helper Functions

In [10]:
'''
Given an array contain the predictions from each classifier and the weights to be assigned to each classifier, this function computes the final weighted probability.
'''
def weighted_probability(num_of_classfiers, num_of_classes, networks_outputs, curr_weight_combi):
    result = [0 for i in range(0, num_of_classes)]
    sum = 0

    for i in range(0, num_of_classfiers):
        curr_network_output = networks_outputs[i]
        curr_weight = curr_weight_combi[i]

        for j in range(0, num_of_classes):
            result[j] += curr_network_output[j] * curr_weight
            sum += result[j]
    
    for k in range(0, num_of_classes): # Normalising to ensure that the final output is still in terms of probability
        result[k] = result[k] / sum

    return result

'''
Calculates the negative log loss.
'''
def fitness(y_pred, y_true): # Negative log loss function
    return metrics.log_loss(y_true, y_pred)

'''
Randomly changes a given float number (up to 2%). 
Note: The method of mutation was not stated in the research paper. 
'''
def mutate(weight_combi): #TODO: Need to double-check if this is okay
    for i in range(0, len(weight_combi)):
        weight_combi[i] = weight_combi[i] * random.uniform(0.99, 1.01)
    
    return weight_combi

'''
Given 2 different possible weight combination, this function produces a final weight combination by randomly extracting weight elements from either parent combinations.
'''
def cross_over(num_of_classifiers, parent_1, parent_2): #TODO: Need to double-check if this is okay
    cut = random.randint(0, num_of_classifiers - 1)
    new_weight_combi = parent_1[:cut] 
    new_weight_combi.extend(parent_2[cut:])

    return new_weight_combi

'''
Produces combinations of weights that can be assigned to each of the classifiers. 
'''
def generate_possible_weight_combis(num_of_classifiers, num_of_combis, weight_limit):
    possible_weight_combis = []

    while (num_of_combis > 0):
        curr_weight_combi = []
        curr_combi_len = 0

        while (curr_combi_len < num_of_classifiers):
            curr_weight = random.uniform(0, weight_limit)
            curr_weight_combi.append(curr_weight)

            curr_combi_len += 1
        
        possible_weight_combis.append(curr_weight_combi)
        num_of_combis -= 1
    
    return possible_weight_combis


#### Start of Genetic Algorithm to find the optimal weights for each classifier

In [29]:
# Defining essential variables
num_of_classifiers = 2
num_of_classes = 8
num_of_required_weight_combis = 10
weight_limit = 50
possible_weight_combis = generate_possible_weight_combis(num_of_classifiers, num_of_required_weight_combis, weight_limit)
max_num_of_iters = 10

# print(possible_weight_combis)

while (max_num_of_iters > 0):
    # Step 1: Randomly chossing 50% of the dataset to calculate the fitness scores for
    chosen_y_true = []
    chosen_y_face_pred = []
    chosen_y_hand_pred = []

    required_num_of_samples = len(data_outputs) // 2 # Rounding down

    random_indices = []
    while required_num_of_samples > 0:
        curr_index = randint(0, len(data_outputs) - 1)

        if (curr_index not in random_indices):
            chosen_y_true.append(data_outputs[curr_index])
            chosen_y_face_pred.append(y_face_preds[curr_index])
            chosen_y_hand_pred.append(y_hands_preds[curr_index])

            random_indices.append(curr_index)
            required_num_of_samples -= 1

    # Step 2: Calculate the average fitness scores for each of the possible weight combinations
    fitness_and_weights = []

    for weights in possible_weight_combis:
        accumulated_fitness_score = 0
        num_of_samples = 0

        for i in range(0, len(chosen_y_true)):
            network_outputs = [chosen_y_face_pred[i], chosen_y_hand_pred[i]]
            y_pred = weighted_probability(num_of_classifiers, num_of_classes, network_outputs, weights)
            y_true = chosen_y_true[i]
            fitness_score = fitness(y_pred, y_true)
            accumulated_fitness_score += fitness_score

            num_of_samples += 1
        
        avg_fitness_score = accumulated_fitness_score / num_of_samples
        fitness_and_weights.append((avg_fitness_score, weights))
    
    # print(fitness_and_weights) # For testing

    # Step 3: Rank the weight combis from best to worse
    fitness_and_weights.sort() # The combis with the lowest log loss is at the start
    # print(fitness_and_weights) # For testing

    # Step 4: Selecting parents
    parents = []
    curr_index = 0

    # Selecting top 20% of the weight combis
    top_20_percent = int(len(fitness_and_weights) // 5) # Rounding down
    while (top_20_percent > 0):
        parents.append(fitness_and_weights[curr_index][1])
        top_20_percent -= 1
        curr_index += 1

    # Randomly choosing another 10% of the weight combinations
    another_10_percent = int(len(fitness_and_weights) // 10)  # Rounding down
    while(another_10_percent > 0):
        random_score_and_parent = random.choice(fitness_and_weights[curr_index:])
        parents.append(random_score_and_parent[1])
        fitness_and_weights.remove(random_score_and_parent)

        another_10_percent -= 1
    
    # print(parents) # For testing

    # Step 5: Randomly mutate 5% of the selected parents
    num_of_parents_to_mutate = max(1, int(len(parents) // 10))  # Rounding down
    index_of_parents_to_mutate = [random.randint(0, len(parents) - 1) for i in range(0, num_of_parents_to_mutate)]

    for index in index_of_parents_to_mutate:
        parents[index] = mutate(parents[index])
    
    # print(parents) # For testing

    # Step 6: Randomly cross over parents to produce new set of weight combinations
    new_weight_combis = []
    index_of_crossed_parents = []
    num_of_curr_weights = 0

    while (num_of_curr_weights < num_of_required_weight_combis):
        chosen_parents = (random.randint(0, len(parents) - 1), random.randint(0, len(parents) - 1))
        parent_1 = parents[chosen_parents[0]]
        parent_2 = parents[chosen_parents[1]]

        if (parent_1 != parent_2 and chosen_parents not in index_of_crossed_parents):
            new_weight_combi = cross_over(num_of_classifiers, parent_1, parent_2)
            new_weight_combis.append(new_weight_combi)
            num_of_curr_weights += 1

    possible_weight_combis = new_weight_combis
    print(possible_weight_combis) # For testing

    max_num_of_iters -= 1

# Step 7: Select the best weights combination
final_fitness_and_weights = []

for weights in possible_weight_combis:
    accumulated_fitness_score = 0
    num_of_samples = 0

    for i in range(0, len(chosen_y_true)):
            network_outputs = [chosen_y_face_pred[i], chosen_y_hand_pred[i]]
            y_pred = weighted_probability(num_of_classifiers, num_of_classes, network_outputs, weights)
            y_true = chosen_y_true[i]
            fitness_score = fitness(y_pred, y_true)
            accumulated_fitness_score += fitness_score

            num_of_samples += 1
    
    avg_fitness_score = accumulated_fitness_score / num_of_samples
    final_fitness_and_weights.append((avg_fitness_score, weights))

final_fitness_and_weights.sort() # The combis with the lowest log loss is at the start
best_weights = final_fitness_and_weights[0][1]
print("The best weight combination is: " + str(best_weights))
print("The fitness score of this combination is: " + str(final_fitness_and_weights[0][0]))

[[24.32019583319557, 20.969136298038045], [24.60153591036305, 20.969136298038045], [24.60153591036305, 20.969136298038045], [18.808674877980664, 8.94421364493817], [24.32019583319557, 20.969136298038045], [24.32019583319557, 20.969136298038045], [18.808674877980664, 20.969136298038045], [18.808674877980664, 8.94421364493817], [18.808674877980664, 8.94421364493817], [24.32019583319557, 39.837186497452414]]
[[24.60153591036305, 20.969136298038045], [24.32019583319557, 20.969136298038045], [24.60153591036305, 20.91015048637464], [24.32019583319557, 20.969136298038045], [24.60153591036305, 20.91015048637464], [24.60153591036305, 20.91015048637464], [24.802848815206186, 20.91015048637464], [24.32019583319557, 20.91015048637464], [24.802848815206186, 20.91015048637464], [24.60153591036305, 20.969136298038045]]
[[24.32019583319557, 20.91015048637464], [24.32019583319557, 20.91015048637464], [24.581825055345007, 20.722573760991054], [24.581825055345007, 20.91015048637464], [24.581825055345007,

#### Computing the performance of the ensemble model

In [30]:
# Measuring performance on unseen data
FACE_DATA_PATH = '/Users/preshita/Desktop/combined_new_face/unseen'
HANDS_DATA_PATH = '/Users/preshita/Desktop/combined_new_hand/unseen'

# Computing predictions by the best face model
face_test_generator = test_datagen.flow_from_directory(FACE_DATA_PATH,
                                                    batch_size  = 40,
                                                    class_mode  = 'categorical', 
                                                    target_size = (100, 75), shuffle = False)
face_data_inputs = face_test_generator # Face data input
y_face_preds = face_model.predict(face_test_generator) # Face data predictions

# Computing predictions by the best hand model
hand_test_generator = test_datagen.flow_from_directory(HANDS_DATA_PATH,
                                                    batch_size  = 40,
                                                    class_mode  = 'categorical', 
                                                    target_size = (100, 75), shuffle = False)

hands_data_inputs = hand_test_generator # Hand data input
y_hands_preds = hand_model.predict(hand_test_generator) # Hand data predictions

# Expected data outputs
num_of_classes = 8
expected_output_labels = face_test_generator.classes # Expected output labels, assuming that both models are working with the same images
data_outputs = np.zeros((expected_output_labels.size, num_of_classes))
data_outputs[np.arange(expected_output_labels.size), expected_output_labels] = 1

Found 839 images belonging to 8 classes.
Found 839 images belonging to 8 classes.


In [32]:
# Measuring performance on test data first
FACE_DATA_PATH = '/Users/preshita/Desktop/combined_new_face/test'
HANDS_DATA_PATH = '/Users/preshita/Desktop/combined_new_hand/test'

# Computing predictions by the best face model
face_test_generator = test_datagen.flow_from_directory(FACE_DATA_PATH,
                                                    batch_size  = 40,
                                                    class_mode  = 'categorical', 
                                                    target_size = (100, 75), shuffle = False)
face_data_inputs = face_test_generator # Face data input
y_face_preds = face_model.predict(face_test_generator) # Face data predictions

# Computing predictions by the best hand model
hand_test_generator = test_datagen.flow_from_directory(HANDS_DATA_PATH,
                                                    batch_size  = 40,
                                                    class_mode  = 'categorical', 
                                                    target_size = (100, 75), shuffle = False)

hands_data_inputs = hand_test_generator # Hand data input
y_hands_preds = hand_model.predict(hand_test_generator) # Hand data predictions

# Expected data outputs
num_of_classes = 8
expected_output_labels = face_test_generator.classes # Expected output labels, assuming that both models are working with the same images
data_outputs = np.zeros((expected_output_labels.size, num_of_classes))
data_outputs[np.arange(expected_output_labels.size), expected_output_labels] = 1

Found 3414 images belonging to 8 classes.
Found 3414 images belonging to 8 classes.


In [33]:
weighted_preds = []

for i in range(0, len(expected_output_labels)):
    networks_outputs = [y_face_preds[i], y_hands_preds[i]]
    weighted_preds.append(weighted_probability(num_of_classifiers, num_of_classes, networks_outputs, best_weights))

ensemble_ypred = np.argmax(weighted_preds, axis=1)

# Printing out metrics
accuracy = accuracy_score(expected_output_labels, ensemble_ypred)
print('Accuracy: %f' % accuracy)

# precision tp / (tp + fp)
precision = precision_score(expected_output_labels, ensemble_ypred, average='weighted')
print('Precision: %f' % precision)

# recall: tp / (tp + fn)
recall = recall_score(expected_output_labels, ensemble_ypred, average='weighted')
print('Recall: %f' % recall)

# f1: 2 tp / (2 tp + fp + fn)
f1 = f1_score(expected_output_labels, ensemble_ypred, average='weighted')
print('F1 score: %f' % f1)

Accuracy: 0.552724
Precision: 0.585187
Recall: 0.552724
F1 score: 0.539442
