# Combined model

The notebook outlines the procedure for aggregating the results of the classifier with those of the regressor to improve the error rate of the %RBR predictions.

The following libraries are used.

In [14]:
import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score, mean_squared_error

import keras.backend as K
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator

## Global functions and definitions

In [7]:
BATCH_SIZE = 10
IMAGE_SHAPE = (128, 128, 3)

### Custom metrix

In [8]:
def rmse(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) 

### Create a test data generator

In [9]:
test_datagen = ImageDataGenerator(rescale=1./255)

In [10]:
cls_test = pd.read_csv('dataframes/test.csv').drop(['percentage'], axis=1)
reg_test = pd.read_csv('dataframes/test.csv').drop(['rbr'], axis=1)

In [12]:
cls_test_gen = test_datagen.flow_from_dataframe(
                        dataframe=cls_test,
                        directory='data/crop', 
                        x_col='filename', 
                        y_col='rbr', 
                        has_ext=True, 
                        target_size=IMAGE_SHAPE[:2], 
                        color_mode='rgb', 
                        classes=None, 
                        class_mode=None, 
                        batch_size=BATCH_SIZE, 
                        shuffle=False, 
                        seed=42)

reg_test_gen = test_datagen.flow_from_dataframe(
                        dataframe=reg_test,
                        directory='data/crop', 
                        x_col='filename', 
                        y_col='percentage', 
                        has_ext=True, 
                        target_size=IMAGE_SHAPE[:2], 
                        color_mode='rgb', 
                        classes=None, 
                        class_mode=None, 
                        batch_size=BATCH_SIZE, 
                        shuffle=False, 
                        seed=42)

Found 200 images.
Found 200 images.


## Load models

In [15]:
cls_model = load_model('models/classification_final')
reg_model = load_model('models/regression_final', custom_objects={'rmse': rmse})

## Make predictions

In [17]:
cls_test_gen.reset()
reg_test_gen.reset()

cls_output = cls_model.predict_generator(generator=cls_test_gen, 
                                         steps=cls_test_gen.n//BATCH_SIZE, 
                                         verbose=1)

reg_output = reg_model.predict_generator(generator=reg_test_gen, 
                                         steps=reg_test_gen.n//BATCH_SIZE, 
                                         verbose=1)

cls_pred = cls_output > 0.5
reg_pred = [x[0] for x in reg_output]

reg_true = reg_true = list(reg_test['percentage'])



Do an elementwise multiplication of the two prediction vectors and compare the result.

In [18]:
comb_pred = cls_pred*reg_pred

reg_rmse = np.sqrt(mean_squared_error(reg_true, reg_pred))
comb_rmse = np.sqrt(mean_squared_error(reg_true, comb_pred))

print('Regression model RMSE of {}(%)'.format(reg_rmse*100))
print('Combined model RMSE of {}(%)'.format(comb_rmse*100))

Regression model RMSE of 6.879220448432488(%)
Combined model RMSE of 6.170758092747986(%)
