# Import Necessary Packages and Libraries

In [1]:
# IMAGE PREPROCESSING FUNCTIONS FOR USE IN MODEL DEVELOPMENT, EVALUATION, AND PRODUCTION
import numpy as np
import pandas as pd
import PIL as pil
import PIL
import matplotlib.pyplot as plt
import seaborn as sns
from os import listdir
from os.path import isfile, join
import tempfile
import pickle
import time
import gc
import skimage.filters
import cv2
import watermark
import joblib
import math
import sys
from skimage.measure import block_reduce
from image_preprocessing import standardize_image_dataset,resize_dataset,binarize_dataset,crop_dataset,process_dataset_blur,do_pooling_dataset
from pipeline import model_pipeline
from automate_optimal_model_dev import automate_optimal_model_dev
from eval_on_test import make_preds

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import CategoricalNB,GaussianNB
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV,KFold
from sklearn.metrics import accuracy_score,f1_score
from sklearn.base import clone
from sklearn.metrics import confusion_matrix

  from pandas import MultiIndex, Int64Index


# Read in Test Data

In [2]:
test_data = pickle.load(open('Amit/Labeled Data/test_data.pkl','rb'))
y = test_data['label']
x = test_data.iloc[:,:-1]

# Load in Optimal Layer 1 Models

In [3]:
%%capture
knn = pickle.load(open('KNN_auto_resize_results/knn_test_12.pickle','rb'))
knn.pop('features')
gbct = pickle.load(open('Partition Based Model Results/gbct1.pkl','rb'))
gbct.pop('features')
log_reg = pickle.load(open('Linear Model Results/logistic_regression.pkl','rb'))
gaussian_nb = pickle.load(open('nb_model_results/top_gaussian_model.pickle','rb'))
gaussian_nb.pop('features')

# Load in Optimal Layer 2 Models

In [4]:
log_reg_finallayer = pickle.load(open('Stacked Model Results/logreg.pkl','rb'))
gnb_finallayer = pickle.load(open('Stacked Model Results/gnb.pkl','rb'))

# Make Predictions From Layer 1 Models to Use as Input for Layer 2 Model

#### KNN Prediction

In [5]:
knn_preds = make_preds(x,y,[('resize',[(256,256),(16,16)]), ('resize',[(16,16),(16,16)])],
                   knn['best_model'],
                   knn['best_thresh'],return_features=False)
print(knn_preds)
knn_preds = knn_preds['probs']

{'probs': array([0., 1., 0., ..., 1., 1., 0.]), 'preds': array([0, 1, 0, ..., 1, 1, 0]), 'f1 score': 0.9803389830508474, 'accuracy': 0.9789855072463768, 'confusion_matrix': array([[628,  18],
       [ 11, 723]], dtype=int64)}


#### Gradient Boosted Classification Trees Prediction

In [6]:
gbct_preds = make_preds(x,y,[('resize',[(256,256),(32,32)]),
               ('resize',[(32,32),(16,16)])],gbct['best_model'],gbct['best_thresh'],return_features=False)
print(gbct_preds)
gbct_preds = gbct_preds['probs']

{'probs': array([0.06696427, 0.9965784 , 0.33431814, ..., 0.99881066, 0.98973255,
       0.56279354]), 'preds': array([0, 1, 0, ..., 1, 1, 1]), 'f1 score': 0.9663526244952894, 'accuracy': 0.9637681159420289, 'confusion_matrix': array([[612,  34],
       [ 16, 718]], dtype=int64)}


#### Log Reg Model Predictions

In [7]:
logreg_preds = make_preds(do_pooling_dataset(resize_dataset(x,(256,256),(256,256)),(2,2),np.max).astype('uint8'),y,[],
                          log_reg['best_estimator'],log_reg['threshold_analysis']['best_thresh'],return_features=False)
print(logreg_preds)
logreg_preds = logreg_preds['probs']

{'probs': array([0.00440583, 0.99870898, 0.9974342 , ..., 0.99782436, 0.9999713 ,
       0.99999403]), 'preds': array([0, 1, 1, ..., 1, 1, 1]), 'f1 score': 0.9572192513368983, 'accuracy': 0.9536231884057971, 'confusion_matrix': array([[600,  46],
       [ 18, 716]], dtype=int64)}


#### Gaussian NB Model Predictions

In [8]:
gnb_preds = pickle.load(open('nb_model_results/top_gaussian_model_predictions.pickle','rb'))
print(gnb_preds)
gnb_preds = gnb_preds['probs']

{'features':       0    1    2    3    4    5    6   7    8    9   ...   54   55   56   57  \
3949   0    0    0    0    0    0    0   0    5  126  ...  123   78    0    0   
230   96  110  124  127  127  125  126  98  110  127  ...  127   23  108   39   
354    0   30  114  127  125  126   13   0    0  127  ...  121    0    0    1   
2736   0    1    1   18   18    0    0   0    0    3  ...  117    1    0    0   
3540   0   10  126  124  127  126   11   0    0  126  ...  127    2    8  116   
...   ..  ...  ...  ...  ...  ...  ...  ..  ...  ...  ...  ...  ...  ...  ...   
1776   4   71  117  127  126  119    6   4    6  126  ...   93   26  125  126   
1791   9    4  102  125  115  118    4   5   12  105  ...   61    8    5   74   
2099   0    5    7   14    7    5    7   0    0   67  ...   10    0    0    4   
53     1    1  123  127  124  122    1   1    1  127  ...  127    1    1  124   
3857   9    9  115  127  124  125  122   8   17  113  ...  126  116   64   98   

       58   59

#### Create Test Feature Data For Final Layer Model

In [13]:
final_features = pd.DataFrame()
final_features['knn'] = knn_preds
final_features['gbct'] = gbct_preds
final_features['logreg'] = logreg_preds
final_features['gaussian_nb'] = gnb_preds
final_features.index = x.index

# Evaluate Test Feature Data for Final Layer Model (Predictions from Layer 1) to Get Final Evaluation of Stacked Model Performance

#### LogReg as Final Layer

In [18]:
make_preds(final_features,y,[],log_reg_finallayer['best_estimator'],log_reg_finallayer['threshold_analysis']['best_thresh'],
          return_features=False)

{'probs': array([0.01590537, 0.99368672, 0.21933773, ..., 0.97324205, 0.99349726,
        0.54924692]),
 'preds': array([0, 1, 0, ..., 1, 1, 1]),
 'f1 score': 0.979702300405954,
 'accuracy': 0.9782608695652174,
 'confusion_matrix': array([[626,  20],
        [ 10, 724]], dtype=int64)}

#### Gaussian NB as Final Layer

In [20]:
make_preds(final_features,y,[],gnb_finallayer['best_estimator'],gnb_finallayer['threshold_analysis']['best_thresh'],
          return_features=False)

{'probs': array([5.19658991e-11, 1.00000000e+00, 3.07721155e-04, ...,
        9.99999998e-01, 1.00000000e+00, 2.76445234e-02]),
 'preds': array([0, 1, 0, ..., 1, 1, 0]),
 'f1 score': 0.981081081081081,
 'accuracy': 0.9797101449275363,
 'confusion_matrix': array([[626,  20],
        [  8, 726]], dtype=int64)}