In [2]:
# IMAGE PREPROCESSING FUNCTIONS FOR USE IN MODEL DEVELOPMENT, EVALUATION, AND PRODUCTION
import numpy as np
import pandas as pd
import PIL as pil
import PIL
import matplotlib.pyplot as plt
import seaborn as sns
from os import listdir
from os.path import isfile, join
import tempfile
import pickle
import time
import gc
import skimage.filters
import cv2
import watermark
import joblib
import math
from skimage.measure import block_reduce
from image_preprocessing import standardize_image_dataset,resize_dataset,binarize_dataset,crop_dataset,process_dataset_blur,do_pooling_dataset
from pipeline import model_pipeline

from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV,KFold
from sklearn.metrics import accuracy_score,f1_score
from sklearn.base import clone

1. Run standardize_image_dataset from image_preprocessing.py to standardize full dataset.

In [None]:
# standardize_image_dataset(all_pic_files,newdim=(256,256))


2. Run cells from Split Labeled Data.ipynb to split data into test and train

In [None]:
# all_data = pickle.load(open('Amit/Labeled Data/labeled_data.pkl','rb'))
# X,y = all_data.iloc[:,:-1], all_data['label']
# xtrain,xtest,ytrain,ytest = train_test_split(X,y,test_size=0.3,random_state=50)
# pickle.dump(pd.concat([xtrain,ytrain],axis=1),open('Amit/Labeled Data/train_data.pkl','wb'))
# pickle.dump(pd.concat([xtest,ytest],axis=1),open('Amit/Labeled Data/training_data.pkl','wb'))

# 3. Read in Training Data and Test Data

In [3]:
all_data = pickle.load(open('Amit/Labeled Data/train_data.pkl','rb'))
train_x,train_y = all_data.iloc[:,:-1],all_data.iloc[:,-1]
del all_data
gc.collect()

0

In [7]:
# test_data = pickle.load(open('Amit/Labeled Data/test_data.pkl','rb'))
# Annoying error with pickling. Comment the line below and uncomment the line above.
test_data = pickle.load(open('/Users/neilbhatia/GitHub/w207_final_project_LOCAL/Amit/Labeled Data/test_data.pkl','rb'))


FileNotFoundError: [Errno 2] No such file or directory: '/Users/neilbhatia/GitHub/w207_final_project_LOCAL/Amit/Labeled Data/test_data.pkl'

# 4. Resize Vectorized Image Dataset - Train and Test

In [4]:
train_x = resize_dataset(train_x,(256,256),(128,128))

In [53]:
test_x = resize_dataset(test_x,(256,256),(128,128))

Importing for model testing and development

In [60]:
from automate_optimal_model_dev import automate_optimal_model_dev
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import GaussianNB


# Bernoulli Model Test 1: Identify optimal Bernoulli Model while sequentially identifying optimal settings for bin/crop, blur, and pooling [3,2,1]

In [14]:
test_1_bernoulli = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = BernoulliNB(),
                                    param_grid={'alpha': [0.01, 0.1, 0.5, 1.0, 10.0]},
                                    preprocessing_eval_order = ['pool','blur','bin/crop'])

with open('nb_model_results/nb_bernoilli_model_test1.pickle', 'wb') as handle:
    pickle.dump(test_1_bernoulli, handle, protocol=pickle.HIGHEST_PROTOCOL)
    

test_2_bernoulli = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = BernoulliNB(),
                                    param_grid={'alpha': [0.01, 0.1, 0.5, 1.0, 10.0]},
                                    preprocessing_eval_order = ['pool','bin/crop','blur'])

with open('nb_model_results/nb_bernoulli_model_test2.pickle', 'wb') as handle:
    pickle.dump(test_2_bernoulli, handle, protocol=pickle.HIGHEST_PROTOCOL)


test_3_bernoulli = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = BernoulliNB(),
                                    param_grid={'alpha': [0.01, 0.1, 0.5, 1.0, 10.0]},
                                    preprocessing_eval_order = ['blur','bin/crop','pool'])

with open('nb_model_results/nb_bernoulli_model_test3.pickle', 'wb') as handle:
    pickle.dump(test_3_bernoulli, handle, protocol=pickle.HIGHEST_PROTOCOL)


test_4_bernoulli = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = BernoulliNB(),
                                    param_grid={'alpha': [0.01, 0.1, 0.5, 1.0, 10.0]},
                                    preprocessing_eval_order = ['bin/crop','blur','pool'])

with open('nb_model_results/nb_bernoulli_model_test4.pickle', 'wb') as handle:
    pickle.dump(test_4_bernoulli, handle, protocol=pickle.HIGHEST_PROTOCOL)


test_5_bernoulli = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = BernoulliNB(),
                                    param_grid={'alpha': [0.01, 0.1, 0.5, 1.0, 10.0]},
                                    preprocessing_eval_order = ['bin/crop','pool','blur'])

with open('nb_model_results/nb_bernoulli_model_test5.pickle', 'wb') as handle:
    pickle.dump(test_5_bernoulli, handle, protocol=pickle.HIGHEST_PROTOCOL)


test_6_bernoulli = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = BernoulliNB(),
                                    param_grid={'alpha': [0.01, 0.1, 0.5, 1.0, 10.0]},
                                    preprocessing_eval_order = ['blur','pool','bin/crop'])

with open('nb_model_results/nb_bernoulli_model_test6.pickle', 'wb') as handle:
    pickle.dump(test_6_bernoulli, handle, protocol=pickle.HIGHEST_PROTOCOL)




KeyboardInterrupt: 

# Gaussian Model Test 1: Identify optimal Gaussian Model while sequentially identifying optimal settings for bin/crop, blur, and pooling [3,2,1]

In [62]:
# alphas = {'alpha': [1.0e-10, 0.0001, 0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 10.0]}

test_1_gaussian = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = GaussianNB(),
                                    param_grid={'var_smoothing': np.linspace(0.00000000001,1,100)},
                                    preprocessing_eval_order = ['pool','blur','bin/crop'])



with open('nb_model_results/nb_gaussian_model_test1.pickle', 'wb') as handle:
    pickle.dump(test_1_gaussian, handle, protocol=pickle.HIGHEST_PROTOCOL)

Base Case (No Preprocessing Best Score): 0.4498242873877392
Better Model Identified W/ Binarization/Cropping, Score = 0.46515209857527917
Better Model Identified W/ Binarization/Cropping, Score = 0.5199576121511833
Better Model Identified W/ Binarization/Cropping, Score = 0.5849546044098574
Better Model Identified W/ Binarization/Cropping, Score = 0.7443428021184401


In [64]:
test_2_gaussian = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = GaussianNB(),
                                    param_grid={'var_smoothing': np.linspace(0.00000000001,1,100)},
                                    preprocessing_eval_order = ['pool','bin/crop','blur'])



with open('nb_model_results/nb_gaussian_model_test2.pickle', 'wb') as handle:
    pickle.dump(test_2_gaussian, handle, protocol=pickle.HIGHEST_PROTOCOL)


test_3_gaussian = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = GaussianNB(),
                                    param_grid={'var_smoothing': np.linspace(0.00000000001,1,100)},
                                    preprocessing_eval_order = ['blur','bin/crop','pool'])



with open('nb_model_results/nb_gaussian_model_test3.pickle', 'wb') as handle:
    pickle.dump(test_3_gaussian, handle, protocol=pickle.HIGHEST_PROTOCOL)


test_4_gaussian = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = GaussianNB(),
                                    param_grid={'var_smoothing': np.linspace(0.00000000001,1,100)},
                                    preprocessing_eval_order = ['bin/crop','blur','pool'])



with open('nb_model_results/nb_gaussian_model_test4.pickle', 'wb') as handle:
    pickle.dump(test_4_gaussian, handle, protocol=pickle.HIGHEST_PROTOCOL)


test_5_gaussian = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = GaussianNB(),
                                    param_grid={'var_smoothing': np.linspace(0.00000000001,1,100)},
                                    preprocessing_eval_order = ['bin/crop','pool','blur'])



with open('nb_model_results/nb_gaussian_model_test5.pickle', 'wb') as handle:
    pickle.dump(test_5_gaussian, handle, protocol=pickle.HIGHEST_PROTOCOL)


test_6_gaussian = automate_optimal_model_dev(X = train_x,y = train_y,
                                    model = GaussianNB(),
                                    param_grid={'var_smoothing': np.linspace(0.00000000001,1,100)},
                                    preprocessing_eval_order = ['blur','pool','bin/crop'])



with open('nb_model_results/nb_gaussian_model_test6.pickle', 'wb') as handle:
    pickle.dump(test_6_gaussian, handle, protocol=pickle.HIGHEST_PROTOCOL)




Base Case (No Preprocessing Best Score): 0.4498242873877392
Better Model Identified W/ Binarization/Cropping, Score = 0.46515209857527917
Better Model Identified W/ Binarization/Cropping, Score = 0.5199576121511833
Better Model Identified W/ Binarization/Cropping, Score = 0.5849546044098574
Better Model Identified W/ Binarization/Cropping, Score = 0.7443428021184401
Base Case (No Preprocessing Best Score): 0.4498242873877392
Better Model Identified W/ Blurring, Score = 0.46636085626911317
Better Model Identified W/ Blurring, Score = 0.46671767406273906
Better Model Identified W/ Binarization/Cropping, Score = 0.719572001783326
Better Model Identified W/ Binarization/Cropping, Score = 0.7197505197505197
Base Case (No Preprocessing Best Score): 0.4498242873877392
Better Model Identified W/ Binarization/Cropping, Score = 0.7091518926689027
Better Model Identified W/ Binarization/Cropping, Score = 0.7139567019806541
Better Model Identified W/ Binarization/Cropping, Score = 0.73779946761313

# Reading in the models

In [5]:
bernoulli_model_results_test1 = pickle.load(open('nb_model_results/nb_bernoulli_model_test1.pickle','rb'))
bernoulli_model_results_test2 = pickle.load(open('nb_model_results/nb_bernoulli_model_test2.pickle','rb'))
bernoulli_model_results_test3 = pickle.load(open('nb_model_results/nb_bernoulli_model_test3.pickle','rb'))
bernoulli_model_results_test4 = pickle.load(open('nb_model_results/nb_bernoulli_model_test4.pickle','rb'))
bernoulli_model_results_test5 = pickle.load(open('nb_model_results/nb_bernoulli_model_test5.pickle','rb'))
bernoulli_model_results_test6 = pickle.load(open('nb_model_results/nb_bernoulli_model_test6.pickle','rb'))

bernoulli_model_results_list = [bernoulli_model_results_test1,bernoulli_model_results_test2,bernoulli_model_results_test3,bernoulli_model_results_test4,bernoulli_model_results_test5,bernoulli_model_results_test6]




In [6]:
gaussian_model_results_test1 = pickle.load(open('nb_model_results/nb_gaussian_model_test1.pickle','rb'))
gaussian_model_results_test2 = pickle.load(open('nb_model_results/nb_gaussian_model_test2.pickle','rb'))
gaussian_model_results_test3 = pickle.load(open('nb_model_results/nb_gaussian_model_test3.pickle','rb'))
gaussian_model_results_test4 = pickle.load(open('nb_model_results/nb_gaussian_model_test4.pickle','rb'))
gaussian_model_results_test5 = pickle.load(open('nb_model_results/nb_gaussian_model_test5.pickle','rb'))
gaussian_model_results_test6 = pickle.load(open('nb_model_results/nb_gaussian_model_test6.pickle','rb'))

gaussian_model_results_list = [gaussian_model_results_test1,gaussian_model_results_test2,gaussian_model_results_test3,gaussian_model_results_test4,gaussian_model_results_test5,gaussian_model_results_test6]


In [33]:

for i,bernoulli_results in enumerate(bernoulli_model_results_list):
    print('Model',i+1,'------------------------------------')
    print("Best Threshold:",bernoulli_results['best_thresh'])
    print("F1 Score", bernoulli_results['best_score'])
    print(bernoulli_results['best_model'],":",bernoulli_results['best_preprocess'], bernoulli_results['best_thresh'])


bernoulli_model_f1_scores_list = [model['best_score'] for model in bernoulli_model_results_list]

top_bernoulli_model_number = bernoulli_model_f1_scores_list.index(max(bernoulli_model_f1_scores_list))+1

print("The top model for bernoulli is:",top_bernoulli_model_number)



with open('nb_model_results/top_bernoulli_model.pickle', 'wb') as handle:
    pickle.dump(bernoulli_model_results_test3, handle, protocol=pickle.HIGHEST_PROTOCOL)
#Picking the best model based on score 

Model 1 ------------------------------------
Best Threshold: 0.01
F1 Score 0.7340137471360132
BernoulliNB(alpha=10.0) : (Initial Standardization/Resizing)(Pool, pool_size = (8, 8), pooling_function = <function amax at 0x11194eca0>)(Blurring, Type = g, Dimension = (16, 16), Kernel = (5, 5), sigma_x = 0, sigma_y = 0) 0.01
Model 2 ------------------------------------
Best Threshold: 0.01
F1 Score 0.7340137471360132
BernoulliNB(alpha=10.0) : (Initial Standardization/Resizing)(Pool, pool_size = (8, 8), pooling_function = <function amax at 0x11194eca0>)(Blurring, Type = g, Dimension = (16, 16), Kernel = (5, 5), sigma_x = 0, sigma_y = 0) 0.01
Model 3 ------------------------------------
Best Threshold: 0.11
F1 Score 0.7346197502837684
BernoulliNB(alpha=10.0) : (Initial Standardization/Resizing)(Blurring, Type = b, Dimension = (128, 128), Kernel = (3, 3), sigma_x = 0, sigma_y = 0)(Pool, pool_size = (8, 8), pooling_function = <function amax at 0x11194eca0>) 0.11
Model 4 ------------------------

In [36]:

for i,gaussian_results in enumerate(gaussian_model_results_list):
    print('Model',i+1,'------------------------------------')
    print("Best Threshold:",gaussian_results['best_thresh'])
    print("F1 Score", gaussian_results['best_score'])
    print(gaussian_results['best_model'],":",gaussian_results['best_preprocess'], gaussian_results['best_thresh'])


gaussian_model_f1_scores_list = [model['best_score'] for model in gaussian_model_results_list]

top_gaussian_model_number = gaussian_model_f1_scores_list.index(max(gaussian_model_f1_scores_list))+1

print("The top model for gaussian is:",top_gaussian_model_number)

with open('nb_model_results/top_gaussian_model.pickle', 'wb') as handle:
    pickle.dump(gaussian_model_results_test1, handle, protocol=pickle.HIGHEST_PROTOCOL)
#Picking the best model based on score 



Model 1 ------------------------------------
Best Threshold: 0.060000000000000005
F1 Score 0.7443428021184401
GaussianNB(var_smoothing=1e-11) : (Initial Standardization/Resizing)(Pool, pool_size = (16, 16), pooling_function = <function amax at 0x11194eca0>) 0.060000000000000005
Model 2 ------------------------------------
Best Threshold: 0.060000000000000005
F1 Score 0.7443428021184401
GaussianNB(var_smoothing=1e-11) : (Initial Standardization/Resizing)(Pool, pool_size = (16, 16), pooling_function = <function amax at 0x11194eca0>) 0.060000000000000005
Model 3 ------------------------------------
Best Threshold: 0.13
F1 Score 0.7197505197505197
GaussianNB(var_smoothing=0.8787878787890909) : (Initial Standardization/Resizing)(Blurring, Type = g, Dimension = (128, 128), Kernel = (3, 3), sigma_x = 1, sigma_y = 0)(Binarization, Automate Threshold = True, Threshold = 0.3) (Crop, (128, 128), (128, 128))(Pool, pool_size = (8, 8), pooling_function = <function mean at 0x111956a60>) 0.13
Model 4 

# The top Gaussian Model was the 1st one, and the top Bernoulli Model was the 3rd one.

In [39]:
# bernoulli_model_results_test3
# gaussian_model_results_test1

In [43]:
print("Top Gaussian Model Score:",gaussian_model_results_test1['best_score'])

print("Top Bernoulli Model Score:",bernoulli_model_results_test3['best_score'])

print("Gaussian is a slightly better model")

Top Gaussian Model Score: 0.7443428021184401
Top Bernoulli Model Score: 0.7346197502837684
Gaussian is a slightly better model


In [52]:
# Preprocessing
print(bernoulli_model_results_test3['best_preprocess'])

print(gaussian_model_results_test1['best_preprocess'])


(Initial Standardization/Resizing)(Blurring, Type = b, Dimension = (128, 128), Kernel = (3, 3), sigma_x = 0, sigma_y = 0)(Pool, pool_size = (8, 8), pooling_function = <function amax at 0x11194eca0>)
(Initial Standardization/Resizing)(Pool, pool_size = (16, 16), pooling_function = <function amax at 0x11194eca0>)


# Now testing both against their test set

In [57]:
from eval_on_test import make_preds
#Bernoulli
best_bernoulli_model = bernoulli_model_results_test3


print(best_bernoulli_model['best_model'])
print(best_bernoulli_model['best_score'])
print(best_bernoulli_model['best_thresh'])
print(best_bernoulli_model['best_preprocess'])

# print('Model Accuracy:',best_bernoulli_model.score(test_x,test_y))


BernoulliNB(alpha=10.0)
0.7346197502837684
0.11
(Initial Standardization/Resizing)(Blurring, Type = b, Dimension = (128, 128), Kernel = (3, 3), sigma_x = 0, sigma_y = 0)(Pool, pool_size = (8, 8), pooling_function = <function amax at 0x11194eca0>)


In [62]:
y = test_data['label']
x = test_data.iloc[:,:-1]

In [63]:
top_bernoulli_predictions = make_preds(x,y,[('resize',[(256,256),(128,128)]),('blur',['b',(128,128),(3,3),0,0]),('pool',[(8,8),np.max])],
          best_bernoulli_model['best_model'],best_bernoulli_model['best_thresh'])

top_bernoulli_predictions

{'features':            0          1          2          3          4          5    \
 3949  0.000000   0.000000   0.000000   0.000000   0.000000   0.000000   
 230   0.000000  36.666668  33.000000  32.000000  48.222221  49.888889   
 354   0.000000   0.000000   0.000000   0.000000   9.000000  25.111111   
 2736  0.000000   0.000000   0.000000   0.000000   0.222222   0.333333   
 3540  0.000000   0.000000   0.000000   0.666667   9.888889  22.222221   
 ...        ...        ...        ...        ...        ...        ...   
 1776  2.000000  14.444445  31.000000   2.000000   2.444444   2.333333   
 1791  0.333333   2.000000   2.000000   1.777778   2.000000   2.000000   
 2099  0.000000   0.000000   2.666667   2.888889   3.222222   4.000000   
 53    1.000000   1.000000   1.000000   1.000000   1.000000   1.555556   
 3857  3.777778   2.444444   3.111111   4.111111   3.666667   3.333333   
 
             6          7          8          9    ...        246        247  \
 3949   0.000000  

# Testing on Gaussian

In [64]:
#Bernoulli
best_gaussian_model = gaussian_model_results_test1


print(best_gaussian_model['best_model'])
print(best_gaussian_model['best_score'])
print(best_gaussian_model['best_thresh'])
print(best_gaussian_model['best_preprocess'])




GaussianNB(var_smoothing=1e-11)
0.7443428021184401
0.060000000000000005
(Initial Standardization/Resizing)(Pool, pool_size = (16, 16), pooling_function = <function amax at 0x11194eca0>)


In [66]:
top_gaussian_predictions = make_preds(x,y,[('resize',[(256,256),(128,128)]),('pool',[(16,16),np.max])],
          best_gaussian_model['best_model'],best_gaussian_model['best_thresh'])

top_gaussian_predictions


{'features':       0    1    2    3    4    5    6   7    8    9   ...   54   55   56   57  \
 3949   0    0    0    0    0    0    0   0    5  126  ...  123   78    0    0   
 230   96  110  124  127  127  125  126  98  110  127  ...  127   23  108   39   
 354    0   30  114  127  125  126   13   0    0  127  ...  121    0    0    1   
 2736   0    1    1   18   18    0    0   0    0    3  ...  117    1    0    0   
 3540   0   10  126  124  127  126   11   0    0  126  ...  127    2    8  116   
 ...   ..  ...  ...  ...  ...  ...  ...  ..  ...  ...  ...  ...  ...  ...  ...   
 1776   4   71  117  127  126  119    6   4    6  126  ...   93   26  125  126   
 1791   9    4  102  125  115  118    4   5   12  105  ...   61    8    5   74   
 2099   0    5    7   14    7    5    7   0    0   67  ...   10    0    0    4   
 53     1    1  123  127  124  122    1   1    1  127  ...  127    1    1  124   
 3857   9    9  115  127  124  125  122   8   17  113  ...  126  116   64   98   
 
  

# Pickling Final Model Predictions

In [67]:

with open('nb_model_results/top_bernoulli_model_predictions.pickle', 'wb') as handle:
    pickle.dump(top_bernoulli_predictions, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('nb_model_results/top_gaussian_model_predictions.pickle', 'wb') as handle:
    pickle.dump(top_gaussian_predictions, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Trying the top models again with new resizing ability

In [5]:
all_data = pickle.load(open('Amit/Labeled Data/train_data.pkl','rb'))
train_x,train_y = all_data.iloc[:,:-1],all_data.iloc[:,-1]
del all_data
gc.collect()

from automate_optimal_model_dev import automate_optimal_model_dev
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import GaussianNB


In [6]:
bernoulli_retest_1 = automate_optimal_model_dev(X = resize_dataset(train_x,(256,256),(32,32)),y = train_y,
                                    model = BernoulliNB(),
                                    param_grid={'alpha': [0.01, 0.1, 0.5, 1.0, 10.0]},
                                    preprocessing_eval_order = ['blur','bin/crop','pool'])

Better Model Identified by Resizing Images to (16, 16): 0.6876712328767124
Better Model Identified W/ Pooling, Score = 0.7214936652589464
Better Model Identified W/ Pooling, Score = 0.724383103304057


In [7]:
bernoulli_retest_1

{'features':        0    1    2    3    4    5    6    7    8    9    10   11   12   13  \
 54    127  126  115 -107  126  122  106  127  127  125  126  127  127  122   
 2602 -104  -44  -20  -69  -43  119   42   68  -38  -32  118  -26  -47  -29   
 3433 -125  127  127 -125  127  -10   -7  127  125   -6   -7  125  104  120   
 235   124  114  116  -65  127  125  124  127  124  112  125  127  -87  123   
 1806 -125  126  127 -125  127  -56  -56   99  126  -37  -29  126  -31  -31   
 ...   ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   
 3330 -128  127  127 -128  127  101  122  127 -119  103  117  127 -128  127   
 70    -89  -20  -25  -77  -65  -20  -20  -41  -71  110  120  -69  -93  -23   
 132   127  127  126  126  127   -1  126  127  127   -1   72  127  127   -1   
 2014    7  121   96   84  126  126  126  116  125  121  125  122  119  119   
 1931  123  125  106  125  -26  -13  -13  125  115  -13   78  120  114  107   
 
        14   15  
 54    126  122  
 2

In [12]:

test_data = pickle.load(open('Amit/Labeled Data/test_data.pkl','rb'))
y = test_data['label']
x = test_data.iloc[:,:-1]

In [13]:
from eval_on_test import make_preds

top_bernoulli_model_retest = make_preds(x,y,[('resize',[(256,256),(16,16)]),('pool',[(4,4),np.max])],
          bernoulli_retest_1['best_model'],bernoulli_retest_1['best_thresh'])

top_bernoulli_model_retest

{'features':        0    1    2    3    4    5    6    7    8    9    10   11   12   13  \
 3949   60   92   87   86   97   89  104   82   80   70   93  100   78   95   
 230   103  126  124  121  124  123  127  119  115  127  120  115  111  125   
 354   106  101  104   89  104  121  122  127  106  126  124   91   57  121   
 2736    1   95   83    1   53  118  125   88   58   89  126  126    1   74   
 3540   58  116  120   62   99  110  101  101  125  107  108  121  125  119   
 ...   ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   
 1776   84   93   86   92   99   80   80   89  106   95   79   86   86   89   
 1791   11   79   85   42   67   76   92   66   60   86   98   50   41   87   
 2099    7   74   95   15   86  105  100   88   90   84   95   45    4   96   
 53     56  127  127   37  110  126  126  111  126  119  124  122  114  124   
 3857   66   88   96   88   86  101  107  108  101   93  111  120   70   77   
 
        14   15  
 3949  111   92  
 2

In [None]:
gaussian_model_retest1 = automate_optimal_model_dev(X = resize_dataset(train_x,(256,256),(32,32)),y = train_y,
                                    model = BernoulliNB(),
                                    param_grid={'alpha': [0.01, 0.1, 0.5, 1.0, 10.0]},
                                    preprocessing_eval_order = ['blur','bin/crop','pool'])