# Import Necessary Packages and Libraries

In [35]:
!pip install watermark

!pip install xgboost



In [36]:
# IMAGE PREPROCESSING FUNCTIONS FOR USE IN MODEL DEVELOPMENT, EVALUATION, AND PRODUCTION
import numpy as np
import pandas as pd
import PIL as pil
import PIL
import matplotlib.pyplot as plt
import seaborn as sns
from os import listdir
from os.path import isfile, join
import tempfile
import pickle
import time
import gc
import skimage.filters
import cv2
import watermark
import joblib
import math
import sys
from skimage.measure import block_reduce
from image_preprocessing import standardize_image_dataset,resize_dataset,binarize_dataset,crop_dataset,process_dataset_blur,do_pooling_dataset
from pipeline import model_pipeline
from automate_optimal_model_dev import automate_optimal_model_dev
from eval_on_test import make_preds

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import CategoricalNB,GaussianNB
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV,KFold
from sklearn.metrics import accuracy_score,f1_score
from sklearn.base import clone
from sklearn.metrics import confusion_matrix

In [37]:
import warnings
warnings.filterwarnings('ignore')

# Read in Training Data

In [38]:
training_data = pickle.load(open('Amit/Labeled Data/train_data.pkl','rb'))
train_X, train_y = training_data.iloc[:,:-1],training_data.iloc[:,-1]

In [39]:
%%time
all_data = pickle.load(open('Amit/Labeled Data/train_data.pkl','rb'))
train_x,train_y = all_data.iloc[:,:-1],all_data.iloc[:,-1]
resized_16_16_train_x = resize_dataset(train_x,(256,256),(16,16))
del all_data
gc.collect()


CPU times: user 1min 54s, sys: 549 ms, total: 1min 54s
Wall time: 1min 54s


0

### Logistic Regression Test Example

In [40]:
param_grid = {'C':[0.00001,0.0001, 0.001, 0.01, 0.1],
    'max_iter':[100]}

In [41]:
%%time

test_model_1 = model_pipeline()
test_model_1_result = test_model_1.evaluate(train_X,
                     train_y,
                     preprocessing = [('binarize',[True,0.3]),
                                 ('crop',[(256,256),(256,256)]),
                                 ('blur',['g',(256,256),(5,5),0,0]),
                                 ('pool',[(2,2),np.max])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)
                        
                           

CPU times: user 21min 18s, sys: 8min 12s, total: 29min 31s
Wall time: 8min 29s


In [42]:
test_model_1_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=0.001),
 'best_params': {'C': 0.001, 'max_iter': 100},
 'best_score': 0.8828109699751993,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      5.834433e-01
 2602    9.182161e-10
 3433    2.946750e-03
 235     9.999996e-01
 1806    9.979477e-01
             ...     
 3330    1.563207e-05
 70      6.152530e-01
 132     9.999999e-01
 2014    9.999998e-01
 1931    1.000000e+00
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.5,
  'best_score': 0.8864388092613009,
  'best_preds': 54      1
  2602    0
  3433    0
  235     1
  1806    1
         ..
  3330    0
  70     

In [43]:
pickle.dump(test_model_1_result,open('linear_model_results/log_reg_test_model_1_result.pkl','wb'))

In [44]:
%%time

test_model_2 = model_pipeline()
test_model_2_result = test_model_2.evaluate(train_X,
                     train_y,
                     preprocessing = [('binarize',[True,0.35]),
                                 ('crop',[(256,256),(256,256)]),
                                 ('blur',['g',(256,256),(5,5),0,0]),
                                 ('pool',[(2,2),np.max])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)
                        
                           

CPU times: user 21min 17s, sys: 8min 14s, total: 29min 31s
Wall time: 8min 28s


In [45]:
test_model_2_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=0.001),
 'best_params': {'C': 0.001, 'max_iter': 100},
 'best_score': 0.8828109699751993,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      5.834433e-01
 2602    9.182161e-10
 3433    2.946750e-03
 235     9.999996e-01
 1806    9.979477e-01
             ...     
 3330    1.563207e-05
 70      6.152530e-01
 132     9.999999e-01
 2014    9.999998e-01
 1931    1.000000e+00
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.5,
  'best_score': 0.8864388092613009,
  'best_preds': 54      1
  2602    0
  3433    0
  235     1
  1806    1
         ..
  3330    0
  70     

In [46]:
pickle.dump(test_model_2_result,open('linear_model_results/log_reg_test_model_2_result.pkl','wb'))

In [47]:
%%time

test_model_3 = model_pipeline()
test_model_3_result = test_model_3.evaluate(train_X,
                     train_y,
                     preprocessing = [('binarize',[True,0.3]),
                                 ('crop',[(256,256),(256,256)]),
                                 ('blur',['g',(256,256),(3,3),0,0]),
                                 ('pool',[(2,2),np.max])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)


CPU times: user 20min 57s, sys: 7min 57s, total: 28min 55s
Wall time: 8min 23s


In [48]:
pickle.dump(test_model_3_result,open('linear_model_results/log_reg_test_model_3_result.pkl','wb'))

In [49]:
test_model_3_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=0.0001),
 'best_params': {'C': 0.0001, 'max_iter': 100},
 'best_score': 0.8849713112335371,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      0.548585
 2602    0.004237
 3433    0.010258
 235     0.977360
 1806    0.939866
           ...   
 3330    0.001018
 70      0.542912
 132     0.993052
 2014    0.999547
 1931    0.999249
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.5,
  'best_score': 0.8933813348103018,
  'best_preds': 54      1
  2602    0
  3433    0
  235     1
  1806    1
         ..
  3330    0
  70      1
  132     1
  2014    1
  1931    1
  N

In [50]:
%%time

test_model_4 = model_pipeline()
test_model_4_result = test_model_4.evaluate(train_X,
                     train_y,
                     preprocessing = [('binarize',[True,0.3]),
                                 ('crop',[(256,256),(256,256)]),
                                 ('blur',['b',(256,256),(5,5),0,0]),
                                 ('pool',[(2,2),np.max])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)

CPU times: user 20min 55s, sys: 8min 12s, total: 29min 7s
Wall time: 8min 21s


In [51]:
pickle.dump(test_model_4_result,open('linear_model_results/log_reg_test_model_4_result.pkl','wb'))

In [52]:
test_model_4_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=0.001),
 'best_params': {'C': 0.001, 'max_iter': 100},
 'best_score': 0.870340521755074,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      5.968287e-01
 2602    9.594204e-07
 3433    3.712843e-04
 235     9.839865e-01
 1806    9.999997e-01
             ...     
 3330    7.545973e-06
 70      5.705025e-01
 132     9.999999e-01
 2014    9.999999e-01
 1931    9.999979e-01
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.39,
  'best_score': 0.8730911005792522,
  'best_preds': array([1, 0, 0, ..., 1, 1, 1])}}

In [53]:
del test_model_4_result
gc.collect()

52

In [54]:
%%time

test_model_5 = model_pipeline()
test_model_5_result = test_model_5.evaluate(train_X,
                     train_y,
                     preprocessing = [('binarize',[True,0.35]),
                                 ('crop',[(256,256),(256,256)]),
                                 ('blur',['b',(256,256),(5,5),0,0]),
                                 ('pool',[(2,2),np.max])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)

CPU times: user 20min 56s, sys: 8min 4s, total: 29min 1s
Wall time: 8min 19s


In [55]:
pickle.dump(test_model_5_result,open('linear_model_results/log_reg_test_model_5_result.pkl','wb'))
test_model_5_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=0.001),
 'best_params': {'C': 0.001, 'max_iter': 100},
 'best_score': 0.870340521755074,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      5.968287e-01
 2602    9.594204e-07
 3433    3.712843e-04
 235     9.839865e-01
 1806    9.999997e-01
             ...     
 3330    7.545973e-06
 70      5.705025e-01
 132     9.999999e-01
 2014    9.999999e-01
 1931    9.999979e-01
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.39,
  'best_score': 0.8730911005792522,
  'best_preds': array([1, 0, 0, ..., 1, 1, 1])}}

In [56]:
del test_model_5_result
gc.collect()

100

In [57]:
%%time

test_model_6 = model_pipeline()
test_model_6_result = test_model_6.evaluate(train_X,
                     train_y,
                     preprocessing = [('binarize',[True,0.3]),
                                 ('crop',[(256,256),(256,256)]),
                                 ('blur',['b',(256,256),(3,3),0,0]),
                                 ('pool',[(2,2),np.max])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)

CPU times: user 21min 8s, sys: 8min 5s, total: 29min 13s
Wall time: 8min 23s


In [58]:
pickle.dump(test_model_6_result,open('linear_model_results/log_reg_test_model_6_result.pkl','wb'))
test_model_6_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=0.001),
 'best_params': {'C': 0.001, 'max_iter': 100},
 'best_score': 0.8819163218813474,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      6.011558e-01
 2602    1.396285e-06
 3433    1.010189e-03
 235     9.999736e-01
 1806    1.000000e+00
             ...     
 3330    7.213061e-07
 70      6.219768e-01
 132     1.000000e+00
 2014    1.000000e+00
 1931    1.000000e+00
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.51,
  'best_score': 0.8859674192626465,
  'best_preds': array([1, 0, 0, ..., 1, 1, 1])}}

In [59]:
del test_model_6_result
gc.collect()

100

In [60]:
%%time

test_model_7 = model_pipeline()
test_model_7_result = test_model_7.evaluate(train_X,
                     train_y,
                     preprocessing = [('pool',[(2,2),np.max])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)

CPU times: user 15min 17s, sys: 8min 5s, total: 23min 23s
Wall time: 2min 14s


In [61]:
pickle.dump(test_model_7_result,open('linear_model_results/log_reg_test_model_7_result.pkl','wb'))
test_model_7_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=1e-05),
 'best_params': {'C': 1e-05, 'max_iter': 100},
 'best_score': 0.9415170535182782,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      0.999274
 2602    0.029690
 3433    0.018012
 235     0.987716
 1806    0.989018
           ...   
 3330    0.002314
 70      0.999996
 132     0.999717
 2014    0.936096
 1931    0.992751
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.33,
  'best_score': 0.9432703003337042,
  'best_preds': array([1, 0, 0, ..., 1, 1, 1])}}

In [62]:
del test_model_7_result
gc.collect()

52

In [63]:
%%time

test_model_8 = model_pipeline()
test_model_8_result = test_model_8.evaluate(train_X,
                     train_y,
                     preprocessing = [('binarize',[True,0.3])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)

CPU times: user 41min 19s, sys: 20min 37s, total: 1h 1min 56s
Wall time: 6min 53s


In [64]:
pickle.dump(test_model_8_result,open('linear_model_results/log_reg_test_model_8_result.pkl','wb'))
test_model_8_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=0.1),
 'best_params': {'C': 0.1, 'max_iter': 100},
 'best_score': 0.9349074893860319,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      0.999828
 2602    0.020692
 3433    0.037475
 235     0.979588
 1806    0.989154
           ...   
 3330    0.003911
 70      1.000000
 132     0.998097
 2014    0.931653
 1931    0.993893
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.27,
  'best_score': 0.9446128410030312,
  'best_preds': array([1, 0, 0, ..., 1, 1, 1])}}

In [65]:
del test_model_8_result
gc.collect()

52

In [66]:
%%time

test_model_9 = model_pipeline()
test_model_9_result = test_model_9.evaluate(train_X,
                     train_y,
                     preprocessing = [('crop',[(256,256),(256,256)])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)

CPU times: user 54min 29s, sys: 27min 44s, total: 1h 22min 14s
Wall time: 8min 39s


In [67]:
pickle.dump(test_model_9_result,open('linear_model_results/log_reg_test_model_9_result.pkl','wb'))
test_model_9_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=1e-05),
 'best_params': {'C': 1e-05, 'max_iter': 100},
 'best_score': 0.9111585544037718,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      0.997341
 2602    0.008737
 3433    0.000528
 235     0.996021
 1806    0.948922
           ...   
 3330    0.006845
 70      1.000000
 132     0.994119
 2014    0.822032
 1931    0.998084
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.36000000000000004,
  'best_score': 0.9151797968707109,
  'best_preds': array([1, 0, 0, ..., 1, 1, 1])}}

In [68]:
del test_model_9_result
gc.collect()

52

In [69]:
%%time

test_model_10 = model_pipeline()
test_model_10_result = test_model_10.evaluate(train_X,
                     train_y,
                     preprocessing = [('blur',['b',(256,256),(3,3),0,0])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)

CPU times: user 49min 55s, sys: 29min 3s, total: 1h 18min 59s
Wall time: 6min 26s


In [70]:
pickle.dump(test_model_10_result,open('linear_model_results/log_reg_test_model_10_result.pkl','wb'))
test_model_10_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=1e-05),
 'best_params': {'C': 1e-05, 'max_iter': 100},
 'best_score': 0.9363632355241519,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      0.999996
 2602    0.017433
 3433    0.007999
 235     0.999255
 1806    0.913698
           ...   
 3330    0.000116
 70      1.000000
 132     0.999974
 2014    0.925692
 1931    0.999954
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.23,
  'best_score': 0.9405555555555555,
  'best_preds': array([1, 0, 0, ..., 1, 1, 1])}}

In [71]:
del test_model_10_result
gc.collect()

52

In [72]:
%%time

test_model_11 = model_pipeline()
test_model_11_result = test_model_11.evaluate(train_X,
                     train_y,
                     preprocessing = [('blur',['g',(256,256),(3,3),0,0])],
                     model = LogisticRegression(),
                     param_grid=param_grid,
                     optimizing_metric='f1',
                     n_splits=5,
                     return_transformed_features = False, 
                     return_grid = True, 
                     return_score = True, 
                     return_best_estimator = True, 
                     return_best_params = True, 
                     return_oos_pred = True, 
                     return_oos_prob = True, 
                     return_threshold_analysis=True)

CPU times: user 50min 34s, sys: 35min 56s, total: 1h 26min 30s
Wall time: 7min 11s


In [73]:
pickle.dump(test_model_11_result,open('linear_model_results/log_reg_test_model_11_result.pkl','wb'))
test_model_11_result

{'grid_search': GridSearchCV(cv=5, estimator=LogisticRegression(),
              param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1],
                          'max_iter': [100]},
              scoring='f1'),
 'best_estimator': LogisticRegression(C=0.01),
 'best_params': {'C': 0.01, 'max_iter': 100},
 'best_score': 0.9364802452308506,
 'oos_preds': 54      1
 2602    0
 3433    0
 235     1
 1806    1
        ..
 3330    0
 70      1
 132     1
 2014    1
 1931    1
 Name: label, Length: 3220, dtype: uint8,
 'oos_probs': 54      1.000000e+00
 2602    4.392566e-09
 3433    7.836329e-08
 235     1.000000e+00
 1806    9.998951e-01
             ...     
 3330    1.395980e-14
 70      1.000000e+00
 132     1.000000e+00
 2014    9.895514e-01
 1931    1.000000e+00
 Name: label, Length: 3220, dtype: float64,
 'threshold_analysis': {'best_thresh': 0.01,
  'best_score': 0.9422863485016648,
  'best_preds': array([1, 0, 0, ..., 1, 1, 1])}}

In [74]:
del test_model_11_result
gc.collect()

52