In [1]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.neural_network import MLPClassifier

In [2]:
import pickle
def save_model(path, model):
    with open(path, 'wb') as fid:
        pickle.dump(model, fid)  
        
def load_model(path):
    with open(path, 'rb') as fid:
        model = pickle.load(fid)
        return model

In [3]:
freq_values = [1, 5, 10, 15, 25, 50, 60, 75, 85, 100]

### Classifier

In [4]:
MODEL_SAVE_PATH = './models/classifier/'

###### KNeighborsClassifier

In [5]:
def process_hyperopt_params_kn(params_dict, freq):
    kn_params[freq]['n_neighbors'] = params_dict['n_neighbors']
    kn_params[freq]['weights'] = ['uniform', 'distance'][params_dict['weights']]
    kn_params[freq]['algorithm'] = ['auto', 'ball_tree', 'kd_tree', 'brute'][params_dict['algorithm']]
    kn_params[freq]['leaf_size'] = params_dict['leaf_size']
    kn_params[freq]['p'] = [1, 2][params_dict['p']]
    
kn_params = {freq:{'n_neighbors': None, 'weights': None, 'algorithm': None, 'leaf_size': None, 'p': None}\
                                                                                     for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'algorithm': 2, 'leaf_size': 34.0, 'n_neighbors': 8.0, 'p': 0, 'weights': 1}
hyperopt_params[5] = {'algorithm': 0, 'leaf_size': 40.0, 'n_neighbors': 8.0, 'p': 0, 'weights': 1}
hyperopt_params[10] = {'algorithm': 3, 'leaf_size': 24.0, 'n_neighbors': 7.0, 'p': 0, 'weights': 1}
hyperopt_params[15] = {'algorithm': 1, 'leaf_size': 26.0, 'n_neighbors': 8.0, 'p': 0, 'weights': 1}
hyperopt_params[25] = {'algorithm': 1, 'leaf_size': 54.0, 'n_neighbors': 10.0, 'p': 0, 'weights': 1}
hyperopt_params[50] = {'algorithm': 2, 'leaf_size': 46.0, 'n_neighbors': 3.0, 'p': 0, 'weights': 1}
hyperopt_params[60] = {'algorithm': 0, 'leaf_size': 34.0, 'n_neighbors': 10.0, 'p': 0, 'weights': 1}
hyperopt_params[75] = {'algorithm': 2, 'leaf_size': 50.0, 'n_neighbors': 10.0, 'p': 0, 'weights': 1}
hyperopt_params[85] = {'algorithm': 0, 'leaf_size': 48.0, 'n_neighbors': 10.0, 'p': 0, 'weights': 1}
hyperopt_params[100] = {'algorithm': 2, 'leaf_size': 10.0, 'n_neighbors': 5.0, 'p': 0, 'weights': 1}

for freq in freq_values:
    process_hyperopt_params_kn(hyperopt_params[freq], freq)
    
for freq in freq_values:
    model = KNeighborsClassifier(n_neighbors=kn_params[freq]['n_neighbors'],\
                                 weights=kn_params[freq]['weights'], algorithm=kn_params[freq]['algorithm'],\
                                 leaf_size=kn_params[freq]['leaf_size'], p=kn_params[freq]['p'])
    
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### DecisionTreeClassifier

In [6]:
def process_hyperopt_params_dt(params_dict, freq):
    dt_params[freq]['max_depth'] = params_dict['max_depth']
    dt_params[freq]['max_features'] = ['auto', 'sqrt', 'log2'][params_dict['max_features']]
    dt_params[freq]['criterion'] = ["gini", "entropy"][params_dict['criterion']]
    dt_params[freq]['min_samples_split'] = params_dict['min_samples_split']
    dt_params[freq]['min_samples_leaf'] = params_dict['min_samples_leaf']
    dt_params[freq]['min_weight_fraction_leaf'] = params_dict['min_weight_fraction_leaf']

    
dt_params = {freq:{'max_depth': None, 'max_features': None, 'criterion': None, 'min_samples_split': None,\
                   'min_samples_leaf': None, 'min_weight_fraction_leaf': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'criterion': 1, 'max_depth': 13, 'max_features': 0, 'min_samples_leaf': 2.0, 'min_samples_split': 10.0, 'min_weight_fraction_leaf': 0.008523324264696193}
hyperopt_params[5] = {'criterion': 1, 'max_depth': 15, 'max_features': 1, 'min_samples_leaf': 10.0, 'min_samples_split': 2.0, 'min_weight_fraction_leaf': 0.0009892337010474505}
hyperopt_params[10] = {'criterion': 1, 'max_depth': 12, 'max_features': 0, 'min_samples_leaf': 9.0, 'min_samples_split': 3.0, 'min_weight_fraction_leaf': 0.0028256473042533813}
hyperopt_params[15] = {'criterion': 1, 'max_depth': 17, 'max_features': 1, 'min_samples_leaf': 5.0, 'min_samples_split': 7.0, 'min_weight_fraction_leaf': 0.0006716572856625453}
hyperopt_params[25] = {'criterion': 1, 'max_depth': 10, 'max_features': 1, 'min_samples_leaf': 7.0, 'min_samples_split': 2.0, 'min_weight_fraction_leaf': 0.00038447265370684805}
hyperopt_params[50] = {'criterion': 1, 'max_depth': 13, 'max_features': 1, 'min_samples_leaf': 2.0, 'min_samples_split': 8.0, 'min_weight_fraction_leaf': 0.0003989811045009658}
hyperopt_params[60] = {'criterion': 0, 'max_depth': 15, 'max_features': 1, 'min_samples_leaf': 5.0, 'min_samples_split': 10.0, 'min_weight_fraction_leaf': 0.0005605568985052982}
hyperopt_params[75] = {'criterion': 0, 'max_depth': 13, 'max_features': 2, 'min_samples_leaf': 3.0, 'min_samples_split': 5.0, 'min_weight_fraction_leaf': 5.696645729779803e-05}
hyperopt_params[85] = {'criterion': 0, 'max_depth': 18, 'max_features': 2, 'min_samples_leaf': 3.0, 'min_samples_split': 7.0, 'min_weight_fraction_leaf': 0.0016728754976179257}
hyperopt_params[100] = {'criterion': 0, 'max_depth': 15, 'max_features': 1, 'min_samples_leaf': 7.0, 'min_samples_split': 3.0, 'min_weight_fraction_leaf': 0.001665172379950201}

for freq in freq_values:
    process_hyperopt_params_dt(hyperopt_params[freq], freq)
    
for freq in freq_values:
    model = DecisionTreeClassifier(max_depth=dt_params[freq]['max_depth'], max_features=dt_params[freq]['max_features'],\
                                  criterion=dt_params[freq]['criterion'], min_samples_split=dt_params[freq]['min_samples_split'],\
                                  min_samples_leaf=dt_params[freq]['min_samples_leaf'], min_weight_fraction_leaf = dt_params[freq]['min_weight_fraction_leaf'])
    
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### RandomForest

In [11]:
def process_hyperopt_params_rf(params_dict, freq):
    rf_params[freq]['max_depth'] = params_dict['max_depth']
    rf_params[freq]['max_features'] = ['auto', 'sqrt', 'log2'][params_dict['max_features']]
    rf_params[freq]['criterion'] = ["gini", "entropy"][params_dict['criterion']]
    rf_params[freq]['min_samples_split'] = params_dict['min_samples_split']
    rf_params[freq]['min_samples_leaf'] = params_dict['min_samples_leaf']
    rf_params[freq]['min_weight_fraction_leaf'] = params_dict['min_weight_fraction_leaf']

    
rf_params = {freq:{'max_depth': None, 'max_features': None, 'criterion': None, 'min_samples_split': None,\
                   'min_samples_leaf': None, 'min_weight_fraction_leaf': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'criterion': 1, 'max_depth': 13, 'max_features': 0, 'min_samples_leaf': 1.0, 'min_samples_split': 4.0, 'min_weight_fraction_leaf': 0.000990811167603108}
hyperopt_params[5] = {'criterion': 1, 'max_depth': 8, 'max_features': 1, 'min_samples_leaf': 8.0, 'min_samples_split': 3.0, 'min_weight_fraction_leaf': 0.014591026678552194}
hyperopt_params[10] = {'criterion': 1, 'max_depth': 14, 'max_features': 1, 'min_samples_leaf': 7.0, 'min_samples_split': 2.0, 'min_weight_fraction_leaf': 0.00025398929532697645}
hyperopt_params[15] = {'criterion': 0, 'max_depth': 16, 'max_features': 0, 'min_samples_leaf': 9.0, 'min_samples_split': 9.0, 'min_weight_fraction_leaf': 0.0002623032626379749}
hyperopt_params[25] = {'criterion': 1, 'max_depth': 17, 'max_features': 1, 'min_samples_leaf': 2.0, 'min_samples_split': 10.0, 'min_weight_fraction_leaf': 7.505121043171701e-05}
hyperopt_params[50] = {'criterion': 0, 'max_depth': 15, 'max_features': 2, 'min_samples_leaf': 7.0, 'min_samples_split': 2.0, 'min_weight_fraction_leaf': 0.00013395028286673901}
hyperopt_params[60] = {'criterion': 0, 'max_depth': 13, 'max_features': 2, 'min_samples_leaf': 3.0, 'min_samples_split': 6.0, 'min_weight_fraction_leaf': 0.0025553382272804903}
hyperopt_params[75] = {'criterion': 0, 'max_depth': 15, 'max_features': 0, 'min_samples_leaf': 4.0, 'min_samples_split': 2.0, 'min_weight_fraction_leaf': 0.0001712406145939348}
hyperopt_params[85] = {'criterion': 1, 'max_depth': 16, 'max_features': 0, 'min_samples_leaf': 8.0, 'min_samples_split': 7.0, 'min_weight_fraction_leaf': 6.16395073401374e-05}
hyperopt_params[100] = {'criterion': 1, 'max_depth': 17, 'max_features': 0, 'min_samples_leaf': 6.0, 'min_samples_split': 7.0, 'min_weight_fraction_leaf': 0.003741673794972568}

for freq in freq_values:
    process_hyperopt_params_rf(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = RandomForestClassifier(max_depth=rf_params[freq]['max_depth'], max_features=rf_params[freq]['max_features'],\
                                  criterion=rf_params[freq]['criterion'], min_samples_split=rf_params[freq]['min_samples_split'],\
                                  min_samples_leaf=rf_params[freq]['min_samples_leaf'], min_weight_fraction_leaf = rf_params[freq]['min_weight_fraction_leaf'],\
                                  verbose=0)
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### GaussianNB

In [15]:
def process_hyperopt_params_nb(params_dict, freq):
    nb_params[freq]['var_smoothing'] = params_dict['var_smoothing']
    
nb_params = {freq:{'var_smoothing': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'var_smoothing': 1.2460184378655685e-09}
hyperopt_params[5] = {'var_smoothing': 1.0282014918470923e-10}
hyperopt_params[10] = {'var_smoothing': 6.377556397965757e-10}
hyperopt_params[15] = {'var_smoothing': 2.2962117400154974e-09}
hyperopt_params[25] = {'var_smoothing': 1.2908377701761542e-08}
hyperopt_params[50] = {'var_smoothing': 6.490487471670385e-09}
hyperopt_params[60] = {'var_smoothing': 5.976912618238698e-09}
hyperopt_params[75] = {'var_smoothing': 4.791540789108842e-10}
hyperopt_params[85] = {'var_smoothing': 1.676267544339988e-10}
hyperopt_params[100] = {'var_smoothing': 5.838586353515949e-09}

for freq in freq_values:
    process_hyperopt_params_nb(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = GaussianNB(var_smoothing=nb_params[freq]['var_smoothing'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### SVC

##### PassiveAggressiveClassifier

In [19]:
def process_hyperopt_params_pa(params_dict, freq):
    pa_params[freq]['C'] = params_dict['C']
    pa_params[freq]['fit_intercept'] = [True, False][params_dict['fit_intercept']]

    
pa_params = {freq:{'C': None, 'fit_intercept': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'C': 0.024110225325426015, 'fit_intercept': 0}
hyperopt_params[5] = {'C': 0.4973703807068094, 'fit_intercept': 1}
hyperopt_params[10] = {'C': 0.13449519398904142, 'fit_intercept': 0}
hyperopt_params[15] = {'C': 0.14875857076238322, 'fit_intercept': 0}
hyperopt_params[25] = {'C': 0.2704430957411047, 'fit_intercept': 0}
hyperopt_params[50] = {'C': 0.5089077894613963, 'fit_intercept': 0}
hyperopt_params[60] = {'C': 0.7574288735094685, 'fit_intercept': 0}
hyperopt_params[75] = {'C': 0.9283106465514318, 'fit_intercept': 1}
hyperopt_params[85] = {'C': 0.8758158654804743, 'fit_intercept': 0}
hyperopt_params[100] = {'C': 0.5022782066219091, 'fit_intercept': 1}

for freq in freq_values:
    process_hyperopt_params_pa(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = PassiveAggressiveClassifier(C=pa_params[freq]['C'], fit_intercept=pa_params[freq]['fit_intercept'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### MLP

In [None]:
def process_hyperopt_params_mlp(params_dict, freq):
    mlp_params[freq]['C'] = params_dict['C']
    mlp_params[freq]['fit_intercept'] = [True, False][params_dict['fit_intercept']]

    
mlp_params = {freq:{'first_size': None, 'second_size': None, 'activation': None, 'max_iter': None,} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'C': 0.024110225325426015, 'fit_intercept': 0}
hyperopt_params[5] = {'C': 0.4973703807068094, 'fit_intercept': 1}
hyperopt_params[10] = {'C': 0.13449519398904142, 'fit_intercept': 0}
hyperopt_params[15] = {'C': 0.14875857076238322, 'fit_intercept': 0}
hyperopt_params[25] = {'C': 0.2704430957411047, 'fit_intercept': 0}
hyperopt_params[50] = {'C': 0.5089077894613963, 'fit_intercept': 0}
hyperopt_params[60] = {'C': 0.7574288735094685, 'fit_intercept': 0}
hyperopt_params[75] = {'C': 0.9283106465514318, 'fit_intercept': 1}
hyperopt_params[85] = {'C': 0.8758158654804743, 'fit_intercept': 0}
hyperopt_params[100] = {'C': 0.5022782066219091, 'fit_intercept': 1}

for freq in freq_values:
    process_hyperopt_params_mlp(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = MLPClassifier(hidden_layer_sizes=(space['first_size'], space['second_size']), 
                          activation=space['activation'], batch_size=16,
                         max_iter=space['max_iter'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)