In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.neural_network import MLPClassifier

In [3]:
import pickle
def save_model(path, model):
    with open(path, 'wb') as fid:
        pickle.dump(model, fid)  
        
def load_model(path):
    with open(path, 'rb') as fid:
        model = pickle.load(fid)
        return model

In [4]:
freq_values = [1, 5, 10, 15, 25, 50, 60, 75, 85, 100]

### Classifier

In [5]:
MODEL_SAVE_PATH = './models/classifier/'

###### KNeighborsClassifier

In [6]:
def process_hyperopt_params_kn(params_dict, freq):
    kn_params[freq]['n_neighbors'] = int(params_dict['n_neighbors'])
    kn_params[freq]['weights'] = ['uniform', 'distance'][params_dict['weights']]
    kn_params[freq]['algorithm'] = ['auto', 'ball_tree', 'kd_tree', 'brute'][params_dict['algorithm']]
    kn_params[freq]['leaf_size'] = int(params_dict['leaf_size'])
    kn_params[freq]['p'] = [1, 2][params_dict['p']]
    
kn_params = {freq:{'n_neighbors': None, 'weights': None, 'algorithm': None, 'leaf_size': None, 'p': None}\
                                                                                     for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'algorithm': 3, 'leaf_size': 8.0, 'n_neighbors': 3.0, 'p': 1, 'weights': 1}
hyperopt_params[5] = {'algorithm': 1, 'leaf_size': 52.0, 'n_neighbors': 2.0, 'p': 1, 'weights': 1}
hyperopt_params[10] = {'algorithm': 1, 'leaf_size': 18.0, 'n_neighbors': 3.0, 'p': 1, 'weights': 1}
hyperopt_params[15] = {'algorithm': 2, 'leaf_size': 44.0, 'n_neighbors': 2.0, 'p': 1, 'weights': 1}
hyperopt_params[25] = {'algorithm': 0, 'leaf_size': 52.0, 'n_neighbors': 2.0, 'p': 1, 'weights': 1}
hyperopt_params[50] = {'algorithm': 2, 'leaf_size': 46.0, 'n_neighbors': 3.0, 'p': 0, 'weights': 1}
hyperopt_params[60] = {'algorithm': 0, 'leaf_size': 36.0, 'n_neighbors': 6.0, 'p': 0, 'weights': 1}
hyperopt_params[75] = {'algorithm': 3, 'leaf_size': 44.0, 'n_neighbors': 5.0, 'p': 1, 'weights': 1}
hyperopt_params[85] = {'algorithm': 0, 'leaf_size': 18.0, 'n_neighbors': 4.0, 'p': 1, 'weights': 1}
hyperopt_params[100] = {'algorithm': 2, 'leaf_size': 28.0, 'n_neighbors': 6.0, 'p': 0, 'weights': 1}


for freq in freq_values:
    process_hyperopt_params_kn(hyperopt_params[freq], freq)
    
for freq in freq_values:
    model = KNeighborsClassifier(n_neighbors=kn_params[freq]['n_neighbors'],\
                                 weights=kn_params[freq]['weights'], algorithm=kn_params[freq]['algorithm'],\
                                 leaf_size=kn_params[freq]['leaf_size'], p=kn_params[freq]['p'])
    
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### DecisionTreeClassifier

In [7]:
def process_hyperopt_params_dt(params_dict, freq):
    dt_params[freq]['max_depth'] = params_dict['max_depth']
    dt_params[freq]['max_features'] = ['auto', 'sqrt', 'log2'][params_dict['max_features']]
    dt_params[freq]['criterion'] = ["gini", "entropy"][params_dict['criterion']]
    dt_params[freq]['min_samples_split'] = int(params_dict['min_samples_split'])
    dt_params[freq]['min_samples_leaf'] = int(params_dict['min_samples_leaf'])
    dt_params[freq]['min_weight_fraction_leaf'] = params_dict['min_weight_fraction_leaf']

    
dt_params = {freq:{'max_depth': None, 'max_features': None, 'criterion': None, 'min_samples_split': None,\
                   'min_samples_leaf': None, 'min_weight_fraction_leaf': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'criterion': 0, 'max_depth': 8, 'max_features': 0, 'min_samples_leaf': 9.0, 'min_samples_split': 6.0, 'min_weight_fraction_leaf': 0.09669755317264396}
hyperopt_params[5] = {'criterion': 1, 'max_depth': 12, 'max_features': 1, 'min_samples_leaf': 7.0, 'min_samples_split': 2.0, 'min_weight_fraction_leaf': 0.1932229501392354}
hyperopt_params[10] = {'criterion': 0, 'max_depth': 14, 'max_features': 0, 'min_samples_leaf': 5.0, 'min_samples_split': 10.0, 'min_weight_fraction_leaf': 0.09115164175711465}
hyperopt_params[15] = {'criterion': 1, 'max_depth': 14, 'max_features': 1, 'min_samples_leaf': 2.0, 'min_samples_split': 6.0, 'min_weight_fraction_leaf': 0.17703706480738823}
hyperopt_params[25] = {'criterion': 0, 'max_depth': 9, 'max_features': 1, 'min_samples_leaf': 2.0, 'min_samples_split': 6.0, 'min_weight_fraction_leaf': 0.11617897743504145}
hyperopt_params[50] = {'criterion': 0, 'max_depth': 7, 'max_features': 0, 'min_samples_leaf': 7.0, 'min_samples_split': 4.0, 'min_weight_fraction_leaf': 0.09505871131836718}
hyperopt_params[60] = {'criterion': 0, 'max_depth': 18, 'max_features': 2, 'min_samples_leaf': 5.0, 'min_samples_split': 5.0, 'min_weight_fraction_leaf': 0.13469241672290375}
hyperopt_params[75] = {'criterion': 1, 'max_depth': 11, 'max_features': 0, 'min_samples_leaf': 8.0, 'min_samples_split': 5.0, 'min_weight_fraction_leaf': 0.15876728911621113}
hyperopt_params[85] = {'criterion': 0, 'max_depth': 9, 'max_features': 0, 'min_samples_leaf': 3.0, 'min_samples_split': 2.0, 'min_weight_fraction_leaf': 0.28478884096971646}
hyperopt_params[100] = {'criterion': 0, 'max_depth': 14, 'max_features': 1, 'min_samples_leaf': 4.0, 'min_samples_split': 5.0, 'min_weight_fraction_leaf': 0.11506559228265428}

for freq in freq_values:
    process_hyperopt_params_dt(hyperopt_params[freq], freq)
    
for freq in freq_values:
    model = DecisionTreeClassifier(max_depth=dt_params[freq]['max_depth'], max_features=dt_params[freq]['max_features'],\
                                  criterion=dt_params[freq]['criterion'], min_samples_split=dt_params[freq]['min_samples_split'],\
                                  min_samples_leaf=dt_params[freq]['min_samples_leaf'], min_weight_fraction_leaf = dt_params[freq]['min_weight_fraction_leaf'])
    
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### RandomForest

In [8]:
def process_hyperopt_params_rf(params_dict, freq):
    rf_params[freq]['max_depth'] = params_dict['max_depth']
    rf_params[freq]['max_features'] = ['auto', 'sqrt', 'log2'][params_dict['max_features']]
    rf_params[freq]['criterion'] = ["gini", "entropy"][params_dict['criterion']]
    rf_params[freq]['min_samples_split'] = int(params_dict['min_samples_split'])
    rf_params[freq]['min_samples_leaf'] = int(params_dict['min_samples_leaf'])
    rf_params[freq]['min_weight_fraction_leaf'] = params_dict['min_weight_fraction_leaf']

    
rf_params = {freq:{'max_depth': None, 'max_features': None, 'criterion': None, 'min_samples_split': None,\
                   'min_samples_leaf': None, 'min_weight_fraction_leaf': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'criterion': 1, 'max_depth': 16, 'max_features': 0, 'min_samples_leaf': 4.0, 'min_samples_split': 4.0, 'min_weight_fraction_leaf': 0.020520288218793627}
hyperopt_params[5] = {'criterion': 1, 'max_depth': 16, 'max_features': 0, 'min_samples_leaf': 10.0, 'min_samples_split': 3.0, 'min_weight_fraction_leaf': 0.07532931418243288}
hyperopt_params[10] = {'criterion': 0, 'max_depth': 15, 'max_features': 1, 'min_samples_leaf': 6.0, 'min_samples_split': 3.0, 'min_weight_fraction_leaf': 0.031246140414796442}
hyperopt_params[15] = {'criterion': 1, 'max_depth': 8, 'max_features': 0, 'min_samples_leaf': 10.0, 'min_samples_split': 5.0, 'min_weight_fraction_leaf': 0.05374194402571921}
hyperopt_params[25] = {'criterion': 0, 'max_depth': 15, 'max_features': 1, 'min_samples_leaf': 7.0, 'min_samples_split': 9.0, 'min_weight_fraction_leaf': 0.049138676021825575}
hyperopt_params[50] = {'criterion': 0, 'max_depth': 17, 'max_features': 2, 'min_samples_leaf': 3.0, 'min_samples_split': 8.0, 'min_weight_fraction_leaf': 0.008207334297320162}
hyperopt_params[60] = {'criterion': 0, 'max_depth': 16, 'max_features': 0, 'min_samples_leaf': 5.0, 'min_samples_split': 8.0, 'min_weight_fraction_leaf': 0.07134633413704286}
hyperopt_params[75] = {'criterion': 1, 'max_depth': 15, 'max_features': 1, 'min_samples_leaf': 7.0, 'min_samples_split': 10.0, 'min_weight_fraction_leaf': 0.04550394739849741}
hyperopt_params[85] = {'criterion': 0, 'max_depth': 10, 'max_features': 0, 'min_samples_leaf': 4.0, 'min_samples_split': 4.0, 'min_weight_fraction_leaf': 0.43523486121190685}
hyperopt_params[100] = {'criterion': 1, 'max_depth': 16, 'max_features': 1, 'min_samples_leaf': 5.0, 'min_samples_split': 5.0, 'min_weight_fraction_leaf': 0.023938782641354465}

for freq in freq_values:
    process_hyperopt_params_rf(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = RandomForestClassifier(max_depth=rf_params[freq]['max_depth'], max_features=rf_params[freq]['max_features'],\
                                  criterion=rf_params[freq]['criterion'], min_samples_split=rf_params[freq]['min_samples_split'],\
                                  min_samples_leaf=rf_params[freq]['min_samples_leaf'], min_weight_fraction_leaf = rf_params[freq]['min_weight_fraction_leaf'],\
                                  verbose=0)
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### GaussianNB

In [12]:
def process_hyperopt_params_nb(params_dict, freq):
    nb_params[freq]['var_smoothing'] = params_dict['var_smoothing']
    
nb_params = {freq:{'var_smoothing': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'var_smoothing': 1.2460184378655685e-09}
hyperopt_params[5] = {'var_smoothing': 1.0282014918470923e-10}
hyperopt_params[10] = {'var_smoothing': 6.377556397965757e-10}
hyperopt_params[15] = {'var_smoothing': 2.2962117400154974e-09}
hyperopt_params[25] = {'var_smoothing': 1.2908377701761542e-08}
hyperopt_params[50] = {'var_smoothing': 6.490487471670385e-09}
hyperopt_params[60] = {'var_smoothing': 5.976912618238698e-09}
hyperopt_params[75] = {'var_smoothing': 4.791540789108842e-10}
hyperopt_params[85] = {'var_smoothing': 1.676267544339988e-10}
hyperopt_params[100] = {'var_smoothing': 5.838586353515949e-09}

for freq in freq_values:
    process_hyperopt_params_nb(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = GaussianNB(var_smoothing=nb_params[freq]['var_smoothing'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### SVC

In [13]:
def process_hyperopt_params_svc(params_dict, freq):
    svc_params[freq]['C'] = params_dict['C']
    svc_params[freq]['degree'] = params_dict['degree']
    svc_params[freq]['gamma'] = params_dict['gamma']
    svc_params[freq]['kernel'] = ['linear', 'poly', 'rbf', 'sigmoid'][params_dict['kernel']]
    svc_params[freq]['shrinking'] = [True, False][params_dict['shrinking']]
    
svc_params = {freq:{'C': None, 'degree': None, 'gamma': None, 'kernel': None, 'shrinking': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'C': 0.9725968938886453, 'degree': 2.0, 'gamma': 0.2566760333046866, 'kernel': 2, 'shrinking': 0}
hyperopt_params[5] = {'C': 0.01038270074688483, 'degree': 4.0, 'gamma': 52.03531554151558, 'kernel': 3, 'shrinking': 0}
hyperopt_params[10] = {'C': 0.1315774700046271, 'degree': 3.0, 'gamma': 2.1043740247691365, 'kernel': 1, 'shrinking': 1}
hyperopt_params[15] = {'C': 0.6257805425780407, 'degree': 5.0, 'gamma': 3.5660531892814364, 'kernel': 3, 'shrinking': 1}
hyperopt_params[25] = {'C': 0.9613151284305586, 'degree': 4.0, 'gamma': 10.506608098218031, 'kernel': 2, 'shrinking': 1}
hyperopt_params[50] = {'C': 0.01038270074688483, 'degree': 4.0, 'gamma': 52.03531554151558, 'kernel': 3, 'shrinking': 0}
hyperopt_params[60] = {'C': 0.7534243940434421, 'degree': 4.0, 'gamma': 62.396831263681776, 'kernel': 3, 'shrinking': 1}
hyperopt_params[75] = {'C': 0.04443332765147431, 'degree': 3.0, 'gamma': 4.598165485840099, 'kernel': 0, 'shrinking': 0}
hyperopt_params[85] = {'C': 0.3152762964124343, 'degree': 3.0, 'gamma': 8.211242324780438, 'kernel': 2, 'shrinking': 1}
hyperopt_params[100] = {'C': 0.01038270074688483, 'degree': 4.0, 'gamma': 52.03531554151558, 'kernel': 3, 'shrinking': 0}

for freq in freq_values:
    process_hyperopt_params_svc(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = SVC(C=svc_params[freq]['C'], kernel=svc_params[freq]['kernel'], degree=svc_params[freq]['degree'],\
                            gamma=svc_params[freq]['gamma'], shrinking=svc_params[freq]['shrinking'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### PassiveAggressiveClassifier

In [9]:
def process_hyperopt_params_pa(params_dict, freq):
    pa_params[freq]['C'] = params_dict['C']
    pa_params[freq]['fit_intercept'] = [True, False][params_dict['fit_intercept']]

    
pa_params = {freq:{'C': None, 'fit_intercept': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'C': 0.09606937087264018, 'fit_intercept': 0}
hyperopt_params[5] = {'C': 0.7232433580937414, 'fit_intercept': 1}
hyperopt_params[10] = {'C': 0.14130229226935603, 'fit_intercept': 0}
hyperopt_params[15] = {'C': 0.14875857076238322, 'fit_intercept': 0}
hyperopt_params[25] = {'C': 0.2704430957411047, 'fit_intercept': 0}
hyperopt_params[50] = {'C': 0.5089077894613963, 'fit_intercept': 0}
hyperopt_params[60] = {'C': 0.7574288735094685, 'fit_intercept': 0}
hyperopt_params[75] = {'C': 0.9283106465514318, 'fit_intercept': 1}
hyperopt_params[85] = {'C': 0.8758158654804743, 'fit_intercept': 0}
hyperopt_params[100] = {'C': 0.5022782066219091, 'fit_intercept': 1}

for freq in freq_values:
    process_hyperopt_params_pa(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = PassiveAggressiveClassifier(C=pa_params[freq]['C'], fit_intercept=pa_params[freq]['fit_intercept'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### MLP

In [10]:
def process_hyperopt_params_mlp(params_dict, freq):
    mlp_params[freq]['C'] = params_dict['C']
    mlp_params[freq]['fit_intercept'] = [True, False][params_dict['fit_intercept']]

    
mlp_params = {freq:{'first_size': None, 'second_size': None, 'activation': None, 'max_iter': None,} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'C': 0.024110225325426015, 'fit_intercept': 0}
hyperopt_params[5] = {'C': 0.4973703807068094, 'fit_intercept': 1}
hyperopt_params[10] = {'C': 0.13449519398904142, 'fit_intercept': 0}
hyperopt_params[15] = {'C': 0.14875857076238322, 'fit_intercept': 0}
hyperopt_params[25] = {'C': 0.2704430957411047, 'fit_intercept': 0}
hyperopt_params[50] = {'C': 0.5089077894613963, 'fit_intercept': 0}
hyperopt_params[60] = {'C': 0.7574288735094685, 'fit_intercept': 0}
hyperopt_params[75] = {'C': 0.9283106465514318, 'fit_intercept': 1}
hyperopt_params[85] = {'C': 0.8758158654804743, 'fit_intercept': 0}
hyperopt_params[100] = {'C': 0.5022782066219091, 'fit_intercept': 1}

for freq in freq_values:
    process_hyperopt_params_mlp(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = MLPClassifier(hidden_layer_sizes=(space['first_size'], space['second_size']), 
                          activation=space['activation'], batch_size=16,
                         max_iter=space['max_iter'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

NameError: name 'space' is not defined

### Reference time regressor

In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.linear_model import HuberRegressor
from sklearn import linear_model
from sklearn import tree
from sklearn import svm
from sklearn.linear_model import PassiveAggressiveRegressor
import xgboost as xgb

In [2]:
MODEL_SAVE_PATH = './models/reference_time_regressor/'

##### Huber

In [None]:
def process_hyperopt_params_hb(params_dict, freq):
    hb_params[freq]['epsilon'] = params_dict['epsilon']
    hb_params[freq]['max_iter'] = params_dict['max_iter']
    hb_params[freq]['alpha'] = params_dict['alpha']
    
hb_params = {freq:{'epsilon': None, 'max_iter': None, 'alpha': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'var_smoothing': 1.2460184378655685e-09}
hyperopt_params[5] = {'var_smoothing': 1.0282014918470923e-10}
hyperopt_params[10] = {'var_smoothing': 6.377556397965757e-10}
hyperopt_params[15] = {'var_smoothing': 2.2962117400154974e-09}
hyperopt_params[25] = {'var_smoothing': 1.2908377701761542e-08}
hyperopt_params[50] = {'var_smoothing': 6.490487471670385e-09}
hyperopt_params[60] = {'var_smoothing': 5.976912618238698e-09}
hyperopt_params[75] = {'var_smoothing': 4.791540789108842e-10}
hyperopt_params[85] = {'var_smoothing': 1.676267544339988e-10}
hyperopt_params[100] = {'var_smoothing': 5.838586353515949e-09}

for freq in freq_values:
    process_hyperopt_params_hb(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = HuberRegressor(epsilon=hb_params[freq]['epsilon'], max_iter=hb_params[freq]['max_iter'],\
                           alpha=hb_params[freq]['alpha'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### Ridge

In [None]:
def process_hyperopt_params_ridge(params_dict, freq):
    ridge_params[freq]['alpha'] = params_dict['alpha']
    ridge_params[freq]['solver'] = ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'][params_dict['solver']]
    ridge_params[freq]['max_iter'] = params_dict['max_iter']
    
ridge_params = {freq:{'solver': None, 'max_iter': None, 'alpha': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'var_smoothing': 1.2460184378655685e-09}
hyperopt_params[5] = {'var_smoothing': 1.0282014918470923e-10}
hyperopt_params[10] = {'var_smoothing': 6.377556397965757e-10}
hyperopt_params[15] = {'var_smoothing': 2.2962117400154974e-09}
hyperopt_params[25] = {'var_smoothing': 1.2908377701761542e-08}
hyperopt_params[50] = {'var_smoothing': 6.490487471670385e-09}
hyperopt_params[60] = {'var_smoothing': 5.976912618238698e-09}
hyperopt_params[75] = {'var_smoothing': 4.791540789108842e-10}
hyperopt_params[85] = {'var_smoothing': 1.676267544339988e-10}
hyperopt_params[100] = {'var_smoothing': 5.838586353515949e-09}

for freq in freq_values:
    process_hyperopt_params_ridge(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = linear_model.Ridge(solver=ridge_params[freq]['solver'], max_iter=ridge_params[freq]['max_iter'],\
                                alpha=ridge_params[freq]['alpha'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### Lasso

In [None]:
def process_hyperopt_params_lasso(params_dict, freq):
    lasso_params[freq]['alpha'] = params_dict['alpha']
    lasso_params[freq]['normalize'] = [True, False][params_dict['normalize']]
    lasso_params[freq]['max_iter'] = params_dict['max_iter']
    
lasso_params = {freq:{'normalize': None, 'max_iter': None, 'alpha': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'var_smoothing': 1.2460184378655685e-09}
hyperopt_params[5] = {'var_smoothing': 1.0282014918470923e-10}
hyperopt_params[10] = {'var_smoothing': 6.377556397965757e-10}
hyperopt_params[15] = {'var_smoothing': 2.2962117400154974e-09}
hyperopt_params[25] = {'var_smoothing': 1.2908377701761542e-08}
hyperopt_params[50] = {'var_smoothing': 6.490487471670385e-09}
hyperopt_params[60] = {'var_smoothing': 5.976912618238698e-09}
hyperopt_params[75] = {'var_smoothing': 4.791540789108842e-10}
hyperopt_params[85] = {'var_smoothing': 1.676267544339988e-10}
hyperopt_params[100] = {'var_smoothing': 5.838586353515949e-09}

for freq in freq_values:
    process_hyperopt_params_lasso(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = linear_model.Lasso(max_iter=lasso_params[freq]['max_iter'], alpha=lasso_params[freq]['alpha'],\
                               normalize=lasso_params[freq]['normalize'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### Decision Tree

In [None]:
def process_hyperopt_params_dt(params_dict, freq):
    dt_params[freq]['max_depth'] = params_dict['max_depth']
    dt_params[freq]['max_features'] = ['auto', 'sqrt', 'log2'][params_dict['max_features']]
    dt_params[freq]['min_samples_split'] = int(params_dict['min_samples_split'])
    dt_params[freq]['min_samples_leaf'] = int(params_dict['min_samples_leaf'])
    dt_params[freq]['min_weight_fraction_leaf'] = params_dict['min_weight_fraction_leaf']

    
dt_params = {freq:{'max_depth': None, 'max_features': None, 'min_samples_split': None,\
                   'min_samples_leaf': None, 'min_weight_fraction_leaf': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'criterion': 1, 'max_depth': 16, 'max_features': 1, 'min_samples_leaf': 3.0, 'min_samples_split': 10.0, 'min_weight_fraction_leaf': 0.0011012880111856103}
hyperopt_params[5] = {'criterion': 0, 'max_depth': 13, 'max_features': 1, 'min_samples_leaf': 6.0, 'min_samples_split': 5.0, 'min_weight_fraction_leaf': 0.0007125292830519577}
hyperopt_params[10] = {'criterion': 1, 'max_depth': 13, 'max_features': 0, 'min_samples_leaf': 4.0, 'min_samples_split': 8.0, 'min_weight_fraction_leaf': 0.0023420736375462502}
hyperopt_params[15] = {'criterion': 1, 'max_depth': 18, 'max_features': 2, 'min_samples_leaf': 3.0, 'min_samples_split': 9.0, 'min_weight_fraction_leaf': 0.00029634263861464793}
hyperopt_params[25] = {'criterion': 0, 'max_depth': 16, 'max_features': 1, 'min_samples_leaf': 10.0, 'min_samples_split': 10.0, 'min_weight_fraction_leaf': 0.001057073698162895}
hyperopt_params[50] = {'criterion': 1, 'max_depth': 15, 'max_features': 0, 'min_samples_leaf': 2.0, 'min_samples_split': 8.0, 'min_weight_fraction_leaf': 0.008641651616123503}
hyperopt_params[60] = {'criterion': 0, 'max_depth': 17, 'max_features': 1, 'min_samples_leaf': 2.0, 'min_samples_split': 7.0, 'min_weight_fraction_leaf': 0.0035923695507163547}
hyperopt_params[75] = {'criterion': 0, 'max_depth': 9, 'max_features': 1, 'min_samples_leaf': 7.0, 'min_samples_split': 6.0, 'min_weight_fraction_leaf': 0.008486706928488966}
hyperopt_params[85] = {'criterion': 0, 'max_depth': 14, 'max_features': 0, 'min_samples_leaf': 2.0, 'min_samples_split': 7.0, 'min_weight_fraction_leaf': 0.0008832297964638193}
hyperopt_params[100] = {'criterion': 0, 'max_depth': 13, 'max_features': 2, 'min_samples_leaf': 5.0, 'min_samples_split': 3.0, 'min_weight_fraction_leaf': 0.002954853576859179}

for freq in freq_values:
    process_hyperopt_params_dt(hyperopt_params[freq], freq)
    
for freq in freq_values:
    model =  tree.DecisionTreeRegressor(max_depth=dt_params[freq]['max_depth'], max_features=dt_params[freq]['max_features'],\
                                  criterion=dt_params[freq]['criterion'], min_samples_split=dt_params[freq]['min_samples_split'],\
                                  min_samples_leaf=dt_params[freq]['min_samples_leaf'], min_weight_fraction_leaf = dt_params[freq]['min_weight_fraction_leaf'])
    
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### Passive Aggresive

In [None]:
def process_hyperopt_params_pa(params_dict, freq):
    pa_params[freq]['tol'] = params_dict['tol']
    pa_params[freq]['C'] = params_dict['C']
    pa_params[freq]['max_iter'] = params_dict['max_iter']
    
pa_params = {freq:{'tol': None, 'max_iter': None, 'C': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'var_smoothing': 1.2460184378655685e-09}
hyperopt_params[5] = {'var_smoothing': 1.0282014918470923e-10}
hyperopt_params[10] = {'var_smoothing': 6.377556397965757e-10}
hyperopt_params[15] = {'var_smoothing': 2.2962117400154974e-09}
hyperopt_params[25] = {'var_smoothing': 1.2908377701761542e-08}
hyperopt_params[50] = {'var_smoothing': 6.490487471670385e-09}
hyperopt_params[60] = {'var_smoothing': 5.976912618238698e-09}
hyperopt_params[75] = {'var_smoothing': 4.791540789108842e-10}
hyperopt_params[85] = {'var_smoothing': 1.676267544339988e-10}
hyperopt_params[100] = {'var_smoothing': 5.838586353515949e-09}

for freq in freq_values:
    process_hyperopt_params_pa(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model = PassiveAggressiveRegressor(max_iter=pa_params[freq]['max_iter'], tol=pa_params[freq]['tol'], 
                                       C = pa_params[freq]['C'])
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)

##### Xgboost

In [8]:
def process_hyperopt_params_xgboost(params_dict, freq):
    xgboost_params[freq]['booster'] = ['gbtree', 'gblinear', 'dart'][params_dict['booster']]
    xgboost_params[freq]['eta'] = params_dict['eta']
    xgboost_params[freq]['gamma'] = params_dict['gamma']
    xgboost_params[freq]['max_depth'] = int(params_dict['max_depth'])
    xgboost_params[freq]['lambda'] = params_dict['lambda']
    xgboost_params[freq]['alpha'] = params_dict['alpha']
    
xgboost_params = {freq:{'booster': None, 'eta': None, 'gamma': None, 'max_depth': None,\
                                        'lambda': None, 'alpha': None} for freq in freq_values}

hyperopt_params = {freq:None for freq in freq_values}

hyperopt_params[1] = {'alpha': 2.22456831562574, 'booster': 0, 'eta': 0.3973668207576586, 'gamma': 0.001076016879970481, 'lambda': 0.003034526844710481, 'max_depth': 30.0}
hyperopt_params[5] = {'alpha': 2.22456831562574, 'booster': 0, 'eta': 0.3973668207576586, 'gamma': 0.001076016879970481, 'lambda': 0.003034526844710481, 'max_depth': 30.0}
hyperopt_params[10] = {'alpha': 0.0015324479870338621, 'booster': 0, 'eta': 0.3926055790235545, 'gamma': 0.005251357110207934, 'lambda': 0.0019836344587500485, 'max_depth': 18.0}
hyperopt_params[15] = {'alpha': 2.190195481597179, 'booster': 2, 'eta': 0.012191516088636667, 'gamma': 0.0014854666707328121, 'lambda': 0.0015027090810100356, 'max_depth': 10.0}
hyperopt_params[25] = {'alpha': 0.0253714007844135, 'booster': 0, 'eta': 0.12683372051319142, 'gamma': 0.008784227550098927, 'lambda': 0.008299711765905143, 'max_depth': 14.0}
hyperopt_params[50] = {'alpha': 2.4685178375491375, 'booster': 0, 'eta': 0.0013604014366323481, 'gamma': 0.002834749664061706, 'lambda': 0.025493769481965915, 'max_depth': 34.0}
hyperopt_params[60] = {'alpha': 0.005612636034304831, 'booster': 2, 'eta': 0.08112377250707214, 'gamma': 0.018933891853825987, 'lambda': 0.06147556252955546, 'max_depth': 26.0}
hyperopt_params[75] = {'alpha': 0.3343461827542807, 'booster': 2, 'eta': 0.021903641855413544, 'gamma': 0.023073642695620584, 'lambda': 0.031558300328255844, 'max_depth': 16.0}
hyperopt_params[85] = {'alpha': 0.018277312948076187, 'booster': 2, 'eta': 0.05846120840596303, 'gamma': 0.0011657287010673463, 'lambda': 0.05454604868215682, 'max_depth': 40.0}
hyperopt_params[100] ={'alpha': 0.0013348880067320093, 'booster': 2, 'eta': 0.009090559751072305, 'gamma': 0.0020709201909402436, 'lambda': 0.055812087501417924, 'max_depth': 42.0}

for freq in freq_values:
    process_hyperopt_params_xgboost(hyperopt_params[freq], freq)
    

for freq in freq_values:
    model =  xgb.XGBRegressor(objective="reg:linear", booster=xgboost_params[freq]['booster'],\
                              eta=xgboost_params[freq]['eta'], gamma=xgboost_params[freq]['gamma'],\
                              max_depth=xgboost_params[freq]['max_depth'], reg_lambda=xgboost_params[freq]['lambda'],
                               alpha=xgboost_params[freq]['alpha'], verbosity=0)
    save_model(MODEL_SAVE_PATH + type(model).__name__ + '_freq_' + str(freq) + '.pkl', model)