In [1]:
import warnings
warnings.simplefilter("ignore")


import pandas as pd
import io
import requests
import sys
import json


sys.path.append("../modules")
from data_manager import DataManager
from trainer import TrainerManager


import pyspark.ml
import pyspark.sql.functions as f
import pyspark.sql.types as t

from pyspark.ml.tuning import CrossValidator
from pyspark.mllib.evaluation import MulticlassMetrics

import toniq

'''We Use hyperopt for Optimizations'''
import hyperopt
from hyperopt import hp
from hyperopt import fmin, tpe

from functools import partial

import tempfile
from shutil import make_archive




# Setup Config

In [2]:
config = {
"provider": "gcp",

"hyperopt":
    {
    "metric": "fMeasure",
    "max_evals": 100,
    },
    
"mlflow":
    {
        "experiment_name": "EXP5",
        "tags": {"version": "0.1.0"} 
    },
"data":
    {
         mode: {"name": f"income_transformed_data_{mode}", "store": "feature", "partition": mode}
         for mode in ["train", "test"]
    
    }
}

## Initialize TrainingManager

In [3]:
trainer = TrainerManager(config)

s3_endpoint is 10.2.3.167:9000


In [None]:

# define a search space


space = hp.choice('model',      
    [
        {
            'type': 'classification',
            'name': 'RandomForestClassifier',    
            'params': {
                       "maxDepth":hp.quniform("maxDepth",5,10,1),
                       "maxBins":hp.quniform("maxBins", 45,60,1),
                       "minInstancesPerNode":hp.quniform("minInstancesPerNode", 40,60,1),
                       "numTrees":hp.quniform("numTrees", 40,60,1)
                      }
        },
        
        {
            'type': 'classification',
            'name': 'GBTClassifier',   
            
            
            'params': {
                       "maxDepth":hp.quniform("maxDepth_GBT",5,10,1),
                       "maxBins":hp.quniform("maxBins_GBT", 45,60,1),
                       "minInstancesPerNode":hp.quniform("minInstancesPerNode_GBT", 40,60,1),
                      }
        }
    ])

# minimize the objective over the space
best = fmin(trainer.run_experiment, space, algo=tpe.suggest, max_evals=config["hyperopt"]["max_evals"])

print(best)
print("Best ", hyperopt.space_eval(space, best))

 83%|████████▎ | 83/100 [21:55<04:15, 15.05s/trial, best loss: -0.691]