In [2]:
import h2o
import mlflow
from mlflow.tracking import MlflowClient
from mlflow.entities import ViewType

In [7]:
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321 ..... not found.
Attempting to start a local H2O server...
  Java Version: openjdk version "11.0.15" 2022-04-19; OpenJDK Runtime Environment (build 11.0.15+10-Ubuntu-0ubuntu0.20.04.1); OpenJDK 64-Bit Server VM (build 11.0.15+10-Ubuntu-0ubuntu0.20.04.1, mixed mode, sharing)
  Starting server from /home/duong/.cache/pypoetry/virtualenvs/cross-sell-TlDnGNar-py3.8/lib/python3.8/site-packages/h2o/backend/bin/h2o.jar
  Ice root: /tmp/tmp4kmwyhht
  JVM stdout: /tmp/tmp4kmwyhht/h2o_duong_started_from_python.out
  JVM stderr: /tmp/tmp4kmwyhht/h2o_duong_started_from_python.err
  Server is running at http://127.0.0.1:54321
Connecting to H2O server at http://127.0.0.1:54321 ... successful.


0,1
H2O_cluster_uptime:,01 secs
H2O_cluster_timezone:,Asia/Ho_Chi_Minh
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.36.1.2
H2O_cluster_version_age:,19 days
H2O_cluster_name:,H2O_from_python_duong_kn1x1c
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,7.816 Gb
H2O_cluster_total_cores:,24
H2O_cluster_allowed_cores:,24


In [4]:
client = MlflowClient()
experiment_name = 'h2o-automl'


try: 
    experiment = client.get_experiment_by_name(experiment_name)
except:
    experiment_id = mlflow.create_experiment(experiment_name)
    experiment = client.get_experiment_by_name(experiment_name)

mlflow.set_experiment(experiment_name)

<Experiment: artifact_location='file:///home/duong/Desktop/GitFolder/E2E-ML-pipeline-with-data/model_dev/mlruns/2', experiment_id='2', lifecycle_stage='active', name='h2o-automl', tags={}>

In [30]:
train_df = h2o.import_file('data/processed/train.csv')

Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [31]:
train_df['Response'] = train_df['Response'].asfactor()
y_col = 'Response'
x_col = [i for i in train_df.columns if i != y_col]


In [32]:
from h2o.automl import H2OAutoML, get_leaderboard
with mlflow.start_run():
    clf = H2OAutoML(
                max_runtime_secs=3600,
                max_models=13,
                seed=42,
                balance_classes=True,
                sort_metric='logloss',
                verbosity='info',
                exclude_algos=['GLM', 'DRF']
        )
clf.train(x=x_col, y=y_col, training_frame=train_df)

# log metrics
mlflow.log_metric('log_loss', clf.leader.logloss())
mlflow.log_metric('auc', clf.leader.auc())
# mlflow.log_metric('f1', clf.leader.F1())

# log best model
mlflow.h2o.log_model(clf.leader, 
                        artifact_path="model"
                )

model_uri = mlflow.get_artifact_uri("model")
print(model_uri)

AutoML progress: |
01:39:32.732: Project: AutoML_5_20220615_13932
01:39:32.732: 5-fold cross-validation will be used.
01:39:32.733: Setting stopping tolerance adaptively based on the training frame: 0.0033065124063248246
01:39:32.733: Build control seed: 42
01:39:32.733: training frame: Frame key: AutoML_5_20220615_13932_training_py_9_sid_8647    cols: 208    rows: 91466  chunks: 96    size: 3596833  checksum: -1078902510547953999
01:39:32.733: validation frame: NULL
01:39:32.733: leaderboard frame: NULL
01:39:32.733: blending frame: NULL
01:39:32.733: response column: Response
01:39:32.733: fold column: null
01:39:32.733: weights column: null
01:39:32.734: Loading execution steps: [{XGBoost : [def_2 (1g, 10w), def_1 (2g, 10w), def_3 (3g, 10w), grid_1 (4g, 90w), lr_search (7g, 30w)]}, {GLM : [def_1 (1g, 10w)]}, {DRF : [def_1 (2g, 10w), XRT (3g, 10w)]}, {GBM : [def_5 (1g, 10w), def_2 (2g, 10w), def_3 (2g, 10w), def_4 (2g, 10w), def_1 (3g, 10w), grid_1 (4g, 60w), lr_annealing (7g, 10w)]}

In [33]:
lb = get_leaderboard(clf, extra_columns='all')
lb.head()

model_id,logloss,auc,aucpr,mean_per_class_error,rmse,mse,training_time_ms,predict_time_per_row_ms,algo
StackedEnsemble_AllModels_1_AutoML_5_20220615_13932,0.266317,0.857021,0.367406,0.228974,0.295279,0.0871899,9481,0.006747,StackedEnsemble
StackedEnsemble_BestOfFamily_1_AutoML_5_20220615_13932,0.266492,0.856847,0.367662,0.230014,0.295329,0.0872193,8076,0.001838,StackedEnsemble
XGBoost_grid_1_AutoML_5_20220615_13932_model_1,0.266941,0.855948,0.364598,0.23198,0.295559,0.0873553,779,0.000581,XGBoost
XGBoost_grid_1_AutoML_5_20220615_13932_model_2,0.267374,0.855214,0.359536,0.227673,0.29601,0.0876217,862,0.000567,XGBoost
GBM_grid_1_AutoML_5_20220615_13932_model_1,0.267426,0.855763,0.365725,0.231263,0.295726,0.087454,2386,0.001249,GBM
GBM_5_AutoML_5_20220615_13932,0.267435,0.855685,0.366115,0.231823,0.295762,0.0874752,2771,0.001311,GBM
GBM_2_AutoML_5_20220615_13932,0.267473,0.855818,0.366347,0.241591,0.295717,0.0874484,3005,0.001204,GBM
GBM_3_AutoML_5_20220615_13932,0.267691,0.855805,0.367635,0.23417,0.295743,0.0874638,2982,0.00125,GBM
GBM_1_AutoML_5_20220615_13932,0.268981,0.853547,0.361923,0.236233,0.296404,0.0878554,5024,0.001671,GBM
GBM_4_AutoML_5_20220615_13932,0.269303,0.854347,0.362843,0.230766,0.296299,0.0877933,3340,0.00118,GBM




In [43]:
clf.leader

Model Details
H2OStackedEnsembleEstimator :  Stacked Ensemble
Model Key:  StackedEnsemble_AllModels_1_AutoML_5_20220615_13932

No model summary for this model

ModelMetricsBinomialGLM: stackedensemble
** Reported on train data. **

MSE: 0.08404998845418953
RMSE: 0.28991376037399386
LogLoss: 0.25697248180153504
Null degrees of freedom: 9974
Residual degrees of freedom: 9964
Null deviance: 7360.460475191961
Residual deviance: 5126.601011940624
AIC: 5148.601011940624
AUC: 0.8695502096952691
AUCPR: 0.4061097648176061
Gini: 0.7391004193905382

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.2735832950671382: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,7250.0,1518.0,0.1731,(1518.0/8768.0)
1,1,356.0,851.0,0.2949,(356.0/1207.0)
2,Total,7606.0,2369.0,0.1879,(1874.0/9975.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.273583,0.475951,168.0
1,max f2,0.165407,0.644397,255.0
2,max f0point5,0.307282,0.412774,135.0
3,max accuracy,0.410666,0.882306,40.0
4,max precision,0.505883,0.75,7.0
5,max recall,0.003224,1.0,390.0
6,max specificity,0.579751,0.999886,0.0
7,max absolute_mcc,0.211449,0.419941,221.0
8,max min_per_class_accuracy,0.241425,0.788663,197.0
9,max mean_per_class_accuracy,0.165407,0.806658,255.0



Gains/Lift Table: Avg response rate: 12.10 %, avg score: 12.51 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.010025,0.429303,5.123861,5.123861,0.62,0.455447,0.62,0.455447,0.051367,0.051367,412.386081,412.386081,0.047033
1,2,0.02005,0.408906,4.462717,4.793289,0.54,0.417489,0.58,0.436468,0.044739,0.096106,346.271748,379.328915,0.086526
2,3,0.030075,0.394657,3.305717,4.297432,0.4,0.401371,0.52,0.424769,0.03314,0.129246,230.571665,329.743165,0.112823
3,4,0.04,0.385796,4.257362,4.28749,0.515152,0.390234,0.518797,0.4162,0.042254,0.1715,325.736236,328.748964,0.149602
4,5,0.050025,0.378242,2.644573,3.958248,0.32,0.38175,0.478958,0.409296,0.026512,0.198012,164.457332,295.82479,0.168358
5,6,0.10005,0.347255,2.865175,3.411712,0.346693,0.362951,0.412826,0.386124,0.143331,0.341342,186.517526,241.171158,0.274508
6,7,0.150075,0.321654,3.014231,3.279218,0.364729,0.334209,0.396794,0.368819,0.150787,0.492129,201.423062,227.921792,0.389141
7,8,0.2,0.297035,2.555624,3.098592,0.309237,0.309663,0.374937,0.354052,0.127589,0.619718,155.562432,209.859155,0.477497
8,9,0.30005,0.223667,1.954281,2.717027,0.236473,0.263174,0.328767,0.323749,0.195526,0.815244,95.428139,171.702739,0.586116
9,10,0.4,0.130167,1.201928,2.338442,0.145436,0.17681,0.282957,0.287033,0.120133,0.935377,20.192807,133.844242,0.609077




ModelMetricsBinomialGLM: stackedensemble
** Reported on cross-validation data. **

MSE: 0.08718992504946824
RMSE: 0.2952794016680951
LogLoss: 0.2663165059098335
Null degrees of freedom: 91465
Residual degrees of freedom: 91453
Null deviance: 68369.8558411503
Residual deviance: 48717.811059097665
AIC: 48743.811059097665
AUC: 0.8570206668351826
AUCPR: 0.3674058068903915
Gini: 0.7140413336703653

Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.23638010623671257: 


Unnamed: 0,Unnamed: 1,0,1,Error,Rate
0,0,62901.0,17274.0,0.2155,(17274.0/80175.0)
1,1,2738.0,8553.0,0.2425,(2738.0/11291.0)
2,Total,65639.0,25827.0,0.2188,(20012.0/91466.0)



Maximum Metrics: Maximum metrics at their respective thresholds


Unnamed: 0,metric,threshold,value,idx
0,max f1,0.23638,0.460855,198.0
1,max f2,0.119188,0.638248,290.0
2,max f0point5,0.305238,0.389641,136.0
3,max accuracy,0.670512,0.876566,0.0
4,max precision,0.670512,1.0,0.0
5,max recall,0.00067,1.0,397.0
6,max specificity,0.670512,1.0,0.0
7,max absolute_mcc,0.174254,0.404617,246.0
8,max min_per_class_accuracy,0.227315,0.77498,205.0
9,max mean_per_class_accuracy,0.119188,0.799267,290.0



Gains/Lift Table: Avg response rate: 12.34 %, avg score: 12.35 %


Unnamed: 0,group,cumulative_data_fraction,lower_threshold,lift,cumulative_lift,response_rate,score,cumulative_response_rate,cumulative_score,capture_rate,cumulative_capture_rate,gain,cumulative_gain,kolmogorov_smirnov
0,1,0.010004,0.424392,3.612155,3.612155,0.445902,0.4512,0.445902,0.4512,0.036135,0.036135,261.215476,261.215476,0.029811
1,2,0.020007,0.40469,3.603301,3.607728,0.444809,0.413873,0.445355,0.432536,0.036046,0.072181,260.330144,260.77281,0.059522
2,3,0.03,0.392369,3.545203,3.586901,0.437637,0.398375,0.442784,0.421157,0.035426,0.107608,254.520273,258.69015,0.088537
3,4,0.040004,0.382189,3.373115,3.53344,0.416393,0.387014,0.436185,0.412619,0.033744,0.141352,237.31151,253.344029,0.11562
4,5,0.050008,0.374311,3.328849,3.492513,0.410929,0.378031,0.431132,0.4057,0.033301,0.174652,232.88485,249.251299,0.142198
5,6,0.100004,0.345659,2.905159,3.198868,0.358627,0.359834,0.394884,0.38277,0.145248,0.319901,190.515913,219.886817,0.250864
6,7,0.150001,0.319993,2.667786,3.021854,0.329324,0.332313,0.373032,0.365952,0.133381,0.453281,166.778637,202.18538,0.345991
7,8,0.200009,0.295054,2.534374,2.899971,0.312855,0.307625,0.357986,0.351369,0.126738,0.580019,153.437428,189.99706,0.433527
8,9,0.300002,0.220194,2.077012,2.625671,0.256396,0.260827,0.324125,0.32119,0.207688,0.787707,107.701164,162.567094,0.556388
9,10,0.400007,0.128482,1.35943,2.309102,0.167815,0.173887,0.285047,0.284364,0.135949,0.923656,35.943041,130.910215,0.597395




Cross-Validation Metrics Summary: 


Unnamed: 0,Unnamed: 1,mean,sd,cv_1_valid,cv_2_valid,cv_3_valid,cv_4_valid,cv_5_valid
0,accuracy,0.78298,0.011607,0.770423,0.785275,0.77986,0.777913,0.801432
1,auc,0.857099,0.005759,0.850172,0.858423,0.856679,0.854427,0.865794
2,err,0.21702,0.011607,0.229577,0.214725,0.22014,0.222087,0.198568
3,err_count,3969.8,209.13799,4193.0,3943.0,4053.0,4028.0,3632.0
4,f0point5,0.374986,0.015975,0.359241,0.372234,0.369947,0.371541,0.401969
5,f1,0.461815,0.013598,0.447781,0.459937,0.458372,0.458457,0.48453
6,f2,0.601304,0.005723,0.594239,0.601706,0.602346,0.598456,0.609773
7,lift_top_group,3.644327,0.390903,3.435338,3.421163,3.669512,3.381819,4.313803
8,logloss,0.26633,0.003246,0.270012,0.262098,0.264568,0.269052,0.26592
9,max_per_class_error,0.246536,0.01015,0.240054,0.24267,0.238116,0.248568,0.263271



See the whole table with table.as_data_frame()




In [10]:
# predict
test_df = h2o.import_file('data/processed/test.csv')
X_test = test_df.drop('Response')
y_test = test_df['Response']


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%


In [12]:
model_uri

NameError: name 'model_uri' is not defined

In [5]:
# Get dataframe of all runs
all_experiments = [exp.experiment_id for exp in client.list_experiments()]
runs = mlflow.search_runs(experiment_ids=all_experiments, run_view_type=ViewType.ALL)

# Identify best model (experiment id and run id) amongst all runs in the experiment
run_id, exp_id = runs.loc[runs['metrics.log_loss'].idxmin()]['run_id'], runs.loc[runs['metrics.log_loss'].idxmin()]['experiment_id']
run_id, exp_id

('c1ea60db0880418daff84df38bebb190', '2')

In [8]:
best_model = mlflow.h2o.load_model(f"mlruns/{exp_id}/{run_id}/artifacts/model/")

In [13]:
preds = best_model.predict(X_test)

stackedensemble prediction progress: |███████████████████████████████████████████| (done) 100%


In [14]:
y_pred = preds.as_data_frame()['predict']
y_true = y_test.as_data_frame()['Response']

In [15]:
from sklearn.metrics import confusion_matrix as cm
cm = cm(y_true, y_pred)
print(cm)

[[ 9241 10779]
 [    5  2842]]


In [16]:
cm.ravel()

array([ 9241, 10779,     5,  2842])

In [17]:
mlflow.end_run()

In [22]:
mlflow.h2o.save_model(best_model, "models")

