# Painless machine learning with AutoML

In [1]:
import h2o
from h2o.automl import H2OAutoML
from h2o.estimators import H2ODeepLearningEstimator
from h2o.explanation import explain
h2o.init()

Checking whether there is an H2O instance running at http://localhost:54321 . connected.


0,1
H2O_cluster_uptime:,15 hours 49 mins
H2O_cluster_timezone:,Europe/Warsaw
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.32.0.3
H2O_cluster_version_age:,1 month and 5 days
H2O_cluster_name:,H2O_from_python_root_7kueog
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.028 Gb
H2O_cluster_total_cores:,12
H2O_cluster_allowed_cores:,12


## Loading the data

In [3]:
data = h2o.import_file('train.csv')
data["Survived"] = data["Survived"].asfactor()
data.show()

H2OResponseError: Server error water.exceptions.H2ONotFoundArgumentException:
  Error: File C:\Users\wojciech.blachowski\Documents\automl\painless-ml-with-automl\train.csv does not exist
  Request: POST /3/ImportFilesMulti
    data: {'paths': '[C:\\Users\\wojciech.blachowski\\Documents\\automl\\painless-ml-with-automl\\train.csv]'}


In [None]:
target = "Survived"
predictors = data.columns
for col in ["PassengerId", "Name", target]: predictors.remove(col)

In [None]:
train, test = data.split_frame(ratios = [0.8], seed=1234)

## Choosing a model ourselves

In [None]:
nn_model = H2ODeepLearningEstimator(seed=1234)
nn_model.train(x=predictors, y=target, training_frame=train, validation_frame=test)

In [None]:
accuracy = nn_model.model_performance(test).accuracy()
# The accuracy has [[threshold, max_accuracy]] format, printing only the actual accuracy:
print(f"Accuracy: {round(accuracy[0][1]*100, 2)}%") 

## Finding the best model using AutoML

In [None]:
aml = H2OAutoML(max_models=20, seed=1234)
aml.train(x=predictors, y=target, training_frame=train)

In [None]:
lb = aml.leaderboard
lb.head()

In [None]:
leader = aml.leader
accuracy = leader.model_performance(test).accuracy()
print(f"Accuracy: {round(accuracy[0][1]*100, 2)}%") 

## Bonus: Explainable AI

In [None]:
explain(leader, test)

In [None]:
explain(h2o.get_model('XGBoost_grid__1_AutoML_20210129_222349_model_3'), test)