In [1]:
!pwd
# /home/jovyan/work/my-notebooks    <- このNotebookの実行パス

/home/jovyan/work/my-notebooks


In [2]:
!ls ..
# h2o_ai_cloud.pyが存在すること

 Data		      h2o_ai_cloud.py   __pycache__
'Example Notebooks'   my-notebooks      README.md


In [3]:
# This notebook uses common methods for connecting to the H2O AI Cloud from the h2o_ai_cloud.py file
# We will add the location of that file to our workspace
# You could also directly use the code from the file in your notebooks
import sys
sys.path.append("../")

In [4]:
from h2o_ai_cloud import token_provider, steam_client   # これはh2o_ai_cloud.pyファイル
from h2osteam.clients import DriverlessClient

In [5]:
steam = steam_client(token_provider())

Visit https://internal.dedicated.h2o.ai/auth/get-platform-token to get your platform token


Enter your platform token:  ·························································································································································································································································································································································································································································································································································································································································································································································································································································


In [6]:
steam

<h2osteam.backend.connection.SteamConnection at 0x7fef4d056340>

In [7]:
DriverlessClient(steam)

<h2osteam.clients.driverless.driverless_client.DriverlessClient at 0x7fef6410c580>

ドキュメント: [h2osteam.clients.driverless.driverless_client.DriverlessClient](https://docs.h2o.ai/enterprise-steam/latest-stable/docs/python-docs/clients.html#h2osteam-clients-driverless)

In [8]:
# Steam上のDAI
DriverlessClient(steam).get_instances()

[<h2osteam.clients.driverless.driverless_instance.DriverlessInstance at 0x7fef6814a640>,
 <h2osteam.clients.driverless.driverless_instance.DriverlessInstance at 0x7fef4dcae9d0>]

In [9]:
# 一つ目を取得
dai_machine = DriverlessClient(steam).get_instances()[0]
dai_machine

<h2osteam.clients.driverless.driverless_instance.DriverlessInstance at 0x7fef640eff10>

In [10]:
# 詳細を確認
#dai_machine.details()

In [11]:
# DAIの名前とステータス（runningなど）の確認
dai_machine.details()['name'], dai_machine.details()['status'], 

('DAI-Steam', 'running')

In [13]:
# DAI（名前がDAI-SteamのDAI）へ接続。DAIが起動していることが前提
dai = dai_machine.connect()
dai

<class 'driverlessai._core.Client'> https://steam.internal.dedicated.h2o.ai:443/proxy/driverless/571

ドキュメント: [driverlessai._core.Client](https://docs.h2o.ai/driverless-ai/pyclient/docs/html/api/client.html#client)

In [None]:
# 新規にDAIを作成する場合は以下を実施
'''
dai_machine = DriverlessClient(steam).launch_instance(
    name="my-automl",
    version=steam.get_driverless_engines()[-1]["version"],  # newest version of the AutoML software
    profile_name="default-driverless-kubernetes",
    max_uptime_h=1
)
dai = dai_machine.connect()
'''

In [30]:
dai.connectors.list()   # DAIが許可されているデータインポート方法

['upload',
 's3',
 'snow',
 'file',
 'feature_store',
 'recipe_file',
 'recipe_url',
 'h2o_drive',
 'feature_store']

In [33]:
dai.datasets.list()   # DAI上のデータ

    | Type    | Key                                  | Name
----+---------+--------------------------------------+-------------------
  0 | Dataset | e33146da-1551-11ee-b52d-f6d1fd8105b7 | from_pandasDF
  1 | Dataset | b7c901f8-154d-11ee-b52d-f6d1fd8105b7 | Telco_Churn
  2 | Dataset | 5247cede-1545-11ee-b52d-f6d1fd8105b7 | TitanicData2.csv
  3 | Dataset | 4fe255ce-1545-11ee-b52d-f6d1fd8105b7 | BostonHousing.csv

***

#### AI Notebook上のCSVをDAIへアップロード

In [29]:
!ls ../Data/
# churn.csvがパス上に存在すること

BostonHousing.csv  churn.csv


In [30]:
# これを実施すると、接続しているDAIにデータがアップロードされる
telco_churn = dai.datasets.create(
    data="../Data/churn.csv", 
    name="Telco_Churn",
)

Complete 100.00% - [4/4] Computed stats for column Account Length


In [31]:
telco_churn

<class 'Dataset'> b7c901f8-154d-11ee-b52d-f6d1fd8105b7 Telco_Churn

ドキュメント: [Dataset](https://docs.h2o.ai/driverless-ai/pyclient/docs/html/api/objects.html#dataset)

***

#### AI Notebookのメモリ上にあるpandas.DataFrameをDAIへアップロード

In [17]:
import pandas as pd

In [36]:
df = pd.DataFrame({
    'A':[1,2,3,4,5],
    'B':[1,2,3,4,5],
    'C':[1,2,3,4,5],
})

In [44]:
dai.datasets

<driverlessai._datasets.Datasets at 0x7fd34a881ca0>

ドキュメント: [driverlessai._datasets.Datasets](https://docs.h2o.ai/driverless-ai/pyclient/docs/html/api/client.html#datasets)

In [52]:
testdata = dai.datasets.create(
    data=df,
    name='from_pandasDF'
)

Complete 100.00% - [4/4] Computed stats for column A


In [51]:
testdata.columns

['A', 'B', 'C']

In [None]:
# data_sourceは以下
# ['upload', 's3', 'snow', 'file', 'feature_store', 'recipe_file', 'recipe_url', 'h2o_drive', 'feature_store']

***

#### H2O Drive上のデータをDAIへアップロード

In [37]:
# これを実施すると、接続しているDAIにデータがアップロードされる
data_h2odrive = dai.datasets.create(
    #data="amazon_reviews_JP_Books_1000SAMPLE.csv",   # H2O Drive上のデータ名。H2O DriveのHome上にデータを置いている場合
    data="mydata/BostonHousing.csv",   # H2O Drive上のデータ名。H2O DriveのHome上にmydataというフォルダを作成し、その中にデータを入れている場合
    data_source="h2o_drive",
    name="data_from_drive",
)

Complete 100.00% - [4/4] Computed stats for column CRIM


In [38]:
data_h2odrive

<class 'Dataset'> 7182f5a6-16d2-11ee-93ee-0abcf8a3ed57 data_from_drive