test code for deep learning archtects

In [2]:
import os
import json
import datetime

import pandas as pd
import numpy as np

import driverlessai
driverlessai.__version__

'1.10.1.3'

In [3]:
# Driverless AIのuser nameとpasswordの読み込み
with open(os.path.join('..', '..', '..', 'idpass.json')) as f:
    idpass = json.load(f)

In [50]:
def get_dai_client(daiaddress, daipassword) -> 'driverlessai._core.Client':
    '''
    DAIサーバへの接続
    ----------
    daiaddress : str
    daipassword : str
    '''
    print('----- start server connection : get_dai_client -----')
    # Driverless AIサーバーへの接続
    dai = driverlessai.Client(address=daiaddress, username=idpass['id'], password=daipassword)
    return dai

def get_dataset(daiobj, dataname, dataurl) -> 'driverlessai._datasets.Dataset': 
    '''
    データオブジェクトの取得
    ----------
    daiobj : driverlessai._core.Client
    dataname : str
    dataurl : str
    '''
    print('----- start get data : get_dataset -----')
    # DAI上のデータ一覧
    uploaded_data = {i.name:i.key for i in daiobj.datasets.list()}
    print('Uploaded data name : key >> ', uploaded_data)

    # データ取得
    if dataname in uploaded_data.keys():
        print('Data is already uploaded in DAI')
        ds = daiobj.datasets.get(uploaded_data[dataname]) 
    else:
        print('Data is uploading to DAI.')
        ds = daiobj.datasets.create(data=dataurl, data_source='s3')
    
    return ds

def get_experiment(daiobj, splitdata, target_column, task, drop_columns, tensorflow_image_pretrained_models, test_mode)-> 'driverlessai._experiments.Experiment':
    '''
    Experimentの実行とExperimentオブジェクトの取得
    ----------
    daiobj : driverlessai._core.Client
    dataobj : driverlessai._datasets.Dataset
    target_column : str
    task : str
    drop_columns : List[str]
    '''
    print('----- start experiment : get_experiment -----')
    # Experiment設定    
    dai_settings = {
        'target_column': target_column,
        'task': task,
        'drop_columns': drop_columns,
        'tensorflow_image_pretrained_models':[tensorflow_image_pretrained_models]
    }
    if test_mode:    # test modeの時、Acc=1&Time=1のExperimentを実施
        dai_settings['accuracy'] = 1
        dai_settings['time'] = 1
    
    # Experimentの実行
    ex = daiobj.experiments.create(**splitdata, **dai_settings)
    return ex

In [45]:
def run_whole_experiments(dai_address: str, dai_password: str, df_expperiments_info: pd.DataFrame) -> None:

    # データ情報
    data_name = df_expperiments_info['data_name'][0]
    s3url = df_expperiments_info['s3url'][0]  # DAIにアップされてない場合の取得先S3
    print('#####-----  利用データ: ', data_name, '  -----#####')

    print('*************** DAIへ接続 ***************')
    dai = get_dai_client(daiaddress=dai_address, daipassword=dai_password)
    print(type(dai))
    print('DAIバージョン: {}'.format(dai.server.version))

    print('*************** データの取得 ***************')
    ds = get_dataset(daiobj=dai, dataname=data_name, dataurl=s3url) 

    print(type(ds))
    print('Dataサイズ(byte): {}'.format(ds.file_size))
    print('Dataサイズ(mega byte): {}'.format(ds.file_size/1024**2))
    print('Data shape: {}'.format(ds.shape))
        
    print('*************** データ分割 ***************')
    ds_split = ds.split_to_train_test(train_size=0.75, train_name=data_name+'_train', test_name=data_name+'_test')
    
    
    for _, row in df_expperiments_info.iterrows():
        #**********  実験のパラメータ情報  **********#
        tensorflow_image_pretrained_models = row['tensorflow_image_pretrained_models']
        start_time = datetime.datetime.now().strftime('%Y年%m月%d日%H時%M分%S秒')
        print('#####-----  開始時間: ', start_time, '  -----#####')
        print('#####-----  archtect: ', tensorflow_image_pretrained_models, '  -----#####')
        

        # Experiment設定
        target_column = row['target_column']
        task = row['task']    # 'regression', 'classification', or 'unsupervised'
        if row['drop_columns']  is np.nan:     # dropped clmを指定しない場合
            drop_columns = []
        else:
            drop_columns = row['drop_columns'] .split(',')     # strをList化
        #print(drop_columns)
        test_mode = row['test_mode']
        

        print('*************** Experimentの実施 ***************')
        ex = get_experiment(daiobj=dai, splitdata=ds_split, 
                            target_column=target_column, task=task, drop_columns=drop_columns, 
                           tensorflow_image_pretrained_models=tensorflow_image_pretrained_models, test_mode=test_mode)

        print(type(ds))
        print('学習時間（sec）：{}'.format(ex.run_duration))
        print('学習時間（min）：{}'.format(ex.run_duration/60))
        print('Experimentサイズ（byte）：{}'.format(ex.size))
        print('Experimentサイズ（mega byte）：{}'.format(ex.size/1024**2))
        print('精度：{}'.format(ex.metrics()))
        print('********** Experiment Summary **********')
        ex.summary()
        
        save_dict = dict(Data_Name=data_name,
                         Try=row['try_n'],
                         Datasize_mb = ds.file_size/1024**2,
                         N_Observation = ds.shape[0],
                         N_features = ds.shape[1] - len(drop_columns) - 1,
                         Shape_Train = ds_split['train_dataset'].shape,
                         Shape_Test = ds_split['test_dataset'].shape,
                         Duration_min = ex.run_duration/60,
                         Experiment_Size_mb = ex.size/1024**2,
                         Acc_Time_Interpret = (ex.settings['accuracy'], ex.settings['time'], ex.settings['interpretability']),
                         Metrics = ex.metrics()
                        ) 
        with open('dlatest_{}.json'.format(start_time), 'w') as f:
            json.dump(save_dict, f, indent=4)

In [46]:
df_expperiments_info = pd.read_csv('exp_params_test.csv')
df_expperiments_info

Unnamed: 0,try_n,data_name,s3url,target_column,task,drop_columns,enable_gpus,test_mode,tensorflow_image_pretrained_models
0,1,car_deals.zip,s3://h2o-public-test-data/bigdata/server/Image...,Price,regression,"Manufacturer,Model,Year,Category,Mileage,FuelT...",,True,resnet50
1,1,car_deals.zip,s3://h2o-public-test-data/bigdata/server/Image...,Price,regression,"Manufacturer,Model,Year,Category,Mileage,FuelT...",,True,seresnext50


In [47]:
df_expperiments_info.dtypes

try_n                                   int64
data_name                              object
s3url                                  object
target_column                          object
task                                   object
drop_columns                           object
enable_gpus                           float64
test_mode                                bool
tensorflow_image_pretrained_models     object
dtype: object

In [48]:
# Driverless AIサーバー情報
dai_address = 'http://35.172.135.60'
dai_password = idpass['pass11013gpu']

In [49]:
run_whole_experiments(dai_address=dai_address, dai_password=dai_password, df_expperiments_info=df_expperiments_info)

#####-----  利用データ:  car_deals.zip   -----#####
*************** DAIへ接続 ***************
----- start server connection : get_dai_client -----
<class 'driverlessai._core.Client'>
DAIバージョン: 1.10.1.3
*************** データの取得 ***************
----- start get data : get_dataset -----
Uploaded data name : key >>  {'histopathology_train.zip': '8ec3abf4-a407-11ec-8921-0242ac110002', 'car_deals.zip': '50119e5c-a407-11ec-8921-0242ac110002', 'UCI_Credit_Card3.csv': 'ac9c94bc-9f2d-11ec-ad99-0242ac110002', 'kaggle_train.csv': '056c70f0-9365-11ec-96ca-0242ac110002', 'talk4_customize_NLP検証用_h連携.csv': '9a306ad0-9364-11ec-96ca-0242ac110002', 'BostonHousing.csv': '6db78e82-91f9-11ec-a31a-0242ac110002'}
Data is already uploaded in DAI
<class 'driverlessai._datasets.Dataset'>
Dataサイズ(byte): 785613764
Dataサイズ(mega byte): 749.219669342041
Data shape: (34571, 16)
*************** データ分割 ***************
Complete
#####-----  開始時間:  2022年04月04日14時52分25秒   -----#####
#####-----  archtect:  resnet50   -----#####
********