# ローカルのnotebook開発から、学習ジョブ移行までを実施する

# 1. ローカルノートブックで学習を行う
・sklearnの乳がんデータを用いる（分類問題）
・LightGBMを使う

## 1-1.データの保存

In [1]:
from sklearn import datasets
import numpy as np
import pandas as pd

In [2]:
data = datasets.load_breast_cancer()

df = pd.DataFrame(np.append(data.data, data.target.reshape(-1,1), axis=1), columns=np.append(data.feature_names,
 'target'))

In [3]:
print(df.shape)
df.head()

(569, 31)


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0.0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0.0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0.0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0.0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0.0


In [4]:
df.to_csv('../input/breast_cancer.csv', index=False)

## 1-2.データ分割

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
#x, y = dataset.data, dataset.target
x, y = df[data.feature_names], df['target']
# データセットを学習用とテスト用に分割する
train_x, test_x, train_y, test_y = train_test_split(x, y,
                                                    test_size=0.166,
                                                    shuffle=True,
                                                    random_state=42,
                                                    stratify=y)
# さらに学習用データを学習用とvalid用に分割する
tr_x, va_x, tr_y, va_y = train_test_split(train_x, train_y,
                                                    test_size=0.2,
                                                    shuffle=True,
                                                    random_state=42,
                                                    stratify=train_y)

In [23]:
print(tr_x.shape)
print(tr_y.shape)
print(va_x.shape)
print(va_y.shape)
print(test_x.shape)
print(test_y.shape)

(379, 30)
(379,)
(95, 30)
(95,)
(95, 30)
(95,)


In [8]:
pd.concat([tr_x,tr_y], axis=1).to_csv('../opt/ml/input/data/train/train.csv', index=False)
pd.concat([va_x,va_y], axis=1).to_csv('../opt/ml/input/data/valid/valid.csv', index=False)
pd.concat([test_x,test_y], axis=1).to_csv('../opt/ml/input/data/test/test.csv', index=False)

## 1-3.学習

In [10]:
import lightgbm as lgb
from sklearn.metrics import accuracy_score

In [11]:
# LightGBM が扱うデータセットの形式に直す
dtrain = lgb.Dataset(tr_x, label=tr_y)
dvalid = lgb.Dataset(va_x, label=va_y)
dtest = lgb.Dataset(test_x)

# 学習用のパラメータ
lgb_params = {
    # 二値分類問題
    'objective': 'binary',
    # 評価指標
    'metrics': 'binary_logloss',
}

In [12]:
# モデルを学習する
# バリデーションデータもモデルに渡し、学習の進行とともにスコアがどう変わるかモニタリングする
# watchlistには学習データおよびバリデーションデータをセットする
#watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
model = lgb.train(lgb_params,
                dtrain,
                num_boost_round=50,  # 学習ラウンド数は適当
                #evals=watchlist
                valid_names=['train','valid'], valid_sets=[dtrain, dvalid]
                )

[LightGBM] [Info] Number of positive: 237, number of negative: 142
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3798
[LightGBM] [Info] Number of data points in the train set: 379, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.625330 -> initscore=0.512233
[LightGBM] [Info] Start training from score 0.512233
[1]	train's binary_logloss: 0.583245	valid's binary_logloss: 0.582221
[2]	train's binary_logloss: 0.518624	valid's binary_logloss: 0.519402
[3]	train's binary_logloss: 0.467531	valid's binary_logloss: 0.471914
[4]	train's binary_logloss: 0.423087	valid's binary_logloss: 0.426939
[5]	train's binary_logloss: 0.385211	valid's binary_logloss: 0.391658
[6]	train's binary_logloss: 0.349443	valid's binary_logloss: 0.356531
[7]	train's binary_logloss: 0.317067	valid's binary_logloss: 0.327033
[8]	train's binary_logloss: 0.289598	valid's binary_logloss: 0.299828
[9]	train's binary_logloss: 0.267131	valid's binary_loglos

## 1-4. 予測・評価

In [14]:
# 予測：検証用データが各クラスに分類される確率を計算する
pred_proba = model.predict(test_x)
# しきい値 0.5 で 0, 1 に丸める
pred = np.where(pred_proba > 0.5, 1, 0)
# 精度 (Accuracy) を検証する
acc = accuracy_score(test_y, pred)
print('Accuracy:', acc)

Accuracy: 0.9473684210526315


## 2.ローカル環境で学習ジョブ風に実行する（コンテナ未使用）
コンテナを導入する前に、ローカル環境でSageMaker学習ジョブのように動かします。.pyファイルの動作確認を高速で行うことが目的です。
ディレクトリ構造

## 2-1. データ配置

In [None]:
pd.concat([tr_x,tr_y], axis=1).to_csv('../opt/ml/input/data/train/train.csv', index=False)
pd.concat([va_x,va_y], axis=1).to_csv('../opt/ml/input/data/valid/valid.csv', index=False)
pd.concat([test_x,test_y], axis=1).to_csv('../opt/ml/input/data/test/test.csv', index=False)

## 2-2. ソースコードを準備
../opt/ml/input/data/src/train.pyに配置

In [24]:
!chmod +x ../opt/program/train

In [27]:
!chmod +x ../opt/ml/input/data/src/*

In [28]:
!pygmentize ../opt/ml/input/data/src/train.py

[37m#!/usr/bin/env python[39;49;00m
[34mimport[39;49;00m [04m[36mnumpy[39;49;00m [34mas[39;49;00m [04m[36mnp[39;49;00m
[34mimport[39;49;00m [04m[36mpandas[39;49;00m [34mas[39;49;00m [04m[36mpd[39;49;00m
[34mimport[39;49;00m [04m[36mlightgbm[39;49;00m [34mas[39;49;00m [04m[36mlgb[39;49;00m
[34mfrom[39;49;00m [04m[36msklearn.metrics[39;49;00m [34mimport[39;49;00m accuracy_score

[37m# データ読み込み[39;49;00m
train_df = pd.read_csv([33m'[39;49;00m[33m../opt/ml/input/data/train/train.csv[39;49;00m[33m'[39;49;00m)
valid_df = pd.read_csv([33m'[39;49;00m[33m../opt/ml/input/data/valid/valid.csv[39;49;00m[33m'[39;49;00m)
test_df = pd.read_csv([33m'[39;49;00m[33m../opt/ml/input/data/test/test.csv[39;49;00m[33m'[39;49;00m)

tr_x, tr_y = train_df.drop([[33m'[39;49;00m[33mtarget[39;49;00m[33m'[39;49;00m], axis=[34m1[39;49;00m), train_df[[33m'[39;49;00m[33mtarget[39;49;00m[33m'[39;49;00m]
va_x, va_y = valid_df.drop([[33m'[39;49;00m

In [26]:
!../opt/program/train

train running...
/Users/yshiy/github/sagemaker-byoc/notebook
[LightGBM] [Info] Number of positive: 237, number of negative: 142
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3798
[LightGBM] [Info] Number of data points in the train set: 379, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.625330 -> initscore=0.512233
[LightGBM] [Info] Start training from score 0.512233
[1]	train's binary_logloss: 0.583245	valid's binary_logloss: 0.582221
[2]	train's binary_logloss: 0.518624	valid's binary_logloss: 0.519402
[3]	train's binary_logloss: 0.467531	valid's binary_logloss: 0.471914
[4]	train's binary_logloss: 0.423087	valid's binary_logloss: 0.426939
[5]	train's binary_logloss: 0.385211	valid's binary_logloss: 0.391658
[6]	train's binary_logloss: 0.349443	valid's binary_logloss: 0.356531
[7]	train's binary_logloss: 0.317067	valid's binary_logloss: 0.327033
[8]	train's binary_logloss: 0.289598	valid's binary_logloss: 0.2998

# 3.ローカルモードで学習ジョブを実行

### ---------------

# 1.コンテナ準備
## 1-1.Dockerfile (確認のみ)

Dockerfileには、構築したいイメージが記述されています。これは、実行したいシステムの完全なオペレーティングシステムのインストールを記述していると考えることができます。しかし、Dockerコンテナの実行は、基本的な操作のためにホストマシン上のLinuxを利用するため、完全なオペレーティングシステムよりもかなり軽量です。

Pythonサイエンススタックでは、標準的なUbuntuのインストールから始めて、通常のツールを実行してscikit-learnで必要なものをインストールします。最後に、特定のアルゴリズムを実装したコードをコンテナに追加して、実行に適した環境を整えます。

その際、余分なスペースを整理します。これにより、コンテナは小さくなり、起動も速くなります。

例のDockerfileを見てみましょう。

imageには、trainやbacktestに必要なソースは含めないこととする。
（学習ジョブ実行時にS3からコピーする）
よって、dockerイメージ作成時に必要な資材はない。


In [33]:
!cat ../container/lgbm/Dockerfile

FROM python:3.7.5-slim
USER root

RUN apt-get update
RUN apt-get -y install locales && \
    localedef -f UTF-8 -i ja_JP ja_JP.UTF-8
ENV LANG ja_JP.UTF-8
ENV LANGUAGE ja_JP:ja
ENV LC_ALL ja_JP.UTF-8
ENV TZ JST-9
ENV TERM xterm

RUN apt-get install -y vim less
RUN pip install --upgrade pip
RUN pip install --upgrade setuptools

RUN apt-get -y install build-essential
RUN apt-get -y install wget

### install libraries
RUN pip install numpy pandas scikit-learn matplotlib seaborn lightgbm

# Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard
# output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE
# keeps Python from writing the .pyc files which are unnecessary in this case. We also update
# PATH so that the train and serve programs are found when the container is invoked.

ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE
ENV PATH="/opt/program:${PATH}"

# Set up the program in the image
COPY program

## 1-2.Building and registering the container to ECR

以下のシェルコードは、`docker build`を使用してコンテナイメージをビルドし、`docker push`を使用してコンテナイメージをECRにプッシュする方法を示しています。このコードはシェルスクリプト `container/build-and-push.sh` としても提供されており、`build-and-push.sh decision_trees_sample` として実行することで、イメージ `decision_trees_sample` をビルドすることができます。

このコードは、使用しているアカウントと現在のデフォルトリージョン（SageMakerのノートブックインスタンスを使用している場合は、ノートブックインスタンスが作成されたリージョンになります）でECRリポジトリを探します。
リポジトリが存在しない場合、スクリプトはそれを作成します。

https://github.com/aws/amazon-sagemaker-examples/blob/master/advanced_functionality/scikit_bring_your_own/container/build_and_push.sh

以下は、build-and-push.shと同じ内容

In [34]:
%%sh

# The name of our algorithm
algorithm_name=test-trainingjob

cd ../container/lgbm

chmod +x program/train
#chmod +x decision_trees/serve

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-1}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

Login Succeeded
The push refers to repository [805433377179.dkr.ecr.us-east-1.amazonaws.com/test-trainingjob]
sh: line 5: cd: container/lgbm: No such file or directory
chmod: program/train: No such file or directory
#1 [internal] load build definition from Dockerfile
#1 sha256:d18e60c53e7e59f8b43609d7e6ff720d01c6520e4a3834900c4eb655bdd90dc1
#1 transferring dockerfile: 2B 0.0s done
#1 DONE 0.0s
failed to solve with frontend dockerfile.v0: failed to read dockerfile: open /var/lib/docker/tmp/buildkit-mount750953024/Dockerfile: no such file or directory
Error response from daemon: No such image: test-trainingjob:latest
An image does not exist locally with the tag: 805433377179.dkr.ecr.us-east-1.amazonaws.com/test-trainingjob


CalledProcessError: Command 'b'\n# The name of our algorithm\nalgorithm_name=test-trainingjob\n\ncd container/lgbm\n\nchmod +x program/train\n#chmod +x decision_trees/serve\n\naccount=$(aws sts get-caller-identity --query Account --output text)\n\n# Get the region defined in the current configuration (default to us-west-2 if none defined)\nregion=$(aws configure get region)\nregion=${region:-us-west-1}\n\nfullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"\n\n# If the repository doesn\'t exist in ECR, create it.\naws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1\n\nif [ $? -ne 0 ]\nthen\n    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null\nfi\n\n# Get the login command from ECR and execute it directly\naws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}\n\n# Build the docker image locally with the image name and then push it to ECR\n# with the full name.\n\ndocker build  -t ${algorithm_name} .\ndocker tag ${algorithm_name} ${fullname}\n\ndocker push ${fullname}\n'' returned non-zero exit status 1.

## 疑問：build and push したイメージの動作確認はローカルでできる？
できる。ローカルモード

# -----コンテナ準備完了-----

# 2. データとプログラムをS3にアップロード
・SageMaker　SDKを使う場合。  

https://sagemaker.readthedocs.io/en/stable/api/utility/session.html


・boto3でs3クライアントを使う場合。  

In [None]:
import sagemaker as sage
sess = sage.Session()

# S3 prefix
#prefix = 'DEMO-scikit-byo-iris'
prefix = 'test-rd/src'

In [None]:
#WORK_DIRECTORY = 'data'
#data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

src_location = sess.upload_data('opt/ml/input/data/src', bucket='work-aws-virginia', key_prefix=prefix)

# -----データ準備完了-----

# 3-1.学習ジョブ発行(ローカルモード)

In [None]:
#data_location = 's3://work-aws-virginia/test-rd/candles/'
data_location = 's3://work-aws-virginia/test-rd/train/'


In [None]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name

#image = '{}.dkr.ecr.{}.amazonaws.com/sagemaker-decision-trees:latest'.format(account, region)

image = '{}.dkr.ecr.{}.amazonaws.com/test-rd:latest'.format(account, region)

In [None]:
image

In [None]:
hyperparameters={'candle_window': 15,
                 'horizon': 300,
                 'target': 'tgt_diff'}

In [None]:
role = 'arn:aws:iam::805433377179:role/sagemaker-sdk-for-local'

lgbm = sage.estimator.Estimator(image_uri=image,
                                #entasdfdfry_point1='run.sh',
                                #source_sddddddddddir='src',
                                role=role, 
                                instance_count=1,
                                instance_type='local',
                                #instance_type='ml.c4.2xlarge',
                                output_path="s3://{}/output".format(sess.default_bucket()),
                                hyperparameters=hyperparameters,
                                #sagemaker_session=sess)
)

In [None]:
lgbm.fit({'train':'s3://work-aws-virginia/test-rd/train/',
          'valid':'s3://work-aws-virginia/test-rd/train/',
          'test' :'s3://work-aws-virginia/test-rd/train/',
          'backtest':'s3://work-aws-virginia/test-rd/train/',
          'src':'s3://work-aws-virginia/test-rd/src/'})

# 3-2.学習ジョブ発行(SageMaker)

### 下準備

In [73]:
import sagemaker as sage
sess = sage.Session()

prefix = 'test-rd/src'

In [74]:
### ソースコードをアップロード
src_location = sess.upload_data('opt/ml/input/data/src', bucket='work-aws-virginia', key_prefix=prefix)

### ハイパーパラメータ設定（ローカルファイルから読み込み）
hyperparameters={"timeframe": 15,
                 "horizon" : 300,
                 "hist_row" : 2
}
print(hyperparameters)

{'timeframe': 15, 'horizon': 300, 'hist_row': 2}


In [75]:
HYPERPARAMETER_JSON_PATH = "../../../../../opt/ml/input/config/hyperparameters.json"
HYPERPARAMETER_JSON_PATH = "opt/ml/input/config/hyperparameters.json"

import json
### ハイパーパラメータ設定（ローカルファイルから読み込み）
with open(HYPERPARAMETER_JSON_PATH, "r") as f:
    hyperparameters = json.load(f)

print(hyperparameters)

{'timeframe': 15, 'horizon': 300, 'hist_row': 2, 'target_col': 'tgt_diff'}


In [76]:
role = 'arn:aws:iam::805433377179:role/sagemaker-sdk-for-local'
#role = 'arn:aws:iam::805433377179:role/service-role/AmazonSageMaker-ExecutionRole-20191212T111531'

account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = '{}.dkr.ecr.{}.amazonaws.com/test-rd:latest'.format(account, region)

lgbm = sage.estimator.Estimator(image_uri=image,
                                #entasdfdfry_point1='run.sh',
                                #source_sddddddddddir='src',
                                role=role, 
                                instance_count=1,
                                #instance_type='local',
                                instance_type='ml.c4.2xlarge',
                                #output_path="s3://{}/output".format(sess.default_bucket()),
                                output_path='s3://work-aws-virginia/test-rd/output/',
                                #sagemaker_session=sess # ノートブックインスタンスで実行する場合にIAMを渡す
                                hyperparameters=hyperparameters,
                                )

In [77]:
lgbm.fit({'train':'s3://work-aws-virginia/test-rd/train/',
          'valid':'s3://work-aws-virginia/test-rd/valid/',
          'test' :'s3://work-aws-virginia/test-rd/test/',
          #'backtest':'s3://work-aws-virginia/test-rd/test/',
          'src':'s3://work-aws-virginia/test-rd/src/'},
          wait=False
)



https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html#sagemaker.estimator.EstimatorBase.fit

・並行してジョブ発行する場合は、wait=Falseを使う

## 並列実行

In [4]:
import sagemaker as sage
sess = sage.Session()

prefix = 'test-rd/src'
### ソースコードをアップロード
src_location = sess.upload_data('opt/ml/input/data/src', bucket='work-aws-virginia', key_prefix=prefix)

In [5]:
hyperparam_list = [
    'opt/ml/input/config/hyperparameters_10_180_2_diff.json',
    'opt/ml/input/config/hyperparameters_10_300_2_diff.json',
    'opt/ml/input/config/hyperparameters_15_180_2_diff.json',
    'opt/ml/input/config/hyperparameters_15_300_2_diff.json',
    ]

In [6]:
role = 'arn:aws:iam::805433377179:role/sagemaker-sdk-for-local'
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = '{}.dkr.ecr.{}.amazonaws.com/test-rd:latest'.format(account, region)

import json
for hp_path in hyperparam_list:
    print(hp_path)

    ### ハイパーパラメータ設定（ローカルファイルから読み込み）
    with open(hp_path, "r") as f:
        hyperparameters = json.load(f)

    print(hyperparameters)

    lgbm = sage.estimator.Estimator(image_uri=image,
                                role=role, 
                                instance_count=1,
                                instance_type='ml.c4.2xlarge',
                                output_path='s3://work-aws-virginia/test-rd/output/',
                                hyperparameters=hyperparameters,
                                )
    lgbm.fit({'train':'s3://work-aws-virginia/test-rd/train/',
          'valid':'s3://work-aws-virginia/test-rd/valid/',
          'test' :'s3://work-aws-virginia/test-rd/test/',
          'src':'s3://work-aws-virginia/test-rd/src/'},
          wait=False
          )

opt/ml/input/config/hyperparameters_10_180_2_diff.json
{'timeframe': 10, 'horizon': 180, 'hist_row': 2, 'target_col': 'tgt_diff'}
opt/ml/input/config/hyperparameters_10_300_2_diff.json
{'timeframe': 10, 'horizon': 300, 'hist_row': 2, 'target_col': 'tgt_diff'}
opt/ml/input/config/hyperparameters_15_180_2_diff.json
{'timeframe': 15, 'horizon': 180, 'hist_row': 2, 'target_col': 'tgt_diff'}
opt/ml/input/config/hyperparameters_15_300_2_diff.json
{'timeframe': 15, 'horizon': 300, 'hist_row': 2, 'target_col': 'tgt_diff'}


# 上記のEstimatorを編集していく
https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html#

classsagemaker.estimator.Estimator(image_uri, role, instance_count=None, instance_type=None, volume_size=30, volume_kms_key=None, max_run=86400, input_mode='File', output_path=None, output_kms_key=None, base_job_name=None, sagemaker_session=None, hyperparameters=None, tags=None, subnets=None, security_group_ids=None, model_uri=None, model_channel_name='model', metric_definitions=None, encrypt_inter_container_traffic=False, use_spot_instances=False, max_wait=None, checkpoint_s3_uri=None, checkpoint_local_path=None, enable_network_isolation=False, rules=None, debugger_hook_config=None, tensorboard_output_config=None, enable_sagemaker_metrics=None, profiler_config=None, disable_profiler=False, environment=None, **kwargs)

In [None]:
import sagemaker as sage

In [None]:
sage.__version__

In [None]:
!pip install sagemaker==2.33.0

SageMaker Python SDK version == 2.33.0　でOK



SageMaker Python SDKのローカルモードを利用して、ノートブックインスタンス以外の環境で学習ジョブを回してみる
https://dev.classmethod.jp/articles/sagemaker-python-sdk-localmode/

# 完了