# ローカルのnotebook開発から、学習ジョブ移行までを実施する

# 1. ローカルノートブックで学習を行う
・sklearnの乳がんデータを用いる（分類問題）
・LightGBMを使う

## よくあるノートブックでの、LightGBMの実行例

In [133]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import lightgbm as lgb

dataset = datasets.load_breast_cancer()

x, y = dataset.data, dataset.target
# データセットを学習用とテスト用に分割する
train_x, test_x, train_y, test_y = train_test_split(x, y,
                                                    test_size=0.166,
                                                    shuffle=True,
                                                    random_state=42,
                                                    stratify=y)
# さらに学習用データを学習用とvalid用に分割する
tr_x, va_x, tr_y, va_y = train_test_split(train_x, train_y,
                                                    test_size=0.2,
                                                    shuffle=True,
                                                    random_state=42,
                                                    stratify=train_y)
# LightGBM が扱うデータセットの形式に直す
dtrain = lgb.Dataset(tr_x, label=tr_y)
dvalid = lgb.Dataset(va_x, label=va_y)
dtest = lgb.Dataset(test_x)

# 学習用のパラメータ
lgb_params = {
    # 二値分類問題
    'objective': 'binary',
    # 評価指標
    'metrics': 'binary_logloss',
}
# モデルを学習する
# バリデーションデータもモデルに渡し、学習の進行とともにスコアがどう変わるかモニタリングする
# watchlistには学習データおよびバリデーションデータをセットする
#watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
model = lgb.train(lgb_params,
                dtrain,
                num_boost_round=50,  # 学習ラウンド数は適当
                #evals=watchlist
                valid_names=['train','valid'], valid_sets=[dtrain, dvalid]
                )
# 予測：検証用データが各クラスに分類される確率を計算する
pred_proba = model.predict(test_x)
# しきい値 0.5 で 0, 1 に丸める
pred = np.where(pred_proba > 0.5, 1, 0)
# 精度 (Accuracy) を検証する
acc = accuracy_score(test_y, pred)
print('Accuracy:', acc)


[LightGBM] [Info] Number of positive: 237, number of negative: 142
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3798
[LightGBM] [Info] Number of data points in the train set: 379, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.625330 -> initscore=0.512233
[LightGBM] [Info] Start training from score 0.512233
[1]	train's binary_logloss: 0.583245	valid's binary_logloss: 0.582221
[2]	train's binary_logloss: 0.518624	valid's binary_logloss: 0.519402
[3]	train's binary_logloss: 0.467531	valid's binary_logloss: 0.471914
[4]	train's binary_logloss: 0.423087	valid's binary_logloss: 0.426939
[5]	train's binary_logloss: 0.385211	valid's binary_logloss: 0.391658
[6]	train's binary_logloss: 0.349443	valid's binary_logloss: 0.356531
[7]	train's binary_logloss: 0.317067	valid's binary_logloss: 0.327033
[8]	train's binary_logloss: 0.289598	valid's binary_logloss: 0.299828
[9]	train's binary_logloss: 0.267131	valid's binary_loglos

## 1-1.データの保存

In [134]:
from sklearn import datasets
import numpy as np
import pandas as pd

In [135]:
data = datasets.load_breast_cancer()

df = pd.DataFrame(np.append(data.data, data.target.reshape(-1,1), axis=1), columns=np.append(data.feature_names,
 'target'))

In [137]:
print(df.shape)
df.head()

(569, 31)


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0.0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0.0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0.0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0.0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0.0


In [141]:
#df['target'] = int(df['target'])
df = df.astype({'target': int})

In [142]:
print(df.shape)
df.head()

(569, 31)


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [144]:
df.to_csv('../input/breast_cancer.csv', index=False)

## 1-2.データ分割

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
#x, y = dataset.data, dataset.target
x, y = df[data.feature_names], df['target']
# データセットを学習用とテスト用に分割する
train_x, test_x, train_y, test_y = train_test_split(x, y,
                                                    test_size=0.166,
                                                    shuffle=True,
                                                    random_state=42,
                                                    stratify=y)
# さらに学習用データを学習用とvalid用に分割する
tr_x, va_x, tr_y, va_y = train_test_split(train_x, train_y,
                                                    test_size=0.2,
                                                    shuffle=True,
                                                    random_state=42,
                                                    stratify=train_y)

In [23]:
print(tr_x.shape)
print(tr_y.shape)
print(va_x.shape)
print(va_y.shape)
print(test_x.shape)
print(test_y.shape)

(379, 30)
(379,)
(95, 30)
(95,)
(95, 30)
(95,)


In [8]:
pd.concat([tr_x,tr_y], axis=1).to_csv('../opt/ml/input/data/train/train.csv', index=False)
pd.concat([va_x,va_y], axis=1).to_csv('../opt/ml/input/data/valid/valid.csv', index=False)
pd.concat([test_x,test_y], axis=1).to_csv('../opt/ml/input/data/test/test.csv', index=False)

## 1-3.学習

In [10]:
import lightgbm as lgb
from sklearn.metrics import accuracy_score

In [11]:
# LightGBM が扱うデータセットの形式に直す
dtrain = lgb.Dataset(tr_x, label=tr_y)
dvalid = lgb.Dataset(va_x, label=va_y)
dtest = lgb.Dataset(test_x)

# 学習用のパラメータ
lgb_params = {
    # 二値分類問題
    'objective': 'binary',
    # 評価指標
    'metrics': 'binary_logloss',
}

In [12]:
# モデルを学習する
# バリデーションデータもモデルに渡し、学習の進行とともにスコアがどう変わるかモニタリングする
# watchlistには学習データおよびバリデーションデータをセットする
#watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
model = lgb.train(lgb_params,
                dtrain,
                num_boost_round=50,  # 学習ラウンド数は適当
                #evals=watchlist
                valid_names=['train','valid'], valid_sets=[dtrain, dvalid]
                )

[LightGBM] [Info] Number of positive: 237, number of negative: 142
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3798
[LightGBM] [Info] Number of data points in the train set: 379, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.625330 -> initscore=0.512233
[LightGBM] [Info] Start training from score 0.512233
[1]	train's binary_logloss: 0.583245	valid's binary_logloss: 0.582221
[2]	train's binary_logloss: 0.518624	valid's binary_logloss: 0.519402
[3]	train's binary_logloss: 0.467531	valid's binary_logloss: 0.471914
[4]	train's binary_logloss: 0.423087	valid's binary_logloss: 0.426939
[5]	train's binary_logloss: 0.385211	valid's binary_logloss: 0.391658
[6]	train's binary_logloss: 0.349443	valid's binary_logloss: 0.356531
[7]	train's binary_logloss: 0.317067	valid's binary_logloss: 0.327033
[8]	train's binary_logloss: 0.289598	valid's binary_logloss: 0.299828
[9]	train's binary_logloss: 0.267131	valid's binary_loglos

## 1-4. 予測・評価

In [93]:
# 予測：検証用データが各クラスに分類される確率を計算する
pred_proba = model.predict(test_x)
# しきい値 0.5 で 0, 1 に丸める
pred = np.where(pred_proba > 0.5, 1, 0)
# 精度 (Accuracy) を検証する
acc = accuracy_score(test_y, pred)
print('Accuracy:', acc)

Accuracy: 0.9473684210526315


In [112]:
result = pd.DataFrame(np.append(test_y.reshape(-1,1), pred.reshape(-1,1), axis=1), columns=['target', 'pred'])

In [113]:
### test_yがseriesの場合
s_test_y = pd.Series(test_y)

In [115]:
type(s_test_y)

pandas.core.series.Series

In [119]:
result = pd.DataFrame(np.append(np.array(s_test_y).reshape(-1,1), pred.reshape(-1,1), axis=1), columns=['target', 'pred'])

In [103]:
result.to_csv('../opt/ml/model/result.csv', index=False)

In [105]:
import boto3
s3 = boto3.resource('s3') #S3オブジェクトを取得
s3.meta.client.upload_file('../opt/ml/model/result.csv', 'work-aws-virginia', 'test-trainingjob/output/result.csv')

#bucket = s3.Bucket('バケット名')
#bucket.upload_file('/opt/ml/model/result.csv', '保存先S3のpath')

## 2.ローカル環境で学習ジョブ風に実行する（コンテナ未使用）
コンテナを導入する前に、ローカル環境でSageMaker学習ジョブのように動かします。.pyファイルの動作確認を高速で行うことが目的です。
ディレクトリ構造

## 2-1. データ配置

In [None]:
pd.concat([tr_x,tr_y], axis=1).to_csv('../opt/ml/input/data/train/train.csv', index=False)
pd.concat([va_x,va_y], axis=1).to_csv('../opt/ml/input/data/valid/valid.csv', index=False)
pd.concat([test_x,test_y], axis=1).to_csv('../opt/ml/input/data/test/test.csv', index=False)

## 2-2. ソースコードを準備
../opt/ml/input/data/src/train.pyに配置

In [24]:
!chmod +x ../opt/program/train

In [27]:
!chmod +x ../opt/ml/input/data/src/*

In [28]:
!pygmentize ../opt/ml/input/data/src/train.py

[37m#!/usr/bin/env python[39;49;00m
[34mimport[39;49;00m [04m[36mnumpy[39;49;00m [34mas[39;49;00m [04m[36mnp[39;49;00m
[34mimport[39;49;00m [04m[36mpandas[39;49;00m [34mas[39;49;00m [04m[36mpd[39;49;00m
[34mimport[39;49;00m [04m[36mlightgbm[39;49;00m [34mas[39;49;00m [04m[36mlgb[39;49;00m
[34mfrom[39;49;00m [04m[36msklearn.metrics[39;49;00m [34mimport[39;49;00m accuracy_score

[37m# データ読み込み[39;49;00m
train_df = pd.read_csv([33m'[39;49;00m[33m../opt/ml/input/data/train/train.csv[39;49;00m[33m'[39;49;00m)
valid_df = pd.read_csv([33m'[39;49;00m[33m../opt/ml/input/data/valid/valid.csv[39;49;00m[33m'[39;49;00m)
test_df = pd.read_csv([33m'[39;49;00m[33m../opt/ml/input/data/test/test.csv[39;49;00m[33m'[39;49;00m)

tr_x, tr_y = train_df.drop([[33m'[39;49;00m[33mtarget[39;49;00m[33m'[39;49;00m], axis=[34m1[39;49;00m), train_df[[33m'[39;49;00m[33mtarget[39;49;00m[33m'[39;49;00m]
va_x, va_y = valid_df.drop([[33m'[39;49;00m

In [26]:
!../opt/program/train

train running...
/Users/yshiy/github/sagemaker-byoc/notebook
[LightGBM] [Info] Number of positive: 237, number of negative: 142
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3798
[LightGBM] [Info] Number of data points in the train set: 379, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.625330 -> initscore=0.512233
[LightGBM] [Info] Start training from score 0.512233
[1]	train's binary_logloss: 0.583245	valid's binary_logloss: 0.582221
[2]	train's binary_logloss: 0.518624	valid's binary_logloss: 0.519402
[3]	train's binary_logloss: 0.467531	valid's binary_logloss: 0.471914
[4]	train's binary_logloss: 0.423087	valid's binary_logloss: 0.426939
[5]	train's binary_logloss: 0.385211	valid's binary_logloss: 0.391658
[6]	train's binary_logloss: 0.349443	valid's binary_logloss: 0.356531
[7]	train's binary_logloss: 0.317067	valid's binary_logloss: 0.327033
[8]	train's binary_logloss: 0.289598	valid's binary_logloss: 0.2998

# 3.ローカルモードで学習ジョブを実行

### ---------------

# 1.コンテナ準備
## 1-1.Dockerfile (確認のみ)

Dockerfileには、構築したいイメージが記述されています。これは、実行したいシステムの完全なオペレーティングシステムのインストールを記述していると考えることができます。しかし、Dockerコンテナの実行は、基本的な操作のためにホストマシン上のLinuxを利用するため、完全なオペレーティングシステムよりもかなり軽量です。

Pythonサイエンススタックでは、標準的なUbuntuのインストールから始めて、通常のツールを実行してscikit-learnで必要なものをインストールします。最後に、特定のアルゴリズムを実装したコードをコンテナに追加して、実行に適した環境を整えます。

その際、余分なスペースを整理します。これにより、コンテナは小さくなり、起動も速くなります。

例のDockerfileを見てみましょう。

imageには、trainやbacktestに必要なソースは含めないこととする。
（学習ジョブ実行時にS3からコピーする）
よって、dockerイメージ作成時に必要な資材はない。


In [127]:
!cat ../container/lgbm/Dockerfile

FROM python:3.7.5-slim
USER root

RUN apt-get update
RUN apt-get -y install locales && \
    localedef -f UTF-8 -i ja_JP ja_JP.UTF-8
ENV LANG ja_JP.UTF-8
ENV LANGUAGE ja_JP:ja
ENV LC_ALL ja_JP.UTF-8
ENV TZ JST-9
ENV TERM xterm

RUN apt-get install -y vim less
RUN pip install --upgrade pip
RUN pip install --upgrade setuptools

RUN apt-get -y install build-essential
RUN apt-get -y install wget

### install libraries
RUN pip install numpy pandas scikit-learn matplotlib seaborn lightgbm boto3

# Set some environment variables. PYTHONUNBUFFERED keeps Python from buffering our standard
# output stream, which means that logs can be delivered to the user quickly. PYTHONDONTWRITEBYTECODE
# keeps Python from writing the .pyc files which are unnecessary in this case. We also update
# PATH so that the train and serve programs are found when the container is invoked.

ENV PYTHONUNBUFFERED=TRUE
ENV PYTHONDONTWRITEBYTECODE=TRUE
ENV PATH="/opt/program:${PATH}"

# Set up the program in the image
COPY p

## 1-2.Building and registering the container to ECR

以下のシェルコードは、`docker build`を使用してコンテナイメージをビルドし、`docker push`を使用してコンテナイメージをECRにプッシュする方法を示しています。このコードはシェルスクリプト `container/build-and-push.sh` としても提供されており、`build-and-push.sh decision_trees_sample` として実行することで、イメージ `decision_trees_sample` をビルドすることができます。

このコードは、使用しているアカウントと現在のデフォルトリージョン（SageMakerのノートブックインスタンスを使用している場合は、ノートブックインスタンスが作成されたリージョンになります）でECRリポジトリを探します。
リポジトリが存在しない場合、スクリプトはそれを作成します。

https://github.com/aws/amazon-sagemaker-examples/blob/master/advanced_functionality/scikit_bring_your_own/container/build_and_push.sh

以下は、build-and-push.shと同じ内容

In [128]:
%%sh

# The name of our algorithm
algorithm_name=test-trainingjob

cd ../container/lgbm

chmod +x program/train
#chmod +x decision_trees/serve

account=$(aws sts get-caller-identity --query Account --output text)

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
region=${region:-us-west-1}

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${algorithm_name}:latest"

# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${algorithm_name}" > /dev/null 2>&1

if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${algorithm_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
aws ecr get-login-password --region ${region}|docker login --username AWS --password-stdin ${fullname}

# Build the docker image locally with the image name and then push it to ECR
# with the full name.

docker build  -t ${algorithm_name} .
docker tag ${algorithm_name} ${fullname}

docker push ${fullname}

Login Succeeded

Use 'docker scan' to run Snyk tests against images to find vulnerabilities and learn how to fix them
The push refers to repository [805433377179.dkr.ecr.us-east-1.amazonaws.com/test-trainingjob]
5f70bf18a086: Preparing
cf45398a159b: Preparing
79a667ff6c52: Preparing
f3b1d227f587: Preparing
adcb570b8c76: Preparing
6accb6f2a916: Preparing
c4faa9f0fb36: Preparing
c84816008bc0: Preparing
6e7f4419aa67: Preparing
5c920f5933f2: Preparing
36c21e895230: Preparing
870ea4318145: Preparing
ca56b6fe98b7: Preparing
459d9d53a256: Preparing
831c5620387f: Preparing
6e7f4419aa67: Waiting
5c920f5933f2: Waiting
36c21e895230: Waiting
870ea4318145: Waiting
ca56b6fe98b7: Waiting
459d9d53a256: Waiting
831c5620387f: Waiting
6accb6f2a916: Waiting
c4faa9f0fb36: Waiting
c84816008bc0: Waiting
5f70bf18a086: Layer already exists
adcb570b8c76: Layer already exists
f3b1d227f587: Layer already exists
6accb6f2a916: Layer already exists
c84816008bc0: Layer already exists
c4faa9f0fb36: Layer already exist

## 疑問：build and push したイメージの動作確認はローカルでできる？
できる。ローカルモード

# -----コンテナ準備完了-----

# 2. データとプログラムをS3にアップロード
・SageMaker　SDKを使う場合。  

https://sagemaker.readthedocs.io/en/stable/api/utility/session.html


・boto3でs3クライアントを使う場合。  

In [54]:
import sagemaker as sage
sess = sage.Session()

# S3 prefix
#prefix = 'DEMO-scikit-byo-iris'
prefix = 'test-trainingjob/src'

In [75]:
#WORK_DIRECTORY = 'data'
#data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=prefix)

train_location = sess.upload_data('../opt/ml/input/data/train', bucket='work-aws-virginia', key_prefix='test-trainingjob/train')
valid_location = sess.upload_data('../opt/ml/input/data/valid', bucket='work-aws-virginia', key_prefix='test-trainingjob/valid')
test_location = sess.upload_data('../opt/ml/input/data/test', bucket='work-aws-virginia', key_prefix='test-trainingjob/test')
src_location = sess.upload_data('../opt/ml/input/data/src', bucket='work-aws-virginia', key_prefix='test-trainingjob/src')

# -----データ準備完了-----

# 3-1.学習ジョブ発行(ローカルモード)

In [76]:
#data_location = 's3://work-aws-virginia/test-rd/candles/'
data_location = 's3://work-aws-virginia/test-trainingjob/train/'


In [77]:
account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name

#image = '{}.dkr.ecr.{}.amazonaws.com/sagemaker-decision-trees:latest'.format(account, region)

image = '{}.dkr.ecr.{}.amazonaws.com/test-trainingjob:latest'.format(account, region)

In [78]:
image

'805433377179.dkr.ecr.us-east-1.amazonaws.com/test-trainingjob:latest'

In [79]:
hyperparameters={'candle_window': 15,
                 'horizon': 300,
                 'target': 'tgt_diff'}

In [80]:
role = 'arn:aws:iam::805433377179:role/sagemaker-sdk-for-local'

lgbm = sage.estimator.Estimator(image_uri=image,
                                #entasdfdfry_point1='run.sh',
                                #source_sddddddddddir='src',
                                role=role, 
                                instance_count=1,
                                instance_type='local',
                                #instance_type='ml.c4.2xlarge',
                                output_path="s3://{}/output".format(sess.default_bucket()),
                                hyperparameters=hyperparameters,
                                #sagemaker_session=sess)
)

In [81]:
lgbm.fit({'train':'s3://work-aws-virginia/test-trainingjob/train/',
          'valid':'s3://work-aws-virginia/test-trainingjob/valid/',
          'test' :'s3://work-aws-virginia/test-trainingjob/test/',
          'src':'s3://work-aws-virginia/test-trainingjob/src/'})

Creating ow2fcchm91-algo-1-mzkfs ... 
Creating ow2fcchm91-algo-1-mzkfs ... done
Docker Compose is now in the Docker CLI, try `docker compose up`

Attaching to ow2fcchm91-algo-1-mzkfs
[36mow2fcchm91-algo-1-mzkfs |[0m [LightGBM] [Info] Number of positive: 237, number of negative: 142
[36mow2fcchm91-algo-1-mzkfs |[0m You can set `force_col_wise=true` to remove the overhead.
[36mow2fcchm91-algo-1-mzkfs |[0m [LightGBM] [Info] Total Bins 3798
[36mow2fcchm91-algo-1-mzkfs |[0m [LightGBM] [Info] Number of data points in the train set: 379, number of used features: 30
[36mow2fcchm91-algo-1-mzkfs |[0m [LightGBM] [Info] [binary:BoostFromScore]: pavg=0.625330 -> initscore=0.512233
[36mow2fcchm91-algo-1-mzkfs |[0m [LightGBM] [Info] Start training from score 0.512233
[36mow2fcchm91-algo-1-mzkfs |[0m [1]	train's binary_logloss: 0.583245	valid's binary_logloss: 0.582221
[36mow2fcchm91-algo-1-mzkfs |[0m [2]	train's binary_logloss: 0.518624	valid's binary_logloss: 0.519402
[36mow2fcchm91

# 3-2.学習ジョブ発行(SageMaker)

### 下準備

In [129]:
import sagemaker as sage
sess = sage.Session()

prefix = 'test-trainingjob/src'

In [130]:
### ソースコードをアップロード
src_location = sess.upload_data('../opt/ml/input/data/src', bucket='work-aws-virginia', key_prefix=prefix)

### ハイパーパラメータ設定（ローカルファイルから読み込み）
hyperparameters={"timeframe": 15,
                 "horizon" : 300,
                 "hist_row" : 2
}
print(hyperparameters)

{'timeframe': 15, 'horizon': 300, 'hist_row': 2}


In [122]:
HYPERPARAMETER_JSON_PATH = "../../../../../opt/ml/input/config/hyperparameters.json"
HYPERPARAMETER_JSON_PATH = "opt/ml/input/config/hyperparameters.json"

import json
### ハイパーパラメータ設定（ローカルファイルから読み込み）
with open(HYPERPARAMETER_JSON_PATH, "r") as f:
    hyperparameters = json.load(f)

print(hyperparameters)

FileNotFoundError: [Errno 2] No such file or directory: 'opt/ml/input/config/hyperparameters.json'

## processingはhyperparameter使えたか？

In [131]:
role = 'arn:aws:iam::805433377179:role/sagemaker-sdk-for-local'
#role = 'arn:aws:iam::805433377179:role/service-role/AmazonSageMaker-ExecutionRole-20191212T111531'

account = sess.boto_session.client('sts').get_caller_identity()['Account']
region = sess.boto_session.region_name
image = '{}.dkr.ecr.{}.amazonaws.com/test-trainingjob:latest'.format(account, region)

lgbm = sage.estimator.Estimator(image_uri=image,
                                #entasdfdfry_point1='run.sh',　### 存在しない引数を指定しても通ってします。
                                #source_sddddddddddir='src',
                                role=role, 
                                instance_count=1,
                                #instance_type='local',
                                instance_type='ml.c4.2xlarge',
                                #output_path="s3://{}/output".format(sess.default_bucket()),
                                output_path='s3://work-aws-virginia/test-trainingjob/output/',
                                #sagemaker_session=sess # ノートブックインスタンスで実行する場合にIAMを渡す
                                hyperparameters=hyperparameters,
                                )

In [132]:
for
                                hyperparameters=hyperparameters,


lgbm = sage.estimator.Estimator

lgbm.fit({'train':'s3://work-aws-virginia/test-trainingjob/train/',
          'valid':'s3://work-aws-virginia/test-trainingjob/valid/',
          'test' :'s3://work-aws-virginia/test-trainingjob/test/',
          'src':'s3://work-aws-virginia/test-trainingjob/src/'},
          wait=False
)

2021-05-27 14:24:18 Starting - Starting the training job...
2021-05-27 14:24:41 Starting - Launching requested ML instancesProfilerReport-1622125457: InProgress
......
2021-05-27 14:25:41 Starting - Preparing the instances for training......
2021-05-27 14:27:01 Downloading - Downloading input data
2021-05-27 14:27:01 Training - Downloading the training image...
2021-05-27 14:27:33 Uploading - Uploading generated training model
2021-05-27 14:27:33 Completed - Training job completed
[34m[LightGBM] [Info] Number of positive: 237, number of negative: 142[0m
[34mYou can set `force_row_wise=true` to remove the overhead.[0m
[34mAnd if memory is not enough, you can set `force_col_wise=true`.[0m
[34m[LightGBM] [Info] Total Bins 3798[0m
[34m[LightGBM] [Info] Number of data points in the train set: 379, number of used features: 30[0m
[34m[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.625330 -> initscore=0.512233[0m
[34m[LightGBM] [Info] Start training from score 0.512233[0m
[34m



https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html#sagemaker.estimator.EstimatorBase.fit

・並行してジョブ発行する場合は、wait=Falseを使う