In [1]:
#세이지 메이커 최신 업데이트
!pip3 install -U sagemaker

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


s3의 주소와 권한을 가진 IAM ARN 필요.

In [2]:
import os
import boto3
import re
import sagemaker

 
role = sagemaker.get_execution_role() #노트북 or 세이지메이커 세션에 연결된 권한으로 권한 설정
region = sagemaker.Session().boto_region_name #지역 설정 보통 boto가 연결된(사용할) 지역으로 설정
bucket = sagemaker.Session().default_bucket() # 기본 버켓 설정 지역+사용자이름 버킷 생성 자동생성
prefix = "4N/TEST"

#s3이름은
#sagemaker-ap-northeast-2-197..../4N/TEST/

In [3]:
#버킷, 경로(저장할 이름), 저장할 파일 이름
def upload_to_s3(bucket, channel, filename): #s3 업로드 함수
    fobj = open(filename, "rb") 
    key = prefix + "/" + channel
    url = "s3://{}/{}/{}".format(bucket, key, filename)
    write_to_s3(fobj, bucket, key)

In [4]:
def write_to_s3(fobj, bucket, key):
    return (
        boto3.Session(region_name=region) #세션시작
        .resource("s3") #s3 리소스 시작
        .Bucket(bucket) #버킷
        .Object(key) #오브젝트(경로)
        .upload_fileobj(fobj) #파일
    )

In [5]:
DATA_DIR = "data"
FILE_TRAIN_0 = "abalone.train_0"
FILE_TRAIN_1 = "abalone.train_1"
FILE_VALIDATION = "abalone.validation"
FILE_TEST = "abalone.test"

In [6]:
s3 = boto3.client("s3")
# Upload the files to the S3 bucket
upload_to_s3(bucket, "train/train_0.libsvm", "data" + "/" + "abalone.train_0")
upload_to_s3(bucket, "train/train_1.libsvm", "data" + "/" + "abalone.train_1")
upload_to_s3(bucket, "validation/validation.libsvm", "data" + "/" + "abalone.validation")
upload_to_s3(bucket, "test/test.libsvm", "data" + "/" + "abalone.test")

In [7]:
%%writefile ./script.py

import argparse
import json
import logging
import os
import pandas as pd
import pickle as pkl

from sagemaker_containers import entry_point #교육 또는 추론을 위한 컨테이너 이미지를 생성하는 데 사용됩니다.
from sagemaker_xgboost_container.data_utils import get_dmatrix #dmatrix 형태 
from sagemaker_xgboost_container import distributed

import xgboost as xgb


def _xgb_train(params, dtrain, evals, num_boost_round, model_dir, is_master):

    booster = xgb.train(params=params,
                        dtrain=dtrain,
                        evals=evals,
                        num_boost_round=num_boost_round)

    if is_master:
        model_location = model_dir + '/xgboost-model'
        pkl.dump(booster, open(model_location, 'wb'))
        logging.info("Stored trained model at {}".format(model_location))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    # Hyperparameters are described here.
    parser.add_argument('--max_depth', type=int,)
    parser.add_argument('--eta', type=float)
    parser.add_argument('--gamma', type=int)
    parser.add_argument('--min_child_weight', type=int)
    parser.add_argument('--subsample', type=float)
    parser.add_argument('--verbosity', type=int)
    parser.add_argument('--objective', type=str)
    parser.add_argument('--num_round', type=int)
    parser.add_argument('--tree_method', type=str, default="auto")
    parser.add_argument('--predictor', type=str, default="auto")

    # Sagemaker specific arguments. Defaults are set in the environment variables.
    parser.add_argument('--output_data_dir', type=str, default=os.environ.get('SM_OUTPUT_DATA_DIR'))
    parser.add_argument('--model_dir', type=str, default=os.environ.get('SM_MODEL_DIR'))
    parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
    parser.add_argument('--validation', type=str, default=os.environ.get('SM_CHANNEL_VALIDATION'))
    parser.add_argument('--sm_hosts', type=str, default=os.environ.get('SM_HOSTS')) # 현재 분산 교육 작업(분산 교육을 수행하는 경우)의 일부인 모든 호스트 목록(JSON 형식 문자열)을 포함합니다.
    parser.add_argument('--sm_current_host', type=str, default=os.environ.get('SM_CURRENT_HOST'))#현재 스크립트가 실행 중인 호스트(머신/노드/컨테이너)의 이름을 포함합니다.

    args, _ = parser.parse_known_args()

    # Get SageMaker host information from runtime environment variables
    sm_hosts = json.loads(args.sm_hosts)
    sm_current_host = args.sm_current_host

    dtrain = get_dmatrix(args.train, 'libsvm')
    dval = get_dmatrix(args.validation, 'libsvm')
    watchlist = [(dtrain, 'train'), (dval, 'validation')] if dval is not None else [(dtrain, 'train')]

    train_hp = {
        'max_depth': args.max_depth,
        'eta': args.eta,
        'gamma': args.gamma,
        'min_child_weight': args.min_child_weight,
        'subsample': args.subsample,
        'verbosity': args.verbosity,
        'objective': args.objective,
        'tree_method': args.tree_method,
        'predictor': args.predictor,
    }

    xgb_train_args = dict(
        params=train_hp,
        dtrain=dtrain,
        evals=watchlist,
        num_boost_round=args.num_round,
        model_dir=args.model_dir)

    if len(sm_hosts) > 1:
        # Wait until all hosts are able to find each other
        entry_point._wait_hostname_resolution()

        # Execute training function after initializing rabit.
        distributed.rabit_run(
            exec_fun=_xgb_train,#위의 xgb fit() 코드
            args=xgb_train_args,
            include_in_training=(dtrain is not None), #현재 머신/노드/컨테이너가 분산 교육에 참여해야 하는지 여부를 결정합니다 
            hosts=sm_hosts,
            current_host=sm_current_host,
            update_rabit_args=True
        )
    else:
        # If single node training, call training method directly.
        if dtrain:
            xgb_train_args['is_master'] = True
            _xgb_train(**xgb_train_args)
        else:
            raise ValueError("Training channel must have data to train model.")


def model_fn(model_dir):
    """Deserialize and return fitted model.

    Note that this should have the same name as the serialized model in the _xgb_train method
    """
    model_file = 'xgboost-model'
    booster = pkl.load(open(os.path.join(model_dir, model_file), 'rb'))
    return booster

Overwriting ./script.py


In [8]:
hyperparams = {
    "max_depth": "5",
    "eta": "0.2",
    "gamma": "4",
    "min_child_weight": "6",
    "subsample": "0.7",
    "objective": "reg:squarederror",
    "num_round": "50",
    "verbosity": "2",
}

instance_type = "ml.m5.2xlarge"
output_path = "s3://{}/{}/{}/output".format(bucket, prefix, "result")
#4N/TEST/result/...
content_type = "libsvm"

In [9]:
# Open Source distributed script mode
from sagemaker.session import Session
from sagemaker.inputs import TrainingInput
from sagemaker.xgboost.estimator import XGBoost

session = Session()


xgb_script_mode_estimator = XGBoost(
    entry_point="script.py",
    framework_version="1.7-1",  # Note: framework_version is mandatory
    hyperparameters=hyperparams,
    role=role,
    instance_count=2,
    instance_type="ml.m5.2xlarge",#instance_type
    output_path=output_path,##4N/TEST/result/...
)

train_input = TrainingInput(
    "s3://{}/{}/{}/".format(bucket, prefix, "train"), content_type=content_type
)
validation_input = TrainingInput(
    "s3://{}/{}/{}/".format(bucket, prefix, "validation"), content_type=content_type
)

In [10]:
xgb_script_mode_estimator.fit({"train": train_input, "validation": validation_input})

INFO:sagemaker:Creating training-job with name: sagemaker-xgboost-2023-08-07-07-21-57-974


Using provided s3_resource
2023-08-07 07:21:58 Starting - Starting the training job...
2023-08-07 07:22:14 Starting - Preparing the instances for training......
2023-08-07 07:23:13 Downloading - Downloading input data...
2023-08-07 07:23:53 Training - Training image download completed. Training in progress...
2023-08-07 07:24:29 Uploading - Uploading generated training model[34m[2023-08-07 07:24:09.887 ip-10-0-146-147.ap-northeast-2.compute.internal:7 INFO utils.py:28] RULE_JOB_STOP_SIGNAL_FILENAME: None[0m
[34m[2023-08-07 07:24:09.910 ip-10-0-146-147.ap-northeast-2.compute.internal:7 INFO profiler_config_parser.py:111] User has disabled profiler.[0m
[34m[2023-08-07:07:24:10:INFO] Imported framework sagemaker_xgboost_container.training[0m
[34m[2023-08-07:07:24:10:INFO] No GPUs detected (normal if no gpus installed)[0m
[34m[2023-08-07:07:24:10:INFO] Invoking user training script.[0m
[34m[2023-08-07:07:24:10:INFO] Module script does not provide a setup.py. [0m
[34mGenerating

In [11]:
predictor = xgb_script_mode_estimator.deploy(
    initial_instance_count=1, instance_type="ml.m5.2xlarge"
)

INFO:sagemaker:Creating model with name: sagemaker-xgboost-2023-08-07-07-25-11-158
INFO:sagemaker:Creating endpoint-config with name sagemaker-xgboost-2023-08-07-07-25-11-158
INFO:sagemaker:Creating endpoint with name sagemaker-xgboost-2023-08-07-07-25-11-158


----!

In [12]:
test_file = DATA_DIR + "/" + FILE_TEST
with open(test_file, "r") as f:
    payload = f.read()

In [13]:
runtime_client = session.sagemaker_runtime_client
response = runtime_client.invoke_endpoint(
    EndpointName=predictor.endpoint_name, ContentType="text/libsvm", Body=payload
)
result = response["Body"].read().decode("ascii")
print("Predicted values are {}.".format(result))

Predicted values are [9.646814346313477, 7.397919654846191, 13.632970809936523, 8.086695671081543, 12.005745887756348, 11.060908317565918, 11.607211112976074, 18.032690048217773, 9.34194564819336, 8.303901672363281, 9.559494018554688, 7.842275142669678, 10.882755279541016, 9.428963661193848, 14.658526420593262, 10.60384750366211, 12.915538787841797, 10.45955753326416, 8.745235443115234, 11.320341110229492, 6.683808326721191, 12.219032287597656, 8.360949516296387, 9.428153991699219, 12.644696235656738, 6.608384609222412, 10.417551040649414, 12.154109001159668, 11.532093048095703, 11.111688613891602, 11.405569076538086, 9.32242488861084, 7.12648868560791, 12.195525169372559, 10.428147315979004, 20.313919067382812, 10.755400657653809, 7.495514869689941, 10.846698760986328, 11.92020034790039, 9.596101760864258, 15.133988380432129, 10.422489166259766, 9.807592391967773, 6.729530334472656, 6.507246017456055, 8.679537773132324, 10.01765251159668, 10.528303146362305, 11.349781036376953, 14.336

In [14]:
predictor.delete_model()
predictor.delete_endpoint()

INFO:sagemaker:Deleting model with name: sagemaker-xgboost-2023-08-07-07-25-11-158
INFO:sagemaker:Deleting endpoint configuration with name: sagemaker-xgboost-2023-08-07-07-25-11-158
INFO:sagemaker:Deleting endpoint with name: sagemaker-xgboost-2023-08-07-07-25-11-158
