# Imports

In [69]:
import logging
import boto3
from botocore.exceptions import ClientError
import pandas as pd

from sagemaker.amazon.amazon_estimator import get_image_uri
from sagemaker.session import s3_input
from sagemaker import get_execution_role
from sagemaker import Session
from sagemaker.estimator import Estimator

In [70]:
bucket_name = 'ye-1468'
region = 'eu-central-1'

In [71]:
session = boto3.Session(
    region_name=region
)

In [72]:
# Now we can create low-level clients or resource clients from our custom session
s3_client = boto3.client('s3')
s3 = boto3.resource('s3')

# Training

## Data

In [73]:
train_data = 's3://aida-project/niy/train.csv'
test_data = 's3://aida-project/niy/test.csv'

# create s3 paths for the objects
s3_input_train = s3_input(train_data, content_type='text/csv')
s3_input_test = s3_input(test_data, content_type='text/csv')

data_channels = {'train': s3_input_train, 'validation': s3_input_test}

's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.


In [75]:
# get execution role
role = get_execution_role()
# create session
sess = Session()

## K_Nearest_Neighbors

In [76]:
# Use a previously-built, AWS K_Nearest_Neighbors model for training
container = get_image_uri(region_name=region,
                          repo_name='knn')

'get_image_uri' method will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


In [77]:
import sagemaker
knn=sagemaker.estimator.Estimator(container,
        get_execution_role(),
    train_instance_count=1,
    train_instance_type='ml.m4.xlarge',
    output_path='s3://aida-project/niy/output',
    sagemaker_session=sagemaker.Session(),
    enable_sagemaker_metrics=True,
    base_job_name = 'knnigo'

)


knn.set_hyperparameters(**{
    'k': 10,
    'predictor_type': 'regressor',
    'sample_size': 5000
})


knn.fit({"train": s3_input_train, "test": s3_input_test}, wait=True)


Parameter image_name will be renamed to image_uri in SageMaker Python SDK v2.


2020-11-19 18:16:01 Starting - Starting the training job...
2020-11-19 18:16:04 Starting - Launching requested ML instances......
2020-11-19 18:17:26 Starting - Preparing the instances for training......
2020-11-19 18:18:28 Downloading - Downloading input data...
2020-11-19 18:18:43 Training - Downloading the training image...
2020-11-19 18:19:22 Training - Training image download completed. Training in progress.[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[11/19/2020 18:19:26 INFO 140404386948928] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-conf.json: {u'index_metric': u'L2', u'_tuning_objective_metric': u'', u'_num_gpus': u'auto', u'_log_level': u'info', u'feature_dim': u'auto', u'faiss_index_ivf_nlists': u'auto', u'epochs': u'1', u'index_type': u'faiss.Flat', u'_faiss_index_nprobe': u'5', u'_kvstore': u'dist_async', u'_num_kv_servers': u'1', u'mini_b

In [63]:
from sagemaker.analytics import TrainingJobAnalytics

training_job_name = 'knnigo-2020-11-19-12-13-12-216'
metrics_dataframe = TrainingJobAnalytics(training_job_name=training_job_name).dataframe()
metrics_dataframe



Unnamed: 0,timestamp,metric_name,value
0,0.0,test:absolute_loss,1.120176
1,0.0,train:progress,100.0
2,0.0,test:mse,2.081062
3,0.0,train:throughput,27211.454277


In [67]:
import math
print('rmse:', math.sqrt(2.081062))

rmse: 1.4425886454564933


https://medium.com/datadriveninvestor/k-nearest-neighbors-in-python-hyperparameters-tuning-716734bc557f