In [1]:
library(reticulate) 
sagemaker <- import('sagemaker')

In [2]:
session <- sagemaker$Session() 
bucket <- session$default_bucket()

In [3]:
role_arn <- sagemaker$get_execution_role()

In [54]:
library(readr)
iris <- read_csv(file = 'iris.csv', col_names = FALSE,, skip = 1)
#names(abalone) <- c('sex', 'length', 'diameter', 'height', 'whole_weight', 'shucked_weight', 'viscera_weight', 'shell_weight', 'rings')
#head(abalone)
names(iris) = c("Sepal.Length", "Sepal.Width","Petal.Length","Petal.Width", "Species")


Parsed with column specification:
cols(
  X1 = col_double(),
  X2 = col_double(),
  X3 = col_double(),
  X4 = col_double(),
  X5 = col_character()
)


In [55]:
iris$Species = as.integer(as.factor(iris$Species))
head(iris)

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
5.1,3.5,1.4,0.2,1
4.9,3.0,1.4,0.2,1
4.7,3.2,1.3,0.2,1
4.6,3.1,1.5,0.2,1
5.0,3.6,1.4,0.2,1
5.4,3.9,1.7,0.4,1


In [46]:
library(dplyr)
iris_train <- iris %>%
  sample_frac(size = 0.7)
iris <- anti_join(iris, iris_train)
iris_test <- iris %>%
  sample_frac(size = 0.5)
iris_valid <- anti_join(iris, iris_test)

Joining, by = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species")
Joining, by = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species")


In [47]:
write_csv(iris_train, 'iris_train.csv', col_names = FALSE) 
write_csv(iris_valid, 'iris_valid.csv', col_names = FALSE)

In [48]:
s3_train <- session$upload_data(path = 'iris_train.csv', 
                                bucket = bucket, 
                                key_prefix = 'data')
s3_valid <- session$upload_data(path = 'iris_valid.csv', 
                                bucket = bucket, 
                                key_prefix = 'data')

In [49]:
s3_train_input <- sagemaker$s3_input(s3_data = s3_train, content_type = 'csv') 
s3_valid_input <- sagemaker$s3_input(s3_data = s3_valid, content_type = 'csv')

In [50]:
registry <- sagemaker$amazon$amazon_estimator$registry(session$boto_region_name, algorithm='xgboost')
container <- paste(registry, '/xgboost:latest', sep='')
container

In [51]:
s3_output <- paste0('s3://', bucket, '/output')
estimator <- sagemaker$estimator$Estimator(image_name = container,
                                     role = role_arn,
                                     train_instance_count = 1L,
                                     train_instance_type = 'ml.m5.large',
                                     train_volume_size = 30L,
                                     train_max_run = 3600L,
                                     input_mode = 'File',
                                     output_path = s3_output,
                                     output_kms_key = NULL,
                                     base_job_name = NULL,
                                     sagemaker_session = NULL)

In [52]:
estimator$set_hyperparameters(num_round = 100L)

job_name <- paste('sagemaker-train-xgboost-iris', format(Sys.time(), '%H-%M-%S'), sep = '-')

input_data <- list('train' = s3_train_input,
                   'validation' = s3_valid_input)

estimator$fit(inputs = input_data,
              job_name = job_name)

In [53]:
estimator$model_data