In [1]:
import pandas as pd
dataset = pd.read_csv('Boston.csv')

In [2]:
print(dataset.shape)

(506, 15)


In [3]:
dataset[:5]

Unnamed: 0.1,Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [4]:
dataset = pd.concat([dataset['medv'],
                     dataset.drop(['medv'], axis=1)],
                    axis=1)

In [5]:
from sklearn.model_selection import train_test_split
training_dataset, validation_dataset = train_test_split(dataset, test_size=0.1)

In [6]:
training_dataset.to_csv('training_dataset.csv',index=False, header=False)
validation_dataset.to_csv('validation_dataset.csv',index=False, header=False)

In [7]:
import sagemaker
sess = sagemaker.Session()
bucket = sess.default_bucket()

In [8]:
prefix = 'boston-housing'
training_data_path = sess.upload_data(path='training_dataset.csv',key_prefix=prefix + '/input/training')
validation_data_path = sess.upload_data(path='validation_dataset.csv',key_prefix=prefix + '/input/validation')
print(training_data_path)
print(validation_data_path)

s3://sagemaker-us-east-2-664224523979/boston-housing/input/training/training_dataset.csv
s3://sagemaker-us-east-2-664224523979/boston-housing/input/validation/validation_dataset.csv


In [9]:
import boto3
from sagemaker import image_uris
region = boto3.Session().region_name
container = image_uris.retrieve('linear-learner', region)

In [11]:
from sagemaker.estimator import Estimator
ll_estimator = Estimator(
    container,
    role=sagemaker.get_execution_role(),
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path='s3://{}/{}/output'.format(bucket,
                                           prefix))

In [12]:
ll_estimator.set_hyperparameters(
    predictor_type='regressor',
    mini_batch_size=32)

In [13]:
training_data_channel = sagemaker.TrainingInput(
    s3_data=training_data_path,
    content_type='text/csv')

In [14]:
validation_data_channel = sagemaker.TrainingInput(
    s3_data=validation_data_path,
    content_type='text/csv')

In [15]:
ll_estimator.fit({'train': training_data_channel,'validation': validation_data_channel})

2021-04-04 12:10:00 Starting - Starting the training job...
2021-04-04 12:10:02 Starting - Launching requested ML instancesProfilerReport-1617538200: InProgress
......
2021-04-04 12:11:25 Starting - Preparing the instances for training......
2021-04-04 12:12:30 Downloading - Downloading input data
2021-04-04 12:12:30 Training - Downloading the training image...
2021-04-04 12:13:00 Uploading - Uploading generated training model[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[04/04/2021 12:12:47 INFO 140225025931072] Reading default configuration from /opt/amazon/lib/python3.7/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'u

In [16]:
from time import strftime, gmtime
timestamp = strftime('%d-%H-%M-%S', gmtime())
endpoint_name = 'linear-learner-demo-'+timestamp
print(endpoint_name)

linear-learner-demo-04-12-13-53


In [18]:
ll_predictor = ll_estimator.deploy(
    endpoint_name=endpoint_name,
    initial_instance_count=1,
    instance_type='ml.t2.medium')

-------------------!

In [23]:
#ll_predictor.content_type = 'text/csv'
ll_predictor.serializer = sagemaker.serializers.CSVSerializer()
ll_predictor.deserializer = sagemaker.deserializers.CSVDeserializer()
test_sample = '0.00632, 18, 2.31, 0, 0.538, 6.575, 65.2, 4.09, 1, 296, 15.3, 396.9, 4.98, 24'
response = ll_predictor.predict(test_sample)
print(response)

[['-61.887508392333984']]


In [24]:
test_samples = ["0.14455, 12.5, 7.87, 0, 0.524, 6.172,96.1, 5.9505, 5, 311, 15.2, 396.9, 19.15, 27.1"]
response = ll_predictor.predict(test_samples)
print(response)

[['75.16258239746094']]


In [25]:
ll_predictor.delete_endpoint()