In [1]:
import sagemaker

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name

bucket = sagemaker_session.default_bucket()
prefix = "sagemaker/lung-cancer"

role = sagemaker.get_execution_role()

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [2]:
import boto3
import pandas as pd
import numpy as np

In [3]:
df=pd.read_csv("s3://bdx-demo-sagemaker/cancer_patient_data_sets.csv")

severe performance issues, see also https://github.com/dask/dask/issues/10276

To fix, you should specify a lower version bound on s3fs, or
update the current installation.



In [4]:
from sklearn.preprocessing import LabelEncoder

In [5]:
label_encoding = LabelEncoder()
y = label_encoding.fit_transform(df["Level"])

In [6]:
x0=df.drop(['index','Patient Id','Level'],axis=1)
x=np.float32(x0)

In [7]:
print(x.shape)
print(y.shape)

(1000, 23)
(1000,)


In [8]:
data=np.concatenate((np.reshape(y,(y.size,1)), x), axis=1)

In [9]:
from sklearn.model_selection import train_test_split
training_data, test_data = train_test_split(data,test_size = 0.3,random_state = 42)

In [10]:
pd.DataFrame(training_data).to_csv("training.csv",index=False,header=False)
pd.DataFrame(test_data).to_csv("test.csv",index=False,header=False)

In [11]:
train_path = sagemaker_session.upload_data(path="training.csv", bucket=bucket, key_prefix="{}/train".format(prefix))
test_path = sagemaker_session.upload_data(path="test.csv", bucket=bucket, key_prefix="{}/test".format(prefix))

In [12]:
!pygmentize mlp_pytorch.py

[34mimport[39;49;00m [04m[36margparse[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mjson[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mlogging[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mos[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36msys[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mpandas[39;49;00m [34mas[39;49;00m [04m[36mpd[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mnumpy[39;49;00m [34mas[39;49;00m [04m[36mnp[39;49;00m[37m[39;49;00m
[37m[39;49;00m
[37m#import sagemaker_containers[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mtorch[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mtorch[39;49;00m[04m[36m.[39;49;00m[04m[36mdistributed[39;49;00m [34mas[39;49;00m [04m[36mdist[39;49;00m[37m[39;49;00m
[34mimport[39;49;00m [04m[36mtorch[39;49;00m[04m[36m.[39;49;00m[04m[36mnn[39;49;00m [34mas[39;49;00m [04m[36mnn[39;49;00m[37m

In [13]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
    entry_point="mlp_pytorch.py",
    role=role,
    framework_version="1.4.0",
    py_version="py3",
    instance_count=2,
    instance_type="ml.c4.xlarge",
    hyperparameters={"epochs": 6, "backend": "gloo"},
)

In [14]:
print(train_path)
print(test_path)

s3://sagemaker-us-east-1-925680695682/sagemaker/lung-cancer/train/training.csv
s3://sagemaker-us-east-1-925680695682/sagemaker/lung-cancer/test/test.csv


In [15]:
estimator.fit({"training": train_path,"test": test_path})

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: pytorch-training-2024-03-26-19-05-47-445


2024-03-26 19:05:48 Starting - Starting the training job...
2024-03-26 19:06:04 Starting - Preparing the instances for training...
2024-03-26 19:06:45 Downloading - Downloading input data...
2024-03-26 19:07:15 Downloading - Downloading the training image......
2024-03-26 19:08:06 Training - Training image download completed. Training in progress..[35mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[35mbash: no job control in this shell[0m
[35m2024-03-26 19:08:14,357 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[35m2024-03-26 19:08:14,360 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[35m2024-03-26 19:08:14,373 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[35m2024-03-26 19:08:14,376 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[35m2024-03-26 19:08:14,555 sagemaker-containers INFO

In [16]:
# Deploy my estimator to a SageMaker Endpoint and get a Predictor
predictor = estimator.deploy(instance_type='ml.m4.xlarge',
                                     initial_instance_count=1)

INFO:sagemaker:Repacking model artifact (s3://sagemaker-us-east-1-925680695682/pytorch-training-2024-03-26-19-05-47-445/output/model.tar.gz), script artifact (s3://sagemaker-us-east-1-925680695682/pytorch-training-2024-03-26-19-05-47-445/source/sourcedir.tar.gz), and dependencies ([]) into single tar.gz file located at s3://sagemaker-us-east-1-925680695682/pytorch-training-2024-03-26-19-14-04-090/model.tar.gz. This may take some time depending on model size...
INFO:sagemaker:Creating model with name: pytorch-training-2024-03-26-19-14-04-090
INFO:sagemaker:Creating endpoint-config with name pytorch-training-2024-03-26-19-14-04-090
INFO:sagemaker:Creating endpoint with name pytorch-training-2024-03-26-19-14-04-090


--------!

In [18]:
response = predictor.predict(x)

In [21]:
response.argmax(1) 

array([2, 2, 0, 0, 0, 0, 1, 2, 0, 2, 0, 2, 0, 0, 2, 1, 2, 0, 0, 2, 0, 2,
       1, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2,
       2, 1, 0, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2,
       2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 2, 2, 2, 2, 2, 2, 0,
       0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 2, 2, 0, 0, 0, 2, 2, 0, 2, 0, 0, 0, 0, 2, 1, 2, 0, 0, 2, 0,
       2, 1, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 1,
       2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2,
       0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 2, 0, 0, 0, 2, 2, 1, 2, 0, 0, 2,
       0, 2, 1, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

In [20]:
sagemaker_session.delete_endpoint(endpoint_name=predictor.endpoint_name)

INFO:sagemaker:Deleting endpoint with name: pytorch-training-2024-03-26-18-51-47-909
