In [1]:
import sagemaker

sagemaker_session = sagemaker.Session()

bucket = sagemaker_session.default_bucket()
prefix = 'pytorch-mnist'

role = sagemaker.get_execution_role()

In [12]:
import torch
from torchvision import datasets ,transforms

data = torch.utils.data.DataLoader(
                            datasets.MNIST('data', 
                                           download=True, 
                                           transform=transforms.Compose([
                                               transforms.ToTensor(),
                                               transforms.Normalize((0.1307,), (0.3081,))
                                            ])), 
                                           batch_size=1
                                            )

In [1]:
!ls data

[31mWine.csv[m[m           test_data1.json    wine_data.csv
test_data0.json    test_data2.json    wine_test_data.csv


In [13]:
data_dir = sagemaker_session.upload_data(path='data/MNIST', bucket=bucket, key_prefix=prefix)
data_dir

input spec (in this case, just an S3 path): s3://sagemaker-us-east-2-236912928450/sagemaker/DEMO-pytorch-mnist


In [14]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(entry_point='mnist.py',
                    role=role,
                    framework_version='1.0.0',
                    train_instance_count=2,
                    train_instance_type='ml.p3.8xlarge',
                    hyperparameters={
                        'epochs': 1,
                        'backend': 'gloo'
                    })

In [15]:
estimator.fit({'training': data_dir})

INFO:sagemaker:Creating training-job with name: sagemaker-pytorch-2019-03-08-12-18-21-741


2019-03-08 12:18:22 Starting - Starting the training job...
2019-03-08 12:18:24 Starting - Launching requested ML instances......
2019-03-08 12:19:26 Starting - Preparing the instances for training......
2019-03-08 12:20:53 Downloading - Downloading input data
2019-03-08 12:20:53 Training - Downloading the training image.
[31mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[31mbash: no job control in this shell[0m
[31m2019-03-08 12:20:56,957 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[31m2019-03-08 12:20:56,960 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[31m2019-03-08 12:20:56,980 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[31m2019-03-08 12:20:56,984 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[31m2019-03-08 12:20:57,214 sagemaker-containers INFO     Module mnist does not provi


[31mTest set: Average loss: 0.3243, Accuracy: 9093/10000 (91%)
[0m
[31mSaving the model.[0m
[31m2019-03-08 12:21:23,459 sagemaker-containers INFO     Reporting training SUCCESS[0m
[32mTest set: Average loss: 0.3243, Accuracy: 9093/10000 (91%)
[0m
[32mSaving the model.[0m
[32m2019-03-08 12:21:23,424 sagemaker-containers INFO     Reporting training SUCCESS[0m

2019-03-08 12:21:33 Uploading - Uploading generated training model
2019-03-08 12:21:33 Completed - Training job completed
Billable seconds: 97


In [16]:
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m4.xlarge')


INFO:sagemaker:Creating model with name: sagemaker-pytorch-2019-03-08-12-18-21-741
INFO:sagemaker:Creating endpoint with name sagemaker-pytorch-2019-03-08-12-18-21-741


---------------------------------------------------------------------------!

In [33]:
test_features = next(iter(data))[0]
test_target = next(iter(data))[1]

test_features.shape, test_target

(torch.Size([1, 1, 28, 28]), tensor([5]))

In [34]:
response = predictor.predict(test_features)
response.argmax(axis=1)[0]

5