In [34]:
from sagemaker.tensorflow import TensorFlow

In [35]:
estimator = TensorFlow(
    entry_point="sentiment_training.py",
    role = 'arn:aws:iam::431525178180:role/hwk4-sagemaker',
    train_instance_type='ml.p2.xlarge',
    train_instance_count=1,
    # output_path='s3://ai2020/hwk4/sagemaker_data/output',
    framework_version='1.14',
    py_version="py3"
)

In [36]:
estimator.fit({'train': 's3://ai2020/hwk4/sagemaker_data/train/'})

ClientError: An error occurred (AccessDeniedException) when calling the CreateTrainingJob operation: User: arn:aws:sts::431525178180:assumed-role/hwk4-sagemaker/SageMaker is not authorized to perform: sagemaker:CreateTrainingJob on resource: arn:aws:sagemaker:us-east-1:431525178180:training-job/tensorflow-training-2020-02-23-03-05-31-864 with an explicit deny

In [39]:
from sagemaker import get_execution_role

role = get_execution_role()
role

'arn:aws:iam::431525178180:role/hwk4-sagemaker'

In [40]:
import sagemaker

sagemaker_session = sagemaker.Session()
bucket = sagemaker_session.default_bucket()
role = sagemaker.get_execution_role()


In [41]:
bucket

'sagemaker-us-east-1-431525178180'

In [42]:
import boto3
from sagemaker.estimator import Framework
from sagemaker.tensorflow import TensorFlow

class ScriptModeTensorFlow(Framework):
    """This class is temporary until the final version of Script Mode is released.
    """
    
    __framework_name__ = "tensorflow-scriptmode"
    
    create_model = TensorFlow.create_model
    
    def __init__(self, py_version='py3', **kwargs):
        super(ScriptModeTensorFlow, self).__init__(**kwargs)
        self.py_version = py_version
        self.image_name = None
        self.framework_version = '1.14'

In [47]:
role = get_execution_role()
bucket='ai2020'
data_key = 'hwk4/sagemaker_data/train'
data_location = 's3://{}/{}'.format(bucket, data_key)

In [53]:
import pandas as pd
pd.read_json(f"{data_location}/train.json", lines=True)

Unnamed: 0,features,sentiment,tweet
0,"[0, 0, 7, 906, 7, 2361, 7, 364, 21, 0, 1870, 0...",4,"@perrinc Hi Colleen, welcome, if you have any ..."
1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 12, ...",0,Im Well Bored Just Sat Eyaa With Chris And Hi...
2,"[0, 128, 0, 3464, 209, 10, 366, 21, 185, 174, ...",0,Bummer of a day...spent $360 on something I re...
3,"[0, 7, 542, 7, 21, 1943, 526, 21, 4746, 0, 0, ...",4,"@kathyfoley @kieranmurphy Gosh, is she pretty,..."
4,"[409, 300, 321, 12, 146, 7491, 0, 0, 0, 0, 0, ...",0,@anberlin i wish i could be there tonight! i g...
5,"[21, 12, 600, 31914, 832, 12, 0, 556, 294, 221...",4,@hdcgf0 OMG! another nickelback fan! Not many ...
6,"[5115, 3236, 151, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...",0,havent tweeted today
7,"[13327, 4169, 1083, 0, 0, 7, 344, 7, 120, 1092...",4,ooo keeping up to date with all the e3 anounce...
8,"[0, 3289, 6258, 99, 238, 4, 0, 21, 628, 347, 4...",4,Three spanish lessons in one night. Hopefully ...
9,"[0, 7, 735, 4, 0, 21, 644, 17, 21, 644, 0, 4, ...",4,"@khali_blache Egads, that is awesome. Can I vo..."


In [48]:
data_location

's3://ai2020/hwk4/sagemaker_data/train'

`sentiment_training.py`

In [1]:
"""
Main sentiment training script

Author pharnoux


"""

import os
import sys
import boto3
import argparse
import sentiment_dataset as sentiment_dataset
import sentiment_model_cnn as sentiment_model_cnn
import config_holder as config_holder

def main(args):
    """
    Main training method

    """

    print("Preparing for training...")

    training_config = config_holder.ConfigHolder(args.config_file).config

    training_config["num_epoch"] = args.num_epoch

    train_dataset = sentiment_dataset.train_input_fn(args.train, training_config)
    validation_dataset = sentiment_dataset.validation_input_fn(args.validation, training_config)
    eval_dataset = sentiment_dataset.eval_input_fn(args.eval, training_config)

    model = sentiment_model_cnn.keras_model_fn(None, training_config)

    print("Starting training...")

    model.fit(
        x=train_dataset[0], y=train_dataset[1], steps_per_epoch=train_dataset[2]["num_batches"],
        epochs=training_config["num_epoch"],
        validation_data=(validation_dataset[0], validation_dataset[1]),
        validation_steps=validation_dataset[2]["num_batches"])

    score = model.evaluate(
        eval_dataset[0], eval_dataset[1], steps=eval_dataset[2]["num_batches"], verbose=0)

    print("Test loss:{}".format(score[0]))
    print("Test accuracy:{}".format(score[1]))

    sentiment_model_cnn.save_model(model, os.path.join(args.model_output_dir, "sentiment_model.h5"))

def get_arg_parser():
    """
    Adding this method to unit test

    """

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--train",
        type=str,
        required=False,
        default="training_data",
        help="The directory where the training data is stored.")
    parser.add_argument(
        "--validation",
        type=str,
        required=False,
        default="dev_data",
        help="The directory where the validation data is stored.")
    parser.add_argument(
        "--eval",
        type=str,
        required=False,
        default="eval_data",
        help="The directory where the evalutaion data is stored.")
    parser.add_argument(
        "--model_output_dir",
        type=str,
        required=False,
        default="s3://ai2020/hwk4/sagemaker_data/output/")
    parser.add_argument(
        "--model_dir",
        type=str,
        required=False,
        default=os.environ.get(""))
    parser.add_argument(
        "--num_epoch",
        type=int,
        default=10,
        help="The number of steps to use for training.")
    parser.add_argument(
        "--config_file",
        type=str,
        default="training_config.json",
        help="The path to the training config file.")

    return parser


if __name__ == "__main__":
    if '-f' in sys.argv:
        sys.argv.remove('-f')
    PARSER = get_arg_parser()
    # ARGS = PARSER.parse_args()
    ARGS, unknown = PARSER.parse_known_args()
    main(ARGS)



Preparing for training...
Fetching train data...
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
Fetching validation data...
Fetching eval data...
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Starting training...
Train on 85 samples, validate on ? samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss:1.3393884658813477
Test accuracy:0.6840000152587891
INFO:tensorflow:Assets written to: s3://ai2020/hwk4/sagemaker_data/output/sentiment_model.h5/1/assets
Model successfull

In [21]:
%tb

SystemExit: 2

Tried to use sagemaker, but access denied.