### Introduction
Model training. Data has alreday been setup - test_sentiment140.csv_noquotes.csv & training_sentiment140_noquotes.csv

Exmplanation of Hyperparameters here: https://docs.aws.amazon.com/sagemaker/latest/dg/blazingtext_hyperparameters.html

In [None]:
import sagemaker
from sagemaker import get_execution_role
import json
import boto3

In [None]:
sess = sagemaker.Session()

role = get_execution_role()
print(role) # This is the role that SageMaker would use to leverage AWS resources (S3, CloudWatch) on your behalf

bucket = 'BUCKET_NAME' #sess.default_bucket() # Replace with your own bucket name if needed
print(bucket)
prefix = 'Sentiment140/supervised' #Replace with the prefix under which you want to store the data if needed

In [None]:
%%time

train_channel = prefix + '/train'
validation_channel = prefix + '/validation'

sess.upload_data(path='training_sentiment140_noquotes.csv', bucket=bucket, key_prefix=train_channel)
sess.upload_data(path='test_sentiment140.csv_noquotes.csv', bucket=bucket, key_prefix=validation_channel)

s3_train_data = 's3://{}/{}'.format(bucket, train_channel)
s3_validation_data = 's3://{}/{}'.format(bucket, validation_channel)

In [None]:
s3_output_location = 's3://{}/{}/output'.format(bucket, prefix)

In [None]:
region_name = boto3.Session().region_name

container = sagemaker.amazon.amazon_estimator.get_image_uri(region_name, "blazingtext", "latest")
print('Using SageMaker BlazingText container: {} ({})'.format(container, region_name))

### Train the Model

In [None]:
bt_model = sagemaker.estimator.Estimator(container,
                                         role, 
                                         train_instance_count=1, 
                                         train_instance_type='ml.c4.4xlarge',
                                         train_volume_size = 30,
                                         train_max_run = 360000,
                                         input_mode= 'File',
                                         output_path=s3_output_location,
                                         sagemaker_session=sess)

In [None]:
bt_model.set_hyperparameters(mode="supervised",
                            epochs=10,
                            min_count=2,
                            learning_rate=0.05,
                            vector_dim=10,
                            early_stopping=True,
                            patience=4,
                            min_epochs=5,
                            word_ngrams=2)

In [None]:
train_data = sagemaker.session.s3_input(s3_train_data, distribution='FullyReplicated', 
                        content_type='text/plain', s3_data_type='S3Prefix')
validation_data = sagemaker.session.s3_input(s3_validation_data, distribution='FullyReplicated', 
                             content_type='text/plain', s3_data_type='S3Prefix')
data_channels = {'train': train_data, 'validation': validation_data}

In [None]:
bt_model.fit(inputs=data_channels, logs=True)

### Hosting

In [None]:
text_classifier = bt_model.deploy(initial_instance_count = 1,instance_type = 'ml.m4.xlarge')

In [None]:
import nltk

sentences = ["Hey, Connor. Money in Hand is good, money in head is bad",
            "what was he thinking?", 
             "morning all... looks like its gonna be a rainy day ",
            "hard to be witty when one is at their wits end",
            "Crazy day tomorow. Meetings all day",
            "looking forward to the weekend"]

# using the same nltk tokenizer that we used during data preparation for training
tokenized_sentences = [' '.join(nltk.word_tokenize(sent)) for sent in sentences]

payload = {"instances" : tokenized_sentences,
          "configuration": {"k": 2}}

response = text_classifier.predict(json.dumps(payload))

predictions = json.loads(response)
print(json.dumps(predictions, indent=2))

### Cleanup

In [None]:
sess.delete_endpoint(text_classifier.endpoint)