

    ██████╗ ██╗██╗     ███████╗████████╗███╗   ███╗     ██████╗██████╗  █████╗  ██████╗██╗  ██╗███████╗██████╗ 
    ██╔══██╗██║██║     ██╔════╝╚══██╔══╝████╗ ████║    ██╔════╝██╔══██╗██╔══██╗██╔════╝██║ ██╔╝██╔════╝██╔══██╗
    ██████╔╝██║██║     ███████╗   ██║   ██╔████╔██║    ██║     ██████╔╝███████║██║     █████╔╝ █████╗  ██████╔╝
    ██╔══██╗██║██║     ╚════██║   ██║   ██║╚██╔╝██║    ██║     ██╔══██╗██╔══██║██║     ██╔═██╗ ██╔══╝  ██╔══██╗
    ██████╔╝██║███████╗███████║   ██║   ██║ ╚═╝ ██║    ╚██████╗██║  ██║██║  ██║╚██████╗██║  ██╗███████╗██║  ██║
    ╚═════╝ ╚═╝╚══════╝╚══════╝   ╚═╝   ╚═╝     ╚═╝     ╚═════╝╚═╝  ╚═╝╚═╝  ╚═╝ ╚═════╝╚═╝  ╚═╝╚══════╝╚═╝  ╚═╝
                                                                                                           



In [1]:
# import libraries
import boto3
import csv
import time
import random
import matplotlib.pyplot as plt
import os
import keras
import numpy as np
import tensorflow as tf

# sagemaker libraries
import sagemaker
from sagemaker.tensorflow import TensorFlow
from sagemaker.tuner      import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

Using TensorFlow backend.


In [2]:
# get the session information
sess = sagemaker.Session()
role = sagemaker.get_execution_role()

In [8]:
# define S3 settings
bucket = 'blstm-cracker'
prefix = 'test-run'

# define the path to which the data will be uploaded
data_path     = 'lstm_cracker/data/dump.csv'
data_name     = 'train.csv'
key           = os.path.join(prefix, 'train', data_name)
s3_train_data = 's3://{}/{}'.format(bucket, key)

# define the output path
out         = os.path.join(prefix, 'output')
output_path = 's3://{}/{}'.format(bucket, out)

# upload the training data to S3
print('Uploading data to: {}'.format(s3_train_data))
boto3.resource('s3').Bucket(bucket).Object(key).put(Body=open(data_path, 'rb'))

# configure SageMaker input channel
input_data = {
    'training': sagemaker.session.s3_input(s3_train_data, distribution='FullyReplicated', content_type='text/csv')
}

Uploading data to: s3://blstm-cracker/test-run/train/train.csv


In [4]:
!pygmentize lstm_cracker/program/program.py

[37m# -*- coding: utf-8 -*-[39;49;00m
[33m"""[39;49;00m
[33m[39;49;00m
[33mThis module trains a bidirectional long short-term memory (LSTM) [39;49;00m
[33mnetwork on a dataset consisting solely of cleartext passwords.[39;49;00m
[33mThe trained network is then used to predict the most likely[39;49;00m
[33malterations and/or additions to a given sequence.[39;49;00m
[33m[39;49;00m
[33mExample[39;49;00m
[33m-------[39;49;00m
[33m    To run the program, include the dataset containing the cleartext [39;49;00m
[33m    passwords as the first argument. The code will handle the rest.[39;49;00m
[33m[39;49;00m
[33m    $ python3 program.py[39;49;00m
[33m[39;49;00m
[33mNotes[39;49;00m
[33m-----[39;49;00m
[33m    The dataset is assumed to contain two columns: the usernames and the [39;49;00m
[33m    cleartext passwords.[39;49;00m
[33m[39;49;00m
[33m    The network parameters (e.g., number of hidden units, embedding[39;49;00m
[33m    laye

[33m        tokenizer : [39;49;00m
[33m            The Keras tokenizer object.[39;49;00m
[33m        ix_to_character : [39;49;00m
[33m            The c-to-character dictionary.[39;49;00m
[33m        data : pd.DataFrame[39;49;00m
[33m            The dataset, including the tokenized passwords.[39;49;00m
[33m[39;49;00m
[33m        Returns[39;49;00m
[33m        -------[39;49;00m
[33m        float[39;49;00m
[33m            The probability of the password.[39;49;00m
[33m[39;49;00m
[33m        """[39;49;00m

        [37m# tokenize the password[39;49;00m
        token  = [36mself[39;49;00m.tokenizer.texts_to_sequences([password])[[34m0[39;49;00m]
        x_test = DataGenerator.slide_window(token)
        x_test = np.array(x_test)
        y_test = token - [34m1[39;49;00m

        [37m# determine the probabilities of the permutations of the words[39;49;00m
        probabilities = [36mself[39;49;00m.model.predict(x_test, verbose=[34m0[3

## generate estimator

In [9]:
tf_estimator = TensorFlow(entry_point='program.py', 
                          role=role,
                          source_dir='lstm_cracker/program',
                          model_dir=output_path,
                          train_instance_count=1, 
                          train_instance_type='local',
                          framework_version='1.12', 
                          py_version='py3',
                          script_mode=True,
                          hyperparameters={'epochs': 1, 'training': s3_train_data}
                         )

In [10]:
tf_estimator.fit(input_data)

Creating tmpvs3dqwvv_algo-1-vws6y_1 ... 
[1BAttaching to tmpvs3dqwvv_algo-1-vws6y_12mdone[0m
[36malgo-1-vws6y_1  |[0m 2019-11-27 20:25:53,245 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training
[36malgo-1-vws6y_1  |[0m 2019-11-27 20:25:53,251 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-vws6y_1  |[0m 2019-11-27 20:25:53,429 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-vws6y_1  |[0m 2019-11-27 20:25:53,448 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-vws6y_1  |[0m 2019-11-27 20:25:53,462 sagemaker-containers INFO     Invoking user script
[36malgo-1-vws6y_1  |[0m 
[36malgo-1-vws6y_1  |[0m Training Env:
[36malgo-1-vws6y_1  |[0m 
[36malgo-1-vws6y_1  |[0m {
[36malgo-1-vws6y_1  |[0m     "additional_framework_parameters": {},
[36malgo-1-vws6y_1  |[0m     "channel_input_dirs": {
[36malgo-1-vws6y_1  |[0m  

[36mtmpvs3dqwvv_algo-1-vws6y_1 exited with code 1
[0mAborting on container exit...


RuntimeError: Failed to run: ['docker-compose', '-f', '/tmp/tmpvs3dqwvv/docker-compose.yaml', 'up', '--build', '--abort-on-container-exit'], Process exited with code: 1

In [None]:
# include the date in the endpoint 
tf_endpoint_name = 'keras-tf-fmnist-'+time.strftime("%Y-%m-%d-%H-%M-%S", time.gmtime())

# deploy the endpoint using GPU acceleration
tf_predictor = tf_estimator.deploy(initial_instance_count=1,
                         instance_type='ml.c5.large',
                         accelerator_type='ml.eia1.medium',
                         endpoint_name=tf_endpoint_name)

## perform predictions

In [None]:
 %matplotlib inline

num_samples = 5
indices = random.sample(range(x_val.shape[0] - 1), num_samples)
images = x_val[indices]/255
labels = y_val[indices]

for i in range(num_samples):
    plt.subplot(1,num_samples,i+1)
    plt.imshow(images[i].reshape(28, 28), cmap='gray')
    plt.title(labels[i])
    plt.axis('off')
    
prediction = tf_predictor.predict(images.reshape(num_samples, 28, 28, 1))['predictions']
prediction = np.array(prediction)
predicted_label = prediction.argmax(axis=1)
print('Predicted labels are: {}'.format(predicted_label))

## Hyperparameter tuning

In [None]:
hyperparameter_ranges = {
    'epochs':        IntegerParameter(20, 100),
    'learning-rate': ContinuousParameter(0.001, 0.1, scaling_type='Logarithmic'), 
    'batch-size':    IntegerParameter(32, 1024),
    'dense-layer':   IntegerParameter(128, 1024),
    'dropout':       ContinuousParameter(0.2, 0.6)
}

objective_metric_name = 'val_acc'
objective_type = 'Maximize'
metric_definitions = [{'Name': 'val_acc', 'Regex': 'val_acc: ([0-9\\.]+)'}]

tuner = HyperparameterTuner(tf_estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=10,
                            max_parallel_jobs=2,
                            objective_type=objective_type)