In [1]:
import sagemaker
import boto3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sagemaker.pytorch import PyTorch
from sagemaker.tensorflow import TensorFlow
import os
from io import BytesIO

boto_session = boto3.Session(region_name='us-east-1')
sagemaker_session = sagemaker.Session(boto_session=boto_session)

role = "arn:aws:iam::211125439249:role/service-role/AmazonSageMaker-ExecutionRole-20250314T153928"
role_name = role.split('/')[-1]  # Extract just the role name from the ARN

# Attach AdministratorAccess policy to your existing role
iam_client = boto3.client('iam')
iam_client.attach_role_policy(
    RoleName=role_name,
    PolicyArn="arn:aws:iam::aws:policy/AdministratorAccess"
)
print(f"Attached AdministratorAccess policy to role: {role}")

# Assume blood.csv is in S3 already - if not, upload it first
input_data_s3_uri = "s3://blue-blood-data/final_df.csv"

bucket_name = "blue-blood-data"
region = 'us-east-1'
file_key = 'final_df.csv'



sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/ubaid/Library/Application Support/sagemaker/config.yaml


Attached AdministratorAccess policy to role: arn:aws:iam::211125439249:role/service-role/AmazonSageMaker-ExecutionRole-20250314T153928


In [2]:
# First, make sure train.py exists in the notebook directory
if not os.path.exists("train.py"):
    print("Error: train.py not found in the current directory")
else:
    print("Found train.py in the current directory")
    
    # Upload train.py to S3 to ensure latest version is used
    code_prefix = "code"
    s3_code_path = sagemaker_session.upload_data("train.py", bucket=bucket_name, key_prefix=code_prefix)
    print(f"Uploaded train.py to {s3_code_path}")

# Verify that the S3 data file exists
try:
    s3_client = boto3.client('s3', region_name=region)
    s3_client.head_object(Bucket=bucket_name, Key=file_key)
    print(f"Verified that s3://{bucket_name}/{file_key} exists")
except Exception as e:
    print(f"Warning: Error verifying S3 file: {e}")

Found train.py in the current directory
Uploaded train.py to s3://blue-blood-data/code/train.py
Verified that s3://blue-blood-data/final_df.csv exists


In [None]:
# Define hyperparameters for testing
epochs_list = [50, 100, 150]
lstm_units_list = [64, 128, 256]
learning_rates = [0.001, 0.01, 0.1] 
dropout_rates = [0.2, 0.3, 0.4]

print(f"Starting hyperparameter testing")

# test all combinations of hyperparameters
for epochs in epochs_list:
    for learning_rate in learning_rates:
        for lstm_units in lstm_units_list:
            for dropout_rate in dropout_rates:
                # Create a unique job name based on parameters
                job_name = f"lstm-e{epochs}-lr{learning_rate}-u{lstm_units}-d{int(dropout_rate*10)}-b1"
                job_name = job_name.replace('.', 'd')  # Replace dots with 'd' for valid job name
                print(f"\nStarting job: {job_name}")

                # Create the TensorFlow estimator with this set of hyperparameters
                estimator = TensorFlow(
                    entry_point='train.py',
                    role=role,
                    instance_count=1,
                    instance_type='ml.m5.4xlarge',
                    framework_version='2.9',
                    py_version='py39',
                    sagemaker_session=sagemaker_session,
                    hyperparameters={
                        'epochs': epochs,
                        'learning_rate': learning_rate,
                        'lstm_units': lstm_units,
                        'dropout_rate': dropout_rate,
                        'job_name': job_name
                    },
                )

                # Start training job
                estimator.fit({'train': input_data_s3_uri})



Starting hyperparameter testing

Starting job: lstm-e10-lr0d001-u32-d2-b1


2025-04-04 09:31:11 Starting - Starting the training job...
2025-04-04 09:31:26 Starting - Preparing the instances for training...
2025-04-04 09:32:07 Downloading - Downloading the training image......
2025-04-04 09:33:13 Training - Training image download completed. Training in progress..2025-04-04 09:33:18.342855: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:33:18.343024: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 09:33:18.368633: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:33:20,312 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 09:33:20,323 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 

2025-04-04 09:34:34 Starting - Starting the training job...
2025-04-04 09:34:49 Starting - Preparing the instances for training...
2025-04-04 09:35:31 Downloading - Downloading the training image......
2025-04-04 09:36:37 Training - Training image download completed. Training in progress..2025-04-04 09:36:42.360846: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:36:42.361029: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 09:36:42.386674: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:36:44,381 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 09:36:44,392 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 

2025-04-04 09:38:00 Starting - Starting the training job...
2025-04-04 09:38:34 Downloading - Downloading input data...
2025-04-04 09:38:54 Downloading - Downloading the training image......
2025-04-04 09:40:00 Training - Training image download completed. Training in progress..2025-04-04 09:40:06.136775: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:40:06.136946: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 09:40:06.163067: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:40:08,260 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 09:40:08,271 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 09:40:08,54

2025-04-04 09:41:31 Starting - Starting the training job...
2025-04-04 09:41:46 Starting - Preparing the instances for training...
2025-04-04 09:42:31 Downloading - Downloading the training image......
2025-04-04 09:43:32 Training - Training image download completed. Training in progress..2025-04-04 09:43:39.833722: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:43:39.833895: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 09:43:39.860010: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:43:41,993 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 09:43:42,004 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 

2025-04-04 09:44:56 Starting - Starting the training job...
2025-04-04 09:45:31 Downloading - Downloading input data...
2025-04-04 09:45:56 Downloading - Downloading the training image......
2025-04-04 09:46:52 Training - Training image download completed. Training in progress.2025-04-04 09:47:01.284303: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:47:01.284476: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 09:47:01.310377: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:47:03,333 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 09:47:03,344 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 09:47:03,683

2025-04-04 09:48:31 Starting - Starting the training job...
2025-04-04 09:49:06 Downloading - Downloading input data...
2025-04-04 09:49:32 Downloading - Downloading the training image......
2025-04-04 09:50:33 Training - Training image download completed. Training in progress..2025-04-04 09:50:38.128628: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:50:38.128783: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 09:50:38.154564: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:50:40,124 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 09:50:40,134 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 09:50:40,38

2025-04-04 09:51:57 Starting - Starting the training job...
2025-04-04 09:52:21 Starting - Preparing the instances for training...
2025-04-04 09:53:02 Downloading - Downloading the training image......
2025-04-04 09:54:03 Training - Training image download completed. Training in progress..2025-04-04 09:54:09.428250: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:54:09.428423: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 09:54:09.454858: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:54:11,438 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 09:54:11,448 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 

2025-04-04 09:55:27 Starting - Starting the training job...
2025-04-04 09:55:51 Starting - Preparing the instances for training...
2025-04-04 09:56:12 Downloading - Downloading input data...
2025-04-04 09:56:37 Downloading - Downloading the training image...
2025-04-04 09:57:34 Training - Training image download completed. Training in progress..2025-04-04 09:57:39.812775: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:57:39.812952: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 09:57:39.838862: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 09:57:41,890 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 09:57:41,902 sagemaker-training-toolkit INFO     N

2025-04-04 09:58:56 Starting - Starting the training job...
2025-04-04 09:59:30 Downloading - Downloading input data...
2025-04-04 09:59:55 Downloading - Downloading the training image......
2025-04-04 10:00:56 Training - Training image download completed. Training in progress..2025-04-04 10:01:01.352348: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:01:01.352571: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 10:01:01.379282: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:01:03,569 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 10:01:03,580 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 10:01:03,86

2025-04-04 10:02:24 Starting - Starting the training job...
2025-04-04 10:02:39 Starting - Preparing the instances for training...
2025-04-04 10:03:27 Downloading - Downloading the training image......
2025-04-04 10:04:22 Training - Training image download completed. Training in progress..2025-04-04 10:04:27.908844: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:04:27.909014: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 10:04:27.934226: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:04:29,877 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 10:04:29,888 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 

2025-04-04 10:05:45 Starting - Starting the training job...
2025-04-04 10:06:16 Starting - Preparing the instances for training...
2025-04-04 10:06:39 Downloading - Downloading input data...
2025-04-04 10:07:05 Downloading - Downloading the training image......
2025-04-04 10:08:06 Training - Training image download completed. Training in progress.2025-04-04 10:08:11.282714: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:08:11.282891: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 10:08:11.308942: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:08:13,360 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 10:08:13,370 sagemaker-training-toolkit INFO    

2025-04-04 10:09:12 Starting - Starting the training job...
2025-04-04 10:09:28 Starting - Preparing the instances for training...
2025-04-04 10:10:15 Downloading - Downloading the training image......
2025-04-04 10:11:12 Training - Training image download completed. Training in progress..2025-04-04 10:11:19.036062: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:11:19.036235: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 10:11:19.061737: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:11:21,079 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 10:11:21,090 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 

2025-04-04 10:12:36 Starting - Starting the training job...
2025-04-04 10:13:01 Starting - Preparing the instances for training...
2025-04-04 10:13:24 Downloading - Downloading input data...
2025-04-04 10:13:44 Downloading - Downloading the training image...
2025-04-04 10:14:44 Training - Training image download completed. Training in progress...2025-04-04 10:14:52.486863: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:14:52.487029: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 10:14:52.512865: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:14:54,512 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 10:14:54,523 sagemaker-training-toolkit INFO     

2025-04-04 10:16:06 Starting - Starting the training job...
2025-04-04 10:16:21 Starting - Preparing the instances for training...
2025-04-04 10:16:44 Downloading - Downloading input data...
2025-04-04 10:17:09 Downloading - Downloading the training image...
2025-04-04 10:18:10 Training - Training image download completed. Training in progress..2025-04-04 10:18:15.376046: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:18:15.376225: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 10:18:15.401113: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:18:17,329 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 10:18:17,339 sagemaker-training-toolkit INFO     N

2025-04-04 10:19:29 Starting - Starting the training job...
2025-04-04 10:19:43 Starting - Preparing the instances for training...
2025-04-04 10:20:29 Downloading - Downloading the training image......
2025-04-04 10:21:29 Training - Training image download completed. Training in progress..2025-04-04 10:21:35.129756: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:21:35.129927: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 10:21:35.155779: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:21:37,202 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 10:21:37,213 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 

2025-04-04 10:23:20 Starting - Starting the training job...
2025-04-04 10:23:45 Starting - Preparing the instances for training...
2025-04-04 10:24:09 Downloading - Downloading input data...
2025-04-04 10:24:34 Downloading - Downloading the training image.....2025-04-04 10:25:40.415466: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:25:40.415635: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-04 10:25:40.441412: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-04 10:25:42,468 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-04 10:25:42,479 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-04 10:25:42,774 sagemaker-trainin

2025-04-04 10:26:47 Starting - Starting the training job...
2025-04-04 10:27:19 Downloading - Downloading input data...
Starting job: lstm-e10-lr0d01-u128-d4-b1


2025-04-04 10:27:45 Starting - Starting the training job..

In [None]:
# Initialize the S3 client
s3 = boto3.client('s3')

# Define the bucket name and the file (graph) you want to fetch
s3_graph_key = 'models/lstm-e10-lr0d001-u64-d2-b1-0/training-validation-loss.png'

# Fetch the file from S3 into memory
response = s3.get_object(Bucket=bucket_name, Key=s3_graph_key)
image_data = response['Body'].read()
image = BytesIO(image_data)

# Display the image using matplotlib
img = plt.imread(image)
plt.imshow(img)
plt.axis('off')  # Hide axes
plt.show()