In [1]:
import sagemaker
import boto3
from sagemaker.tensorflow import TensorFlow
import os

boto_session = boto3.Session(region_name='us-east-1')
sagemaker_session = sagemaker.Session(boto_session=boto_session)

role = "arn:aws:iam::211125439249:role/service-role/AmazonSageMaker-ExecutionRole-20250314T153928"
role_name = role.split('/')[-1]  # Extract just the role name from the ARN

# Attach AdministratorAccess policy to your existing role
iam_client = boto3.client('iam')
iam_client.attach_role_policy(
    RoleName=role_name,
    PolicyArn="arn:aws:iam::aws:policy/AdministratorAccess"
)
print(f"Attached AdministratorAccess policy to role: {role}")

# Assume blood.csv is in S3 already - if not, upload it first
input_data_s3_uri = "s3://blue-blood-data/synthetic_data.csv"

bucket_name = "blue-blood-data"
region = 'us-east-1'
file_key = 'synthetic_data.csv'


print(sagemaker.__version__)
print(dir(sagemaker))  # Check if 'utils' is listed




sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\advay\AppData\Local\sagemaker\sagemaker\config.yaml


Attached AdministratorAccess policy to role: arn:aws:iam::211125439249:role/service-role/AmazonSageMaker-ExecutionRole-20250314T153928
2.243.0
['AlgorithmEstimator', 'AutoML', 'AutoMLDataChannel', 'AutoMLImageClassificationConfig', 'AutoMLInput', 'AutoMLJob', 'AutoMLJobV2', 'AutoMLTabularConfig', 'AutoMLTextClassificationConfig', 'AutoMLTextGenerationConfig', 'AutoMLTimeSeriesForecastingConfig', 'AutoMLV2', 'CandidateEstimator', 'CandidateStep', 'FactorizationMachines', 'FactorizationMachinesModel', 'FactorizationMachinesPredictor', 'FileSource', 'HyperparameterTuningJobAnalytics', 'IPInsights', 'IPInsightsModel', 'IPInsightsPredictor', 'KMeans', 'KMeansModel', 'KMeansPredictor', 'KNN', 'KNNModel', 'KNNPredictor', 'LDA', 'LDAModel', 'LDAPredictor', 'LinearLearner', 'LinearLearnerModel', 'LinearLearnerPredictor', 'LocalAutoMLDataChannel', 'LocalSession', 'MetricsSource', 'Model', 'ModelMetrics', 'ModelPackage', 'NTM', 'NTMModel', 'NTMPredictor', 'Object2Vec', 'Object2VecModel', 'PCA', '

In [2]:
# First, make sure train.py exists in the notebook directory
if not os.path.exists("train.py"):
    print("Error: train.py not found in the current directory")
else:
    print("Found train.py in the current directory")
    
    # Upload train.py to S3 to ensure latest version is used
    code_prefix = "code"
    s3_code_path = sagemaker_session.upload_data("train.py", bucket=bucket_name, key_prefix=code_prefix)
    print(f"Uploaded train.py to {s3_code_path}")

# Verify that the S3 data file exists
try:
    s3_client = boto3.client('s3', region_name=region)
    s3_client.head_object(Bucket=bucket_name, Key=file_key)
    print(f"Verified that s3://{bucket_name}/{file_key} exists")
except Exception as e:
    print(f"Warning: Error verifying S3 file: {e}")

Found train.py in the current directory
Uploaded train.py to s3://blue-blood-data/code/train.py
Verified that s3://blue-blood-data/synthetic_data.csv exists


In [9]:
# Define hyperparameters for testing
# epochs_list = [75, 150, 300]
# lstm_units_list = [64, 128, 256]
# learning_rates = [0.0001, 0.0005, 0.001] 
# dropout_rates = [0.1, 0.25, 0.35]

epochs_list = [150]
learning_rates = [0.0001]
lstm_units_list = [128]
dropout_rates = [0.1]

print(f"Starting hyperparameter testing")

# test all combinations of hyperparameters
for epochs in epochs_list:
    for learning_rate in learning_rates:
        for lstm_units in lstm_units_list:
            for dropout_rate in dropout_rates:
                # Create a unique job name based on parameters
                job_name = f"BB-{epochs}Epochs-{learning_rate}LearningRate-{lstm_units}LSTM_Units-{dropout_rate}DropoutRate"
                job_name = job_name.replace('.', 'd')  # Replace dots with 'd' for valid job name
                print(f"\nStarting job: {job_name}")

                # Create the TensorFlow estimator with this set of hyperparameters
                estimator = TensorFlow(
                    entry_point='train.py',
                    role=role,
                    instance_count=1,
                    instance_type='ml.m5.4xlarge',
                    framework_version='2.9',
                    py_version='py39',
                    sagemaker_session=sagemaker_session,
                    hyperparameters={
                        'epochs': epochs,
                        'learning_rate': learning_rate,
                        'lstm_units': lstm_units,
                        'dropout_rate': dropout_rate,
                        'job_name': job_name
                    },
                )

                # Start training job
                estimator.fit({'train': input_data_s3_uri})

Starting hyperparameter testing

Starting job: BB-150Epochs-0d0001LearningRate-128LSTM_Units-0d1DropoutRate


2025-04-17 04:24:09 Starting - Starting the training job...
2025-04-17 04:24:30 Starting - Preparing the instances for training...
2025-04-17 04:25:06 Downloading - Downloading the training image......
2025-04-17 04:26:07 Training - Training image download completed. Training in progress....2025-04-17 04:26:34.681131: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-17 04:26:34.681282: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-17 04:26:34.706751: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-17 04:26:36,765 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-17 04:26:36,776 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-1