In [None]:
import sagemaker
import boto3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sagemaker.pytorch import PyTorch
from sagemaker.tensorflow import TensorFlow
import os
from io import BytesIO

boto_session = boto3.Session(region_name='us-east-1')
sagemaker_session = sagemaker.Session(boto_session=boto_session)

role = "arn:aws:iam::211125439249:role/service-role/AmazonSageMaker-ExecutionRole-20250314T153928"
role_name = role.split('/')[-1]  # Extract just the role name from the ARN

# Attach AdministratorAccess policy to your existing role
iam_client = boto3.client('iam')
iam_client.attach_role_policy(
    RoleName=role_name,
    PolicyArn="arn:aws:iam::aws:policy/AdministratorAccess"
)
print(f"Attached AdministratorAccess policy to role: {role}")

# Assume blood.csv is in S3 already - if not, upload it first
input_data_s3_uri = "s3://blue-blood-data/final_df.csv"

bucket_name = "blue-blood-data"
region = 'us-east-1'
file_key = 'final_df.csv'



sagemaker.config INFO - Not applying SDK defaults from location: /Library/Application Support/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /Users/ubaid/Library/Application Support/sagemaker/config.yaml


Attached AdministratorAccess policy to role: arn:aws:iam::211125439249:role/service-role/AmazonSageMaker-ExecutionRole-20250314T153928


In [2]:
# First, make sure train.py exists in the notebook directory
if not os.path.exists("train.py"):
    print("Error: train.py not found in the current directory")
else:
    print("Found train.py in the current directory")
    
    # Upload train.py to S3 to ensure latest version is used
    code_prefix = "code"
    s3_code_path = sagemaker_session.upload_data("train.py", bucket=bucket_name, key_prefix=code_prefix)
    print(f"Uploaded train.py to {s3_code_path}")

# Verify that the S3 data file exists
try:
    s3_client = boto3.client('s3', region_name=region)
    s3_client.head_object(Bucket=bucket_name, Key=file_key)
    print(f"Verified that s3://{bucket_name}/{file_key} exists")
except Exception as e:
    print(f"Warning: Error verifying S3 file: {e}")

Found train.py in the current directory
Uploaded train.py to s3://blue-blood-data/code/train.py
Verified that s3://blue-blood-data/final_df.csv exists


In [4]:
# Define parameters for initial test run
test_epochs = 10
test_learning_rate = 0.001
test_lstm_units = 64
test_dropout_rate = 0.2

# Create a unique job name for the test run
test_job_name = f"lstm-med-test-e{test_epochs}-lr{test_learning_rate}-u{test_lstm_units}-d{int(test_dropout_rate*10)}"
print(f"Starting test job: {test_job_name}")

# Create the TensorFlow estimator
estimator = TensorFlow(
    entry_point='train.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.4xlarge',  # Using GPU instance
    framework_version='2.9',  # TensorFlow version
    py_version='py39',
    sagemaker_session=sagemaker_session,
    hyperparameters={
        'epochs': test_epochs,
        'learning_rate': test_learning_rate,
        'lstm_units': test_lstm_units,
        'dropout_rate': test_dropout_rate,
        'batch_size': 1
    },
    job_name=test_job_name
)

# Start training job
estimator.fit({'train': input_data_s3_uri})


Starting test job: lstm-med-test-e10-lr0.001-u64-d2


2025-04-09 04:56:27 Starting - Starting the training job...
2025-04-09 04:56:40 Starting - Preparing the instances for training...
2025-04-09 04:57:18 Downloading - Downloading the training image......
2025-04-09 04:58:13 Training - Training image download completed. Training in progress...2025-04-09 04:58:39.690693: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-09 04:58:39.690851: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.
2025-04-09 04:58:39.716530: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.
2025-04-09 04:58:41,746 sagemaker-training-toolkit INFO     Imported framework sagemaker_tensorflow_container.training
2025-04-09 04:58:41,758 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
2025-04-09

In [None]:
# Initialize the S3 client
s3 = boto3.client('s3')

# Define the bucket name and the file (graph) you want to fetch
s3_key = 'baseline_model/baseline-training-validation-loss.png'  # Adjust the path as needed

# Fetch the file from S3 into memory
response = s3.get_object(Bucket=bucket_name, Key=s3_key)
image_data = response['Body'].read()
image = BytesIO(image_data)

# Display the image using matplotlib
img = plt.imread(image)
plt.imshow(img)
plt.axis('off')  # Hide axes
plt.show()
