In [79]:
import boto3
import pandas as pd

sm_client = boto3.client('sagemaker')

In [80]:
# collect traning jobs
search_params={
   "Resource": "TrainingJob",
   "SearchExpression": { 
      "Filters": [{ 
            "Name": "Tags.model",
            "Operator": "Equals",
            "Value": "free_trial_xgboost"
         }]}
}

results = sm_client.search(**search_params)['Results']

In [81]:
training_rows = []

# collect training job detail 
for job in results:
    response = job['TrainingJob']
    if response['TrainingJobStatus'] == 'Completed':
        TrainingJobName = response['TrainingJobName']
        TrainingJobArn = response['TrainingJobArn']
        S3ModelArtifacts = response['ModelArtifacts']['S3ModelArtifacts']
        HyperParameters = response['HyperParameters']
        TrainingImage = response['AlgorithmSpecification']['TrainingImage']
        RoleArn = response['RoleArn']
        TrainData = [c['DataSource']['S3DataSource']['S3Uri'] for c in response['InputDataConfig'] if c['ChannelName'] == 'train'][0]
        TestData = [c['DataSource']['S3DataSource']['S3Uri'] for c in response['InputDataConfig'] if c['ChannelName'] == 'validation'][0]
        TrainAUC = [m['Value'] for m in response['FinalMetricDataList'] if m['MetricName'] == 'train:auc'][0]
        TestAUC = [m['Value'] for m in response['FinalMetricDataList'] if m['MetricName'] == 'validation:auc'][0]
        InstanceType = response['ResourceConfig']['InstanceType']
        InstanceCount = response['ResourceConfig']['InstanceCount']
        VolumeSizeInGB = response['ResourceConfig']['VolumeSizeInGB']
        TrainingTimeInSeconds = response['TrainingTimeInSeconds']
        BillableTimeInSeconds = response['BillableTimeInSeconds']
        TrainingStartTime = response['TrainingStartTime'].strftime("%Y-%m-%d %H:%M:%S")

        row = [TrainingJobName, TrainingJobArn, S3ModelArtifacts, HyperParameters, TrainingImage, RoleArn, TrainData, TestData, TrainAUC, TestAUC, InstanceType, InstanceCount, VolumeSizeInGB, TrainingTimeInSeconds, BillableTimeInSeconds, TrainingStartTime]
        
        training_rows.append(row)

    
df = pd.DataFrame(training_rows, columns=['TrainingJobName', 'TrainingJobArn', 'S3ModelArtifact', 'HyperParameters', 'TrainingImage', 'RoleArn', 'TrainData', 'TestData', 'TrainAUC', 'TestAUC', 'InstanceType', 'InstanceCount', 'VolumeSizeInGB', 'TrainingTimeInSeconds', 'BillableTimeInSeconds', 'TrainingStartTime'])
df = df.sort_values(by='TestAUC',ascending=True)

In [82]:
df.head()

Unnamed: 0,TrainingJobName,TrainingJobArn,S3ModelArtifact,HyperParameters,TrainingImage,RoleArn,TrainData,TestData,TrainAUC,TestAUC,InstanceType,InstanceCount,VolumeSizeInGB,TrainingTimeInSeconds,BillableTimeInSeconds,TrainingStartTime
0,free-trial-train-xgboost-2020-08-07-16-29-57,arn:aws:sagemaker:us-east-1:613630599026:train...,s3://hbomax-datascience-development-dev/free_t...,"{'alpha': '1.218487609', 'eta': '0.225242353',...",613630599026.dkr.ecr.us-east-1.amazonaws.com/t...,arn:aws:iam::613630599026:role/hbomax-datascie...,s3://hbomax-datascience-development-dev/free_t...,s3://hbomax-datascience-development-dev/free_t...,0.94327,0.86269,ml.m4.4xlarge,1,30,111,111,2020-08-07 16:40:34
1,free-trial-train-xgboost-2020-08-07-15-17-36,arn:aws:sagemaker:us-east-1:613630599026:train...,s3://hbomax-datascience-development-dev/free_t...,"{'alpha': '1.218487609', 'eta': '0.225242353',...",613630599026.dkr.ecr.us-east-1.amazonaws.com/t...,arn:aws:iam::613630599026:role/hbomax-datascie...,s3://hbomax-datascience-development-dev/free_t...,s3://hbomax-datascience-development-dev/free_t...,0.94327,0.86269,ml.m4.4xlarge,1,30,107,107,2020-08-07 15:28:08
