In [1]:
import boto3
import pandas as pd

sm_client = boto3.client('sagemaker')

In [2]:
# collect traning jobs
search_params={
   "Resource": "TrainingJob",
   "SearchExpression": { 
      "Filters": [{ 
            "Name": "Tags.model",
            "Operator": "Equals",
            "Value": "free_trial_xgboost"
         }]}
}

results = sm_client.search(**search_params)['Results']

In [4]:
training_rows = []

# collect training job detail 
for job in results:
    response = job['TrainingJob']
    if response['TrainingJobStatus'] == 'Completed':
        TrainingJobName = response['TrainingJobName']
        TrainingJobArn = response['TrainingJobArn']
        S3ModelArtifacts = response['ModelArtifacts']['S3ModelArtifacts']
        HyperParameters = response['HyperParameters']
        TrainingImage = response['AlgorithmSpecification']['TrainingImage']
        RoleArn = response['RoleArn']
        TrainData = [c['DataSource']['S3DataSource']['S3Uri'] for c in response['InputDataConfig'] if c['ChannelName'] == 'train'][0]
        TestData = [c['DataSource']['S3DataSource']['S3Uri'] for c in response['InputDataConfig'] if c['ChannelName'] == 'validation'][0]
        TrainAUC = [m['Value'] for m in response['FinalMetricDataList'] if m['MetricName'] == 'train:auc'][0]
        TestAUC = [m['Value'] for m in response['FinalMetricDataList'] if m['MetricName'] == 'validation:auc'][0]
        InstanceType = response['ResourceConfig']['InstanceType']
        InstanceCount = response['ResourceConfig']['InstanceCount']
        VolumeSizeInGB = response['ResourceConfig']['VolumeSizeInGB']
        TrainingTimeInSeconds = response['TrainingTimeInSeconds']
        BillableTimeInSeconds = response['BillableTimeInSeconds']
        TrainingStartTime = response['TrainingStartTime'].strftime("%Y-%m-%d %H:%M:%S")

        row = [TrainingJobName, TrainingJobArn, S3ModelArtifacts, HyperParameters, TrainingImage, RoleArn, TrainData, TestData, TrainAUC, TestAUC, InstanceType, InstanceCount, VolumeSizeInGB, TrainingTimeInSeconds, BillableTimeInSeconds, TrainingStartTime]
        training_rows.append(row)

    
df = pd.DataFrame(training_rows, columns=['TrainingJobName', 'TrainingJobArn', 'S3ModelArtifact', 'HyperParameters', 'TrainingImage', 'RoleArn', 'TrainData', 'TestData', 'TrainAUC', 'TestAUC', 'InstanceType', 'InstanceCount', 'VolumeSizeInGB', 'TrainingTimeInSeconds', 'BillableTimeInSeconds', 'TrainingStartTime'])
df = df.sort_values(by='TestAUC',ascending=True)

KeyError: 'FinalMetricDataList'

In [21]:
df.head()

Unnamed: 0,TrainingJobName,TrainingJobArn,S3ModelArtifact,HyperParameters,TrainingImage,RoleArn,TrainData,TestData,TrainAUC,TestAUC,InstanceType,InstanceCount,VolumeSizeInGB,TrainingTimeInSeconds,BillableTimeInSeconds,TrainingStartTime
0,free-trial-train-xgboost-2020-08-04-22-54-30,arn:aws:sagemaker:us-east-1:613630599026:train...,s3://hbomax-datascience-deployment-dev/lifecyc...,"{'alpha': '1.218487609', 'eta': '0.225242353',...",811284229777.dkr.ecr.us-east-1.amazonaws.com/x...,arn:aws:iam::613630599026:role/hbomax-datascie...,s3://hbomax-datascience-deployment-dev/lifecyc...,s3://hbomax-datascience-deployment-dev/lifecyc...,0.892016,0.877707,ml.m4.4xlarge,1,30,54,54,2020-08-04 23:04:01
1,free-trial-train-xgboost-2020-08-04-22-29-25,arn:aws:sagemaker:us-east-1:613630599026:train...,s3://hbomax-datascience-deployment-dev/lifecyc...,"{'alpha': '1.218487609', 'eta': '0.225242353',...",811284229777.dkr.ecr.us-east-1.amazonaws.com/x...,arn:aws:iam::613630599026:role/hbomax-datascie...,s3://hbomax-datascience-deployment-dev/lifecyc...,s3://hbomax-datascience-deployment-dev/lifecyc...,0.892016,0.877707,ml.m4.4xlarge,1,30,50,50,2020-08-04 22:40:52
2,free-trial-train-xgboost-2020-08-04-16-29-06,arn:aws:sagemaker:us-east-1:613630599026:train...,s3://hbomax-datascience-deployment-dev/lifecyc...,"{'alpha': '1.218487609', 'eta': '0.225242353',...",811284229777.dkr.ecr.us-east-1.amazonaws.com/x...,arn:aws:iam::613630599026:role/hbomax-datascie...,s3://hbomax-datascience-deployment-dev/lifecyc...,s3://hbomax-datascience-deployment-dev/lifecyc...,0.892016,0.877707,ml.m4.4xlarge,1,30,48,48,2020-08-04 16:39:51
3,free-trial-train-xgboost-2020-08-04-14-55-14,arn:aws:sagemaker:us-east-1:613630599026:train...,s3://hbomax-datascience-deployment-dev/lifecyc...,"{'alpha': '1.218487609', 'eta': '0.225242353',...",811284229777.dkr.ecr.us-east-1.amazonaws.com/x...,arn:aws:iam::613630599026:role/hbomax-datascie...,s3://hbomax-datascience-deployment-dev/lifecyc...,s3://hbomax-datascience-deployment-dev/lifecyc...,0.892016,0.877707,ml.m4.4xlarge,1,30,55,55,2020-08-04 15:06:44
4,free-trial-train-xgboost-2020-07-30-21-50-07,arn:aws:sagemaker:us-east-1:613630599026:train...,s3://hbomax-datascience-deployment-dev/lifecyc...,"{'alpha': '1.218487609', 'eta': '0.225242353',...",811284229777.dkr.ecr.us-east-1.amazonaws.com/x...,arn:aws:iam::613630599026:role/hbomax-datascie...,s3://hbomax-datascience-deployment-dev/lifecyc...,s3://hbomax-datascience-deployment-dev/lifecyc...,0.892016,0.877707,ml.m4.4xlarge,1,30,51,51,2020-07-30 22:00:45
