In [2]:
## Batch Transform: Process multiple images at once
# we will run it once every day

In [3]:
from datetime import datetime

date_today = datetime.today().strftime('%Y-%m-%d')
year = date_today[0:4]
month = date_today[5:7]
day = date_today[8:10]

In [None]:
import sagemaker
from sagemaker import get_execution_role, image_uris

role = get_execution_role()

In [None]:
sess = sagemaker.Session()

training_image = image_uris.retrieve(
    region = sess.boto_region_name, 
    framework = "object-detection", 
    version = "1"
)

print(training_image)

In [None]:
model = sagemaker.model.Model(
    image_uri = training_image,
    model_data = '',  # copy S3 URI .....model.taz.gz
    role = role
)

In [None]:
transformer = model.transformer(
    instance_count = 1,
    output_path = f'',  # S3 --> create a bucket. eg name = plastic-detection-batch-transform-2023
                        # then at S3, create a folder: batch-output --> copy S3 URI
    instance_type = 'ml.m4.xlarge'
)

In [None]:
sample_data_bucket = 'plastic-detection-batch-transform-2023'  # name of the above bucket
input_file_path = f'images/{year}/{month}/{day}'  # create a bew folder in this bucket: images

In [None]:
### Note: need to upload some images to the input file path on S3
### Just manual 2023 --> 09 --> 03
### then run transformer below

In [None]:
transformer.transform(
    f"s3://{sample_data_bucket}/{input_file_path}", # <-- the input images
    content_type = "image/jpeg"
)

In [None]:
print(transformer.output_path)

### Loop over output files

In [4]:
from datetime import datetime

date_today = datetime.today().strftime('%Y-%m-%d')

year = date_today[0:4]
month = date_today[5:7]
day = date_today[8:10]

print(date_today)

2023-09-01


In [None]:
s3 = boto3.resource('s3')

my_bucket = s3.Bucket('plastic-detection-batch-transform-2023')

files = []
for object_summary in my_bucket.objects.filter(Prefix = f'batch-output/{year}/{month}/{day}'):
    out_file = object_summary.key[24:] # see doc summary for understanding
    files.append(out_file)

### Visualize batch transformation results

In [None]:
!mkdir -p Batch_test_images

In [None]:
import json
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sagemaker.s3 import S3Downloader


num_detections = []

thresh = 0.3

for i in files:
    file_key = f'/{year}/{month}/{day}/{i}'
    file_name_in_sagemaker = f'Batch_test_images/{i[:-4]}'
    output = S3Downloader.read_file(file_key)
    detections = json.loads(output)
    detection_results = detections['prediction']
    img = mpimg.imread(file_name_in_sagemaker)
    plt.imshow(img)
    width = img.shape[1]
    height = img.shape[0]
    colors = dict()
    num_detections = 0
    for det in detection_results:
        (klass,score, x0,y0,x1,y1) = det #0
        if score < thresh:
            continue
        num_detections +=1
        cls_id = int(klass)#1
        if cls_id not in colors:
            colors[cls_id] = (random.random(),random.random(),random.random()) #(0.1,0.5,0.4)
        xmin = int(x0*width)
        ymin = int(y0*height)
        xmax = int(x1*width)
        ymax = int(y1*height) 
        width = xmax-xmin
        height = ymax-ymin
        
        rect = plt.Rectangle(
            (xmin,ymin),
            width,#width
            height, #height
            fill = False,
            edgecolor = colors[cls_id],
            linewidth = 3.5
        )
        plt.gca().add_patch(rect)
        plt.gca().text(
        xmin,
        ymin-2,
        "{:.3f}".format(score),
        bbox = dict(facecolor = colors[cls_id], alpha = 0.5),
        fontsize = 12,
        color = "white"
        )
    print("Number of detections" + str(num_detections))
    plt.show()