# (04a) MODEL: Evaluation and Baseline Performance

In [1]:
import subprocess
import sys
import os

# Specify the path to the folder containing your module
repo_root = '../'

src_path = os.path.join(repo_root, 'src')
# Add src_path to sys.path if not already present
if src_path not in sys.path:
    sys.path.insert(0, src_path)

In [2]:
# try importing src/utils
from utils.utils import parse_s3_uri
from utils.utils import generate_manifest_file


In [3]:
import boto3
import sagemaker
from sagemaker import get_execution_role
import pandas as pd
import json
from sagemaker.transformer import Transformer


role = get_execution_role()
region = boto3.Session().region_name
s3_client = boto3.client("s3")
sm_client = boto3.client("sagemaker")
sess = sagemaker.Session()

# project bucket
bucket_name = "aai-590-tmp2"
dev_split = "data_split/train_val"

# provide s3 full path to label_mapping.json use during training
s3_label_map_uri = f"s3://{bucket_name}/{dev_split}/label_mapping.json"


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [4]:
# Load Model Package arn from notebook 03.
from sagemaker import ModelPackage
# 1. Create Model resource from package ARN
model = ModelPackage(
    model_package_arn='arn:aws:sagemaker:us-east-1:324183265896:model-package/wildscan-image-classifier-fixed-locs/1',
    role=role,
    sagemaker_session=sess
)

In [5]:
model.create()

In [19]:
download_and_extract_model('s3://aai-590-tmp/sg-ic-transfer-learning/space_time_split/output/image-classification-2025-07-10-13-02-40-529/output/model.tar.gz')

  tar.extractall(path=extract_path)


Extracted to: ./model_dir


In [21]:
!pip install mxnet

Collecting mxnet
  Downloading mxnet-1.9.1-py3-none-manylinux2014_x86_64.whl.metadata (3.4 kB)
Collecting graphviz<0.9.0,>=0.8.1 (from mxnet)
  Downloading graphviz-0.8.4-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading mxnet-1.9.1-py3-none-manylinux2014_x86_64.whl (49.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.1/49.1 MB[0m [31m191.5 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading graphviz-0.8.4-py2.py3-none-any.whl (16 kB)
Installing collected packages: graphviz, mxnet
[2K  Attempting uninstall: graphviz
[2K    Found existing installation: graphviz 0.20.3
[2K    Uninstalling graphviz-0.20.3:
[2K      Successfully uninstalled graphviz-0.20.3
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [mxnet]32m1/2[0m [mxnet]
[1A[2KSuccessfully installed graphviz-0.8.4 mxnet-1.9.1


In [24]:
import numpy as np
np.bool = bool #workaround for mxnet plotting

In [25]:
import mxnet as mx

sym, arg_params, aux_params = mx.model.load_checkpoint('./model_dir/image-classification', 5)

[09:09:37] ../src/nnvm/legacy_json_util.cc:208: Loading symbol saved by previous version v1.4.1. Attempting to upgrade...
[09:09:37] ../src/nnvm/legacy_json_util.cc:216: Symbol successfully upgraded!


In [31]:
with open('./model_dir/model-shapes.json', 'r') as f:
    shape_info = json.load(f)
input_shape = shape_info[0]['shape']
input_shape

[128, 3, 224, 224]

In [32]:
shape_info

[{'name': 'data', 'shape': [128, 3, 224, 224]}]

In [33]:
viz = mx.viz.plot_network(
    symbol=sym,
    shape={'data': tuple(input_shape)}
)
viz.format = 'png'  # Choose your desired format
viz.render('sg-finetuned-architecture')



'sg-finetuned-architecture.png'

In [35]:
from PIL import Image

img = Image.open('sg-finetuned-architecture.png')
flipped_img = img.transpose(Image.FLIP_TOP_BOTTOM)
flipped_img.save('sg-finetuned-architecture_flipped.png')


In [36]:
print(mx.__version__)

1.9.1


In [48]:
# perplexity suggestion
symbol_file = './model_dir/image-classification-symbol.json'

sym2 = mx.sym.load(symbol_file)
graph = mx.viz.plot_network(
    symbol = sym2, 
    shape={'data': (1, 3, 224, 224)},
    node_attrs={
        "width": "2.0",         # Width in inches
        "height": ".5",        # Height in inches
        "fixedsize": "true",     # Force exact size (optional)
        
    },
    
)

graph.node_attrs = {
    "fillcolor": "lightgray",
}

graph.graph_attr={
    'rankdir':'BT',
    'ranksep': '0.3',  # Reduce spacing between ranks
    'nodesep': '0.2',  # Reduce spacing between nodes
    'fontsize': '10',           # Smaller font for graph title
}
graph.format = 'png'
graph.render('./model_dir/model_graph')

[10:15:11] ../src/nnvm/legacy_json_util.cc:208: Loading symbol saved by previous version v1.4.1. Attempting to upgrade...
[10:15:11] ../src/nnvm/legacy_json_util.cc:216: Symbol successfully upgraded!


'./model_dir/model_graph.png'

In [None]:
# Generate Manifest File for Validation and Test Meta CSVs for Evaluation Via Batch Transform
generate_manifest_file(s3_input_csv = f"{s3_validation_path}/{val_key}", s3_images_loc = f"s3://{bucket_name}/cct_resized/")
generate_manifest_file(s3_input_csv = f"{s3_test_path}/{test_key}", s3_images_loc = f"s3://{bucket_name}/cct_resized/")


-----
### Batch Transform Validation Set

In [6]:
# 

s3_newdata_dir = f's3://{bucket_name}/{dev_split}/validation'
s3_newdata_csv = f'{s3_newdata_dir}/val-meta.csv'
generate_manifest_file(s3_input_csv=s3_newdata_csv, s3_images_loc = f"s3://aai-540-data/cct_resized/")


File uploaded to s3://aai-590-tmp2/data_split/train_val/validation/val-meta.manifest


In [7]:
# Transform the Validation Set First

s3_transform_manifest = f"{s3_newdata_dir}/val-meta.manifest"
s3_transform_out = f"{s3_newdata_dir}/batch_transform_out"

# initialize Tranformer
transformer = Transformer(
    model_name = model.name,
    instance_count=1,  # Number of instances
    instance_type="ml.g4dn.xlarge",  # Instance type
    output_path= s3_transform_out,  # Predictions output
    max_payload=10,  # Max payload size (MB)
    strategy="MultiRecord" , # for faster processing, but in real world, instance type can be ml.m5.xlarge and single record strategy is ok
    max_concurrent_transforms=10,
    sagemaker_session=sess,

    accept = 'txt/csv', # so output is generated in single file
    assemble_with='Line', # new line is generated for each prediction

)

In [9]:
# Transform the Validation Set

# batch transform images in manifest file
transformer.transform(
    data=s3_transform_manifest,
    data_type='ManifestFile', # provide list of s3uris of objects to be batch transformed
    content_type='application/x-image', 
    split_type='None', # because each object is an image file to be processed, no splitting needed
    logs=True,
    wait=True
)


INFO:sagemaker:Creating transform job with name: wildscan-image-classifier-fixed-locs-20-2025-07-14-12-18-55-929


.

In [10]:
# Evaluate the Prediction Results via ScriptProcessing
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput

image_uri = sagemaker.image_uris.retrieve(
    framework='sklearn',        # or 'xgboost', 'pytorch', etc.
    region=region,
    version='1.2-1',            # Specify the version you need
    py_version='py3',           # Specify Python version if required
       # Use 'processing' for processing jobs
)

print(image_uri)

# Define your processing container (can use a built-in or custom image)
script_processor = ScriptProcessor(
    command=['python3'],
    image_uri=image_uri,  # e.g., a scikit-learn or custom image
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    
)

INFO:sagemaker.image_uris:Defaulting to only supported image scope: cpu.


683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:1.2-1-cpu-py3


In [11]:

# Run evaluation script
s3_evaluation_out = f"{s3_newdata_dir}/sc-ig-transfer-learning"
s3_true_meta_uri = s3_newdata_csv

script_processor.run(
    code='../src/evaluation/evaluate.py',  # Your processing script
    inputs=[
        # S3 location of batch transform predictions files
        ProcessingInput(
            source=s3_transform_out,       # S3 bucket with predictions
            destination='/opt/ml/processing/input_predictions'        # Where the script will read input
        ),
        
        # S3 location of the ground truth labels for the images in this set
        ProcessingInput(
            source=s3_true_meta_uri,
            destination='/opt/ml/processing/true_labels'
        ),

        # Label Mapping
        ProcessingInput(
            source=s3_label_map_uri,
            destination='/opt/ml/processing/label_mapping'
        )
    ],
    outputs=[
        ProcessingOutput(
            source='/opt/ml/processing/output',           # Where the script will write output
            destination=s3_evaluation_out    # S3 bucket to store results
        )
    ]
)

INFO:sagemaker:Creating processing-job with name sagemaker-scikit-learn-2025-07-14-12-20-23-602


..............[34m[2025-07-14 12:22:38.490753] Script has started.[0m
[34mINFO:root:Script has started at 2025-07-14 12:22:38.490787.[0m
[34mNumber of files in /opt/ml/processing/input_predictions: 6833[0m
[34mpred probs df shape:(6833, 2)
 (step 1) pred probs df shape: (6833, 2)
 (step 2) True Labels data shape: (6833, 15)
 (step3) Merged data shape : (6833, 16)
 (step4) Merged data shape with preds: (6833, 18)
 (step5) METRICS CALCULATION
 (step5.1) Class-Restricted metrics - Evaluate only on labels available during training
 (step5.1) class_restriced df shape : (6833, 18)[0m
  _warn_prf(average, modifier, msg_start, len(result))[0m
  _warn_prf(average, modifier, msg_start, len(result))[0m
  _warn_prf(average, modifier, msg_start, len(result))
              precision    recall  f1-score   support
      badger       0.00      0.00      0.00         4
        bird       0.95      0.37      0.53       429
      bobcat       0.80      0.70      0.75       355
         car      

-----
### Batch Transform Test Set, then Evaluate Performance

In [None]:
# Transform the Test Set
s3_transform_manifest = f"{s3_test_path}/test-meta.manifest"
s3_transform_out = f"{s3_test_path}/batch_transform_out"

# initialize Tranformer
transformer1 = Transformer(
    model_name = model.name,
    instance_count=1,  # Number of instances
    instance_type="ml.g4dn.xlarge",  # Instance type
    output_path= s3_transform_out,  # Predictions output
    max_payload=10,  # Max payload size (MB)
    strategy="MultiRecord" , # for faster processing, but in real world, instance type can be ml.m5.xlarge and single record strategy is ok
    max_concurrent_transforms=10,
    sagemaker_session=sess,

    accept = 'txt/csv', # so output is generated in single file
    assemble_with='Line', # new line is generated for each prediction

)

# batch transform images in manifest file
transformer1.transform(
    data=s3_transform_manifest,
    data_type='ManifestFile', # provide list of s3uris of objects to be batch transformed
    content_type='application/x-image', 
    split_type='None', # because each object is an image file to be processed, no splitting needed
    logs=True,
    wait=True
)

In [None]:
# Run Evaluation Script on test set
image_uri = sagemaker.image_uris.retrieve(
    framework='sklearn',        # or 'xgboost', 'pytorch', etc.
    region=region,
    version='1.2-1',            # Specify the version you need
    py_version='py3',           # Specify Python version if required
       # Use 'processing' for processing jobs
)
print(image_uri)
# Define your processing container (can use a built-in or custom image)
script_processor1 = ScriptProcessor(
    command=['python3'],
    image_uri=image_uri,  # e.g., a scikit-learn or custom image
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    
)

# Run the processing job on the validation prediction set
s3_evaluation_out = f"{s3_test_path}/evaluation"
s3_true_meta_uri = f"{s3_test_path}/{test_key}"

script_processor1.run(
    code='../src/evaluation/evaluate.py',  # Your processing script
    inputs=[
        # S3 location of batch transform predictions files
        ProcessingInput(
            source=s3_transform_out,       # S3 bucket with predictions
            destination='/opt/ml/processing/input_predictions'        # Where the script will read input in local container
        ),
        
        # S3 location of the ground truth labels for the images in this set
        ProcessingInput(
            source=s3_true_meta_uri,
            destination='/opt/ml/processing/true_labels'
        ),

        # Label Mapping
        ProcessingInput(
            source=s3_label_map_uri,
            destination='/opt/ml/processing/label_mapping'
        )
    ],
    outputs=[
        ProcessingOutput(
            source='/opt/ml/processing/output',           # Where the script will write output files in local container
            destination=s3_evaluation_out    # S3 bucket to store results
        )
    ]
)