# (04-d) CUSTOM MODEL (with NO Temporal Features, Margin Loss) EVALUATION

In [1]:
import subprocess
import sys
import os

# Specify the path to the folder containing your module
repo_root = '../'

src_path = os.path.join(repo_root, 'src')
# Add src_path to sys.path if not already present
if src_path not in sys.path:
    sys.path.insert(0, src_path)

In [2]:
from custom_models import AnimalTemporalClassifier
from custom_models import AnimalClassifier
from custom_datasets import S3ImageWithTimeFeatureDataset
import torch
from torch.utils.data import DataLoader

In [3]:
import time
import torch.nn.functional as F

In [4]:
import os
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, f1_score

In [5]:
import boto3
import tarfile
from io import BytesIO
import json

def download_and_extract_files_from_s3(bucket, key, files_to_extract):
    s3 = boto3.client('s3')
    obj = s3.get_object(Bucket=bucket, Key=key)
    buffer = BytesIO(obj['Body'].read())

    extracted_files = {}
    with tarfile.open(fileobj=buffer, mode='r:gz') as tar:
        for member in tar.getmembers():
            if member.name in files_to_extract:
                f = tar.extractfile(member)
                if f:
                    # For JSON, decode as text; for .pth, keep as bytes
                    if member.name.endswith('.json'):
                        extracted_files[member.name] = f.read().decode('utf-8')
                    else:
                        extracted_files[member.name] = f.read()
    return extracted_files



In [6]:
# Usage example
bucket = 'aai-590-tmp2'
key ='Resnet18_with_No_Temporal_Margin_Loss/output/pytorch-training-2025-07-17-09-22-39-143/output/model.tar.gz'
files_to_extract = ['label2idx.json', 'model.pth']
#files_to_extract = ['model.pth']
                    
extracted_files = download_and_extract_files_from_s3(bucket, key, files_to_extract)

# Parse JSON to count classes
json_content = extracted_files.get(files_to_extract[0])
if json_content:
    json_data = json.loads(json_content)
    #num_classes = len(json_data.get('classes', []))
    num_classes = len(json_data)
    print(f'Number of classes: {num_classes}')
else:
    print('JSON file not found')

# The .pth file content is binary, ready for torch.load
pth_content = extracted_files.get(files_to_extract[1])
if pth_content:
    print(f'.pth file size: {len(pth_content)} bytes')
    # Example: load with torch.load(BytesIO(pth_content))
else:
    print('.pth file not found')

JSON file not found
.pth file size: 44813397 bytes


In [7]:
bucket_name = "aai-590-tmp2"
train_val_dir = "data_split/train_val"
s3_label2idx = f's3://{bucket_name}/{train_val_dir}/label_mapping.json'
label2idx = pd.read_json(s3_label2idx, typ='series').to_dict()
num_classes = len(label2idx)
num_classes

17

In [8]:
# initialize custom model with same number of classes based on json file
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = []
model = AnimalClassifier(num_classes).to(device)



In [9]:
# Load the weights from the .pth file (from BytesIO or file)
model.load_state_dict(torch.load(BytesIO(pth_content), map_location=device))
model.eval()

AnimalClassifier(
  (cnn): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, trac

In [None]:
!pip install torchviz torchview graphviz

In [None]:
device

In [None]:
from torchview import draw_graph

# Specify input sizes as a tuple of shapes
input_sizes = (1, 3, 224, 224)

model_graph = draw_graph(
    model,
    input_size=input_sizes,
    expand_nested=True
)

model_graph.visual_graph.render(filename='custom_model_noTimeVector_architecture')


#from IPython.display import Image, display
#display(Image(filename='custom_model_noTimeVector_architecture.pdf'))

In [None]:
from IPython.display import IFrame

# Replace 'yourfile.pdf' with your actual file path
IFrame('custom_model_noTimeVector_architecture.pdf', width=600, height=400)


In [10]:
idx2label = {int(v): k for k, v in label2idx.items()}
idx2label

{0: 'car',
 1: 'coyote',
 2: 'deer',
 3: 'bobcat',
 4: 'dog',
 5: 'skunk',
 6: 'empty',
 7: 'cat',
 8: 'opossum',
 9: 'squirrel',
 10: 'raccoon',
 11: 'rodent',
 12: 'rabbit',
 13: 'bird',
 14: 'badger',
 15: 'fox',
 16: 'lizard'}

In [11]:
# Configure s3 locations for evaluation
bucket_name = bucket
s3_newdata_dir = f's3://{bucket_name}/data_split/train_val/validation2'
s3_newdata_csv = f's3://{bucket_name}/data_split/train_val/validation2/val-meta.csv' # used only to extract annotations later
s3_newdata_manifest = f's3://{bucket_name}/data_split/train_val/validation2/val-meta.manifest' # should have been generated from datapreprocessing pipeline
s3_label_map_uri = f"s3://{bucket_name}/data_split/train_val/label_mapping.json"

In [13]:
# Evaluate on validation set
s3_newdata_set =  's3://aai-590-tmp2/data_split/train_val/validation2/val-meta.csv'

new_dataset = []
new_dataset = S3ImageWithTimeFeatureDataset(s3_newdata_csv)
print(f"Number of images: {len(new_dataset)}")

new_dataset_loader = []
new_dataset_loader = DataLoader(new_dataset, batch_size=128, shuffle=False, num_workers=0)
print(f"Number of batches: {len(new_dataset_loader)}")


DEBUG INFO: No Label Encoding needed for this dataset
Number of images: 6833
Number of batches: 54


In [16]:
import time

pred_labels = []
pred_probs = []
batch_id = 0

start_time = time.time()

with torch.no_grad():
    #for image:
    #for batch_id in range(len(new_dataset_loader)):
    for images_batch, features_batch, scalars_batch in new_dataset_loader:
        images, features = images_batch.to(device), features_batch.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        
        pred_labels_batch = [idx2label[int(idx)] for idx in predicted]

        if (batch_id%20 == 0): 
            end_time = time.time()
            elapsed_time = end_time - start_time
            print(f"batch: {batch_id} of {len(new_dataset_loader)}")
            #print(f"outputs: {outputs}")
            #print(f"pred_label: {pred_labels_batch}")
            print(f"Elapsed time: {elapsed_time:.4f} seconds")
            start_time = time.time()

        batch_id += 1
        pred_labels.extend(pred_labels_batch)
        pred_probs.extend(outputs)

batch: 0 of 54
Elapsed time: 9.4315 seconds
batch: 20 of 54
Elapsed time: 231.0758 seconds
batch: 40 of 54
Elapsed time: 234.3784 seconds


In [19]:
pred_probs[0]

tensor([-2.2092, -3.4855, -4.0129, -2.7377,  0.9568, -2.2611, -2.3972, -2.0869,
        12.7367, -3.0021, -1.0022, -2.8003, -3.0363, -3.0994, -1.8258, -4.1770,
        -4.2084])

In [21]:
import torch.nn.functional as F

# output: tensor from your model (after ReLU Linear)
pred_probs_linear = torch.stack(pred_probs)
pred_probs_softmax = F.softmax(pred_probs_linear, dim=1)  # dim=1 for batch x classes


In [25]:
import pandas as pd
import numpy as np

pred_probs_df = pred_probs_softmax.numpy()
pred_probs_df = pd.DataFrame(pred_probs_df, columns = label2idx.keys())
pred_probs_df.to_csv('pred_proba_no_time_margin_loss.csv', index = False)

In [None]:
results_dir = f'custom_loss_results/'
os.makedirs(results_dir, exist_ok=True)

file_name = 'pred_labels.txt'
file_path = os.path.join(results_dir, file_name)
with open(file_path, 'w') as f:
    for item in pred_labels:
        f.write(f"{item}\n")

file_name = 'pred_probs.txt'
file_path = os.path.join(results_dir, file_name)

with open(file_path, 'w') as f:
    for item in pred_probs:
        f.write(f"{item}\n")

file_name = 'pred_probs_softmax.txt'
file_path = os.path.join(results_dir, file_name)

with open(file_path, 'w') as f:
    for item in pred_probs_softmax:
        f.write(f"{item}\n")


In [None]:
new_data_csv = pd.read_csv(s3_newdata_set)
true_labels = new_data_csv['label'].tolist()

restricted_indices = new_data_csv.index[new_data_csv['label'].isin(list(json_data.keys()))].tolist()
restricted_true_labels = [true_labels[i] for i in restricted_indices]
restricted_pred_labels = [pred_labels[i] for i in restricted_indices]

class_report = classification_report(restricted_true_labels, restricted_pred_labels)
print(class_report)

In [None]:
accuracy_restricted = accuracy_score(restricted_true_labels, restricted_pred_labels)
f1_score_restricted = f1_score(restricted_true_labels, restricted_pred_labels, average='weighted')

In [None]:
import torch.nn.functional as F

# output: tensor from your model (after ReLU Linear)
pred_probs_linear = torch.stack(pred_probs)
pred_probs_softmax = F.softmax(pred_probs_linear, dim=1)  # dim=1 for batch x classes

In [None]:
# NOVELTY RATIO

threshold = 0.7  # Example threshold for MSP
is_novel = (pred_probs_softmax.max(dim=1)[0] < threshold).sum()
novelty_ratio = is_novel/len(pred_probs_softmax)
novelty_ratio = novelty_ratio.item()
novelty_ratio

In [None]:
report_dict

In [None]:
file_name = 'multiclass_metrics.json'
file_path = os.path.join(results_dir, file_name)

report_dict = {
    "multiclass_classification_metrics": {
        "accuracy": {"value": accuracy_restricted, "standard_deviation": "NaN"},
        "f1-weighted": {"value": f1_score_restricted, "standard_deviation": "NaN"},
        "novelty_ratio": {"value": novelty_ratio, "standard_deviation": "NaN"}
    }
}
with open(file_path, "w") as f:
    json.dump(report_dict, f)

In [None]:
# upload all results to S3
s3_client = boto3.client("s3")
s3_client.upload_file(f'{results_dir}/pred_labels.txt', bucket_name, f"{dev_split}/test/{year_month_test}/evaluation/pred_labels.txt")
s3_client.upload_file(f'{results_dir}/pred_probs.txt', bucket_name, f"{dev_split}/test/{year_month_test}/evaluation/pred_probs.txt")
s3_client.upload_file(f'{results_dir}/pred_probs_softmax.txt', bucket_name, f"{dev_split}/test/{year_month_test}/evaluation/pred_probs_softmax.txt")
s3_client.upload_file(f'{results_dir}/multiclass_metrics.json', bucket_name, f"{dev_split}/test/{year_month_test}/evaluation/multiclass_metrics.json")


In [None]:
# Preprocess all new images from input CSV (with S3 loc), perform preprocessing including temporal feature engineering
bucket_name = "aai-590-tmp2"
dev_split = "data_split"

# define a function that does all of the above given the year-month
def evaluate_year_month(year_month = "2012-03", time_features = True):
    print(f"\n=================MODEL INFERENCE===========================")
    print(f"YEAR-MONTH: {year_month}")
    # Evaluate on specified month after training
    year_month_test = year_month
    s3_newdata_set =  f's3://{bucket_name}/{dev_split}/test/{year_month_test}/new_dataset.csv'
    
    new_dataset = []
    new_dataset = S3ImageWithTimeFeatureDataset(s3_newdata_set)
    print(f"Total Number of images: {len(new_dataset)}")
    
    new_dataset_loader = []
    new_dataset_loader = DataLoader(new_dataset, batch_size=32, shuffle=False, num_workers=0)
    print(f"Number of batches: {len(new_dataset_loader)}")

   

    pred_labels = []
    pred_probs = []
    batch_id = 0
    
    start_time = time.time()
    
    with torch.no_grad():
    #for image:
    #for batch_id in range(len(new_dataset_loader)):
        for images_batch, features_batch, scalars_batch in new_dataset_loader:
            images, features = images_batch.to(device), features_batch.to(device)
            if(time_features == True):
                outputs = model(images, features)
            else:
                outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            
            pred_labels_batch = [idx2label[int(idx)] for idx in predicted]
        
            if (batch_id%32 == 0): 
                end_time = time.time()
                elapsed_time = end_time - start_time
                print(f"batch_id: {batch_id}")
                #print(f"outputs: {outputs}")
                #print(f"pred_label: {pred_labels_batch}")
                print(f"Elapsed time: {elapsed_time:.4f} seconds")
                start_time = time.time()
        
            batch_id += 1
            pred_labels.extend(pred_labels_batch)
            pred_probs.extend(outputs)

    new_data_csv = pd.read_csv(s3_newdata_set)
    true_labels = new_data_csv['label'].tolist()
    
    #restricted_indices = new_data_csv.index[new_data_csv['label'].isin(list(json_data.keys()))].tolist()
    restricted_indices = new_data_csv.index[new_data_csv['label'].isin(label2idx.index.to_list())].tolist()
    restricted_true_labels = [true_labels[i] for i in restricted_indices]
    restricted_pred_labels = [pred_labels[i] for i in restricted_indices]
    
    class_report = classification_report(restricted_true_labels, restricted_pred_labels)
    print(class_report)

    accuracy_restricted = accuracy_score(restricted_true_labels, restricted_pred_labels)
    f1_score_restricted = f1_score(restricted_true_labels, restricted_pred_labels, average='weighted')

    # output: tensor from your model (after ReLU Linear)
    pred_probs_linear = torch.stack(pred_probs)
    pred_probs_softmax = F.softmax(pred_probs_linear, dim=1)  # dim=1 for batch x classes

    #torch.set_printoptions(precision=2, sci_mode=False)
    #print("pred_probs")
    #print(pred_probs)
    #print("\npred_probs_linear")
    #print(pred_probs_linear)
    #print("\npred_probs_softmax")
    #print(pred_probs_softmax)
    
    

    # NOVELTY RATIO
    threshold = 0.7  # Example threshold for MSP
    is_novel = (pred_probs_softmax.max(dim=1)[0] < threshold).sum()
    novelty_ratio = is_novel/len(pred_probs_softmax)
    novelty_ratio = novelty_ratio.item()
    novelty_ratio

    # Save to local files
    results_dir = f'{dev_split}/custom_model_results/'
    os.makedirs(results_dir, exist_ok=True)
    
    file_name = 'pred_labels.txt'
    file_path = os.path.join(results_dir, file_name)
    with open(file_path, 'w') as f:
        for item in pred_labels:
            f.write(f"{item}\n")
            
    file_name = 'pred_probs.txt'
    file_path = os.path.join(results_dir, file_name)
    with open(file_path, 'w') as f:
        for item in pred_probs:
            f.write(f"{item}\n")
    
    file_name = 'pred_probs_softmax.txt'
    file_path = os.path.join(results_dir, file_name)
    with open(file_path, 'w') as f:
        for item in pred_probs_softmax:
            f.write(f"{item}\n")

    file_name = 'multiclass_metrics.json'
    file_path = os.path.join(results_dir, file_name)
    
    report_dict = {
        "multiclass_classification_metrics": {
            "accuracy": {"value": accuracy_restricted, "standard_deviation": "NaN"},
            "f1-weighted": {"value": f1_score_restricted, "standard_deviation": "NaN"},
            "novelty_ratio": {"value": novelty_ratio, "standard_deviation": "NaN"}
        }
    }
    print(json.dumps(report_dict, indent=2))
    with open(file_path, "w") as f:
        json.dump(report_dict, f)


    # upload all results to S3
    if(time_features==True):
        eval_results = 'eval_results_WithTimeFeatures'
    else:
        eval_results = 'eval_results_NoTimeFeatures'
    
    s3_client = boto3.client("s3")
    s3_client.upload_file(f'{results_dir}/pred_labels.txt', bucket_name, f"{dev_split}/test/{year_month_test}/{eval_results}/pred_labels.txt")
    s3_client.upload_file(f'{results_dir}/pred_probs.txt', bucket_name, f"{dev_split}/test/{year_month_test}/{eval_results}/pred_probs.txt")
    s3_client.upload_file(f'{results_dir}/pred_probs_softmax.txt', bucket_name, f"{dev_split}/test/{year_month_test}/{eval_results}/pred_probs_softmax.txt")
    s3_client.upload_file(f'{results_dir}/multiclass_metrics.json', bucket_name, f"{dev_split}/test/{year_month_test}/{eval_results}/multiclass_metrics.json")

    

In [None]:
evaluate_year_month("2013-04", time_features=False)

In [None]:
s3_test_csv = 's3://aai-590-tmp2/data_split/test/test-meta.csv'
test_meta_df = pd.read_csv(s3_test_csv)
test_meta_df.head()

In [None]:
test_df_year_months = test_meta_df['year_month'].unique().tolist()
test_df_year_months

In [None]:
# Evaluate on all months
#for year_month in ["2013-04", "2012-04", "2012-05", "2012-06", "2012-07", "2012-08", "2012-09", "2012-10", "2012-11", "2012-12"]:
for year_month in test_df_year_months:
    evaluate_year_month(year_month, time_features=False)	

In [None]:
year_month