# MODEL TRAINING: Fuse Resnet18 Image Vector with Temporal Vector 

In [1]:
from sagemaker.pytorch import PyTorch
from sagemaker.inputs import TrainingInput
from sagemaker import get_execution_role

role = get_execution_role()

bucket_name = "aai-590-tmp2"
train_val_dir = "data_split/train_val"
model_name = "Resnet18_with_Temporal"


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:

# S3 URIs for your CSV files
s3_train = f's3://{bucket_name}/{train_val_dir}/train-meta.csv'
s3_val =  f's3://{bucket_name}/{train_val_dir}/val-meta.csv'
s3_label2idx = f's3://{bucket_name}/{train_val_dir}/label_mapping.json'

# Define input channels
train_input = TrainingInput(s3_data=s3_train, content_type='text/csv')
val_input = TrainingInput(s3_data=s3_val, content_type='text/csv')
label2idx_input = TrainingInput(s3_data=s3_label2idx, content_type='application/json')

# S3 location for model output
s3_output_location = f"s3://{bucket_name}/{model_name}/output"

# Create the estimator
estimator = PyTorch(
    entry_point='train.py',           # Your training script
    source_dir='../src',                 # Directory containing model.py, dataset.py, etc.
    role=role,
    framework_version='1.13',         # Use a supported PyTorch version
    py_version='py39',
    instance_count=1,
    instance_type='ml.p3.2xlarge',    # Or another instance type
    hyperparameters={
        'epochs': 3,
        'batch_size': 32,
        'lr': 1e-4,
    },
    output_path=s3_output_location
)



In [3]:

# Pass both channels to fit
estimator.fit({
    'train': train_input,
    'validation': val_input,
    'label2idx': label2idx_input
})

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: pytorch-training-2025-07-12-13-20-18-380


2025-07-12 13:20:23 Starting - Starting the training job
2025-07-12 13:20:23 Pending - Training job waiting for capacity......
2025-07-12 13:21:24 Downloading - Downloading input data...
2025-07-12 13:21:45 Downloading - Downloading the training image.....................
2025-07-12 13:24:58 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
  "cipher": algorithms.TripleDES,[0m
  "class": algorithms.TripleDES,[0m
[34m2025-07-12 13:25:14,445 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-07-12 13:25:14,466 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2025-07-12 13:25:14,481 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-07-12 13:25:14,483 sagemaker_pytorch_container.training I

In [6]:
import subprocess
import sys
import os

# Specify the path to the folder containing your module
repo_root_rel = '../'

src_path = os.path.join(repo_root_rel, '')
# Add src_path to sys.path if not already present
if repo_root_rel not in sys.path:
        sys.path.insert(0, src_path)


import src.eda.eda_utils as eda
import importlib # for debug
from src.custom_datasets import S3ImageWithTimeFeatureDataset

In [7]:
train_dataset = S3ImageWithTimeFeatureDataset(s3_train, s3_label2idx)

In [9]:
import torch
torch.save(train_dataset, './data_split/train_tensors.pt')

In [10]:
val_dataset = S3ImageWithTimeFeatureDataset(s3_val, s3_label2idx)
torch.save(val_dataset, './data_split/val_tensors.pt')