# (03c) CUSTOM MODEL TRAINING: Fine Tuned ResNet18 without Temporal Features (same architecture as 03b for easy comparison)

In [1]:
from sagemaker.pytorch import PyTorch
from sagemaker.inputs import TrainingInput
from sagemaker import get_execution_role

role = get_execution_role()

bucket_name = "aai-590-tmp"
dev_split = "space_time_split"


sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [2]:

# S3 URIs for your CSV files
s3_train = f's3://{bucket_name}/{dev_split}/train-meta.csv'
s3_val =  f's3://{bucket_name}/{dev_split}/validation/val-meta.csv'
s3_label2idx = f's3://{bucket_name}/{dev_split}/label_mapping.json'

# Define input channels
train_input = TrainingInput(s3_data=s3_train, content_type='text/csv')
val_input = TrainingInput(s3_data=s3_val, content_type='text/csv')
label2idx_input = TrainingInput(s3_data=s3_label2idx, content_type='application/json')

# S3 location for model output
s3_output_location = f"s3://{bucket_name}/custom_model2/{dev_split}/output"

# Create the estimator
estimator = PyTorch(
    entry_point='train.py',           # Your training script
    source_dir='../src',                 # Directory containing model.py, dataset.py, etc.
    role=role,
    framework_version='1.13',         # Use a supported PyTorch version
    py_version='py39',
    instance_count=1,
    instance_type='ml.p3.2xlarge',    # Or another instance type
    hyperparameters={
        'epochs': 3,
        'batch_size': 32,
        'lr': 1e-4,
        'custom_model':'AnimalClassifier'
    },
    output_path=s3_output_location
)



In [3]:

# Pass both channels to fit
estimator.fit({
    'train': train_input,
    'validation': val_input,
    'label2idx': label2idx_input
})

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: pytorch-training-2025-07-11-11-39-55-224


2025-07-11 11:39:55 Starting - Starting the training job......
2025-07-11 11:40:44 Pending - Training job waiting for capacity...
2025-07-11 11:41:08 Pending - Preparing the instances for training...
2025-07-11 11:41:57 Downloading - Downloading the training image.....................
2025-07-11 11:45:05 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
  "cipher": algorithms.TripleDES,[0m
  "class": algorithms.TripleDES,[0m
[34m2025-07-11 11:45:19,481 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2025-07-11 11:45:19,502 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2025-07-11 11:45:19,517 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2025-07-11 11:45:19,519 sagemaker_pytorch_contain