# EXPERIMENTS

In [1]:
import subprocess
import sys
import os

# Specify the path to the folder containing your module
repo_root_rel = '../'

src_path = os.path.join(repo_root_rel, '')
# Add src_path to sys.path if not already present
if repo_root_rel not in sys.path:
        sys.path.insert(0, src_path)


import src.eda.eda_utils as eda
import importlib # for debug

In [2]:

from src.custom_datasets import S3ImageWithTimeFeatureDataset

In [3]:
from sagemaker.pytorch import PyTorch
from sagemaker.inputs import TrainingInput
from sagemaker import get_execution_role

role = get_execution_role()

bucket_name = "aai-590-tmp2"
train_val_dir = "data_split/train_val"



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


In [4]:
s3_train = f's3://{bucket_name}/{train_val_dir}/train-meta.csv'
s3_val = f's3://{bucket_name}/{train_val_dir}/val-meta.csv'
s3_label2idx = f's3://{bucket_name}/{train_val_dir}/label_mapping.json'

In [5]:
train_dataset = S3ImageWithTimeFeatureDataset(s3_train, s3_label2idx)

In [6]:
len(train_dataset)

22003

In [7]:
val_dataset = S3ImageWithTimeFeatureDataset(s3_val, s3_label2idx)
len(val_dataset)

6833

In [15]:
from torch.utils.data import DataLoader
import torch
import numpy as np
import gzip


In [19]:
BATCH_SIZE = 256
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)


In [20]:
batch_no = 0
for images, features, labels in train_loader:
    with gzip.open(f'./data_split/train/images_batch_{batch_no}.pt.gz', 'wb') as f:
        torch.save(images, f)
    #torch.save(images, f'./data_split/train/images_batch_{batch_no}.pt')
    torch.save(features, f'./data_split/train/features_batch_{batch_no}.pt')
    torch.save(labels, f'./data_split/train/labels_batch_{batch_no}.pt')
    if(batch_no%10 == 0) :
        print(f"Processed {batch_no} batches so far...")
    batch_no += 1


Processed 0 batches so far...
Processed 10 batches so far...
Processed 20 batches so far...
Processed 30 batches so far...
Processed 40 batches so far...
Processed 50 batches so far...
Processed 60 batches so far...
Processed 70 batches so far...
Processed 80 batches so far...


In [26]:
import torch.optim as optim
import torch.nn as nn
import torch
import pandas as pd
from src.custom_models import AnimalTemporalClassifier

In [23]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [27]:
label2idx = pd.read_json(s3_label2idx, typ='series')
num_classes = len(label2idx)
print("DEBUG CHECK Num Classes in JSON: ", num_classes)

model = AnimalTemporalClassifier(num_classes=num_classes).to(device)

DEBUG CHECK Num Classes in JSON:  17




In [28]:
optimizer = optim.Adam(model.parameters(), lr= 1e-4)
criterion = nn.CrossEntropyLoss()

In [29]:
model.parameters()

<generator object Module.parameters at 0x7f05de6d84a0>

In [30]:
NUM_EPOCHS = 5

In [34]:
len(train_loader)

688

In [35]:
for epoch in range(NUM_EPOCHS):
        
    # =========DEBUG==================
    print(f"EPOCH {epoch+1}=================")
    print("TRAINING start....")
    
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0
    
    for images, features, labels in train_loader:
        images, features, labels = images.to(device), features.to(device), labels.to(device)
        optimizer.zero_grad()
        #if(args.custom_model == 'AnimalTemporalClassifier'):
        outputs = model(images, features)
        #else:
            #outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_train += (predicted == labels).sum().item()
        total_train += labels.size(0)
    train_loss = running_loss / len(train_dataset)
    train_acc = correct_train / total_train
    print("TRAINING end....")
    
    # Validation
    print("\nVALIDATION start....")
    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0
    with torch.no_grad():
        for images, features, labels in val_loader:
            images, features, labels = images.to(device), features.to(device), labels.to(device)
            #if(args.custom_model == 'AnimalTemporalClassifier'):
            outputs = model(images, features)
            #else:
            #outputs = model(images)
            #outputs = model(images, features)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    val_loss = val_loss / len(val_dataset)
    val_acc = correct / total
    print("VALIDATION end....")

    # print accuracy and loss values
    #print(f"Epoch {epoch+1}: train_loss={train_loss:.4f}, val_loss={val_loss:.4f}, val_acc={val_acc:.4f}")
    print(f"Epoch {epoch+1}: train_loss={train_loss:.4f}, train_acc={train_acc:.4f}, val_loss={val_loss:.4f}, val_acc={val_acc:.4f}")

TRAINING start....


In [None]:
label2idx = pd.read_json(s3_label2idx, typ='series')

In [None]:
label2idx['empty']

In [15]:
train_dataset[1][0].shape

torch.Size([3, 224, 224])

In [38]:
train_dataset[0][0].shape

torch.Size([3, 224, 224])

In [39]:
train_dataset[1][0].shape

torch.Size([3, 224, 224])

In [None]:
train_tensor_list = [train_dataset[i] for i in range(len(train_dataset))]