## MODEL

In [14]:
#import os
#os.chdir('/content/drive/MyDrive/VCTF')

In [15]:
import torch

DEVICE = torch.device('cuda')

torch.cuda.is_available()

True

In [16]:
from definitions import *

In [17]:
from dataset_preparations import get_img_paths, get_labels
from torch.utils.data import Subset


stanford_training_paths = get_img_paths(STANFORD_TRAIN_PATH)
training_labels = get_labels(STANFORD_SET_PATH, "anno_train.csv")

stanford_validation_paths = get_img_paths(STANFORD_VALIDATION_PATH)
validation_labels = get_labels(STANFORD_SET_PATH, "anno_test.csv")

In [18]:
from dataset_preparations import get_dataset

training_dataset = get_dataset(stanford_training_paths, training_labels)
validation_dataset = get_dataset(stanford_validation_paths, validation_labels)

In [19]:
from dataset_preparations import create_data_loader

# training subset
# training_subset = Subset(training_dataset, indices=range(500))

# validation subset
# validation_subset = Subset(validation_dataset, indices=range(500))


training_data_loader = create_data_loader(training_dataset, True)
validation_data_loader = create_data_loader(validation_dataset, False)

In [20]:
from dataset_preparations import create_model

model = create_model(NUM_CLASSES)
model.to(DEVICE)



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [21]:
type(stanford_training_paths)

dict

In [22]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)

In [23]:
from classes import Averager

train_itr = 1
val_itr = 1
train_loss_list = []
val_loss_list = []
train_loss_avg = Averager()
val_loss_avg = Averager()

In [24]:
from tqdm.auto import tqdm

# function for running training iterations
def train(train_data_loader, model):
    print('Training')
    global train_itr
    global train_loss_list

     # initialize tqdm progress bar
    prog_bar = tqdm(train_data_loader, total=len(train_data_loader))

    for i, data in enumerate(prog_bar):
        optimizer.zero_grad()
        images, targets = data

        images = list(image.to(DEVICE) for image in images)
        # targets = [[i.to(DEVICE) for i in t] for t in targets]
        #print(targets)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
        # targets = {k: v.to(DEVICE) for k, v in targets.items()}
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        train_loss_list.append(loss_value)
        train_loss_avg.send(loss_value)
        losses.backward()
        optimizer.step()
        train_itr += 1

        # update the loss value beside the progress bar for each iteration
        prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
    return train_loss_list

In [25]:
# function for running validation iterations
def validate(valid_data_loader, model):
    print('Validating')
    global val_itr
    global val_loss_list

    # initialize tqdm progress bar
    prog_bar = tqdm(valid_data_loader, total=len(valid_data_loader))

    for i, data in enumerate(prog_bar):
        images, targets = data

        images = list(image.to(DEVICE) for image in images)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

        with torch.no_grad():
            loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        val_loss_list.append(loss_value)
        val_loss_avg.send(loss_value)
        val_itr += 1
        # update the loss value beside the progress bar for each iteration
        prog_bar.set_description(desc=f"Loss: {loss_value:.4f}")
    return val_loss_list

In [26]:
import time

NUM_EPOCHS = 10 # number of epochs to train for
for epoch in range(NUM_EPOCHS):
    print(f"\nEPOCH {epoch+1} of {NUM_EPOCHS}")

    # reset the training and validation loss histories for the current epoch
    train_loss_avg.reset()
    val_loss_avg.reset()
    # start timer and carry out training and validation
    start = time.time()
    train_loss = train(training_data_loader, model)
    val_loss = validate(validation_data_loader, model)
    print(f"Epoch #{epoch+1} train loss: {train_loss_avg.value:.3f}")
    print(f"Epoch #{epoch+1} validation loss: {val_loss_avg.value:.3f}")
    end = time.time()
    print(f"Took {((end - start) / 60):.3f} minutes for epoch {epoch}")
    # save the best model till now if we have the least loss in the...
    # ... current epoch
    # save_best_model(
    #     train_loss_avg.value, epoch, model, optimizer
    # )
    # save the current epoch model
    # save_model(epoch, model, optimizer)
    # save loss plot
    # save_loss_plot(OUT_DIR, train_loss, val_loss)

    # sleep for 5 seconds after each epoch
    time.sleep(2)


EPOCH 1 of 10
Training


  0%|          | 0/1018 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
import time

NUM_EPOCHS = 10 # number of epochs to train for
for epoch in range(NUM_EPOCHS):
    print(f"\nEPOCH {epoch+1} of {NUM_EPOCHS}")

    # reset the training and validation loss histories for the current epoch
    train_loss_avg.reset()
    val_loss_avg.reset()
    # start timer and carry out training and validation
    start = time.time()
    train_loss = train(training_data_loader, model)
    val_loss = validate(validation_data_loader, model)
    print(f"Epoch #{epoch+1} train loss: {train_loss_avg.value:.3f}")
    print(f"Epoch #{epoch+1} validation loss: {val_loss_avg.value:.3f}")
    end = time.time()
    print(f"Took {((end - start) / 60):.3f} minutes for epoch {epoch}")
    # save the best model till now if we have the least loss in the...
    # ... current epoch
    # save_best_model(
    #     train_loss_avg.value, epoch, model, optimizer
    # )
    # save the current epoch model
    # save_model(epoch, model, optimizer)
    # save loss plot
    # save_loss_plot(OUT_DIR, train_loss, val_loss)

    # sleep for 5 seconds after each epoch
    time.sleep(2)

In [27]:
import time

NUM_EPOCHS = 10 # number of epochs to train for
for epoch in range(NUM_EPOCHS):
    print(f"\nEPOCH {epoch+1} of {NUM_EPOCHS}")

    # reset the training and validation loss histories for the current epoch
    train_loss_avg.reset()
    val_loss_avg.reset()
    # start timer and carry out training and validation
    start = time.time()
    train_loss = train(training_data_loader, model)
    val_loss = validate(validation_data_loader, model)
    print(f"Epoch #{epoch+1} train loss: {train_loss_avg.value:.3f}")
    print(f"Epoch #{epoch+1} validation loss: {val_loss_avg.value:.3f}")
    end = time.time()
    print(f"Took {((end - start) / 60):.3f} minutes for epoch {epoch}")
    # save the best model till now if we have the least loss in the...
    # ... current epoch
    # save_best_model(
    #     train_loss_avg.value, epoch, model, optimizer
    # )
    # save the current epoch model
    # save_model(epoch, model, optimizer)
    # save loss plot
    # save_loss_plot(OUT_DIR, train_loss, val_loss)

    # sleep for 5 seconds after each epoch
    time.sleep(2)

    #save after each epoch
    torch.save(model.state_dict(), MODEL_SAVE_PATH)


EPOCH 1 of 10
Training


  0%|          | 0/1018 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 314.00 MiB (GPU 0; 11.76 GiB total capacity; 8.25 GiB already allocated; 221.62 MiB free; 8.95 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF