In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.models import inception_v3
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.optim as optim
import torch.nn as nn
import time
import os
from torch.utils.data import Subset
import random
import argparse

def get_random_subset_indices(num_samples, dataset_size):
    return random.sample(range(dataset_size), num_samples)


def create_dataset():
     # Data augmentation and normalization for training
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(299),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    

    train_dataset = torchvision.datasets.ImageFolder(os.path.join("/lus/eagle/projects/datascience/ImageNet/ILSVRC/Data/CLS-LOC", "train"), transform=train_transform)

    num_samples = 10000

    train_indices = get_random_subset_indices(num_samples, len(train_dataset))
    small_train_dataset = Subset(train_dataset, train_indices)
    return small_train_dataset

def build_model():
    model = inception_v3(pretrained=False, aux_logits=True)
    num_ftrs = model.AuxLogits.fc.in_features
    model.AuxLogits.fc = nn.Linear(num_ftrs, 1000)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 1000)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model = nn.DataParallel(model)
    return model, criterion, optimizer, device


def create_dataLoader(batch_size=128, workers=4):
    return DataLoader(create_dataset(), batch_size=batch_size, num_workers=workers, shuffle=True, pin_memory=True)

def select_device(selected_gpus):
     os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, selected_gpus))

def train_one_epoch(model, criterion, optimizer, data_loader, device):
    model.train()

    total_images = 0
    start_time = time.time()
    start_time_dataLoad = time.time()
    end_time_dataLoad = 0
    for i, (inputs, labels) in enumerate(data_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs, aux_outputs = model(inputs)
        loss1 = criterion(outputs, labels)
        loss2 = criterion(aux_outputs, labels)
        loss = loss1 + 0.4 * loss2

        loss.backward()
        optimizer.step()

        total_images += inputs.size(0)

    end_time = time.time()
    images_per_second = total_images / (end_time - start_time)
    dataLoad_time = end_time_dataLoad - start_time_dataLoad
    return int(images_per_second), end_time-start_time


def train(batch_size=256, GPU_selection=[0, 1], epoch=5, num_workers=8):
    
    num_epochs = epoch  # Adjust this value according to your needs
    train_loader = create_dataLoader(batch_size, num_workers)
    select_device(GPU_selection)
    model,criterion,optimizer,device = build_model()
    
    
    for epoch in range(num_epochs):
        images_per_second, epoch_duration = train_one_epoch(model, criterion, optimizer, train_loader, device)
      
        print(f"Epoch [{epoch + 1}/{num_epochs}],Duration: {epoch_duration:.2f}s, Images/s: {images_per_second}")
         

In [2]:
train(batch_size=256, GPU_selection=[0], epoch=5, num_workers=4)



Epoch [1/5],Duration: 70.04s, Images/s: 142
Epoch [2/5],Duration: 19.60s, Images/s: 510
Epoch [3/5],Duration: 19.41s, Images/s: 515
Epoch [4/5],Duration: 19.39s, Images/s: 515
Epoch [5/5],Duration: 19.48s, Images/s: 513


In [2]:
train(batch_size=256, GPU_selection=[0,1], epoch=5, num_workers=4)



Epoch [1/5],Duration: 70.06s, Images/s: 142
Epoch [2/5],Duration: 11.67s, Images/s: 856
Epoch [3/5],Duration: 11.84s, Images/s: 844
Epoch [4/5],Duration: 11.70s, Images/s: 854
Epoch [5/5],Duration: 11.69s, Images/s: 855


In [2]:
train(batch_size=256, GPU_selection=[0,1,2], epoch=5, num_workers=4)



Epoch [1/5],Duration: 68.61s, Images/s: 145
Epoch [2/5],Duration: 11.19s, Images/s: 893
Epoch [3/5],Duration: 10.88s, Images/s: 919
Epoch [4/5],Duration: 11.52s, Images/s: 867
Epoch [5/5],Duration: 10.85s, Images/s: 921


In [2]:
train(batch_size=256, GPU_selection=[0,1,2,3], epoch=5, num_workers=4)



Epoch [1/5],Duration: 70.13s, Images/s: 142
Epoch [2/5],Duration: 11.03s, Images/s: 906
Epoch [3/5],Duration: 10.78s, Images/s: 927
Epoch [4/5],Duration: 11.25s, Images/s: 889
Epoch [5/5],Duration: 10.94s, Images/s: 914
