In [None]:
import os
import gc
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

In [None]:
# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Clear CUDA cache if GPU is available
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print(f"Initial GPU memory allocated: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
    print(f"Initial GPU memory cached: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")

# Set random seed for reproducibility
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)

In [None]:
# Training hyperparameters
arch = 'resnet101'  # Options: 'resnet18', 'resnet34', 'resnet50', 'resnet101'
batch_size = 128
test_batch_size = 128
epochs = 100
learning_rate = 0.01
momentum = 0.9
weight_decay = 5e-4
num_workers = 2
save_model = True
pretrained = False

In [None]:
# Helper function to print GPU memory usage
def print_gpu_memory():
    if torch.cuda.is_available():
        print(f"GPU memory allocated: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
        print(f"GPU memory cached: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")

# Helper function to clear memory
def clear_memory():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

In [None]:
# Get ResNet model
def get_model(architecture, num_classes=100, use_pretrained=True):
    """
    Create a ResNet model with the specified architecture
    
    Args:
        architecture: ResNet variant (resnet18, resnet34, resnet50, or resnet101)
        num_classes: Number of output classes
        use_pretrained: Whether to use ImageNet pretrained weights
    
    Returns:
        PyTorch ResNet model
    """
    if architecture == 'resnet18':
        model = models.resnet18(weights='IMAGENET1K_V1' if use_pretrained else None)
    elif architecture == 'resnet34':
        model = models.resnet34(weights='IMAGENET1K_V1' if use_pretrained else None)
    elif architecture == 'resnet50':
        model = models.resnet50(weights='IMAGENET1K_V1' if use_pretrained else None)
    elif architecture == 'resnet101':
        model = models.resnet101(weights='IMAGENET1K_V1' if use_pretrained else None)
    else:
        raise ValueError(f"Unsupported architecture: {architecture}")
    
    # Modify the final fully connected layer to match CIFAR-100 classes
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_classes)
    return model

In [None]:
def train(model, device, train_loader, optimizer, criterion, epoch, scheduler=None, scaler=None):
    """
    Training function for one epoch
    
    Args:
        model: PyTorch model
        device: Device to train on (cuda/cpu)
        train_loader: DataLoader for training data
        optimizer: PyTorch optimizer
        criterion: Loss function
        epoch: Current epoch number
        scheduler: Learning rate scheduler (optional)
        scaler: Gradient scaler for mixed precision training (optional)
    
    Returns:
        average_loss, accuracy
    """
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}')
    for batch_idx, (data, target) in enumerate(progress_bar):
        # Manually clear cache to reduce memory fragmentation
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
            
        data, target = data.to(device), target.to(device)
        
        # Zero gradients
        optimizer.zero_grad(set_to_none=True)
        
        # Mixed precision training if scaler is provided
        if scaler is not None:
            with torch.cuda.amp.autocast():
                output = model(data)
                loss = criterion(output, target)
            
            # Use scaler for backpropagation and optimization
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
        
        # Update statistics
        running_loss += loss.item()
        _, predicted = output.max(1)
        total += target.size(0)
        correct += predicted.eq(target).sum().item()
        
        # Clean up tensors
        del output, loss, data, target
        
        # Update progress bar
        progress_bar.set_postfix({
            'loss': running_loss / (batch_idx + 1),
            'acc': 100. * correct / total
        })
    
    # Update learning rate
    if scheduler is not None:
        scheduler.step()
    
    return running_loss / len(train_loader), 100. * correct / total

def test(model, device, test_loader, criterion, scaler=None):
    """
    Evaluation function
    
    Args:
        model: PyTorch model
        device: Device to test on (cuda/cpu)
        test_loader: DataLoader for test data
        criterion: Loss function
        scaler: Gradient scaler for mixed precision (optional)
    
    Returns:
        average_loss, accuracy
    """
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        progress_bar = tqdm(test_loader, desc='Test')
        for data, target in progress_bar:
            # Clear cache
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
                
            data, target = data.to(device), target.to(device)
            
            # Mixed precision if scaler is provided
            if scaler is not None:
                with torch.cuda.amp.autocast():
                    output = model(data)
                    loss = criterion(output, target)
            else:
                output = model(data)
                loss = criterion(output, target)
            
            # Update statistics
            test_loss += loss.item()
            _, predicted = output.max(1)
            total += target.size(0)
            correct += predicted.eq(target).sum().item()
            
            # Clean up tensors
            del output, loss, data, target
            
            # Update progress bar
            progress_bar.set_postfix({
                'loss': test_loss / (progress_bar.n + 1),
                'acc': 100. * correct / total
            })
    
    return test_loss / len(test_loader), 100. * correct / total

# Data loading parameters
kwargs = {'num_workers': num_workers, 'pin_memory': True} if torch.cuda.is_available() else {}

# Data augmentation and preprocessing
# Option 1: Upscale to 224x224 (for transfer learning and pretrained models)
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.Resize(224),  # Upscale to 224x224
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])

transform_test = transforms.Compose([
    transforms.Resize(224),  # Upscale to 224x224
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])

# Load CIFAR-100 dataset
train_dataset = datasets.CIFAR100(
    root='./data', train=True, download=True, transform=transform_train)

test_dataset = datasets.CIFAR100(
    root='./data', train=False, download=True, transform=transform_test)

train_loader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, **kwargs)

test_loader = DataLoader(
    test_dataset, batch_size=test_batch_size, shuffle=False, **kwargs)

# Create model
print(f"Using {arch} architecture")
model = get_model(arch, num_classes=100, use_pretrained=pretrained)
model = model.to(device)

# Print model structure
print(model)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate,
                      momentum=momentum, weight_decay=weight_decay)

# Learning rate scheduler - using cosine annealing
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

# Initialize mixed precision training scaler
scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())

# For recording training history
train_losses = []
test_losses = []
train_accs = []
test_accs = []

# Save best model
best_acc = 0.0

# Training loop
for epoch in range(epochs):
    # Clear memory
    clear_memory()
    print(f"Memory state before Epoch {epoch+1}:")
    print_gpu_memory()
    
    # Train and test
    train_loss, train_acc = train(
        model, device, train_loader, optimizer, criterion, epoch, scheduler, scaler)
    
    # Clear memory between train and test
    clear_memory()
    
    test_loss, test_acc = test(model, device, test_loader, criterion, scaler)
    
    # Record history
    train_losses.append(train_loss)
    test_losses.append(test_loss)
    train_accs.append(train_acc)
    test_accs.append(test_acc)
    
    # Print results
    print(f'Epoch: {epoch+1}/{epochs}')
    print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%')
    print(f'Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.2f}%')
    print('-' * 70)
    
    # Save best model
    if test_acc > best_acc:
        best_acc = test_acc
        if save_model:
            # Save model
            save_path = f'cifar100_{arch}_best.pth'
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_acc': best_acc,
            }, save_path)
            print(f'Saved best model to {save_path} [Accuracy: {best_acc:.2f}%]')
    
    # Save checkpoint every 10 epochs
    if save_model and (epoch + 1) % 10 == 0:
        save_path = f'cifar100_{arch}_checkpoint_epoch{epoch+1}.pth'
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'best_acc': best_acc,
        }, save_path)
        print(f'Saved checkpoint to {save_path}')
    
    # Clear memory after each epoch
    clear_memory()
    print(f"Memory state after Epoch {epoch+1}:")
    print_gpu_memory()

# Plot training history
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(test_losses, label='Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Loss Curves')

plt.subplot(1, 2, 2)
plt.plot(train_accs, label='Train Accuracy')
plt.plot(test_accs, label='Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.title('Accuracy Curves')

plt.tight_layout()
plt.savefig(f'cifar100_{arch}_training_curves.png')
plt.show()

# Load best model and evaluate
print("Evaluating best model...")
checkpoint = torch.load(f'cifar100_{arch}_best.pth')
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

test_loss, test_acc = test(model, device, test_loader, criterion)
print(f"Best model accuracy: {test_acc:.2f}%")

# Function to change architecture and retrain
def change_architecture(new_arch):
    """
    Function to switch to a different ResNet architecture
    
    Args:
        new_arch: New architecture to use ('resnet18', 'resnet34', 'resnet50', 'resnet101')
    
    Returns:
        New model
    """
    global arch
    arch = new_arch
    print(f"Switching to {new_arch} architecture")
    
    # Clear memory
    clear_memory()
    
    # Create new model
    new_model = get_model(new_arch, num_classes=100, use_pretrained=pretrained)
    new_model = new_model.to(device)
    
    return new_model

# Example of how to switch architecture:
# To use a different architecture, uncomment and run:
# model = change_architecture('resnet34')
# Then re-run the training loop

Using device: cuda
Initial GPU memory allocated: 479.21 MB
Initial GPU memory cached: 916.00 MB
Files already downloaded and verified
Files already downloaded and verified
Using resnet101 architecture
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): Batch

Epoch 1: 100%|██████████| 391/391 [01:50<00:00,  3.55it/s, loss=4.31, acc=5.52]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.89it/s, loss=3.88, acc=10.7]


Epoch: 1/100
Train Loss: 4.3136 | Train Acc: 5.52%
Test Loss: 3.8296 | Test Acc: 10.66%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 10.66%]
Memory state after Epoch 1:
GPU memory allocated: 706.38 MB
GPU memory cached: 1182.00 MB
Memory state before Epoch 2:
GPU memory allocated: 706.38 MB
GPU memory cached: 1182.00 MB


Epoch 2: 100%|██████████| 391/391 [01:52<00:00,  3.46it/s, loss=3.7, acc=12.6] 
Test: 100%|██████████| 79/79 [00:05<00:00, 13.79it/s, loss=3.55, acc=15.8]


Epoch: 2/100
Train Loss: 3.7029 | Train Acc: 12.60%
Test Loss: 3.5058 | Test Acc: 15.85%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 15.85%]
Memory state after Epoch 2:
GPU memory allocated: 705.25 MB
GPU memory cached: 1148.00 MB
Memory state before Epoch 3:
GPU memory allocated: 705.25 MB
GPU memory cached: 1148.00 MB


Epoch 3: 100%|██████████| 391/391 [01:51<00:00,  3.49it/s, loss=3.39, acc=18.1]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.85it/s, loss=3.3, acc=21]   


Epoch: 3/100
Train Loss: 3.3872 | Train Acc: 18.06%
Test Loss: 3.2556 | Test Acc: 21.01%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 21.01%]
Memory state after Epoch 3:
GPU memory allocated: 705.84 MB
GPU memory cached: 1202.00 MB
Memory state before Epoch 4:
GPU memory allocated: 705.84 MB
GPU memory cached: 1202.00 MB


Epoch 4: 100%|██████████| 391/391 [01:48<00:00,  3.61it/s, loss=3.11, acc=22.7]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.77it/s, loss=3.07, acc=25.9]


Epoch: 4/100
Train Loss: 3.1143 | Train Acc: 22.65%
Test Loss: 3.0337 | Test Acc: 25.93%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 25.93%]
Memory state after Epoch 4:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 5:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 5: 100%|██████████| 391/391 [01:49<00:00,  3.56it/s, loss=2.86, acc=27.9]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.83it/s, loss=2.86, acc=29.9]


Epoch: 5/100
Train Loss: 2.8572 | Train Acc: 27.87%
Test Loss: 2.8252 | Test Acc: 29.91%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 29.91%]
Memory state after Epoch 5:
GPU memory allocated: 705.84 MB
GPU memory cached: 1202.00 MB
Memory state before Epoch 6:
GPU memory allocated: 705.84 MB
GPU memory cached: 1202.00 MB


Epoch 6: 100%|██████████| 391/391 [01:52<00:00,  3.46it/s, loss=2.61, acc=32.7]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.84it/s, loss=2.87, acc=31]  


Epoch: 6/100
Train Loss: 2.6065 | Train Acc: 32.65%
Test Loss: 2.8320 | Test Acc: 31.01%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 31.01%]
Memory state after Epoch 6:
GPU memory allocated: 706.38 MB
GPU memory cached: 1182.00 MB
Memory state before Epoch 7:
GPU memory allocated: 706.38 MB
GPU memory cached: 1182.00 MB


Epoch 7: 100%|██████████| 391/391 [01:50<00:00,  3.53it/s, loss=2.37, acc=37.4]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.81it/s, loss=2.63, acc=35.8]


Epoch: 7/100
Train Loss: 2.3719 | Train Acc: 37.43%
Test Loss: 2.5928 | Test Acc: 35.76%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 35.76%]
Memory state after Epoch 7:
GPU memory allocated: 705.62 MB
GPU memory cached: 1460.00 MB
Memory state before Epoch 8:
GPU memory allocated: 705.62 MB
GPU memory cached: 1460.00 MB


Epoch 8: 100%|██████████| 391/391 [01:52<00:00,  3.48it/s, loss=2.16, acc=42.1]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.82it/s, loss=2.58, acc=37.7]


Epoch: 8/100
Train Loss: 2.1584 | Train Acc: 42.08%
Test Loss: 2.5472 | Test Acc: 37.66%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 37.66%]
Memory state after Epoch 8:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 9:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 9: 100%|██████████| 391/391 [01:51<00:00,  3.50it/s, loss=1.98, acc=46]  
Test: 100%|██████████| 79/79 [00:05<00:00, 13.91it/s, loss=2.41, acc=42.1]


Epoch: 9/100
Train Loss: 1.9791 | Train Acc: 45.96%
Test Loss: 2.3823 | Test Acc: 42.15%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 42.15%]
Memory state after Epoch 9:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 10:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 10: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=1.81, acc=50.1]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.79it/s, loss=2.05, acc=47.1]


Epoch: 10/100
Train Loss: 1.8092 | Train Acc: 50.10%
Test Loss: 2.0240 | Test Acc: 47.13%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 47.13%]
Saved checkpoint to cifar100_resnet101_checkpoint_epoch10.pth
Memory state after Epoch 10:
GPU memory allocated: 705.62 MB
GPU memory cached: 1460.00 MB
Memory state before Epoch 11:
GPU memory allocated: 705.62 MB
GPU memory cached: 1460.00 MB


Epoch 11: 100%|██████████| 391/391 [01:52<00:00,  3.49it/s, loss=1.68, acc=53.3]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.86it/s, loss=1.95, acc=48.9]


Epoch: 11/100
Train Loss: 1.6805 | Train Acc: 53.33%
Test Loss: 1.9223 | Test Acc: 48.94%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 48.94%]
Memory state after Epoch 11:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 12:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 12: 100%|██████████| 391/391 [01:49<00:00,  3.56it/s, loss=1.54, acc=56.3]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.79it/s, loss=1.9, acc=50.6] 


Epoch: 12/100
Train Loss: 1.5432 | Train Acc: 56.33%
Test Loss: 1.8750 | Test Acc: 50.63%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 50.63%]
Memory state after Epoch 12:
GPU memory allocated: 705.62 MB
GPU memory cached: 1460.00 MB
Memory state before Epoch 13:
GPU memory allocated: 705.62 MB
GPU memory cached: 1460.00 MB


Epoch 13: 100%|██████████| 391/391 [01:50<00:00,  3.53it/s, loss=1.41, acc=59.5]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.94it/s, loss=1.73, acc=54.6]


Epoch: 13/100
Train Loss: 1.4132 | Train Acc: 59.47%
Test Loss: 1.7032 | Test Acc: 54.56%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 54.56%]
Memory state after Epoch 13:
GPU memory allocated: 705.09 MB
GPU memory cached: 1444.00 MB
Memory state before Epoch 14:
GPU memory allocated: 705.09 MB
GPU memory cached: 1444.00 MB


Epoch 14: 100%|██████████| 391/391 [01:51<00:00,  3.50it/s, loss=1.32, acc=62]  
Test: 100%|██████████| 79/79 [00:05<00:00, 13.98it/s, loss=1.86, acc=52.4]


Epoch: 14/100
Train Loss: 1.3189 | Train Acc: 61.99%
Test Loss: 1.8377 | Test Acc: 52.41%
----------------------------------------------------------------------
Memory state after Epoch 14:
GPU memory allocated: 705.94 MB
GPU memory cached: 1164.00 MB
Memory state before Epoch 15:
GPU memory allocated: 705.94 MB
GPU memory cached: 1164.00 MB


Epoch 15: 100%|██████████| 391/391 [01:50<00:00,  3.53it/s, loss=1.23, acc=64.1]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.90it/s, loss=1.8, acc=54.6] 


Epoch: 15/100
Train Loss: 1.2308 | Train Acc: 64.05%
Test Loss: 1.7789 | Test Acc: 54.65%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 54.65%]
Memory state after Epoch 15:
GPU memory allocated: 705.09 MB
GPU memory cached: 1480.00 MB
Memory state before Epoch 16:
GPU memory allocated: 705.09 MB
GPU memory cached: 1480.00 MB


Epoch 16: 100%|██████████| 391/391 [01:49<00:00,  3.56it/s, loss=1.14, acc=66.7]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.98it/s, loss=1.69, acc=56.6]


Epoch: 16/100
Train Loss: 1.1416 | Train Acc: 66.73%
Test Loss: 1.6708 | Test Acc: 56.64%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 56.64%]
Memory state after Epoch 16:
GPU memory allocated: 705.62 MB
GPU memory cached: 1424.00 MB
Memory state before Epoch 17:
GPU memory allocated: 705.62 MB
GPU memory cached: 1424.00 MB


Epoch 17: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=1.06, acc=68.7]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.79it/s, loss=1.63, acc=57.7]


Epoch: 17/100
Train Loss: 1.0569 | Train Acc: 68.71%
Test Loss: 1.6101 | Test Acc: 57.67%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 57.67%]
Memory state after Epoch 17:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 18:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 18: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.984, acc=70.5]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.89it/s, loss=1.51, acc=60.8]


Epoch: 18/100
Train Loss: 0.9844 | Train Acc: 70.52%
Test Loss: 1.4934 | Test Acc: 60.79%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 60.79%]
Memory state after Epoch 18:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 19:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 19: 100%|██████████| 391/391 [01:49<00:00,  3.58it/s, loss=0.915, acc=72.6]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.81it/s, loss=1.61, acc=58.8]


Epoch: 19/100
Train Loss: 0.9147 | Train Acc: 72.58%
Test Loss: 1.5934 | Test Acc: 58.79%
----------------------------------------------------------------------
Memory state after Epoch 19:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 20:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 20: 100%|██████████| 391/391 [01:50<00:00,  3.52it/s, loss=0.855, acc=74.3]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.80it/s, loss=1.61, acc=59.4]


Epoch: 20/100
Train Loss: 0.8550 | Train Acc: 74.25%
Test Loss: 1.5863 | Test Acc: 59.39%
----------------------------------------------------------------------
Saved checkpoint to cifar100_resnet101_checkpoint_epoch20.pth
Memory state after Epoch 20:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 21:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 21: 100%|██████████| 391/391 [01:49<00:00,  3.56it/s, loss=0.793, acc=76]  
Test: 100%|██████████| 79/79 [00:05<00:00, 14.96it/s, loss=1.6, acc=61.2] 


Epoch: 21/100
Train Loss: 0.7934 | Train Acc: 76.01%
Test Loss: 1.5754 | Test Acc: 61.23%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 61.23%]
Memory state after Epoch 21:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 22:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 22: 100%|██████████| 391/391 [01:50<00:00,  3.53it/s, loss=0.739, acc=77.6]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.99it/s, loss=1.55, acc=61.7]


Epoch: 22/100
Train Loss: 0.7388 | Train Acc: 77.57%
Test Loss: 1.5328 | Test Acc: 61.73%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 61.73%]
Memory state after Epoch 22:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 23:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 23: 100%|██████████| 391/391 [01:50<00:00,  3.55it/s, loss=0.678, acc=79.2]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.78it/s, loss=1.6, acc=61]   


Epoch: 23/100
Train Loss: 0.6777 | Train Acc: 79.21%
Test Loss: 1.5800 | Test Acc: 61.00%
----------------------------------------------------------------------
Memory state after Epoch 23:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 24:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 24: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.629, acc=80.6]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.76it/s, loss=1.58, acc=61.5]


Epoch: 24/100
Train Loss: 0.6290 | Train Acc: 80.61%
Test Loss: 1.5569 | Test Acc: 61.55%
----------------------------------------------------------------------
Memory state after Epoch 24:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 25:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 25: 100%|██████████| 391/391 [01:50<00:00,  3.55it/s, loss=0.577, acc=82.2]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.87it/s, loss=1.8, acc=59.8] 


Epoch: 25/100
Train Loss: 0.5767 | Train Acc: 82.25%
Test Loss: 1.7761 | Test Acc: 59.81%
----------------------------------------------------------------------
Memory state after Epoch 25:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 26:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 26: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.531, acc=83.5]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.94it/s, loss=1.49, acc=63.9]


Epoch: 26/100
Train Loss: 0.5314 | Train Acc: 83.50%
Test Loss: 1.4709 | Test Acc: 63.85%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 63.85%]
Memory state after Epoch 26:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 27:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 27: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=0.491, acc=84.6]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.73it/s, loss=1.78, acc=60.5]


Epoch: 27/100
Train Loss: 0.4915 | Train Acc: 84.63%
Test Loss: 1.7568 | Test Acc: 60.45%
----------------------------------------------------------------------
Memory state after Epoch 27:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 28:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 28: 100%|██████████| 391/391 [01:50<00:00,  3.53it/s, loss=0.454, acc=85.9]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.91it/s, loss=1.56, acc=63.1]


Epoch: 28/100
Train Loss: 0.4538 | Train Acc: 85.87%
Test Loss: 1.5403 | Test Acc: 63.12%
----------------------------------------------------------------------
Memory state after Epoch 28:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 29:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 29: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=0.408, acc=87.2]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.85it/s, loss=1.5, acc=65]   


Epoch: 29/100
Train Loss: 0.4080 | Train Acc: 87.24%
Test Loss: 1.4795 | Test Acc: 64.95%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 64.95%]
Memory state after Epoch 29:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 30:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 30: 100%|██████████| 391/391 [01:51<00:00,  3.50it/s, loss=0.371, acc=88.6]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.90it/s, loss=1.61, acc=63.6]


Epoch: 30/100
Train Loss: 0.3708 | Train Acc: 88.59%
Test Loss: 1.5912 | Test Acc: 63.61%
----------------------------------------------------------------------
Saved checkpoint to cifar100_resnet101_checkpoint_epoch30.pth
Memory state after Epoch 30:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 31:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 31: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=0.338, acc=89.6]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.69it/s, loss=1.56, acc=64.2]


Epoch: 31/100
Train Loss: 0.3383 | Train Acc: 89.58%
Test Loss: 1.5365 | Test Acc: 64.25%
----------------------------------------------------------------------
Memory state after Epoch 31:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 32:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 32: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.308, acc=90.5]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.93it/s, loss=1.61, acc=64.3]


Epoch: 32/100
Train Loss: 0.3085 | Train Acc: 90.49%
Test Loss: 1.5878 | Test Acc: 64.29%
----------------------------------------------------------------------
Memory state after Epoch 32:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 33:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 33: 100%|██████████| 391/391 [01:50<00:00,  3.55it/s, loss=0.274, acc=91.6]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.78it/s, loss=1.59, acc=64.9]


Epoch: 33/100
Train Loss: 0.2741 | Train Acc: 91.57%
Test Loss: 1.5682 | Test Acc: 64.93%
----------------------------------------------------------------------
Memory state after Epoch 33:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 34:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 34: 100%|██████████| 391/391 [01:51<00:00,  3.52it/s, loss=0.243, acc=92.7]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.91it/s, loss=1.52, acc=65.9]


Epoch: 34/100
Train Loss: 0.2430 | Train Acc: 92.65%
Test Loss: 1.4983 | Test Acc: 65.91%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 65.91%]
Memory state after Epoch 34:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 35:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 35: 100%|██████████| 391/391 [01:49<00:00,  3.56it/s, loss=0.227, acc=93.3]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.87it/s, loss=1.65, acc=63.6]


Epoch: 35/100
Train Loss: 0.2265 | Train Acc: 93.30%
Test Loss: 1.6318 | Test Acc: 63.64%
----------------------------------------------------------------------
Memory state after Epoch 35:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 36:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 36: 100%|██████████| 391/391 [01:51<00:00,  3.50it/s, loss=0.193, acc=94.5]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.84it/s, loss=1.47, acc=66.7]


Epoch: 36/100
Train Loss: 0.1928 | Train Acc: 94.50%
Test Loss: 1.4526 | Test Acc: 66.67%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 66.67%]
Memory state after Epoch 36:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 37:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 37: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=0.163, acc=95.5]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.80it/s, loss=1.57, acc=66.2]


Epoch: 37/100
Train Loss: 0.1635 | Train Acc: 95.50%
Test Loss: 1.5472 | Test Acc: 66.25%
----------------------------------------------------------------------
Memory state after Epoch 37:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 38:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 38: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.149, acc=96]  
Test: 100%|██████████| 79/79 [00:05<00:00, 13.86it/s, loss=1.51, acc=66.4]


Epoch: 38/100
Train Loss: 0.1491 | Train Acc: 95.95%
Test Loss: 1.4951 | Test Acc: 66.39%
----------------------------------------------------------------------
Memory state after Epoch 38:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 39:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 39: 100%|██████████| 391/391 [01:50<00:00,  3.55it/s, loss=0.129, acc=96.6]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.93it/s, loss=1.49, acc=68.1]


Epoch: 39/100
Train Loss: 0.1285 | Train Acc: 96.58%
Test Loss: 1.4723 | Test Acc: 68.07%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 68.07%]
Memory state after Epoch 39:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 40:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 40: 100%|██████████| 391/391 [01:50<00:00,  3.53it/s, loss=0.109, acc=97.3] 
Test: 100%|██████████| 79/79 [00:05<00:00, 13.84it/s, loss=1.46, acc=67.9]


Epoch: 40/100
Train Loss: 0.1088 | Train Acc: 97.29%
Test Loss: 1.4369 | Test Acc: 67.91%
----------------------------------------------------------------------
Saved checkpoint to cifar100_resnet101_checkpoint_epoch40.pth
Memory state after Epoch 40:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 41:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 41: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=0.0927, acc=97.8]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.73it/s, loss=1.45, acc=68.7]


Epoch: 41/100
Train Loss: 0.0927 | Train Acc: 97.77%
Test Loss: 1.4333 | Test Acc: 68.66%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 68.66%]
Memory state after Epoch 41:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 42:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 42: 100%|██████████| 391/391 [01:51<00:00,  3.52it/s, loss=0.0821, acc=98.1]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.82it/s, loss=1.39, acc=69.6]


Epoch: 42/100
Train Loss: 0.0821 | Train Acc: 98.12%
Test Loss: 1.3679 | Test Acc: 69.56%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 69.56%]
Memory state after Epoch 42:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 43:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 43: 100%|██████████| 391/391 [01:49<00:00,  3.57it/s, loss=0.0685, acc=98.6]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.73it/s, loss=1.4, acc=69.7] 


Epoch: 43/100
Train Loss: 0.0685 | Train Acc: 98.58%
Test Loss: 1.3851 | Test Acc: 69.73%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 69.73%]
Memory state after Epoch 43:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 44:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 44: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.0603, acc=98.7]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.89it/s, loss=1.38, acc=69.8]


Epoch: 44/100
Train Loss: 0.0603 | Train Acc: 98.75%
Test Loss: 1.3673 | Test Acc: 69.79%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 69.79%]
Memory state after Epoch 44:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 45:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 45: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=0.0553, acc=98.9]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.96it/s, loss=1.42, acc=69.8]


Epoch: 45/100
Train Loss: 0.0553 | Train Acc: 98.93%
Test Loss: 1.4019 | Test Acc: 69.81%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 69.81%]
Memory state after Epoch 45:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 46:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 46: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.0447, acc=99.2]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.85it/s, loss=1.37, acc=70.5]


Epoch: 46/100
Train Loss: 0.0447 | Train Acc: 99.25%
Test Loss: 1.3486 | Test Acc: 70.50%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 70.50%]
Memory state after Epoch 46:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 47:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 47: 100%|██████████| 391/391 [01:50<00:00,  3.53it/s, loss=0.0367, acc=99.4]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.66it/s, loss=1.35, acc=70.8]


Epoch: 47/100
Train Loss: 0.0367 | Train Acc: 99.41%
Test Loss: 1.3327 | Test Acc: 70.75%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 70.75%]
Memory state after Epoch 47:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 48:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 48: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.0329, acc=99.6]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.97it/s, loss=1.33, acc=71.3]


Epoch: 48/100
Train Loss: 0.0329 | Train Acc: 99.57%
Test Loss: 1.3126 | Test Acc: 71.34%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 71.34%]
Memory state after Epoch 48:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 49:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 49: 100%|██████████| 391/391 [01:50<00:00,  3.55it/s, loss=0.0261, acc=99.7]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.69it/s, loss=1.29, acc=71.5]


Epoch: 49/100
Train Loss: 0.0261 | Train Acc: 99.69%
Test Loss: 1.2769 | Test Acc: 71.55%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 71.55%]
Memory state after Epoch 49:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 50:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 50: 100%|██████████| 391/391 [01:50<00:00,  3.53it/s, loss=0.0224, acc=99.8]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.90it/s, loss=1.3, acc=71.7] 


Epoch: 50/100
Train Loss: 0.0224 | Train Acc: 99.77%
Test Loss: 1.2840 | Test Acc: 71.69%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 71.69%]
Saved checkpoint to cifar100_resnet101_checkpoint_epoch50.pth
Memory state after Epoch 50:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 51:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 51: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=0.0207, acc=99.8]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.82it/s, loss=1.31, acc=71.7]


Epoch: 51/100
Train Loss: 0.0207 | Train Acc: 99.80%
Test Loss: 1.2967 | Test Acc: 71.65%
----------------------------------------------------------------------
Memory state after Epoch 51:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 52:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 52: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.0154, acc=99.9]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.88it/s, loss=1.27, acc=72.4]


Epoch: 52/100
Train Loss: 0.0154 | Train Acc: 99.88%
Test Loss: 1.2547 | Test Acc: 72.40%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 72.40%]
Memory state after Epoch 52:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 53:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 53: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=0.0145, acc=99.9]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.79it/s, loss=1.26, acc=72.5]


Epoch: 53/100
Train Loss: 0.0145 | Train Acc: 99.91%
Test Loss: 1.2413 | Test Acc: 72.47%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 72.47%]
Memory state after Epoch 53:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 54:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 54: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.0147, acc=99.9]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.86it/s, loss=1.27, acc=72.2]


Epoch: 54/100
Train Loss: 0.0147 | Train Acc: 99.88%
Test Loss: 1.2585 | Test Acc: 72.25%
----------------------------------------------------------------------
Memory state after Epoch 54:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 55:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 55: 100%|██████████| 391/391 [01:49<00:00,  3.56it/s, loss=0.0138, acc=99.9]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.81it/s, loss=1.25, acc=72.5]


Epoch: 55/100
Train Loss: 0.0138 | Train Acc: 99.89%
Test Loss: 1.2325 | Test Acc: 72.53%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 72.53%]
Memory state after Epoch 55:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 56:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 56: 100%|██████████| 391/391 [01:51<00:00,  3.52it/s, loss=0.012, acc=99.9] 
Test: 100%|██████████| 79/79 [00:05<00:00, 13.86it/s, loss=1.24, acc=72.7]


Epoch: 56/100
Train Loss: 0.0120 | Train Acc: 99.92%
Test Loss: 1.2287 | Test Acc: 72.67%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 72.67%]
Memory state after Epoch 56:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 57:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 57: 100%|██████████| 391/391 [01:50<00:00,  3.55it/s, loss=0.0108, acc=100] 
Test: 100%|██████████| 79/79 [00:05<00:00, 14.79it/s, loss=1.23, acc=72.7]


Epoch: 57/100
Train Loss: 0.0108 | Train Acc: 99.95%
Test Loss: 1.2176 | Test Acc: 72.65%
----------------------------------------------------------------------
Memory state after Epoch 57:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 58:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 58: 100%|██████████| 391/391 [01:51<00:00,  3.51it/s, loss=0.0107, acc=99.9]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.92it/s, loss=1.22, acc=72.8]


Epoch: 58/100
Train Loss: 0.0107 | Train Acc: 99.94%
Test Loss: 1.2044 | Test Acc: 72.77%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 72.77%]
Memory state after Epoch 58:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 59:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 59: 100%|██████████| 391/391 [01:50<00:00,  3.55it/s, loss=0.0106, acc=99.9] 
Test: 100%|██████████| 79/79 [00:05<00:00, 14.75it/s, loss=1.21, acc=72.8]


Epoch: 59/100
Train Loss: 0.0106 | Train Acc: 99.93%
Test Loss: 1.1996 | Test Acc: 72.75%
----------------------------------------------------------------------
Memory state after Epoch 59:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 60:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 60: 100%|██████████| 391/391 [01:51<00:00,  3.50it/s, loss=0.00984, acc=100]
Test: 100%|██████████| 79/79 [00:05<00:00, 13.87it/s, loss=1.2, acc=72.9] 


Epoch: 60/100
Train Loss: 0.0098 | Train Acc: 99.96%
Test Loss: 1.1825 | Test Acc: 72.94%
----------------------------------------------------------------------
Saved best model to cifar100_resnet101_best.pth [Accuracy: 72.94%]
Saved checkpoint to cifar100_resnet101_checkpoint_epoch60.pth
Memory state after Epoch 60:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 61:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 69: 100%|██████████| 391/391 [01:50<00:00,  3.54it/s, loss=0.00765, acc=100]
Test: 100%|██████████| 79/79 [00:05<00:00, 14.82it/s, loss=1.18, acc=72.9]


Epoch: 69/100
Train Loss: 0.0076 | Train Acc: 99.96%
Test Loss: 1.1663 | Test Acc: 72.94%
----------------------------------------------------------------------
Memory state after Epoch 69:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB
Memory state before Epoch 70:
GPU memory allocated: 705.53 MB
GPU memory cached: 1462.00 MB


Epoch 70: 100%|██████████| 391/391 [01:51<00:00,  3.52it/s, loss=0.00769, acc=100] 
Test: 100%|██████████| 79/79 [00:05<00:00, 13.90it/s, loss=1.18, acc=73.1]


Epoch: 70/100
Train Loss: 0.0077 | Train Acc: 99.96%
Test Loss: 1.1609 | Test Acc: 73.07%
----------------------------------------------------------------------
Saved checkpoint to cifar100_resnet101_checkpoint_epoch70.pth
Memory state after Epoch 70:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB
Memory state before Epoch 71:
GPU memory allocated: 705.25 MB
GPU memory cached: 1184.00 MB


Epoch 71:  91%|█████████ | 356/391 [01:39<00:09,  3.59it/s, loss=0.00726, acc=100]