In [111]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import time

In [112]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [113]:
pre_epoch = 0
BATCH_SIZE = 128
LR = 0.01

In [114]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) 
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) 

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


# Instrumentation et évaluation "en continu" du système #

## Modifier les fonctions pour calculer à chaque étape le nombre d'opérations flottantes effectuées, séparément pour les additions, les multiplications, les maximums et le total. ##

In [115]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def count_maxpool_operations(self, input, output, pool_layer, out_channels):
        kernel_maxpooling = pool_layer.kernel_size
        stride = pool_layer.stride
        padding = pool_layer.padding
        output_height = output.shape[2]
        output_width = output.shape[3]
        out_channels =  output.shape[1]
        num_max = output_height * output_width * (kernel_maxpooling**2 -1) * out_channels
        return num_max

    def count_conv_operations(self, input, output, output_pooled, conv_layer, pool_layer):
        out_channels, in_channels = output.size(1), conv_layer.in_channels
        output_height, output_width = output.size(2), output.size(3)
        filter_size = conv_layer.kernel_size[0]
        stride = conv_layer.stride[0]
        padding = conv_layer.padding[0]
        num_mults = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_adds = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer, out_channels)
        total_ops = num_mults + num_adds + num_maxs
        return num_mults, num_adds, num_maxs, total_ops

    def count_operations(self, x):
        conv1_out = self.conv1(x)
        conv1_out_pooled = self.pool(F.relu(conv1_out))
        conv2_out = self.conv2(conv1_out_pooled)
        conv2_out_pooled = self.pool(F.relu(conv2_out))
        conv1_ops = self.count_conv_operations(x, conv1_out, conv1_out_pooled, self.conv1, self.pool)
        conv2_ops = self.count_conv_operations(conv1_out_pooled, conv2_out, conv2_out_pooled, self.conv2, self.pool)
        return conv1_ops, conv2_ops        

    def count_fc_operations(self, input, fc_layer):
        in_features = fc_layer.in_features
        out_features = fc_layer.out_features
        num_mults = out_features * in_features
        num_adds = out_features * in_features
        num_maxs = 0    
        total_ops = num_mults + num_adds
        return num_mults, num_adds, num_maxs, total_ops

    def count_total_operations(self, x):
        conv1_ops, conv2_ops = self.count_operations(x)
        fc1_ops = self.count_fc_operations(x, self.fc1)
        fc2_ops = self.count_fc_operations(x, self.fc2)
        fc3_ops = self.count_fc_operations(x, self.fc3)
        total_ops = sum(op[3] for op in [conv1_ops, conv2_ops, fc1_ops, fc2_ops, fc3_ops])
        return total_ops


## Dans la partie entraînement du réseau CNN, lister les différentes couches et sous-couches. ##

### La couche de convolution : ###
La couche de convolution applique un ensemble de filtres convolutifs aux images en entrée, chacun d'entre eux activant certaines caractéristiques des images. 

- `self.conv1` Une première couche de convolution prend en entrée des images RGB et produit 6 filtres de convolution de taille 5×5 sans padding.
- `self.conv2` Une deuxième couche de convolution prend 6 entrées et produit 16 filtres de convolution de taille 5×5 sans padding.

### Les couches entièrement connectées : ###
Chaque couche entièrement connectée effectue une transformation linéaire suivie d'une activation ReLU, qui introduit de la non-linéarité dans le réseau.  Du coup, dans chacune des couches entièrement connectées (`self.fc1`, `self.fc2` et `self.fc3`), il y a une sous-couche linéaire suivie d'une sous-couche non linéaire. Ces couches sont responsables de la combinaison des caractéristiques extraites par les couches de convolution précédentes pour effectuer la tâche de classification finale.

`self.fc1`: 
- Il y a 120 neurones dans la couche entièrement connectée, chacun connecté à une entrée de taille 400.
- Sous-couche linéaire : Elle effectue une transformation linéaire des caractéristiques d'entrée.
- Sous-couche non linéaire : Suite à la transformation linéaire, une activation ReLU est appliquée. Cela introduit de la non-linéarité dans la sortie de la couche.

`self.fc2`:
- 84 neurones dans la deuxième couche entièrement connectée, chacun connecté aux 120 neurones de la couche précédente.
- Sous-couche linéaire : Elle effectue une transformation linéaire des caractéristiques d'entrée.
- Sous-couche non linéaire : Suite à la transformation linéaire, une activation ReLU est appliquée. Cela introduit de la non-linéarité dans la sortie de la couche.

`self.fc3`:
- 10 neurones dans la dernière couche entièrement connectée, chacun connecté aux 84 neurones de la couche précédente.
- Sous-couche linéaire : Il s'agit de la dernière transformation linéaire qui produit la sortie finale du réseau sans RELU.

### La couche de pooling : ###
L'opération de pooling consiste à réduire la taille des images, tout en préservant leurs caractéristiques importantes. Elle est utilisée après chaque couche de convolution.
- `self.pool` Cette couche en effectuant une opération de max pooling avec une fenêtre de taille 2x2 et un pas de 2. 

## Donner la taille des différents tenseurs de données Xn et de poids Wn le long du calcul. ##

### Convolutional Layer 1 (conv1): ###
- X1 = 3×32×32 = 3072
- Poids W1:
    - [6, 3, 5, 5]: Poids de convolution: 6×3×5×5=450
    - Bias: [6]

### Convolutional Layer 2 (conv2): ###
- X2 = 6×14×14 = 1176
- Poids W2:
    - [16, 6, 5, 5]: Poids de convolution: 16×6×5×5=2400
    - Bias: [16]

### Fully Connected Layer 1 (fc1): ###
- X3 = 16×5×5 = 400
- Poids W3:
    - [120, 400]: Poids de convolution: 120×400 = 48000
    - Bias: [120]

### Fully Connected Layer 2 (fc2): ###
- X4 = 120
- Poids W4:
    - [84, 120]: Poids de convolution: 84×120=10080
    - Bias: [84]
  
### Fully Connected Layer 3 (fc3): ###
- X5 = 84
- Poids W5:
    - [10, 84]: Poids de convolution: 10×84=840
    - Bias: [10]

Fonction spécifique pour le calcul de la précision globale, de l'erreur, du nombre d'opérations flottantes effectuées et du nombre total d'opérations flottantes par secondesur l'ensemble de test

In [116]:
class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(outchannel)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(outchannel)
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.shortcut(x)
        out = self.relu(out)
        return out

In [117]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 64, num_blocks[0], stride=1)
        self.fc = nn.Linear(1024, num_classes)

    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        inchannel = 64 
        for stride in strides:
            layers.append(block(inchannel, channels, stride))
            inchannel = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = F.avg_pool2d(out, 8)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out

    def count_conv_operations(self, input, output, output_pooled, stride, padding, kernel_size):
        out_channels = output.size(1)
        output_height, output_width = output.size(2), output.size(3)
        num_mults = output_height * output_width * input.size(1) * kernel_size ** 2 * out_channels
        num_adds = output_height * output_width * input.size(1) * kernel_size ** 2 * out_channels
        num_maxs = self.count_maxpool_operations(output, output_pooled)
        total_ops = num_mults + num_adds + num_maxs
        return num_mults, num_adds, num_maxs, total_ops

    def count_maxpool_operations(self, output, output_pooled):
        kernel_size = 2 
        stride = 2 
        padding = 0
        output_height = output.shape[2]
        output_width = output.shape[3]
        out_channels = output.shape[1]
        num_max = ((output_height + 2 * padding - kernel_size) // stride + 1) * ((output_width + 2 * padding - kernel_size) // stride + 1) * (kernel_size ** 2 - 1) * out_channels
        return num_max


    def count_fc_operations(self, input, fc_layer):
        in_features = fc_layer.in_features
        out_features = fc_layer.out_features    
        num_mults = out_features * in_features
        num_adds = out_features * in_features
        num_maxs = 0
        total_ops = num_mults + num_adds
        return num_mults, num_adds, num_maxs, total_ops

    def count_total_operations(self, x):
        conv1_out = self.conv1(x)
        conv1_out_pooled = self.relu(self.bn1(conv1_out))
        conv1_ops = self.count_conv_operations(x, conv1_out, conv1_out_pooled, self.conv1, None, kernel_size=3)

        layer1_out = self.layer1(conv1_out_pooled)
        layer1_out_pooled = F.avg_pool2d(layer1_out, 8)
        layer1_ops = self.count_conv_operations(conv1_out_pooled, layer1_out, layer1_out_pooled, stride=1, padding=1, kernel_size=3)

        fc_ops = self.count_fc_operations(x, self.fc)

        total_ops = sum(op[3] for op in [conv1_ops, layer1_ops, fc_ops])
        return total_ops

In [118]:
def ResNet18():
    return ResNet(ResidualBlock, [2, 2])

In [119]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
  
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=2)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=48, kernel_size=3, padding=1)
        self.batchnorm = nn.BatchNorm2d(48)
        self.maxpool2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
        self.conv3 = nn.Conv2d(in_channels=48, out_channels=64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=64, out_channels=48, kernel_size=3, padding=1)
        self.maxpool3 = nn.MaxPool2d(kernel_size=3, stride=2, padding=0)

        self.fc1 = nn.Linear(in_features=3*3*48, out_features=128)
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = F.relu(self.conv2(x))
        x = self.batchnorm(x)
        x = self.maxpool2(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.relu(self.conv5(x))
        x = self.maxpool3(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def count_maxpool_operations(self, input, output, pool_layer):
        if pool_layer:
            kernel_size = pool_layer.kernel_size
            stride = pool_layer.stride
            padding = pool_layer.padding
            output_height = output.shape[2]
            output_width = output.shape[3]
            out_channels = output.shape[1]
            num_max = ((output_height + 2 * padding - kernel_size) // stride + 1) * ((output_width + 2 * padding - kernel_size) // stride + 1) * (kernel_size ** 2 - 1) * out_channels  
        else:
            num_max = 0
        return num_max

    def count_conv_operations(self, input, output, output_pooled, conv_layer, pool_layer):
        out_channels, in_channels = output.size(1), conv_layer.in_channels
        output_height, output_width = output.size(2), output.size(3)
        filter_size = conv_layer.kernel_size[0]
        stride = conv_layer.stride[0]
        padding = conv_layer.padding[0]
        num_mults = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_adds = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer)
        if pool_layer:
            num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer)
        else:
            num_maxs = 0
        total_ops = num_mults + num_adds + num_maxs
        return num_mults, num_adds, num_maxs, total_ops

    def count_operations(self, x):
        conv1_out = F.relu(self.conv1(x))
        conv1_out_pooled = self.maxpool1(conv1_out)
        conv2_out = F.relu(self.conv2(conv1_out_pooled))
        conv2_out_norm = self.batchnorm(conv2_out)
        conv2_out_pooled = self.maxpool2(conv2_out_norm)
        conv3_out = F.relu(self.conv3(conv2_out_pooled))
        conv4_out = F.relu(self.conv4(conv3_out))
        conv5_out = F.relu(self.conv5(conv4_out))
        conv5_out_pooled = self.maxpool3(conv5_out)
        
        conv1_ops = self.count_conv_operations(x, conv1_out, conv1_out_pooled, self.conv1, self.maxpool1)
        conv2_ops = self.count_conv_operations(conv1_out_pooled, conv2_out, conv2_out_pooled, self.conv2, self.maxpool2)
        conv3_ops = self.count_conv_operations(conv2_out_pooled, conv3_out, conv3_out, self.conv3, None)
        conv4_ops = self.count_conv_operations(conv3_out, conv4_out, conv4_out, self.conv4, None)
        conv5_ops = self.count_conv_operations(conv4_out, conv5_out, conv5_out_pooled, self.conv5, self.maxpool3)
        
        return conv1_ops, conv2_ops, conv3_ops, conv4_ops, conv5_ops
        
    def count_fc_operations(self, input, fc_layer):
        in_features = fc_layer.in_features
        out_features = fc_layer.out_features
        num_mults = out_features * in_features
        num_adds = out_features * in_features
        num_maxs = 0
        total_ops = num_mults + num_adds
        return num_mults, num_adds, num_maxs, total_ops
    
    def count_total_operations(self, x):
        conv1_ops, conv2_ops, conv3_ops, conv4_ops, conv5_ops = self.count_operations(x)
        fc1_ops = self.count_fc_operations(x, self.fc1)
        fc2_ops = self.count_fc_operations(x, self.fc2)
        fc3_ops = self.count_fc_operations(x, self.fc3)
        total_ops = sum(op[3] for op in [conv1_ops, conv2_ops, conv3_ops, conv4_ops, conv5_ops, fc1_ops, fc2_ops, fc3_ops])
        return total_ops


In [120]:
class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(128 * 8 * 8, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def count_maxpool_operations(self, input, output, pool_layer, out_channels):
        kernel_maxpooling = pool_layer.kernel_size
        stride = pool_layer.stride
        padding = pool_layer.padding
        output_height = output.shape[2]
        output_width = output.shape[3]
        out_channels =  output.shape[1]
        num_max = output_height * output_width * (kernel_maxpooling**2 -1) * out_channels
        return num_max

    def count_conv_operations(self, input, output, output_pooled, conv_layer, pool_layer):
        out_channels, in_channels = output.size(1), conv_layer.in_channels
        output_height, output_width = output.size(2), output.size(3)
        filter_size = conv_layer.kernel_size[0]
        stride = conv_layer.stride[0]
        padding = conv_layer.padding[0]
        num_mults = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_adds = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer, out_channels)
        total_ops = num_mults + num_adds + num_maxs
        return num_mults, num_adds, num_maxs, total_ops

    def count_operations(self, x):
        conv1_out = self.conv1(x)
        conv1_out_pooled = self.pool(F.relu(conv1_out))
        conv2_out = self.conv2(conv1_out_pooled)
        conv2_out_pooled = self.pool(F.relu(conv2_out))
        conv1_ops = self.count_conv_operations(x, conv1_out, conv1_out_pooled, self.conv1, self.pool)
        conv2_ops = self.count_conv_operations(conv1_out_pooled, conv2_out, conv2_out_pooled, self.conv2, self.pool)
        return conv1_ops, conv2_ops        

    def count_fc_operations(self, input, fc_layer):
        in_features = fc_layer.in_features
        out_features = fc_layer.out_features
        num_mults = out_features * in_features
        num_adds = out_features * in_features
        num_maxs = 0    
        total_ops = num_mults + num_adds
        return num_mults, num_adds, num_maxs, total_ops

    def count_total_operations(self, x):
        conv1_ops, conv2_ops = self.count_operations(x)
        fc1_ops = self.count_fc_operations(x, self.fc1)
        fc2_ops = self.count_fc_operations(x, self.fc2)
        fc3_ops = self.count_fc_operations(x, self.fc3)
        total_ops = sum(op[3] for op in [conv1_ops, conv2_ops, fc1_ops, fc2_ops, fc3_ops])
        return total_ops

In [121]:
def evaluate_model(net, testloader, criterion, device):
    net.eval()
    correct = 0
    total_samples = 0 
    total_loss = 0.0
    total_test_ops = 0

    with torch.no_grad():
        start_time = time.time()
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct += (predicted == labels).sum().item()
            total_test_ops += net.count_total_operations(images)

        evaluate_time = time.time() - start_time

    test_accuracy = 100. * correct / total_samples
    total_loss /= len(testloader)
    ops_per_second = total_test_ops / evaluate_time

    return test_accuracy, total_loss, total_test_ops, evaluate_time, ops_per_second

In [122]:
def train(epoch_nums, net, trainloader, testloader, optimizer, criterion, device):
    total_time = 0.0
    total_ops = 0
    
    for epoch in range(epoch_nums + 1):
        net.train()
        sum_loss = 0.0
        correct = 0.0
        total = 0.0
        iteration_num = 0
        total_train_ops = 0

        if epoch > 0: 
            for i, data in enumerate(trainloader, 0):
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                if epoch == 1 and i == 0: 
                    batch_train_ops = net.count_total_operations(inputs)
                    batch_train_ops = inputs.shape[0] * batch_train_ops

                if torch.cuda.is_available(): 
                    torch.cuda.synchronize()
                batch_train_start = time.time()
                outputs = net(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                sum_loss += loss.item()

                if torch.cuda.is_available(): 
                    torch.cuda.synchronize()
                batch_train_time = time.time() - batch_train_start

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                iteration_num += 1

                batch_train_ops = net.count_total_operations(inputs)
                batch_train_ops = inputs.shape[0] * batch_train_ops
                total_train_ops += batch_train_ops

            epoch_train_ops = total_train_ops * iteration_num

        test_accuracy, total_loss, total_test_ops, evaluate_time, ops_per_second = evaluate_model(net, testloader, criterion, device)
        total_time += evaluate_time

        if epoch == 0: 
            print('[Epoch:%d] Test Acc: %.3f%%  | Loss: %.3f%% | Train Ops: %d | Test Ops: %d | Time: %.6fs | Ops/Sec : %d ' % (
                0, 
                test_accuracy, 
                total_loss,
                0,
                total_test_ops,
                evaluate_time,
                ops_per_second
            ))
        else:
            print('[Epoch:%d] Test Acc: %.3f%% | Loss: %.3f%% | Train Ops: %d | Test Ops: %d | Time: %.6fs | Ops/Sec : %d ' % (
                epoch, 
                test_accuracy,
                total_loss,
                total_train_ops,
                total_test_ops,
                evaluate_time,
                ops_per_second
            ))

        if epoch > 0: 
            total_ops += total_test_ops + epoch_train_ops

    ops_per_second = total_ops / total_time 
    total_params = sum(p.numel() for p in net.parameters())
    print('Time elapsed: %.3fs | Total ops: %d | Ops/Second : %d | Total Parameters : %d ' % (
        total_time,
        total_ops,
        ops_per_second,
        total_params,
    ))
    print('Finished Training')


In [123]:
net = Net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)    
epoch_nums = 5

train(epoch_nums, net, trainloader, testloader, optimizer, criterion, device)

[Epoch:0] Test Acc: 10.020%  | Loss: 2.305% | Train Ops: 0 | Test Ops: 103345272 | Time: 1.164539s | Ops/Sec : 88743515 
[Epoch:1] Test Acc: 10.810% | Loss: 2.298% | Train Ops: 65408400000 | Test Ops: 103345272 | Time: 1.143528s | Ops/Sec : 90374102 
[Epoch:2] Test Acc: 19.330% | Loss: 2.277% | Train Ops: 65408400000 | Test Ops: 103345272 | Time: 1.167405s | Ops/Sec : 88525627 
[Epoch:3] Test Acc: 24.330% | Loss: 2.098% | Train Ops: 65408400000 | Test Ops: 103345272 | Time: 1.139183s | Ops/Sec : 90718796 
[Epoch:4] Test Acc: 28.360% | Loss: 1.988% | Train Ops: 65408400000 | Test Ops: 103345272 | Time: 1.481369s | Ops/Sec : 69763343 
[Epoch:5] Test Acc: 30.490% | Loss: 1.918% | Train Ops: 65408400000 | Test Ops: 103345272 | Time: 1.188130s | Ops/Sec : 86981423 
Time elapsed: 7.284s | Total ops: 127873938726360 | Ops/Second : 17555085188352 | Total Parameters : 62006 
Finished Training


In [124]:
netAlex = AlexNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(netAlex.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
epoch_nums = 5

train(epoch_nums, netAlex, trainloader, testloader, optimizer, criterion, device)

[Epoch:0] Test Acc: 10.000%  | Loss: 2.304% | Train Ops: 0 | Test Ops: 1165701248 | Time: 1.168787s | Ops/Sec : 997359662 
[Epoch:1] Test Acc: 33.910% | Loss: 1.694% | Train Ops: 737785600000 | Test Ops: 1165701248 | Time: 1.211577s | Ops/Sec : 962135742 
[Epoch:2] Test Acc: 49.690% | Loss: 1.307% | Train Ops: 737785600000 | Test Ops: 1165701248 | Time: 1.151581s | Ops/Sec : 1012261785 
[Epoch:3] Test Acc: 55.820% | Loss: 1.218% | Train Ops: 737785600000 | Test Ops: 1165701248 | Time: 1.466966s | Ops/Sec : 794634303 
[Epoch:4] Test Acc: 57.490% | Loss: 1.217% | Train Ops: 737785600000 | Test Ops: 1165701248 | Time: 1.169889s | Ops/Sec : 996420611 
[Epoch:5] Test Acc: 64.270% | Loss: 0.997% | Train Ops: 737785600000 | Test Ops: 1165701248 | Time: 1.456298s | Ops/Sec : 800454966 
Time elapsed: 7.625s | Total ops: 1442376676506240 | Ops/Second : 189161735179061 | Total Parameters : 173834 
Finished Training


In [125]:
netVGG = VGG().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(netVGG.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
epoch_nums = 5

train(epoch_nums, netVGG, trainloader, testloader, optimizer, criterion, device)

[Epoch:0] Test Acc: 9.750%  | Loss: 2.304% | Train Ops: 0 | Test Ops: 3424596912 | Time: 1.182159s | Ops/Sec : 2896899388 
[Epoch:1] Test Acc: 45.770% | Loss: 1.504% | Train Ops: 2167466400000 | Test Ops: 3424596912 | Time: 1.208653s | Ops/Sec : 2833400209 
[Epoch:2] Test Acc: 53.290% | Loss: 1.288% | Train Ops: 2167466400000 | Test Ops: 3424596912 | Time: 1.166970s | Ops/Sec : 2934606946 
[Epoch:3] Test Acc: 56.500% | Loss: 1.178% | Train Ops: 2167466400000 | Test Ops: 3424596912 | Time: 1.402603s | Ops/Sec : 2441601172 
[Epoch:4] Test Acc: 62.720% | Loss: 1.047% | Train Ops: 2167466400000 | Test Ops: 3424596912 | Time: 1.469724s | Ops/Sec : 2330094589 
[Epoch:5] Test Acc: 67.040% | Loss: 0.938% | Train Ops: 2167466400000 | Test Ops: 3424596912 | Time: 1.164321s | Ops/Sec : 2941281372 
Time elapsed: 7.594s | Total ops: 4237413934984560 | Ops/Second : 557963360750485 | Total Parameters : 1069822 
Finished Training


In [126]:
netRes = ResNet18().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(netRes.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
epoch_nums = 5

train(epoch_nums, netRes, trainloader, testloader, optimizer, criterion, device)

[Epoch:0] Test Acc: 11.330%  | Loss: 2.300% | Train Ops: 0 | Test Ops: 6253260800 | Time: 1.384133s | Ops/Sec : 4517818318 
[Epoch:1] Test Acc: 58.360% | Loss: 1.154% | Train Ops: 3957760000000 | Test Ops: 6253260800 | Time: 1.196241s | Ops/Sec : 5227423922 
[Epoch:2] Test Acc: 63.720% | Loss: 1.031% | Train Ops: 3957760000000 | Test Ops: 6253260800 | Time: 1.176614s | Ops/Sec : 5314622542 
[Epoch:3] Test Acc: 67.900% | Loss: 0.902% | Train Ops: 3957760000000 | Test Ops: 6253260800 | Time: 1.489056s | Ops/Sec : 4199480972 
[Epoch:4] Test Acc: 72.060% | Loss: 0.795% | Train Ops: 3957760000000 | Test Ops: 6253260800 | Time: 1.318741s | Ops/Sec : 4741840342 
[Epoch:5] Test Acc: 73.420% | Loss: 0.748% | Train Ops: 3957760000000 | Test Ops: 6253260800 | Time: 1.311442s | Ops/Sec : 4768231466 
Time elapsed: 7.876s | Total ops: 7737452066304000 | Ops/Second : 982380424751392 | Total Parameters : 160074 
Finished Training
