In [51]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import time
!pip install torchsummary
from torchsummary import summary

/var/lib/oar/.batch_job_bashrc: line 5: /home/ziwang/.bashrc: No such file or directory


In [52]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [53]:
pre_epoch = 0
BATCH_SIZE = 128
LR = 0.01

In [54]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) 
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) 

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
# classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


# Instrumentation et évaluation "en continu" du système #

## Modifier les fonctions pour calculer à chaque étape le nombre d'opérations flottantes effectuées, séparément pour les additions, les multiplications, les maximums et le total. ##

In [55]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def count_maxpool_operations(self, input, output, pool_layer, out_channels):
        kernel_maxpooling = pool_layer.kernel_size
        stride = pool_layer.stride
        padding = pool_layer.padding
        output_height = output.shape[2]
        output_width = output.shape[3]
        out_channels =  output.shape[1]
        num_max = output_height * output_width * (kernel_maxpooling**2 -1) * out_channels
        return num_max

    def count_conv_operations(self, input, output, output_pooled, conv_layer, pool_layer):
        out_channels, in_channels = output.size(1), conv_layer.in_channels
        output_height, output_width = output.size(2), output.size(3)
        filter_size = conv_layer.kernel_size[0]
        stride = conv_layer.stride[0]
        padding = conv_layer.padding[0]
        num_mults = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_adds = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer, out_channels)
        total_ops = num_mults + num_adds + num_maxs
        return num_mults, num_adds, num_maxs, total_ops

    def count_operations(self, x):
        conv1_out = self.conv1(x)
        conv1_out_pooled = self.pool(F.relu(conv1_out))
        conv2_out = self.conv2(conv1_out_pooled)
        conv2_out_pooled = self.pool(F.relu(conv2_out))
        conv1_ops = self.count_conv_operations(x, conv1_out, conv1_out_pooled, self.conv1, self.pool)
        conv2_ops = self.count_conv_operations(conv1_out_pooled, conv2_out, conv2_out_pooled, self.conv2, self.pool)
        return conv1_ops, conv2_ops        

    def count_fc_operations(self, input, fc_layer):
        in_features = fc_layer.in_features
        out_features = fc_layer.out_features
        num_mults = out_features * in_features
        num_adds = out_features * in_features
        num_maxs = 0    
        total_ops = num_mults + num_adds
        return num_mults, num_adds, num_maxs, total_ops

    def count_total_operations(self, x):
        conv1_ops, conv2_ops = self.count_operations(x)
        fc1_ops = self.count_fc_operations(x, self.fc1)
        fc2_ops = self.count_fc_operations(x, self.fc2)
        fc3_ops = self.count_fc_operations(x, self.fc3)
        total_ops = sum(op[3] for op in [conv1_ops, conv2_ops, fc1_ops, fc2_ops, fc3_ops])
        return total_ops


## Dans la partie entraînement du réseau CNN, lister les différentes couches et sous-couches. ##

### La couche de convolution : ###
La couche de convolution applique un ensemble de filtres convolutifs aux images en entrée, chacun d'entre eux activant certaines caractéristiques des images. 

- `self.conv1` Une première couche de convolution prend en entrée des images RGB et produit 6 filtres de convolution de taille 5×5 sans padding.
- `self.conv2` Une deuxième couche de convolution prend 6 entrées et produit 16 filtres de convolution de taille 5×5 sans padding.

### Les couches entièrement connectées : ###
Chaque couche entièrement connectée effectue une transformation linéaire suivie d'une activation ReLU, qui introduit de la non-linéarité dans le réseau.  Du coup, dans chacune des couches entièrement connectées (`self.fc1`, `self.fc2` et `self.fc3`), il y a une sous-couche linéaire suivie d'une sous-couche non linéaire. Ces couches sont responsables de la combinaison des caractéristiques extraites par les couches de convolution précédentes pour effectuer la tâche de classification finale.

`self.fc1`: 
- Il y a 120 neurones dans la couche entièrement connectée, chacun connecté à une entrée de taille 400.
- Sous-couche linéaire : Elle effectue une transformation linéaire des caractéristiques d'entrée.
- Sous-couche non linéaire : Suite à la transformation linéaire, une activation ReLU est appliquée. Cela introduit de la non-linéarité dans la sortie de la couche.

`self.fc2`:
- 84 neurones dans la deuxième couche entièrement connectée, chacun connecté aux 120 neurones de la couche précédente.
- Sous-couche linéaire : Elle effectue une transformation linéaire des caractéristiques d'entrée.
- Sous-couche non linéaire : Suite à la transformation linéaire, une activation ReLU est appliquée. Cela introduit de la non-linéarité dans la sortie de la couche.

`self.fc3`:
- 10 neurones dans la dernière couche entièrement connectée, chacun connecté aux 84 neurones de la couche précédente.
- Sous-couche linéaire : Il s'agit de la dernière transformation linéaire qui produit la sortie finale du réseau sans RELU.

### La couche de pooling : ###
L'opération de pooling consiste à réduire la taille des images, tout en préservant leurs caractéristiques importantes. Elle est utilisée après chaque couche de convolution.
- `self.pool` Cette couche en effectuant une opération de max pooling avec une fenêtre de taille 2x2 et un pas de 2. 

## Donner la taille des différents tenseurs de données Xn et de poids Wn le long du calcul. ##

### Convolutional Layer 1 (conv1): ###
- X1 = 3×32×32 = 3072
- Poids W1:
    - [6, 3, 5, 5]: Poids de convolution: 6×3×5×5=450
    - Bias: [6]

### Convolutional Layer 2 (conv2): ###
- X2 = 6×14×14 = 1176
- Poids W2:
    - [16, 6, 5, 5]: Poids de convolution: 16×6×5×5=2400
    - Bias: [16]

### Fully Connected Layer 1 (fc1): ###
- X3 = 16×5×5 = 400
- Poids W3:
    - [120, 400]: Poids de convolution: 120×400 = 48000
    - Bias: [120]

### Fully Connected Layer 2 (fc2): ###
- X4 = 120
- Poids W4:
    - [84, 120]: Poids de convolution: 84×120=10080
    - Bias: [84]
  
### Fully Connected Layer 3 (fc3): ###
- X5 = 84
- Poids W5:
    - [10, 84]: Poids de convolution: 10×84=840
    - Bias: [10]

## Modifier le programme pour faire l'évaluation après chaque époque et aussi avant la première (faire une fonction spécialisée). Supprimer les autres affichages intermédiaires. ##

In [56]:
def evaluate_model(net, dataloader, criterion, epoch, device, iteration_num, i):
    # correct_pred = {classname: 0 for classname in classes}
    # total_pred = {classname: 0 for classname in classes}
    epoch_evaluate_ops = 0
    with torch.no_grad(): 
        eval_loss = 0.0
        correct = 0
        total = 0
        iteration_num = 0
        batch_evaluate_ops = 0
        if torch.cuda.is_available(): torch.cuda.synchronize()
        epoch_evaluate_start = time.time()
        
        for data in dataloader:
            net.eval()
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            batch_evaluate_ops = net.count_total_operations(images)
            batch_evaluate_ops = images.shape[0] * batch_evaluate_ops # batch_size * total_operation
            epoch_evaluate_ops += batch_evaluate_ops
            
            outputs = net(images)
            loss = criterion(outputs, labels)
            eval_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            
            
            # for label, prediction in zip(labels, predicted):
            #     if label == prediction.item():
            #         correct_pred[classes[label]] += 1
            #     total_pred[classes[label]] += 1
            
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            iteration_num += 1
            
        accuracy = 100. * correct / total
        eval_loss /= len(dataloader)
        if(epoch == 0):
            epoch_evaluate_ops = batch_evaluate_ops * iteration_num
        if torch.cuda.is_available(): torch.cuda.synchronize()
        epoch_evaluate_time = time.time() - epoch_evaluate_start
        epoch_evaluate_ops_per_second = epoch_evaluate_ops / epoch_evaluate_time


    # for classname, correct_count in correct_pred.items():
    #     af = 100 * float(correct_count) / total_pred[classname]
    #     print(f'Accuracy for class: {classname:5s} is {af:.1f} %')
    
    return accuracy, eval_loss, batch_evaluate_ops, epoch_evaluate_time, epoch_evaluate_ops_per_second



In [57]:
def train(epoch_nums, net, trainloader, testloader, optimizer, criterion, device):
    if torch.cuda.is_available(): torch.cuda.synchronize()
    total_time_start = time.time()
    for epoch in range(epoch_nums):
        net.train()
        sum_loss = 0.0
        correct = 0.0
        total = 0.0
        iteration_num = 0

        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()

            if epoch == 0 & i == 0:
                batch_train_ops = net.count_total_operations(inputs)
                batch_train_ops = inputs.shape[0] * batch_train_ops
            if torch.cuda.is_available(): torch.cuda.synchronize()
            batch_train_start = time.time()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()

            if torch.cuda.is_available(): torch.cuda.synchronize()
            batch_train_time = time.time() - batch_train_start
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            # batch_train_ops_per_second = batch_train_ops / batch_train_time
            iteration_num += 1
            
        if(epoch == 0):
            epoch_train_ops = batch_train_ops * iteration_num
        
        accuracy, eval_loss, batch_evaluate_ops, epoch_evaluate_time, epoch_evaluate_ops_per_second = evaluate_model(net, testloader, criterion, epoch, device, iteration_num, i)
        print('[Epoch:%d] Validation Acc: %.3f%% | Loss: %.3f%% | Ops: %d | Time: %.6fs | Ops/Sec : %d ' % (
            epoch + 1, 
            accuracy, 
            eval_loss,
            batch_evaluate_ops, 
            epoch_evaluate_time,
            epoch_evaluate_ops_per_second
        ))
    print('Finished Training')

In [58]:
class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(outchannel)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(outchannel)
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.shortcut(x)
        out = self.relu(out)
        return out

In [59]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 64, num_blocks[0], stride=1)
        self.fc = nn.Linear(1024, num_classes)

    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        inchannel = 64  # 初始化输入通道数
        for stride in strides:
            layers.append(block(inchannel, channels, stride))
            inchannel = channels  # 更新输入通道数为当前通道数
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = F.avg_pool2d(out, 8)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out

    def count_conv_operations(self, input, output, output_pooled, stride, padding, kernel_size):
        out_channels = output.size(1)
        output_height, output_width = output.size(2), output.size(3)
        num_mults = output_height * output_width * input.size(1) * kernel_size ** 2 * out_channels
        num_adds = output_height * output_width * input.size(1) * kernel_size ** 2 * out_channels
        num_maxs = self.count_maxpool_operations(output, output_pooled)
        total_ops = num_mults + num_adds + num_maxs
        return num_mults, num_adds, num_maxs, total_ops



    def count_maxpool_operations(self, output, output_pooled):
        kernel_size = 2  # 例如，假设池化层的核大小为 2
        stride = 2  # 假设步长也为 2
        padding = 0  # 假设填充为 0
        output_height = output.shape[2]
        output_width = output.shape[3]
        out_channels = output.shape[1]
        num_max = ((output_height + 2 * padding - kernel_size) // stride + 1) * ((output_width + 2 * padding - kernel_size) // stride + 1) * (kernel_size ** 2 - 1) * out_channels
        return num_max


    def count_fc_operations(self, input, fc_layer):
        in_features = fc_layer.in_features
        out_features = fc_layer.out_features    
        num_mults = out_features * in_features
        num_adds = out_features * in_features
        num_maxs = 0
        total_ops = num_mults + num_adds
        return num_mults, num_adds, num_maxs, total_ops

    def count_total_operations(self, x):
        conv1_out = self.conv1(x)
        conv1_out_pooled = self.relu(self.bn1(conv1_out))
        conv1_ops = self.count_conv_operations(x, conv1_out, conv1_out_pooled, self.conv1, None, kernel_size=3)

        layer1_out = self.layer1(conv1_out_pooled)
        layer1_out_pooled = F.avg_pool2d(layer1_out, 8)
        layer1_ops = self.count_conv_operations(conv1_out_pooled, layer1_out, layer1_out_pooled, stride=1, padding=1, kernel_size=3)

        fc_ops = self.count_fc_operations(x, self.fc)

        total_ops = sum(op[3] for op in [conv1_ops, layer1_ops, fc_ops])
        return total_ops

In [60]:
def ResNet18():
    return ResNet(ResidualBlock, [2, 2])

In [61]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
  
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=16,kernel_size=5,stride=1,padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
            nn.Conv2d(in_channels=16,out_channels=48,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(48),
            nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
            nn.Conv2d(in_channels=48,out_channels=64,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64,out_channels=48,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
        )

        self.classifier = nn.Sequential(
            nn.Linear(in_features=3*3*48,out_features=128),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(128,128),
            nn.ReLU(),
            nn.Linear(128,10)
        )

    def forward(self,x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        result = self.classifier(x)
        return result


    def count_maxpool_operations(self, input, output, pool_layer):
        if pool_layer:
            kernel_size = pool_layer.kernel_size
            stride = pool_layer.stride
            padding = pool_layer.padding
            output_height = output.shape[2]
            output_width = output.shape[3]
            out_channels = output.shape[1]
            num_max = ((output_height + 2 * padding - kernel_size) // stride + 1) * ((output_width + 2 * padding - kernel_size) // stride + 1) * (kernel_size ** 2 - 1) * out_channels  
        else:
            num_max = 0
        return num_max
             
    def count_conv_operations(self, input, output, output_pooled, conv_layer, pool_layer):
        out_channels, in_channels = output.size(1), conv_layer.in_channels
        output_height, output_width = output.size(2), output.size(3)
        filter_size = conv_layer.kernel_size[0]
        stride = conv_layer.stride[0]
        padding = conv_layer.padding[0]
        num_mults = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_adds = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer)
        if pool_layer:
            num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer)
        else:
            num_maxs = 0
        total_ops = num_mults + num_adds + num_maxs
        return num_mults, num_adds, num_maxs, total_ops

    def count_operations(self, x):
        conv1_out = self.features[0](x)
        conv1_out_pooled = self.features[2](F.relu(conv1_out))
        conv2_out = self.features[3](conv1_out_pooled)
        conv2_out_pooled = self.features[6](F.relu(conv2_out))
        
        conv3_out = self.features[7](conv2_out_pooled)
        conv4_out = self.features[9](F.relu(conv3_out))
        conv5_out = self.features[11](F.relu(conv4_out))
        conv5_out_pooled = self.features[13](F.relu(conv5_out))
        
        conv1_ops = self.count_conv_operations(x, conv1_out, conv1_out_pooled, self.features[0], self.features[2])
        conv2_ops = self.count_conv_operations(conv1_out_pooled, conv2_out, conv2_out_pooled, self.features[3], self.features[6])
        conv3_ops = self.count_conv_operations(conv2_out_pooled, conv3_out, conv3_out, self.features[7], None)
        conv4_ops = self.count_conv_operations(conv3_out, conv4_out, conv4_out, self.features[9], None)
        conv5_ops = self.count_conv_operations(conv4_out, conv5_out, conv5_out_pooled, self.features[11], self.features[13])
        return conv1_ops, conv2_ops, conv3_ops, conv4_ops, conv5_ops


    def count_fc_operations(self, input, fc_layer):
        in_features = fc_layer.in_features
        out_features = fc_layer.out_features    
        num_mults = out_features * in_features
        num_adds = out_features * in_features
        num_maxs = 0
        total_ops = num_mults + num_adds
        return num_mults, num_adds, num_maxs, total_ops
    
    def count_total_operations(self, x):
        conv1_ops, conv2_ops, conv3_ops, conv4_ops, conv5_ops  = self.count_operations(x)
        fc1_ops = self.count_fc_operations(x, self.classifier[0])
        fc2_ops = self.count_fc_operations(x, self.classifier[3])
        fc3_ops = self.count_fc_operations(x, self.classifier[5])
        total_ops = sum(op[3] for op in [conv1_ops, conv2_ops, conv3_ops, conv4_ops, conv5_ops, fc1_ops, fc2_ops, fc3_ops])
        return total_ops

In [62]:
class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(128 * 8 * 8, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def count_maxpool_operations(self, input, output, pool_layer, out_channels):
        kernel_maxpooling = pool_layer.kernel_size
        stride = pool_layer.stride
        padding = pool_layer.padding
        output_height = output.shape[2]
        output_width = output.shape[3]
        out_channels =  output.shape[1]
        num_max = output_height * output_width * (kernel_maxpooling**2 -1) * out_channels
        return num_max

    def count_conv_operations(self, input, output, output_pooled, conv_layer, pool_layer):
        out_channels, in_channels = output.size(1), conv_layer.in_channels
        output_height, output_width = output.size(2), output.size(3)
        filter_size = conv_layer.kernel_size[0]
        stride = conv_layer.stride[0]
        padding = conv_layer.padding[0]
        num_mults = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_adds = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer, out_channels)
        total_ops = num_mults + num_adds + num_maxs
        return num_mults, num_adds, num_maxs, total_ops

    def count_operations(self, x):
        conv1_out = self.conv1(x)
        conv1_out_pooled = self.pool(F.relu(conv1_out))
        conv2_out = self.conv2(conv1_out_pooled)
        conv2_out_pooled = self.pool(F.relu(conv2_out))
        conv1_ops = self.count_conv_operations(x, conv1_out, conv1_out_pooled, self.conv1, self.pool)
        conv2_ops = self.count_conv_operations(conv1_out_pooled, conv2_out, conv2_out_pooled, self.conv2, self.pool)
        return conv1_ops, conv2_ops        

    def count_fc_operations(self, input, fc_layer):
        in_features = fc_layer.in_features
        out_features = fc_layer.out_features
        num_mults = out_features * in_features
        num_adds = out_features * in_features
        num_maxs = 0    
        total_ops = num_mults + num_adds
        return num_mults, num_adds, num_maxs, total_ops

    def count_total_operations(self, x):
        conv1_ops, conv2_ops = self.count_operations(x)
        fc1_ops = self.count_fc_operations(x, self.fc1)
        fc2_ops = self.count_fc_operations(x, self.fc2)
        fc3_ops = self.count_fc_operations(x, self.fc3)
        total_ops = sum(op[3] for op in [conv1_ops, conv2_ops, fc1_ops, fc2_ops, fc3_ops])
        return total_ops

In [67]:
net = Net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)    
epoch_nums = 20

train(epoch_nums, net, trainloader, testloader, optimizer, criterion, device)

[Epoch:1] Validation Acc: 10.000% | Loss: 2.300% | Ops: 20930688 | Time: 1.119046s | Ops/Sec : 1477619994 
[Epoch:2] Validation Acc: 16.920% | Loss: 2.292% | Ops: 20930688 | Time: 1.153205s | Ops/Sec : 11343762501 
[Epoch:3] Validation Acc: 18.450% | Loss: 2.226% | Ops: 20930688 | Time: 0.945565s | Ops/Sec : 13834783225 
[Epoch:4] Validation Acc: 23.600% | Loss: 2.038% | Ops: 20930688 | Time: 1.150473s | Ops/Sec : 11370695931 
[Epoch:5] Validation Acc: 28.420% | Loss: 1.960% | Ops: 20930688 | Time: 1.151441s | Ops/Sec : 11361141658 
[Epoch:6] Validation Acc: 31.720% | Loss: 1.861% | Ops: 20930688 | Time: 1.152841s | Ops/Sec : 11347342496 
[Epoch:7] Validation Acc: 35.710% | Loss: 1.750% | Ops: 20930688 | Time: 1.141953s | Ops/Sec : 11455528060 
[Epoch:8] Validation Acc: 37.880% | Loss: 1.675% | Ops: 20930688 | Time: 1.460503s | Ops/Sec : 8956969775 
[Epoch:9] Validation Acc: 40.500% | Loss: 1.626% | Ops: 20930688 | Time: 1.150513s | Ops/Sec : 11370297712 
[Epoch:10] Validation Acc: 41.

In [68]:
netAlex = AlexNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(netAlex.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
epoch_nums = 20

train(epoch_nums, netAlex, trainloader, testloader, optimizer, criterion, device)

[Epoch:1] Validation Acc: 38.500% | Loss: 1.573% | Ops: 236091392 | Time: 1.436174s | Ops/Sec : 12986741899 
[Epoch:2] Validation Acc: 45.810% | Loss: 1.365% | Ops: 236091392 | Time: 1.335401s | Ops/Sec : 110496463028 
[Epoch:3] Validation Acc: 56.780% | Loss: 1.174% | Ops: 236091392 | Time: 1.147871s | Ops/Sec : 128548519612 
[Epoch:4] Validation Acc: 61.170% | Loss: 1.088% | Ops: 236091392 | Time: 1.180489s | Ops/Sec : 124996625884 
[Epoch:5] Validation Acc: 65.840% | Loss: 0.957% | Ops: 236091392 | Time: 1.163759s | Ops/Sec : 126793563872 
[Epoch:6] Validation Acc: 68.770% | Loss: 0.882% | Ops: 236091392 | Time: 1.360822s | Ops/Sec : 108432328570 
[Epoch:7] Validation Acc: 68.880% | Loss: 0.890% | Ops: 236091392 | Time: 1.181145s | Ops/Sec : 124927139622 
[Epoch:8] Validation Acc: 69.550% | Loss: 0.863% | Ops: 236091392 | Time: 1.164897s | Ops/Sec : 126669700952 
[Epoch:9] Validation Acc: 73.790% | Loss: 0.755% | Ops: 236091392 | Time: 1.312687s | Ops/Sec : 112408463688 
[Epoch:10] 

In [69]:
netVGG = VGG().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(netVGG.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
epoch_nums = 20

train(epoch_nums, netVGG, trainloader, testloader, optimizer, criterion, device)

[Epoch:1] Validation Acc: 45.400% | Loss: 1.496% | Ops: 693589248 | Time: 1.166674s | Ops/Sec : 46965599775 
[Epoch:2] Validation Acc: 52.410% | Loss: 1.306% | Ops: 693589248 | Time: 1.148276s | Ops/Sec : 377516595129 
[Epoch:3] Validation Acc: 59.090% | Loss: 1.138% | Ops: 693589248 | Time: 1.341359s | Ops/Sec : 323174657376 
[Epoch:4] Validation Acc: 64.670% | Loss: 0.997% | Ops: 693589248 | Time: 1.140669s | Ops/Sec : 380034366134 
[Epoch:5] Validation Acc: 66.960% | Loss: 0.956% | Ops: 693589248 | Time: 1.238172s | Ops/Sec : 350107533092 
[Epoch:6] Validation Acc: 69.650% | Loss: 0.874% | Ops: 693589248 | Time: 0.804577s | Ops/Sec : 538784325753 
[Epoch:7] Validation Acc: 71.160% | Loss: 0.832% | Ops: 693589248 | Time: 1.426240s | Ops/Sec : 303941233589 
[Epoch:8] Validation Acc: 71.430% | Loss: 0.822% | Ops: 693589248 | Time: 1.165017s | Ops/Sec : 372091689359 
[Epoch:9] Validation Acc: 73.620% | Loss: 0.754% | Ops: 693589248 | Time: 1.157795s | Ops/Sec : 374412748852 
[Epoch:10] 

In [70]:
netRes = ResNet18().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(netRes.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4)
epoch_nums = 20

train(epoch_nums, netRes, trainloader, testloader, optimizer, criterion, device)

[Epoch:1] Validation Acc: 58.140% | Loss: 1.174% | Ops: 1266483200 | Time: 1.217599s | Ops/Sec : 82171716113 
[Epoch:2] Validation Acc: 62.710% | Loss: 1.039% | Ops: 1266483200 | Time: 1.151975s | Ops/Sec : 687126214742 
[Epoch:3] Validation Acc: 68.810% | Loss: 0.870% | Ops: 1266483200 | Time: 0.882500s | Ops/Sec : 896943086374 
[Epoch:4] Validation Acc: 67.330% | Loss: 0.942% | Ops: 1266483200 | Time: 0.999935s | Ops/Sec : 791603524105 
[Epoch:5] Validation Acc: 71.870% | Loss: 0.821% | Ops: 1266483200 | Time: 1.143902s | Ops/Sec : 691975467417 
[Epoch:6] Validation Acc: 71.730% | Loss: 0.816% | Ops: 1266483200 | Time: 1.209887s | Ops/Sec : 654236547558 
[Epoch:7] Validation Acc: 71.020% | Loss: 0.865% | Ops: 1266483200 | Time: 1.354752s | Ops/Sec : 584278128212 
[Epoch:8] Validation Acc: 77.350% | Loss: 0.661% | Ops: 1266483200 | Time: 1.176023s | Ops/Sec : 673075540710 
[Epoch:9] Validation Acc: 78.180% | Loss: 0.637% | Ops: 1266483200 | Time: 1.167971s | Ops/Sec : 677715655799 
[E