In [45]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import time
!pip install torchsummary
from torchsummary import summary

/var/lib/oar/.batch_job_bashrc: line 5: /home/ziwang/.bashrc: No such file or directory


In [46]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [47]:
pre_epoch = 0  # 定义已经遍历数据集的次数
BATCH_SIZE = 128      #批处理尺寸(batch_size)
LR = 0.01        #学习率

In [48]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) 
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2) 

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
# classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [49]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def count_maxpool_operations(self, input, output, pool_layer, out_channels):
        # input output 的维度均为 N,C,H,W
        kernel_maxpooling = pool_layer.kernel_size
        stride = pool_layer.stride
        padding = pool_layer.padding
        output_height = output.shape[2]
        output_width = output.shape[3]
        out_channels =  output.shape[1]
        num_max = output_height * output_width * (kernel_maxpooling**2 -1) * out_channels
        return num_max

    def count_conv_operations(self, input, output, output_pooled, conv_layer, pool_layer):
        # batch_size = input.size(0)
        out_channels, in_channels = output.size(1), conv_layer.in_channels
        output_height, output_width = output.size(2), output.size(3)
        filter_size = conv_layer.kernel_size[0]
        stride = conv_layer.stride[0]
        padding = conv_layer.padding[0]
        # Compute number of operations for convolution
        # print(str(output_height) + "*" +  str(output_width) + "*" + str(in_channels) + "*" + str(filter_size ** 2) + "*" + str(out_channels))
        num_mults = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_adds = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        # num_maxs = output_height * output_width * out_channels
        # print("num_mults" + str(num_mults))
        num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer, out_channels)
        # print("num_maxs" + str(num_maxs))
        total_ops = num_mults + num_adds + num_maxs
        return num_mults, num_adds, num_maxs, total_ops

    def count_operations(self, x):
        conv1_out = self.conv1(x)
        conv1_out_pooled = self.pool(F.relu(conv1_out))  # Apply max pooling after the first convolution
        conv2_out = self.conv2(conv1_out_pooled)
        conv2_out_pooled = self.pool(F.relu(conv2_out))  # Apply max pooling after the first convolution
        # Count operations for convolutional layer 1
        conv1_ops = self.count_conv_operations(x, conv1_out, conv1_out_pooled, self.conv1, self.pool)
        # Count operations for convolutional layer 2
        conv2_ops = self.count_conv_operations(conv1_out_pooled, conv2_out, conv2_out_pooled, self.conv2, self.pool)
        return conv1_ops, conv2_ops        

    def count_fc_operations(self, input, fc_layer):
        # Get the number of input features for the fully connected layer
        in_features = fc_layer.in_features
        # Get the number of output features for the fully connected layer
        out_features = fc_layer.out_features    
        # Compute number of operations for fully connected layer
        # print(str(out_features) + " * " + str(in_features))
        num_mults = out_features * in_features
        num_adds = out_features * in_features
        num_maxs = 0    
        total_ops = num_mults + num_adds
        return num_mults, num_adds, num_maxs, total_ops

    def count_total_operations(self, x):
        conv1_ops, conv2_ops = self.count_operations(x)
        fc1_ops = self.count_fc_operations(x, self.fc1)
        fc2_ops = self.count_fc_operations(x, self.fc2)
        fc3_ops = self.count_fc_operations(x, self.fc3)
        total_ops = sum(op[3] for op in [conv1_ops, conv2_ops, fc1_ops, fc2_ops, fc3_ops])
        return total_ops


In [50]:
net = Net().to(device)

In [51]:
def evaluate_model(net, dataloader, criterion, epoch, device, iteration_num, i):
    # correct_pred = {classname: 0 for classname in classes}
    # total_pred = {classname: 0 for classname in classes}
    
    with torch.no_grad(): 
        eval_loss = 0.0
        correct = 0
        total = 0
        iteration_num = 0
        batch_evaluate_ops = 0
        if torch.cuda.is_available(): torch.cuda.synchronize()
        epoch_evaluate_start = time.time()
        
        for data in dataloader:
            net.eval()
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            batch_evaluate_ops = net.count_total_operations(images)
            batch_evaluate_ops = images.shape[0] * batch_evaluate_ops
            
            outputs = net(images)
            loss = criterion(outputs, labels)
            eval_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            
            
            # for label, prediction in zip(labels, predicted):
            #     if label == prediction.item():
            #         correct_pred[classes[label]] += 1
            #     total_pred[classes[label]] += 1
            
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            iteration_num += 1
            
        accuracy = 100. * correct / total
        eval_loss /= len(dataloader)
        if(epoch == 0):
            epoch_evaluate_ops = batch_evaluate_ops * iteration_num 
        if torch.cuda.is_available(): torch.cuda.synchronize()
        epoch_evaluate_time = time.time() - epoch_evaluate_start

    # for classname, correct_count in correct_pred.items():
    #     af = 100 * float(correct_count) / total_pred[classname]
    #     print(f'Accuracy for class: {classname:5s} is {af:.1f} %')
    
    return accuracy, eval_loss, batch_evaluate_ops, epoch_evaluate_time



In [52]:
def train(epoch_nums, net, trainloader, testloader, optimizer, criterion, device):
    if torch.cuda.is_available(): torch.cuda.synchronize()
    total_time_start = time.time()
    for epoch in range(epoch_nums):
        net.train()
        sum_loss = 0.0
        correct = 0.0
        total = 0.0
        iteration_num = 0

        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()

            if epoch == 0 & i == 0:
                batch_train_ops = net.count_total_operations(inputs)
                batch_train_ops = inputs.shape[0] * batch_train_ops
            if torch.cuda.is_available(): torch.cuda.synchronize()
            batch_train_start = time.time()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()

            if torch.cuda.is_available(): torch.cuda.synchronize()
            batch_train_time = time.time() - batch_train_start
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            # batch_train_ops_per_second = batch_train_ops / batch_train_time
            iteration_num += 1
            
        if(epoch == 0):
            epoch_train_ops = batch_train_ops * iteration_num
        
        accuracy, eval_loss, batch_evaluate_ops, epoch_evaluate_time = evaluate_model(net, testloader, criterion, epoch, device, iteration_num, i)
        print('[Epoch:%d] Validation Acc: %.3f%% | Loss: %.3f%% | Ops: %d | Time: %.6fs ' % (
            epoch + 1, 
            accuracy, 
            eval_loss,
            batch_evaluate_ops, 
            epoch_evaluate_time,
        ))


In [53]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)    

epoch_nums = 10  # or any number of epochs you want
train(epoch_nums, net, trainloader, testloader, optimizer, criterion, device)
print('Finished Training')

[Epoch:1] Validation Acc: 10.320% | Loss: 2.302% | Ops: 20930688 | Time: 1.060341s 
[Epoch:2] Validation Acc: 15.490% | Loss: 2.298% | Ops: 20930688 | Time: 0.851083s 
[Epoch:3] Validation Acc: 15.250% | Loss: 2.273% | Ops: 20930688 | Time: 1.492649s 
[Epoch:4] Validation Acc: 21.950% | Loss: 2.172% | Ops: 20930688 | Time: 0.821721s 
[Epoch:5] Validation Acc: 26.130% | Loss: 2.022% | Ops: 20930688 | Time: 1.394150s 
[Epoch:6] Validation Acc: 29.920% | Loss: 1.920% | Ops: 20930688 | Time: 1.213524s 
[Epoch:7] Validation Acc: 33.520% | Loss: 1.830% | Ops: 20930688 | Time: 1.499590s 
[Epoch:8] Validation Acc: 37.140% | Loss: 1.744% | Ops: 20930688 | Time: 1.192477s 
[Epoch:9] Validation Acc: 38.000% | Loss: 1.678% | Ops: 20930688 | Time: 1.194107s 
[Epoch:10] Validation Acc: 39.730% | Loss: 1.632% | Ops: 20930688 | Time: 1.312217s 
Finished Training


In [54]:
input_size = (3, 32, 32)
net = net.to(device)
summary(net, input_size = input_size)
# Number of Parameters CONVOL1 = out_channels × (in_channels × kernel_size ** 2 + 1) = 6 * (3 * 25 + 1) = 6 * 76 = 456
# Taille de sortie après 1 layer convolution: ((size_input - kernel_size) + 1 // stride) =  (32 - 5) + 1 = 28
# Maxpooling (2 * 2) = Taille de sortie // 2 = 28 // 2 = 14
# MaxPool2d 层不包含可学习的参数 = 0
# Taille de sortie après 2 layer convolution: ((14 - 5) + 1) // 1 = 10
# Number of Parameters CONVOL2 = out_channels × (in_channels × kernel_size ** 2 + 1) = 16 * (6 * 25 + 1) = 2416
# Maxpooling (2 * 2) = Taille de sortie // 2 = 10 // 2 = 5
# Number of Parameters FC1 =(in_channels + 1) × out_channels = ((16 * 5 * 5) + 1) * 120 = 48120
# Number of Parameters FC2 =(in_channels + 1) × out_channels = ((120) + 1) * 84 = 10164
# Number of Parameters FC3 =(in_channels + 1) × out_channels = ((84) + 1) * 10 = 850


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             456
         MaxPool2d-2            [-1, 6, 14, 14]               0
            Conv2d-3           [-1, 16, 10, 10]           2,416
         MaxPool2d-4             [-1, 16, 5, 5]               0
            Linear-5                  [-1, 120]          48,120
            Linear-6                   [-1, 84]          10,164
            Linear-7                   [-1, 10]             850
Total params: 62,006
Trainable params: 62,006
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.06
Params size (MB): 0.24
Estimated Total Size (MB): 0.31
----------------------------------------------------------------


=================================================================================================

In [55]:
class ResidualBlock(nn.Module):
    def __init__(self, inchannel, outchannel, stride=1):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(outchannel),
            nn.ReLU(inplace=True),
            nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(outchannel)
        )
        self.shortcut = nn.Sequential()
        if stride != 1 or inchannel != outchannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outchannel)
            )

    def forward(self, x):
        out = self.left(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [56]:
class ResNet(nn.Module):
    def __init__(self, ResidualBlock, num_classes=10):
        super(ResNet, self).__init__()
        self.inchannel = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        self.layer1 = self.make_layer(ResidualBlock, 64,  2, stride=1)
        #self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)
        #self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)
        #self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)
        #self.fc = nn.Linear(512, num_classes)
        #self.fc = nn.Linear(128, num_classes)
        self.fc = nn.Linear(1024, num_classes)

    def make_layer(self, block, channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)   #strides=[1,1]
        layers = []
        for stride in strides:
            layers.append(block(self.inchannel, channels, stride))
            self.inchannel = channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        #out = self.layer2(out)
        #out = self.layer3(out)
        #out = self.layer4(out)
        #out = F.avg_pool2d(out, 4)
        #out = F.avg_pool2d(out, 16)
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


In [57]:
def ResNet18():
    return ResNet(ResidualBlock)

In [58]:
class My_AlexNet(nn.Module):
    def __init__(self):
        super(My_AlexNet, self).__init__()
        # 特征提取
        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3,out_channels=16,kernel_size=5,stride=1,padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
            nn.Conv2d(in_channels=16,out_channels=48,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(48),
            nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
            nn.Conv2d(in_channels=48,out_channels=64,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64,out_channels=48,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
        )
        # 全连接层
        self.classifier = nn.Sequential(
            nn.Linear(in_features=3*3*48,out_features=128),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(128,128),
            nn.ReLU(),
            nn.Linear(128,10)
        )

    # 前向算法
    def forward(self,x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        result = self.classifier(x)
        return result


    def count_maxpool_operations(self, input, output, pool_layer):
        if pool_layer:
            kernel_size = pool_layer.kernel_size
            stride = pool_layer.stride
            padding = pool_layer.padding
            output_height = output.shape[2]
            output_width = output.shape[3]
            out_channels = output.shape[1]
            num_max = ((output_height + 2 * padding - kernel_size) // stride + 1) * ((output_width + 2 * padding - kernel_size) // stride + 1) * (kernel_size ** 2 - 1) * out_channels  
        else:
            num_max = 0
        return num_max
             
    def count_conv_operations(self, input, output, output_pooled, conv_layer, pool_layer):
        out_channels, in_channels = output.size(1), conv_layer.in_channels
        output_height, output_width = output.size(2), output.size(3)
        filter_size = conv_layer.kernel_size[0]
        stride = conv_layer.stride[0]
        padding = conv_layer.padding[0]
        num_mults = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_adds = output_height * output_width * in_channels * filter_size ** 2 * out_channels
        num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer)
        if pool_layer:
            num_maxs = self.count_maxpool_operations(output, output_pooled, pool_layer)
        else:
            num_maxs = 0
        total_ops = num_mults + num_adds + num_maxs
        return num_mults, num_adds, num_maxs, total_ops

    def count_operations(self, x):
        conv1_out = self.features[0](x)
        conv1_out_pooled = self.features[2](F.relu(conv1_out))
        conv2_out = self.features[3](conv1_out_pooled)
        conv2_out_pooled = self.features[6](F.relu(conv2_out))
        
        conv3_out = self.features[7](conv2_out_pooled)
        conv4_out = self.features[9](F.relu(conv3_out))
        conv5_out = self.features[11](F.relu(conv4_out))
        conv5_out_pooled = self.features[13](F.relu(conv5_out))
        
        conv1_ops = self.count_conv_operations(x, conv1_out, conv1_out_pooled, self.features[0], self.features[2])
        conv2_ops = self.count_conv_operations(conv1_out_pooled, conv2_out, conv2_out_pooled, self.features[3], self.features[6])
        conv3_ops = self.count_conv_operations(conv2_out_pooled, conv3_out, conv3_out, self.features[7], None)
        conv4_ops = self.count_conv_operations(conv3_out, conv4_out, conv4_out, self.features[9], None)
        conv5_ops = self.count_conv_operations(conv4_out, conv5_out, conv5_out_pooled, self.features[11], self.features[13])
        return conv1_ops, conv2_ops, conv3_ops, conv4_ops, conv5_ops


    def count_fc_operations(self, input, fc_layer):
        in_features = fc_layer.in_features
        out_features = fc_layer.out_features    
        num_mults = out_features * in_features
        num_adds = out_features * in_features
        num_maxs = 0
        total_ops = num_mults + num_adds
        return num_mults, num_adds, num_maxs, total_ops
    
    def count_total_operations(self, x):
        conv1_ops, conv2_ops, conv3_ops, conv4_ops, conv5_ops  = self.count_operations(x)
        fc1_ops = self.count_fc_operations(x, self.classifier[0])
        fc2_ops = self.count_fc_operations(x, self.classifier[3])
        fc3_ops = self.count_fc_operations(x, self.classifier[5])
        total_ops = sum(op[3] for op in [conv1_ops, conv2_ops, conv3_ops, conv4_ops, conv5_ops, fc1_ops, fc2_ops, fc3_ops])
        return total_ops

In [59]:
#net = ResNet18().to(device)
net = My_AlexNet().to(device)

# 定义损失函数和优化方式
criterion = nn.CrossEntropyLoss()  #损失函数为交叉熵，多用于多分类问题
optimizer = optim.SGD(net.parameters(), lr=LR, momentum=0.9, weight_decay=5e-4) #优化方式为mini-batch momentum-SGD，并采用L2正则化（权重衰减）

epoch_nums = 40  # or any number of epochs you want
train(epoch_nums, net, trainloader, testloader, optimizer, criterion, device)
print('Finished Training')

[Epoch:1] Validation Acc: 31.850% | Loss: 1.780% | Ops: 236091392 | Time: 1.190848s 
[Epoch:2] Validation Acc: 50.120% | Loss: 1.343% | Ops: 236091392 | Time: 1.169844s 
[Epoch:3] Validation Acc: 55.780% | Loss: 1.219% | Ops: 236091392 | Time: 1.467693s 
[Epoch:4] Validation Acc: 58.070% | Loss: 1.168% | Ops: 236091392 | Time: 1.405068s 
[Epoch:5] Validation Acc: 64.850% | Loss: 0.981% | Ops: 236091392 | Time: 1.443400s 
[Epoch:6] Validation Acc: 67.210% | Loss: 0.942% | Ops: 236091392 | Time: 1.450630s 
[Epoch:7] Validation Acc: 64.420% | Loss: 1.037% | Ops: 236091392 | Time: 1.300340s 
[Epoch:8] Validation Acc: 69.470% | Loss: 0.892% | Ops: 236091392 | Time: 1.394092s 
[Epoch:9] Validation Acc: 70.990% | Loss: 0.830% | Ops: 236091392 | Time: 1.186645s 
[Epoch:10] Validation Acc: 73.370% | Loss: 0.773% | Ops: 236091392 | Time: 1.347919s 
[Epoch:11] Validation Acc: 72.960% | Loss: 0.794% | Ops: 236091392 | Time: 1.484970s 
[Epoch:12] Validation Acc: 74.120% | Loss: 0.775% | Ops: 236091