In [1]:
import torch
from torchvision import datasets, transforms
from random import random
import numpy as np

In [2]:
batch_size = 4

In [3]:
transform = transforms.ToTensor()

cifar10 = datasets.CIFAR10(
    root="./data",
    train=True,
    download=True,
    transform=transform
)

def raw_data(batch_size=4):
    data_loader = torch.utils.data.DataLoader(
        cifar10,
        batch_size=batch_size,
        shuffle=False
    )
    images, labels = next(iter(data_loader))
    return images, labels


In [None]:
# for each batch
def ConvLayer(input, kernel):
    """input have shape (B, Cin, H, W)
    kernel have shape (Cout, Cin, K, K) # 共享权重"""
    temp = input.shape # (B, Cin, H, W)
    B = temp[0]
    Cin = temp[1]
    H = temp[2]
    W = temp[3]

    temp = kernel.shape # (Cout, Cin, K, K) # 共享权重
    Cout = temp[0]
    K = temp[2]

    res = np.zeros((B, Cout, H-K+1, W-K+1)) # 卷积结果
    for b in range(B):
        for k in range(len(kernel)): # k have shape (Cin, K, K)
            for i in range(H-K+1): # scan image
                for j in range(W-K+1):
                    res[b, k, i, j] = (input[b, :, i:i+K, j:j+K] * kernel[k]).sum()
    return res

def ReULayer(input):
    return np.maximum(0, input)

def MaxPoolLayer(input, pool_size=2):
    """input have shape (B, C, H, W)"""
    temp = input.shape
    B = temp[0]
    C = temp[1]
    H = temp[2]
    W = temp[3]

    for b in range(B):
        for c in range(C):
            for i in range(H//2):
                for j in range(W//2):
                    input[b, c, i, j] = input[b, c, 2*i:2*i+2, 2*j:2*j+2].max()
    return input

def Block(input, kernel):
    """input have shape (B, Cin, H, W)
    kernel have shape (Cout, Cin, K, K) # 共享权重"""
    conv_out = ConvLayer(input, kernel)
    relu_out = ReULayer(conv_out)
    pool_out = MaxPoolLayer(relu_out)
    return pool_out

def FCLayer(input, weight):
    """input have shape (B, C, H, W) 展平后为 (B, C*H*W),
    weight have shape (len(label), C*H*W)"""
    temp = input.shape
    B = temp[0]
    Cin = temp[1]
    H = temp[2]
    W = temp[3]

    x = input.reshape(B, -1) # 展平为 (B, C*H*W)
    # out = x @ weight.T # (B, C*H*W) * (C*H*W, len(label)) = (B, len(label))
    
    # return out

def SoftmaxLayer(input):
    """input have shape (B, len(label))"""
    exp_input = np.exp(input - np.max(input, axis=1, keepdims=True)) # 防止数值溢出
    return exp_input / exp_input.sum(axis=1, keepdims=True)

def CrossEntropyLoss(pred, label):
    """pred have shape (B, len(label)), label have shape (B,)"""
    B = pred.shape[0]
    loss = 0.0
    for b in range(B):
        loss -= np.log(pred[b, label[b]] + 1e-10) # 加上一个小数防止log(0)
    return loss / B

In [None]:
images, labels = raw_data()

images_np = images.numpy()
labels_np = labels.numpy()
print("Images shape:", images_np.shape)
print("Labels shape:", labels_np.shape)

def MyVGG(input, kernels, weight):
    """input have shape (B, 3, 32, 32)"""
    # blocks
    temp = input
    for i in range(len(kernels)):
        temp = Block(temp, kernels[i])
    
    fc_out = FCLayer(temp, weight)
    print("5")
    return fc_out


kernel1 = np.random.rand(64, 3, 3, 3) # (Cout, Cin, K, K)
kernel2 = np.random.rand(128, 64, 3, 3)
kernel3 = np.random.rand(256, 128, 3, 3)
weight = np.random.rand(10, 64*26*26) # (len(label), C*H*W)

Block_out1 = Block(input=images_np, kernel=kernel1)
Block_out2 = Block(input=Block_out1, kernel=kernel2)
Block_out3 = Block(input=Block_out2, kernel=kernel3)

Images shape: (4, 3, 32, 32)
Labels shape: (4,)


In [40]:
print("Block_out3 shape:", Block_out3.shape) # (B, 256, 4, 4)
x = Block_out3.reshape(4, -1) # 展平为 (B, C*H*W)
print("x shape:", x.shape) # (B, C*H*W) = (4, 4096)
np.dot(weight, x[0]) # (len(label), C*H*W) * (C*H*W,) = (len(label),)

Block_out3 shape: (4, 256, 26, 26)
x shape: (4, 173056)


ValueError: shapes (10,16384) and (173056,) not aligned: 16384 (dim 1) != 173056 (dim 0)

In [None]:
256*