In [1]:
import numpy as np
import torch
import torch.nn as nn
import os
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
from PIL import Image

In [45]:
transform = torchvision.transforms.Compose({
    torchvision.transforms.ToTensor(),
    # torchvision.transforms.Resize([224, 224])
})


class CustomDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.classes = os.listdir(self.data_dir)
        self.img_paths, self.labels = [], []

        for i, cls in enumerate(self.classes):
            cls_dir = os.path.join(self.data_dir, cls)
            img_list = os.listdir(cls_dir)
            self.img_paths.extend([os.path.join(cls_dir, img) for img in img_list])
            self.labels.extend([i] * len(img_list))
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        img_path = self.img_paths[index]
        img = Image.open(img_path).convert('RGB')
        label = self.labels[index]

        if self.transform:
            img = self.transform(img)

        return img, label

    def labelsname(self, i):
        """返回 0 ~ 9 号所对应的标签名"""
        return self.classes[i]


DATA_PATH = '../../DATASETS/animal10_classification/raw-img/'
dataset = CustomDataset(DATA_PATH, transform)
train_data, valid_data = random_split(dataset, [0.8, 0.2])  # generator类用于管理生成器随机种子
# train: 20944, valid: 5235

In [74]:
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channels, out_channels, downsample=False):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=(1, 1), stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=(3, 3), stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=(1, 1), stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        if downsample:
            if out_channels == 64:
                stride = 1
                self.downsample = nn.Sequential(
                    nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=(1, 1), stride=stride, bias=False),
                    nn.BatchNorm2d(out_channels * self.expansion)
                    )
            else:
                self.downsample = nn.Sequential(
                    nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=(1, 1), stride=1, bias=False),
                    nn.BatchNorm2d(out_channels * self.expansion)
                    )
        else:
            self.downsample = None

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)
        
        out += identity  # 残差连接
        out = self.relu(out)

        return out


class resnet50(nn.Module):
    def __init__(self, in_channels, backbone=False, num_classes=0):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=(7, 7), stride=2, padding=(3, 3), bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)

        self.layer1 = nn.Sequential(
            Bottleneck(64, 64, True),
            Bottleneck(256, 64, False),
            Bottleneck(256, 64, False)
        )
        self.layer2 = nn.Sequential(
            Bottleneck(256, 128, True),
            Bottleneck(512, 128, False),
            Bottleneck(512, 128, False),
            Bottleneck(512, 128, False)
        )
        self.layer3 = nn.Sequential(
            Bottleneck(512, 256, True),
            Bottleneck(1024, 256, False),
            Bottleneck(1024, 256, False),
            Bottleneck(1024, 256, False),
            Bottleneck(1024, 256, False),
            Bottleneck(1024, 256, False)
        )
        self.layer4 = nn.Sequential(
            Bottleneck(1024, 512, True),
            Bottleneck(2048, 512, False),
            Bottleneck(2048, 512, False)
        )

        if not backbone:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            # [B, C, H, W] -> [B, C, 1, 1] 即一个批次中，对每份样本，在不同的通道层面上进行二维平均池化
            self.fc = nn.Linear(in_features=2048, out_features=num_classes, bias=True)
            self.softmax = nn.Softmax(dim=3)
        else:
            self.fc = None
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.maxpool(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        print(out.shape)
        out = self.avgpool(out)
        print(out.shape)
        if self.fc is not None:
            out = out.permute(0, 3, 2, 1)
            out = self.fc(out)
            out = self.softmax(out)
        
        return out

In [75]:
model = resnet50(3, num_classes=10, backbone=False)

x = 10
print(train_data[x][0].shape)
pred = model(train_data[x][0].expand((1, 3, 169, 300)))
print(pred)

torch.Size([3, 169, 300])
torch.Size([1, 2048, 43, 75])
torch.Size([1, 2048, 1, 1])
tensor([[[[0.0632, 0.0537, 0.0959, 0.1539, 0.2726, 0.0602, 0.0641, 0.0443,
           0.1108, 0.0813]]]], grad_fn=<SoftmaxBackward0>)


In [84]:
a = torch.arange(20, dtype=torch.float16).reshape([1, 5, 2, 2])
print(a)
b = nn.AdaptiveAvgPool2d((1, 1))(a)
print(b)

tensor([[[[ 0.,  1.],
          [ 2.,  3.]],

         [[ 4.,  5.],
          [ 6.,  7.]],

         [[ 8.,  9.],
          [10., 11.]],

         [[12., 13.],
          [14., 15.]],

         [[16., 17.],
          [18., 19.]]]], dtype=torch.float16)
tensor([[[[ 1.5000]],

         [[ 5.5000]],

         [[ 9.5000]],

         [[13.5000]],

         [[17.5000]]]], dtype=torch.float16)
