In [2]:
import numpy as np
import torch
import torch.nn as nn
import os
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
from PIL import Image

In [3]:
transform = torchvision.transforms.Compose({
    torchvision.transforms.ToTensor()
})


class CustomDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.classes = os.listdir(self.data_dir)
        self.img_paths, self.labels = [], []

        for i, cls in enumerate(self.classes):
            cls_dir = os.path.join(self.data_dir, cls)
            img_list = os.listdir(cls_dir)
            self.img_paths.extend([os.path.join(cls_dir, img) for img in img_list])
            self.labels.extend([i] * len(img_list))
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        img_path = self.img_paths[index]
        img = Image.open(img_path).convert('RGB')
        label = self.labels[index]

        if self.transform:
            img = self.transform(img)

        return img, label

    def labelsname(self, i):
        """返回 0 ~ 9 号所对应的标签名"""
        return self.classes[i]


DATA_PATH = '../../DATASETS/animal10_classification/raw-img/'
dataset = CustomDataset(DATA_PATH, transform)

26179

In [3]:
class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, in_channels, out_channels, downsample=False):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=(1, 1), stride=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=(3, 3), stride=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.conv3 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=(1, 1), stride=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        if downsample:
            if out_channels == 64:
                stride = 2
                self.downsample = nn.Sequential(
                    nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=(1, 1), stride=stride, bias=False),
                    nn.BatchNorm2d(out_channels * self.expansion)
                    )
            else:
                self.downsample = nn.Sequential(
                    nn.Conv2d(in_channels, out_channels * self.expansion, kernel_size=(1, 1), stride=1, bias=False),
                    nn.BatchNorm2d(out_channels * self.expansion)
                    )
        else:
            self.downsample = None

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)
        
        out += identity  # 残差连接
        out = self.relu(out)

        return out


class resnet50(nn.Module):
    def __init__(self, in_channels, backbone=False, num_classes=0):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, 64, kernel_size=(7, 7), stride=2, padding=(3, 3), bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)

        self.layer1 = nn.Sequential(
            Bottleneck(64, 64, True),
            Bottleneck(256, 64, False),
            Bottleneck(256, 64, False)
        )
        self.layer2 = nn.Sequential(
            Bottleneck(256, 128, True),
            Bottleneck(512, 128, False),
            Bottleneck(512, 128, False),
            Bottleneck(512, 128, False)
        )
        self.layer3 = nn.Sequential(
            Bottleneck(512, 256, True),
            Bottleneck(1024, 256, False),
            Bottleneck(1024, 256, False),
            Bottleneck(1024, 256, False),
            Bottleneck(1024, 256, False),
            Bottleneck(1024, 256, False)
        )
        self.layer4 = nn.Sequential(
            Bottleneck(1024, 512, True),
            Bottleneck(2048, 512, False),
            Bottleneck(2048, 512, False)
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        if not backbone:
            self.fc = nn.Linear(in_features=2048, out_features=num_classes, bias=True)
        else:
            self.fc = None
        
    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.maxpool(out)

        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        out = self.avgpool(out)
        if self.fc is not None:
            out = self.fc(out)
        
        return out

In [4]:
model = resnet50(3, num_classes=10)