In [1]:
import torch
import os
from torch import nn
import torch.nn.functional as F 
import pickle
from torchsummary import summary
import numpy as np
import torchvision.transforms as transforms
import torch.nn as nn
from torch.utils.data import Dataset
from matplotlib import pyplot as plt
from torch.utils.data import DataLoader, Subset
from torch.optim.lr_scheduler import MultiStepLR
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
from tqdm import tqdm
# from model.res3net import Res3Net, BasicBlock
# from model.res3netpre import Res3NetPre, BasicBlockPre
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import GradScaler, autocast

In [2]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def make_data_label(lst_of_dic, do_label=True):
    data = torch.empty((0, 3, 32, 32))
    if do_label:
        label = torch.empty(0, dtype=torch.long)
    for dic in lst_of_dic:
        # Convert the data to a tensor and reshape
        cur_data = torch.tensor(dic[b'data']).reshape(10000, 3, 32, 32)
        data = torch.cat((data, cur_data), dim=0)
        if do_label:
            cur_label = torch.tensor(dic[b'labels'], dtype=torch.long).reshape(10000)
            label = torch.cat((label, cur_label), dim=0)
    # print(data[1])
    if do_label:
        return data, label
    else:
        return data

class cifar_dataset(Dataset):
    def __init__(self, data, label, transform=None):
        """
        Args:
            data (Tensor): The raw image data with shape (n, 3, 32, 32) and pixel values in [0, 255].
            label (Tensor): The corresponding labels.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        # Convert data to float and normalize pixel values to [0, 1]
        self.transform = transforms.Compose([transforms.Normalize(mean = [0.4914, 0.4822, 0.4465], std = [0.2470, 0.2435, 0.2616])])
        self.data = data / 255
        self.label = label

    def __getitem__(self, index):
        img, lbl = self.data[index], self.label[index]
        return img, lbl

    def __len__(self):
        return self.data.shape[0]

In [1]:
root = r'deep-learning-spring-2025-project-1\cifar-10-python\cifar-10-batches-py\data_batch_'
lst = []
for i in range(5):
    lst.append(unpickle(root+str(i+1)))
#lst

NameError: name 'unpickle' is not defined

In [4]:
import numpy as np
import pandas as pd
import os
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split, TensorDataset
from torch.optim.lr_scheduler import StepLR, MultiStepLR
from PIL import Image
import torch.optim.lr_scheduler as lr_scheduler

# auto. choose CPU or GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Function to load CIFAR-10 dataset
def load_cifar_batch(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

# Specify the directory containing CIFAR-10 batches
cifar10_dir = 'deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py'

# Load metadata (labels)
meta_data_dict = load_cifar_batch(os.path.join(cifar10_dir, 'batches.meta'))
label_names = [label.decode('utf-8') for label in meta_data_dict[b'label_names']]

# Load training data
train_data = []
train_labels = []
for i in range(1, 6):
    batch = load_cifar_batch(os.path.join(cifar10_dir, f'data_batch_{i}'))
    train_data.append(batch[b'data'])
    train_labels += batch[b'labels']

train_data = np.vstack(train_data).reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)  # Convert to HWC format
train_labels = np.array(train_labels)

# Data augmentation and normalization
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
])

# Convert to TensorDataset and apply transformations
class CustomCIFAR10Dataset(Dataset):
    def __init__(self, images, labels, transform):
        self.images = images
        self.labels = torch.tensor(labels, dtype=torch.long)  # 保持为numpy数组
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # img = Image.fromarray(self.images[idx])  # 直接转PIL图像（更高效）
        img = self.transform(self.images[idx])  # 应用transform
        label = self.labels[idx]
        return img, label

train_dataset = CustomCIFAR10Dataset(train_data, train_labels, transform=transform)

# Split into training and validation sets
#train_size = int(0.9 * len(train_dataset))
#val_size = len(train_dataset) - train_size
#train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

test_transform = transforms.Compose([
    # transforms.ToPILImage(),  # Convert numpy array to PIL Image
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

batch_test_dict = load_cifar_batch(os.path.join(cifar10_dir, 'test_batch'))
val_images = batch_test_dict[b'data'].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
val_labels = np.array(batch_test_dict[b'labels'])

val_dataset = CustomCIFAR10Dataset(val_images, val_labels, transform=test_transform)

# DataLoaders
# train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=8, pin_memory=True, persistent_workers=True)
# valid_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=8, pin_memory=True, persistent_workers=True)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, pin_memory=True)
valid_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, pin_memory=True)

# Load test dataset
cifar_test_path = 'deep-learning-spring-2025-project-1/cifar_test_nolabel.pkl'
test_batch = load_cifar_batch(cifar_test_path)
test_images = test_batch[b'data'].astype(np.float32) / 255.0

# Convert test dataset to Tensor
test_dataset = [(test_transform(img),) for img in test_images]
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

#train_dataset[0][0] == transform(train_data[0])

Using device: cuda


In [5]:
train_dataset[0][1].dtype

torch.int64

In [4]:
import math
from model.resnet32 import ResNet32, BasicBlock
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
import torch
class BasicBlock(nn.Module):
    expansion = 1


    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()

        DROPOUT = 0.1

        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)

        self.bn1 = nn.BatchNorm2d(planes)
        self.dropout = nn.Dropout(DROPOUT)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes),
                nn.Dropout(DROPOUT)
            )

    def forward(self, x):
        out = F.relu(self.dropout(self.bn1(self.conv1(x))))
        out = F.relu(self.dropout(self.bn2(self.conv2(out))))
        out = out + self.shortcut(x)
        out = F.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks=[2, 2, 1, 1], num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear1 = nn.Linear(512*block.expansion, 10)
        # self.linear2 = nn.Linear(128, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear1(out)
        # out = F.relu(out)
        # out = self.linear2(out)
        return F.log_softmax(out, dim=-1)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Importing Model and printing Summary
model = ResNet32(num_classes=10).to(device)
summary(model, input_size=(3,32,32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             432
       BatchNorm2d-2           [-1, 16, 32, 32]              32
            Conv2d-3           [-1, 16, 32, 32]           2,304
       BatchNorm2d-4           [-1, 16, 32, 32]              32
            Conv2d-5           [-1, 16, 32, 32]           2,304
       BatchNorm2d-6           [-1, 16, 32, 32]              32
        BasicBlock-7           [-1, 16, 32, 32]               0
            Conv2d-8           [-1, 16, 32, 32]           2,304
       BatchNorm2d-9           [-1, 16, 32, 32]              32
           Conv2d-10           [-1, 16, 32, 32]           2,304
      BatchNorm2d-11           [-1, 16, 32, 32]              32
       BasicBlock-12           [-1, 16, 32, 32]               0
           Conv2d-13           [-1, 16, 32, 32]           2,304
      BatchNorm2d-14           [-1, 16,

In [7]:
model(train_dataset[0][0].to(device).unsqueeze(0))

tensor([[-2.4823, -2.5297, -2.1597, -2.2404, -2.4353, -2.2294, -2.1318, -2.1364,
         -2.3743, -2.4075]], device='cuda:0', grad_fn=<LogSoftmaxBackward0>)

In [1]:
# Function to load CIFAR-10 batch files
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

# Load one batch
batch = unpickle(r'deep-learning-spring-2025-project-1/cifar_test_nolabel.pkl')
# batch = unpickle(r'deep-learning-spring-2025-project-1/cifar-10-python/cifar-10-batches-py/data_batch_1')
images = test_images
# labels = batch[b'labels']

# Reshape images
# images = images.reshape(-1, 3, 32, 32)
print(images.shape)
#images  = images.transpose(0, 2, 3, 1)
# images = images[:,:,:,torch.tensor([0,1,2])]
# CIFAR-10 class labels
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Plot some images
fig, axes = plt.subplots(2, 5, figsize=(10, 5))
for i in range(10):
    ax = axes[i // 5, i % 5]
    ax.imshow(images[i+110])
    # ax.set_title(class_names[labels[i]])
    ax.axis('off')

plt.show()


NameError: name 'pickle' is not defined

In [14]:
torch.cuda.empty_cache()

In [13]:
import os
cwd = os.getcwd()
def model_testing(model, device, test_dataloader, test_acc, test_losses, misclassified = []):

    model.eval()
    test_loss = 0
    correct = 0
    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    # label = 0
    classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    with torch.no_grad():

        for index, (data, target) in enumerate(test_dataloader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)

            for d,i,j in zip(data, pred, target):
                if i != j:
                    misclassified.append([d.cpu(),i[0].cpu(),j.cpu()])

            test_loss += F.nll_loss(output, target, reduction='sum').item()
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_dataloader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_dataloader.dataset),
        100. * correct / len(test_dataloader.dataset)))

    test_acc.append(100. * correct / len(test_dataloader.dataset))
    return misclassified

# def model_training(model, device, train_dataloader, optimizer, train_acc, train_losses, criterion):
#
#     model.train()
#     # pbar = tqdm(train_dataloader)
#     correct = 0
#     processed = 0
#     running_loss = 0.0
#
#     for batch_idx, (data, target) in enumerate(train_dataloader):
#         data, target = data.to(device), target.to(device)
#         optimizer.zero_grad()
#         with autocast(enabled=True):
#             y_pred = model(data)
#             loss = F.nll_loss(y_pred, target)
#
#
#         train_losses.append(loss)
#         loss.backward()
#         optimizer.step()
#
#         pred = y_pred.argmax(dim=1, keepdim=True)
#         correct += pred.eq(target.view_as(pred)).sum().item()
#         processed += len(data)
#         # print statistics
#         running_loss += loss.item()
#         # pbar.set_description(desc=f'Loss={loss.item():.4f} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
#         train_acc.append(100*correct/processed)
#     print(f'Loss={loss.item():.4f} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
def model_training(model, device, train_dataloader, optimizer, train_acc, train_losses, criterion):
    model.train()
    correct = 0
    processed = 0
    scaler = GradScaler(enabled=True)  # 确保启用Scaler
    running_loss = 0.0
    PBAR = tqdm(train_dataloader)

    for batch_idx, (data, target) in enumerate(PBAR):
        # print(batch_idx)
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        with autocast(enabled=True):
            y_pred = model(data)
            loss = criterion(y_pred, target)  # 使用CrossEntropyLoss而非NLLLoss

        scaler.scale(loss).backward()  # 使用scaler缩放梯度
        scaler.step(optimizer)
        scaler.update()

        pred = y_pred.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
        processed += len(data)
        # print statistics
        running_loss += loss.item()
        # pbar.set_description(desc=f'Loss={loss.item():.4f} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
        train_acc.append(100*correct/processed)

    for param_group in optimizer.param_groups:
        lr = param_group['lr']
        break
    print(f'Loss={loss.item():.4f} lr={lr} Accuracy={100*correct/processed:0.2f}')
    
def lr_warmup(current_epoch):
    if current_epoch < 5:  # 前5轮预热
        return (0.01 + (0.1 - 0.01) * (current_epoch / 5))
    else:
        return 1.0  # 后续由余弦退火接管

In [15]:
criterion = nn.CrossEntropyLoss()
from torch.optim.lr_scheduler import LambdaLR
from torch import optim

optimizer = optim.SGD(model.parameters(), lr=0.2, momentum=0.9, weight_decay=5e-4)

# 定义预热函数
warmup_epochs = 5
lr_lambda = lambda epoch: (epoch + 1) / warmup_epochs if epoch < warmup_epochs else 0.5 * (1 + np.cos(np.pi * (epoch - warmup_epochs) / (EPOCHS - warmup_epochs)))
scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda)

train_acc = []
train_losses = []
valid_acc = []
valid_losses = []

EPOCHS = 100

for i in range(EPOCHS):

    print(f'EPOCHS : {i}')
    model_training(model, device, train_loader, optimizer, train_acc, train_losses, criterion)
    scheduler.step()
    misclassified = model_testing(model, device, valid_loader, valid_acc, valid_losses)

    if i < 5:
        lr_scale = lr_warmup(i)
        for param_group in optimizer.param_groups:
            param_group['lr'] = 0.1 * lr_scale
    else:
        scheduler.step()

  scaler = GradScaler(enabled=True)  # 确保启用Scaler


EPOCHS : 0


  with autocast(enabled=True):
100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:20<00:00, 19.22it/s]


Loss=1.3060 lr=0.04000000000000001 Accuracy=40.65

Test set: Average loss: 1.5500, Accuracy: 4751/10000 (47.51%)

EPOCHS : 1


100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:19<00:00, 19.96it/s]


Loss=0.9255 lr=0.001 Accuracy=60.04

Test set: Average loss: 1.0374, Accuracy: 6225/10000 (62.25%)

EPOCHS : 2


100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:20<00:00, 19.34it/s]


Loss=0.8412 lr=0.0028000000000000004 Accuracy=63.91

Test set: Average loss: 0.9588, Accuracy: 6564/10000 (65.64%)

EPOCHS : 3


100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:20<00:00, 19.27it/s]


Loss=0.8177 lr=0.004600000000000001 Accuracy=66.52

Test set: Average loss: 0.9067, Accuracy: 6801/10000 (68.01%)

EPOCHS : 4


100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:20<00:00, 19.54it/s]


Loss=0.7065 lr=0.0064 Accuracy=69.46

Test set: Average loss: 0.8349, Accuracy: 7153/10000 (71.53%)

EPOCHS : 5


100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:21<00:00, 18.56it/s]


Loss=0.7024 lr=0.0082 Accuracy=72.22

Test set: Average loss: 0.8372, Accuracy: 7148/10000 (71.48%)

EPOCHS : 6


100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:19<00:00, 19.58it/s]


Loss=1.7420 lr=0.19978136272187746 Accuracy=30.81

Test set: Average loss: 2.8126, Accuracy: 3273/10000 (32.73%)

EPOCHS : 7


100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:19<00:00, 19.59it/s]


Loss=1.2751 lr=0.19912640693269754 Accuracy=50.09

Test set: Average loss: 1.3664, Accuracy: 5274/10000 (52.74%)

EPOCHS : 8


100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:19<00:00, 19.59it/s]


Loss=0.9909 lr=0.19803799658748095 Accuracy=61.14

Test set: Average loss: 1.0419, Accuracy: 6312/10000 (63.12%)

EPOCHS : 9


100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:19<00:00, 19.61it/s]


Loss=0.8278 lr=0.19652089102773487 Accuracy=67.03

Test set: Average loss: 1.0087, Accuracy: 6641/10000 (66.41%)

EPOCHS : 10


100%|████████████████████████████████████████████████████████████████████████████████| 391/391 [00:20<00:00, 19.51it/s]


Loss=0.9432 lr=0.19458172417006347 Accuracy=70.96

Test set: Average loss: 0.9889, Accuracy: 6755/10000 (67.55%)

EPOCHS : 11


 26%|████████████████████▋                                                           | 101/391 [00:05<00:15, 19.00it/s]


KeyboardInterrupt: 

In [None]:

test_loader = DataLoader(test, batch_size=1, shuffle=False)
valid_loader2 = DataLoader(valid_set, batch_size=1, shuffle=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
result = []
for img, _ in test_loader:
    model.eval()
    img = img.to(device)
    y_pred = model(img)
    pred = int(y_pred.argmax(dim=1, keepdim=True))
    result.append(pred)


In [10]:
test[0]

(tensor([[[0.0745, 0.0039, 0.0039,  ..., 0.0039, 0.0706, 0.0000],
          [0.0000, 0.0706, 0.0000,  ..., 0.4118, 0.4039, 0.4471],
          [0.4157, 0.4078, 0.3412,  ..., 0.4196, 0.3882, 0.3804],
          ...,
          [0.0039, 0.0000, 0.0549,  ..., 0.0471, 0.0078, 0.0039],
          [0.0824, 0.0078, 0.0039,  ..., 0.5843, 0.6510, 0.6627],
          [0.5686, 0.4588, 0.4431,  ..., 0.0235, 0.0235, 0.0549]],
 
         [[0.0039, 0.0000, 0.0588,  ..., 0.0471, 0.0078, 0.0039],
          [0.0745, 0.0039, 0.0000,  ..., 0.5412, 0.5961, 0.6588],
          [0.6275, 0.6510, 0.6745,  ..., 0.0745, 0.0706, 0.0588],
          ...,
          [0.7020, 0.5608, 0.6549,  ..., 0.3961, 0.2902, 0.2392],
          [0.2235, 0.1725, 0.3843,  ..., 0.0863, 0.0392, 0.0196],
          [0.1059, 0.0745, 0.0235,  ..., 0.7137, 0.6549, 0.7294]],
 
         [[0.7255, 0.6588, 0.7373,  ..., 0.4941, 0.3804, 0.3216],
          [0.3020, 0.2353, 0.2235,  ..., 0.4941, 0.4824, 0.4275],
          [0.0941, 0.0588, 0.0353,  ...,

In [11]:
def make_output(result):
    out = ['ID,Labels']
    for i in range(len(result)):
        cur = str(i) + str(',') + str(int(result[i]))
        out.append(cur)
    return '\n'.join(out)
with open("output.csv", "w", encoding="utf-8") as f:
    f.write(make_output(result))