 # DataLoader加载器

In [None]:
# DataLoader常见参数设置
# dataset (Dataset) – dataset from which to load the data.
# batch_size (int, optional) – how many samples per batch to load (default: 1).
# shuffle (bool, optional) – True:打乱    False:不打乱    (default: False)
# sampler (Sampler or Iterable, optional) – 采样策略
    # sampler 决定每个 epoch 中 DataLoader 返回数据的顺序。
	# •	不设置 sampler，且 shuffle=True → 系统默认用 RandomSampler
	# •	设置了 sampler → 你自己决定样本顺序，就不能再设置 shuffle（会冲突）
    # PyTorch 已经有一些常用的 sampler 实现：
        # SequentialSampler:按顺序依次采样（适用于验证或不打乱的训练,默认在 shuffle = False 时使用）
        # RandomSampler:随机打乱后采样（适用于训练时打乱数据,默认在 shuffle = True 时使用）
        # WeightedRandomSampler:根据给定权重概率进行有放回采样（适用于类别不平衡问题)
        # SubsetRandomSampler:从指定的索引子集中随机采样（适用于划分训练/验证集等情况）
# num_workers (int, optional) – how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)
# drop_last (bool, optional) – 是否舍弃不足一组 batch_size 的数据
    # set to True to drop the last incomplete batch, if the dataset size is not divisible by the batch size. If False and the size of dataset is not divisible by the batch size, then the last batch will be smaller. (default: False)

In [1]:
import torchvision
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter


In [3]:
# 准备测试集
# 使用CIFAR10数据集，设置train=False表示加载测试集
# transform=torchvision.transforms.ToTensor()将图片数据转换为Tensor格式
test_data = torchvision.datasets.CIFAR10("./data/CIFAR10", train = False, transform = torchvision.transforms.ToTensor())

# 测试数据集中第一张图片及其target
# datasets的__getitem__(index: int)会返回指定index的(img, target)
img, target = test_data[0]
print(img.shape)  # 打印图片的形状 (C, H, W)
print(target)     # 打印图片对应的标签


torch.Size([3, 32, 32])
3


In [8]:
# 使用DataLoader加载测试数据集
# batch_size=4表示每次加载4个样本
# shuffle=True表示打乱数据顺序
# num_workers=0表示数据加载在主进程中进行
# drop_last=False表示保留最后不足一个batch的数据
test_loader = DataLoader(dataset = test_data, batch_size = 64, shuffle = True, num_workers = 0, drop_last = False)

# 创建SummaryWriter对象，用于将数据写入TensorBoard日志文件
writer = SummaryWriter("./logs/5_DataLoader")

# 初始化step计数器
step = 0

# 遍历DataLoader加载的数据
for data in test_loader:
    if step >= 10:  # 限制只处理前10个batch
        break
    print(step, ":")  # 打印当前step编号
    imgs, targets = data  # 解包DataLoader返回的(imgs, targets)
    writer.add_images("test_data", imgs, step)  # 将图片数据写入TensorBoard
    print(imgs.shape)  # 打印当前batch的图片形状 (batch_size, C, H, W)
    print(targets)     # 打印当前batch的标签
    step += 1          # 更新step计数器


0 :
torch.Size([64, 3, 32, 32])
tensor([2, 2, 5, 3, 2, 7, 4, 0, 4, 3, 9, 5, 3, 8, 2, 6, 9, 7, 0, 8, 0, 3, 1, 6,
        0, 5, 0, 2, 9, 5, 4, 7, 5, 1, 0, 7, 9, 3, 0, 8, 7, 2, 6, 4, 4, 9, 5, 2,
        4, 7, 5, 0, 8, 0, 7, 0, 4, 7, 9, 7, 1, 8, 4, 5])
1 :
torch.Size([64, 3, 32, 32])
tensor([6, 9, 9, 1, 2, 1, 1, 6, 5, 6, 2, 5, 0, 4, 7, 5, 9, 1, 1, 7, 6, 4, 4, 3,
        2, 9, 7, 5, 6, 1, 4, 6, 6, 9, 1, 3, 5, 1, 9, 7, 5, 0, 6, 6, 5, 2, 8, 7,
        0, 2, 2, 7, 1, 6, 4, 9, 5, 8, 1, 8, 0, 7, 9, 5])
2 :
torch.Size([64, 3, 32, 32])
tensor([2, 8, 0, 7, 4, 6, 5, 6, 8, 4, 4, 6, 5, 8, 0, 9, 5, 4, 4, 6, 8, 9, 8, 9,
        1, 9, 2, 1, 6, 2, 1, 2, 3, 4, 1, 7, 8, 8, 2, 8, 7, 8, 5, 3, 0, 8, 5, 5,
        7, 5, 9, 2, 5, 7, 5, 8, 3, 6, 0, 8, 1, 6, 9, 3])
3 :
torch.Size([64, 3, 32, 32])
tensor([9, 4, 9, 4, 8, 0, 2, 8, 1, 5, 9, 1, 8, 5, 1, 7, 8, 5, 4, 8, 9, 3, 3, 4,
        0, 0, 1, 7, 8, 5, 8, 0, 3, 7, 8, 8, 3, 5, 5, 8, 3, 7, 2, 1, 5, 7, 5, 2,
        2, 3, 3, 9, 7, 2, 3, 9, 9, 3, 3, 0, 4, 7, 5, 7])
4 :


In [9]:
# 关闭SummaryWriter，释放资源
writer.close()