In [1]:
import numpy as np
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

In [None]:
print(f"PyTorch 版本: {torch.__version__}")
print(f"CUDA 可用: {torch.cuda.is_available()}")

PyTorch 版本: 2.9.1
CUDA 可用: False


In [7]:
# 创建一个自定义 Dataset 类
class CustomDataset(Dataset):
    """
    自定义数据集类，继承自 torch.utils.data.Dataset
    """
    def __init__(self, data, labels, transform=None):
        """
        初始化数据集
        Args:
            data: 输入数据
            labels: 标签
            transform: 可选的转换函数
        """
        self.data = torch.FloatTensor(data)
        self.labels = torch.LongTensor(labels)
        self.transform = transform
    
    def __len__(self):
        """返回数据集大小"""
        return len(self.data)
    
    def __getitem__(self, idx):
        """根据索引返回单个数据样本"""
        sample = self.data[idx]
        label = self.labels[idx]
        
        if self.transform:
            sample = self.transform(sample)
        
        return sample, label

# 创建示例数据
# 假设我们有 100 个样本，每个样本有 10 个特征
np.random.seed(42)
X = np.random.randn(100, 10)  # 100 个样本，10 个特征
y = np.random.randint(0, 3, size=100)  # 100 个标签，3 个类别

# 创建数据集实例
dataset = CustomDataset(X, y)

print(f"数据集大小: {len(dataset)}")
print(f"第一个样本的形状: {dataset[0][0].shape}")
print(f"第一个样本的标签: {dataset[0][1]}")

数据集大小: 100
第一个样本的形状: torch.Size([10])
第一个样本的标签: 2


In [5]:
# 创建 DataLoader
# batch_size: 每个批次的大小
# shuffle: 是否打乱数据
# num_workers: 用于数据加载的进程数（0 表示在主进程中加载）
dataloader = DataLoader(
    dataset,
    batch_size=32,
    shuffle=True,
    num_workers=0  # 在 Windows 或某些环境下，设置为 0 可以避免问题
)

print(f"DataLoader 创建的批次数: {len(dataloader)}")
print(f"批次大小: {dataloader.batch_size}")

DataLoader 创建的批次数: 4
批次大小: 32


In [10]:
# 遍历 DataLoader 中的数据
for epoch in range(2):  # 模拟 2 个 epoch
    print(f"\n=== Epoch {epoch + 1} ===")
    for batch_idx, (data, labels) in enumerate(dataloader):
        print(f"批次 {batch_idx + 1}:")
        print(f"  数据形状: {data.shape}")  # [batch_size, features]
        print(f"  标签形状: {labels.shape}")  # [batch_size]
        print(f"  标签值: {labels[:5].tolist()}")  # 显示前 5 个标签
        
        # 这里可以进行模型训练
        # output = model(data)
        # loss = criterion(output, labels)
        # ...
        
        if batch_idx >= 2:  # 只显示前 3 个批次
            break


=== Epoch 1 ===
批次 1:
  数据形状: torch.Size([32, 10])
  标签形状: torch.Size([32])
  标签值: [2, 2, 0, 2, 2]
批次 2:
  数据形状: torch.Size([32, 10])
  标签形状: torch.Size([32])
  标签值: [2, 1, 2, 2, 1]
批次 3:
  数据形状: torch.Size([32, 10])
  标签形状: torch.Size([32])
  标签值: [1, 1, 1, 2, 1]

=== Epoch 2 ===
批次 1:
  数据形状: torch.Size([32, 10])
  标签形状: torch.Size([32])
  标签值: [2, 2, 2, 1, 0]
批次 2:
  数据形状: torch.Size([32, 10])
  标签形状: torch.Size([32])
  标签值: [2, 0, 0, 1, 1]
批次 3:
  数据形状: torch.Size([32, 10])
  标签形状: torch.Size([32])
  标签值: [1, 1, 1, 0, 1]


In [11]:
x = torch.tensor([[1.,0.],[-1.,1.]], requires_grad=True)
z = x.pow(2).sum()
z.backward()
print(x.grad)

tensor([[ 2.,  0.],
        [-2.,  2.]])


In [5]:
layer = torch.nn.Linear(32, 64, bias=True)
print(layer.weight.shape)
print(layer.bias.shape)

torch.Size([64, 32])
torch.Size([64])


In [None]:
#获取data
train_data = pd.read_csv('./covid.train_new.csv').values  # values 是属性，不是方法，不需要括号
x_train = train_data[:,:-1]
y_train = train_data[:,-1]

class Covid19Dataset(Dataset):
    def __init__(self, x, y=None):
        if y is None:
            self.y = y
        else:
            self.y = torch.FloatTensor(y)
        self.x = torch.FloatTensor(x)

    def __getitem__(self, idx):
        if self.y is None:
            return self.x[idx]
        else:
            return self.x[idx], self.y[idx]

    def __len__(self):
        return len(self.x)

covid_dataset = Covid19Dataset(x_train, y_train)
train_loader = DataLoader(covid_dataset, batch_size=32, shuffle=True, pin_memory=True)

class MyModel(nn.Module):
    def __init__(self, input_dim):
        super(MyModel, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        x = self.layers(x)
        x = x.squeeze()
        return x    


model = MyModel(input_dim=x_train.shape[1]).to('cpu')
criterion = nn.MSELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

n_epochs = 10
for epoch in range(n_epochs):
    model.train()
    for batch_idx, (x, y) in enumerate(train_loader):
        x = x.to('cpu')
        y = y.to('cpu')
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        

TypeError: 'numpy.ndarray' object is not callable

In [None]:
{
  "env": {
    "ANTHROPIC_AUTH_TOKEN": "sk-ant-oat01-TAf7J251AT3PbP4Vu_diZfoDHSXLlQCCcxEHzpccWRfMuehfmiEvjBWHUf4V-62huXJiF1rzxmdoi4to-eJVU_EzFxSNdAA",
    "ANTHROPIC_BASE_URL": "https://code.newcli.com/claude/droid",
    "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": 1
  },
  "permissions": {
    "allow": [],
    "deny": []
  }
}