In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from torchvision import models
from sklearn.preprocessing import LabelEncoder
import os

# 设置随机种子以确保可重复性
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# 数据集类
class NPYDataset(Dataset):
    def __init__(self, csv_file, root_dir):
        try:
            self.annotations = pd.read_csv(csv_file, encoding='utf-8')
        except UnicodeDecodeError:
            self.annotations = pd.read_csv(csv_file, encoding='gbk')
        self.root_dir = root_dir
        self.le = LabelEncoder()
        self.annotations['labels'] = self.annotations['labels'].apply(lambda x: x.strip("[]'"))
        self.annotations['labels'] = self.le.fit_transform(self.annotations['labels'])

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_name = os.path.join(self.root_dir, str(self.annotations.iloc[index, -1]) + '.npy')
        image = np.load(img_name)
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
        label = self.annotations.iloc[index, 2]
        return image, label

# 模型定义
class DenseNet2D(nn.Module):
    def __init__(self, num_classes):
        super(DenseNet2D, self).__init__()
        # 使用预训练的DenseNet模型
        self.densenet = models.densenet121(pretrained=True)
        self.dropout = nn.Dropout(0.5)  # 添加Dropout层
        
        # DenseNet的分类器部分是一个名为classifier的线性层
        # 我们需要用新的线性层替换它，以匹配我们的类别数目
        self.densenet.classifier = nn.Linear(self.densenet.classifier.in_features, num_classes)
        
        # 由于我们已经在上面的行中将原始的分类器替换掉了，
        # 所以这里不需要再替换fc层为Identity

    def forward(self, x):
        # 通过DenseNet模型
        x = self.densenet(x)
        # Dropout层现在不是必需的，因为我们在最后一层之前已经包含了dropout
        # x = self.dropout(x)
        # 由于我们已经将分类器层替换为自定义的线性层，以下步骤也不再需要
        # x = self.fc(x)
        return x

# 训练和评估参数
num_epochs = 10
batch_size = 16
learning_rate = 0.0005

# 设备配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 数据加载
train_dataset = NPYDataset(csv_file='/kaggle/input/d-n-classification/train.csv', root_dir='/kaggle/input/d-n-classification/normalized_train_images/normalized_train_images')
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

validation_dataset = NPYDataset(csv_file='/kaggle/input/d-n-classification/validation.csv', root_dir='/kaggle/input/d-n-classification/normalized_validation_images/normalized_validation_images')
validation_loader = DataLoader(dataset=validation_dataset, batch_size=batch_size, shuffle=False)

test_dataset = NPYDataset(csv_file='/kaggle/input/d-n-classification/test.csv', root_dir='/kaggle/input/d-n-classification/normalized_test_images/normalized_test_images')
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# 模型初始化
num_classes = len(np.unique(train_dataset.annotations['labels']))

# 创建一个权重数组
# 这里假设类别标签已经编码为0, 1
weights = torch.tensor([1.0, 1.0], dtype=torch.float32).to(device)

# 使用加权损失函数
criterion = nn.CrossEntropyLoss(weight=weights)

model = DenseNet2D(num_classes=num_classes).to(device)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-3)  # 增加权重衰减

# 学习率调度器
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# 训练模型的代码...
# 请根据您的具体需求添加训练循环和验证/测试循环

Downloading: "https://download.pytorch.org/models/densenet121-a639ec97.pth" to /root/.cache/torch/hub/checkpoints/densenet121-a639ec97.pth
100%|██████████| 30.8M/30.8M [00:00<00:00, 130MB/s] 


In [2]:
import torch
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

# 确保模型在GPU上
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 确保模型处于训练模式
model.train()

# 可能需要调整学习率
#optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)  # 示例学习率

for epoch in range(0, 30):  # 继续训练过程
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device).long()
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    # 每个epoch结束后打印损失
    print(f'Epoch [{epoch+1}/{30}], Loss: {loss.item():.4f}')
    
    # 每个epoch结束后在训练集上评估模型
    model.eval()  # 切换到评估模式
    with torch.no_grad():
        train_preds = []
        train_labels = []
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device).long()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            train_preds.extend(predicted.view(-1).cpu().numpy())
            train_labels.extend(labels.view(-1).cpu().numpy())
        train_accuracy = 100 * np.sum(np.array(train_preds) == np.array(train_labels)) / len(train_labels)
        print(f'Train Accuracy: {train_accuracy:.2f} %')

    # 每个epoch结束后在验证集上评估模型
    with torch.no_grad():
        val_preds = []
        val_labels = []
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device).long()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            val_preds.extend(predicted.view(-1).cpu().numpy())
            val_labels.extend(labels.view(-1).cpu().numpy())
        val_accuracy = 100 * np.sum(np.array(val_preds) == np.array(val_labels)) / len(val_labels)
        print(f'Validation Accuracy: {val_accuracy:.2f} %')
    
    # 每个epoch结束后在测试集上评估模型
    with torch.no_grad():
        test_preds = []
        test_labels = []
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device).long()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            test_preds.extend(predicted.view(-1).cpu().numpy())
            test_labels.extend(labels.view(-1).cpu().numpy())
    
        test_accuracy = 100 * np.sum(np.array(test_preds) == np.array(test_labels)) / len(test_labels)
        print(f'Test Accuracy: {test_accuracy:.2f} %')

        # 计算精确率、召回率和F1分数
        precision = precision_score(test_labels, test_preds, average='weighted')
        recall = recall_score(test_labels, test_preds, average='weighted')
        f1 = f1_score(test_labels, test_preds, average='weighted')
    
        print(f'Precision: {precision:.4f}')
        print(f'Recall: {recall:.4f}')
        print(f'F1 Score: {f1:.4f}')
    
    model.train()  # 切换回训练模式

Epoch [1/30], Loss: 0.6285
Train Accuracy: 53.14 %
Validation Accuracy: 54.67 %
Test Accuracy: 54.00 %
Precision: 0.7589
Recall: 0.5400
F1 Score: 0.4109
Epoch [2/30], Loss: 0.5363
Train Accuracy: 72.43 %
Validation Accuracy: 66.67 %
Test Accuracy: 68.67 %
Precision: 0.7542
Recall: 0.6867
F1 Score: 0.6626
Epoch [3/30], Loss: 0.5165
Train Accuracy: 79.29 %
Validation Accuracy: 70.00 %
Test Accuracy: 75.33 %
Precision: 0.7805
Recall: 0.7533
F1 Score: 0.7479
Epoch [4/30], Loss: 0.6072
Train Accuracy: 93.14 %
Validation Accuracy: 79.33 %
Test Accuracy: 78.67 %
Precision: 0.7876
Recall: 0.7867
F1 Score: 0.7866
Epoch [5/30], Loss: 0.8036
Train Accuracy: 76.14 %
Validation Accuracy: 69.33 %
Test Accuracy: 70.67 %
Precision: 0.7615
Recall: 0.7067
F1 Score: 0.6918
Epoch [6/30], Loss: 0.2684
Train Accuracy: 91.57 %
Validation Accuracy: 72.67 %
Test Accuracy: 80.67 %
Precision: 0.8333
Recall: 0.8067
F1 Score: 0.8032
Epoch [7/30], Loss: 0.3546
Train Accuracy: 93.14 %
Validation Accuracy: 72.67 %
Te

In [3]:
import torch
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

# 确保模型在GPU上
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 确保模型处于训练模式
model.train()

# 可能需要调整学习率
#optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)  # 示例学习率

for epoch in range(30, 50):  # 继续训练过程
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device).long()
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    # 每个epoch结束后打印损失
    print(f'Epoch [{epoch+1}/{50}], Loss: {loss.item():.4f}')
    
    # 每个epoch结束后在训练集上评估模型
    model.eval()  # 切换到评估模式
    with torch.no_grad():
        train_preds = []
        train_labels = []
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device).long()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            train_preds.extend(predicted.view(-1).cpu().numpy())
            train_labels.extend(labels.view(-1).cpu().numpy())
        train_accuracy = 100 * np.sum(np.array(train_preds) == np.array(train_labels)) / len(train_labels)
        print(f'Train Accuracy: {train_accuracy:.2f} %')

    # 每个epoch结束后在验证集上评估模型
    with torch.no_grad():
        val_preds = []
        val_labels = []
        for images, labels in validation_loader:
            images, labels = images.to(device), labels.to(device).long()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            val_preds.extend(predicted.view(-1).cpu().numpy())
            val_labels.extend(labels.view(-1).cpu().numpy())
        val_accuracy = 100 * np.sum(np.array(val_preds) == np.array(val_labels)) / len(val_labels)
        print(f'Validation Accuracy: {val_accuracy:.2f} %')
    
    # 每个epoch结束后在测试集上评估模型
    with torch.no_grad():
        test_preds = []
        test_labels = []
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device).long()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            test_preds.extend(predicted.view(-1).cpu().numpy())
            test_labels.extend(labels.view(-1).cpu().numpy())
    
        test_accuracy = 100 * np.sum(np.array(test_preds) == np.array(test_labels)) / len(test_labels)
        print(f'Test Accuracy: {test_accuracy:.2f} %')

        # 计算精确率、召回率和F1分数
        precision = precision_score(test_labels, test_preds, average='weighted')
        recall = recall_score(test_labels, test_preds, average='weighted')
        f1 = f1_score(test_labels, test_preds, average='weighted')
    
        print(f'Precision: {precision:.4f}')
        print(f'Recall: {recall:.4f}')
        print(f'F1 Score: {f1:.4f}')
    
    model.train()  # 切换回训练模式

Epoch [31/50], Loss: 0.0045
Train Accuracy: 99.57 %
Validation Accuracy: 76.00 %
Test Accuracy: 80.67 %
Precision: 0.8166
Recall: 0.8067
F1 Score: 0.8054
Epoch [32/50], Loss: 0.0040
Train Accuracy: 99.29 %
Validation Accuracy: 76.67 %
Test Accuracy: 78.00 %
Precision: 0.7866
Recall: 0.7800
F1 Score: 0.7790
Epoch [33/50], Loss: 0.0026
Train Accuracy: 100.00 %
Validation Accuracy: 75.33 %
Test Accuracy: 86.00 %
Precision: 0.8607
Recall: 0.8600
F1 Score: 0.8600
Epoch [34/50], Loss: 0.0021
Train Accuracy: 100.00 %
Validation Accuracy: 76.00 %
Test Accuracy: 82.67 %
Precision: 0.8270
Recall: 0.8267
F1 Score: 0.8267
Epoch [35/50], Loss: 0.0004
Train Accuracy: 100.00 %
Validation Accuracy: 74.00 %
Test Accuracy: 84.00 %
Precision: 0.8402
Recall: 0.8400
F1 Score: 0.8399
Epoch [36/50], Loss: 0.0033
Train Accuracy: 100.00 %
Validation Accuracy: 74.00 %
Test Accuracy: 82.67 %
Precision: 0.8267
Recall: 0.8267
F1 Score: 0.8267
Epoch [37/50], Loss: 0.0077
Train Accuracy: 100.00 %
Validation Accuracy

  _warn_prf(average, modifier, msg_start, len(result))


Epoch [44/50], Loss: 0.1361
Train Accuracy: 85.14 %
Validation Accuracy: 69.33 %
Test Accuracy: 74.00 %
Precision: 0.7710
Recall: 0.7400
F1 Score: 0.7332


KeyboardInterrupt: 