In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from torchvision import models
from sklearn.preprocessing import LabelEncoder
import os

# 设置随机种子以确保可重复性
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# 数据集类
class NPYDataset(Dataset):
    def __init__(self, csv_file, root_dir):
        try:
            self.annotations = pd.read_csv(csv_file, encoding='utf-8')
        except UnicodeDecodeError:
            self.annotations = pd.read_csv(csv_file, encoding='gbk')
        self.root_dir = root_dir
        self.le = LabelEncoder()
        self.annotations['labels'] = self.annotations['labels'].apply(lambda x: x.strip("[]'"))
        self.annotations['labels'] = self.le.fit_transform(self.annotations['labels'])

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        img_name = os.path.join(self.root_dir, str(self.annotations.iloc[index, -1]) + '.npy')
        image = np.load(img_name)
        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1)
        label = self.annotations.iloc[index, 2]
        return image, label

# 模型定义
class ResNet2DFeatures(nn.Module):
    def __init__(self, num_classes=4):
        super(ResNet2DFeatures, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        for param in self.resnet.parameters():
            param.requires_grad = False
        
        # 移除最后一个全连接层以获取特征
        self.features = nn.Sequential(*list(self.resnet.children())[:-1])
        
    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)  # 展平为向量
        return x

# 实例化特征提取模型
feature_extractor = ResNet2DFeatures().to(device)

# 之后的优化器、学习率调度器和训练循环可以保持不变

# 训练和评估参数
num_epochs = 10
batch_size = 32
learning_rate = 0.001

# 设备配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 数据加载
train_dataset = NPYDataset(csv_file=r'C:\Users\HP\Desktop\指南者\项目\眼病预测\train.csv', root_dir=r'C:\Users\HP\Desktop\指南者\项目\眼病预测\normalized_gaus_train')
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = NPYDataset(csv_file=r'C:\Users\HP\Desktop\指南者\项目\眼病预测\test.csv', root_dir=r'C:\Users\HP\Desktop\指南者\项目\眼病预测\normalized_gaus_test')
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

validation_dataset = NPYDataset(csv_file=r'C:\Users\HP\Desktop\指南者\项目\眼病预测\validation.csv', root_dir=r'C:\Users\HP\Desktop\指南者\项目\眼病预测\normalized_gaus_validation')
validation_loader = DataLoader(dataset=validation_dataset, batch_size=batch_size, shuffle=False)

# 模型初始化
num_classes = len(np.unique(train_dataset.annotations['labels']))  # 这应该计算为4

# 创建一个权重数组
# 这里假设类别标签已经编码为0, 1
weights = torch.tensor([1.0, 1.0, 1.0, 1.0], dtype=torch.float32).to(device)

# 使用加权损失函数
criterion = nn.CrossEntropyLoss(weight=weights)

model = ResNet2D(num_classes=num_classes).to(device)
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-3)  # 增加权重衰减

# 学习率调度器
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

# 训练模型的代码...
# 请根据您的具体需求添加训练循环和验证/测试循环

In [10]:
model_path = r'C:\Users\HP\Desktop\指南者\项目\眼病预测\trained_model.pth'  # 指定模型保存路径

In [11]:
def extract_features(loader, model, device):
    model.eval()
    features = []
    labels = []

    with torch.no_grad():
        for images, targets in loader:
            images = images.to(device)
            output = model(images)
            features.append(output.cpu().numpy())
            labels.append(targets.cpu().numpy())

    features = np.concatenate(features)
    labels = np.concatenate(labels)
    return features, labels

# 提取训练集、验证集和测试集的特征
train_features, train_labels = extract_features(train_loader, feature_extractor, device)
validation_features, validation_labels = extract_features(validation_loader, feature_extractor, device)
test_features, test_labels = extract_features(test_loader, feature_extractor, device)

In [3]:
# 正确的类别数，根据错误消息，应该是4
num_classes = 4

# 使用正确的类别数创建模型实例
model = ResNet2D(num_classes=num_classes)

# 现在加载模型应该不会出错，因为类别数匹配
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))

# 根据你的需要调用 model.train() 或 model.eval()

<All keys matched successfully>

In [4]:
import torch  
import numpy as np  
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score  
  
# 假设model, test_loader, device等都已正确定义和初始化  
  
model.eval()  
with torch.no_grad():  
    all_preds = []  
    all_labels = []  
    for images, labels in test_loader:  
        images, labels = images.to(device), labels.to(device).long()  
        outputs = model(images)  
        _, predicted = torch.max(outputs.data, 1)  
          
        # 收集所有预测和标签  
        all_preds.extend(predicted.view(-1).cpu().numpy())  
        all_labels.extend(labels.view(-1).cpu().numpy())  
  
    # 计算准确率  
    accuracy = 100 * np.sum(np.array(all_preds) == np.array(all_labels)) / len(all_labels)  
    print(f'Accuracy of the model on the test images: {accuracy:.2f} %')  
      
    # 计算精确率、召回率和F1分数  
    precision = precision_score(all_labels, all_preds, average='weighted')  
    recall = recall_score(all_labels, all_preds, average='weighted')  
    f1 = f1_score(all_labels, all_preds, average='weighted')  
      
    print(f'Precision: {precision:.4f}')  
    print(f'Recall: {recall:.4f}')  
    print(f'F1 Score: {f1:.4f}')  
  
    # 计算混淆矩阵  
    cm = confusion_matrix(all_labels, all_preds)  
      
    # 计算每个类别的准确率  
    class_accuracy = cm.diagonal() / cm.sum(axis=1)  
      
    # 打印每个类别的准确率  
    for i in range(len(class_accuracy)):  
        print(f'Accuracy for class {i}: {class_accuracy[i]:.2f}')  
  
# 打印所有预测结果（如果需要）  
# print('All Predictions:', all_preds)

Accuracy of the model on the test images: 90.27 %
Precision: 0.9146
Recall: 0.9027
F1 Score: 0.9039
Accuracy for class 0: 0.98
Accuracy for class 1: 0.77
Accuracy for class 2: 0.94
Accuracy for class 3: 0.92


In [6]:
# Assuming `le` is your LabelEncoder instance that has been fitted to the labels
print("Label mapping:")
for i, label in enumerate(train_dataset.le.classes_):
    print(f"{label}: {i}")

Label mapping:
C: 0
D: 1
M: 2
N: 3


In [13]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# 训练 SVM
svm_clf = SVC(kernel='linear', C=1)
svm_clf.fit(train_features, train_labels)

# 验证 SVM
validation_predictions_svm = svm_clf.predict(validation_features)
validation_accuracy_svm = accuracy_score(validation_labels, validation_predictions_svm)
print(f'SVM 验证准确率: {validation_accuracy_svm:.4f}')

# 测试 SVM
test_predictions_svm = svm_clf.predict(test_features)
test_accuracy_svm = accuracy_score(test_labels, test_predictions_svm)
print(f'SVM 测试准确率: {test_accuracy_svm:.4f}')


SVM 验证准确率: 0.8221
SVM 测试准确率: 0.8389


In [14]:
from sklearn.neural_network import MLPClassifier

# 训练 MLP
mlp_clf = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500, random_state=42)
mlp_clf.fit(train_features, train_labels)

# 验证 MLP
validation_predictions_mlp = mlp_clf.predict(validation_features)
validation_accuracy_mlp = accuracy_score(validation_labels, validation_predictions_mlp)
print(f'MLP 验证准确率: {validation_accuracy_mlp:.4f}')

# 测试 MLP
test_predictions_mlp = mlp_clf.predict(test_features)
test_accuracy_mlp = accuracy_score(test_labels, test_predictions_mlp)
print(f'MLP 测试准确率: {test_accuracy_mlp:.4f}')

MLP 验证准确率: 0.8322
MLP 测试准确率: 0.8087


In [22]:
!pip install xgboost -i https://pypi.tuna.tsinghua.edu.cn/simple

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting xgboost
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/24/14/d9ecb9fa86727f51bfb35f1c2b0428ebc6cd5ffde24c5e2dc583d3575a6f/xgboost-1.6.2-py3-none-win_amd64.whl (125.4 MB)
     ------------------------------------- 125.4/125.4 MB 29.8 MB/s eta 0:00:00
Installing collected packages: xgboost
Successfully installed xgboost-1.6.2


DEPRECATION: pandas 0.24.2 has a non-standard dependency specifier pytz>=2011k. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of pandas or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [23]:
import xgboost as xgb

# 将数据转换为 XGBoost 的 DMatrix 格式
dtrain = xgb.DMatrix(train_features, label=train_labels)
dvalid = xgb.DMatrix(validation_features, label=validation_labels)
dtest = xgb.DMatrix(test_features, label=test_labels)

# 设置 XGBoost 的参数
params = {
    'objective': 'multi:softmax',  # 用于多类分类
    'num_class': num_classes,      # 类别数量
    'eval_metric': 'mlogloss',     # 使用对数损失进行评估
    'max_depth': 6,                # 树的最大深度
    'eta': 0.1,                    # 学习率
    'seed': 42                     # 随机种子
}

# 训练 XGBoost
xgb_clf = xgb.train(params, dtrain, num_boost_round=100, evals=[(dvalid, 'validation')])

# 验证 XGBoost
validation_predictions_xgb = xgb_clf.predict(dvalid)
validation_accuracy_xgb = accuracy_score(validation_labels, validation_predictions_xgb)
print(f'XGBoost 验证准确率: {validation_accuracy_xgb:.4f}')

# 测试 XGBoost
test_predictions_xgb = xgb_clf.predict(dtest)
test_accuracy_xgb = accuracy_score(test_labels, test_predictions_xgb)
print(f'XGBoost 测试准确率: {test_accuracy_xgb:.4f}')

[0]	validation-mlogloss:1.27678
[1]	validation-mlogloss:1.18596
[2]	validation-mlogloss:1.10482
[3]	validation-mlogloss:1.04226
[4]	validation-mlogloss:0.98213
[5]	validation-mlogloss:0.93216
[6]	validation-mlogloss:0.88883
[7]	validation-mlogloss:0.84934
[8]	validation-mlogloss:0.81408
[9]	validation-mlogloss:0.78427
[10]	validation-mlogloss:0.75769
[11]	validation-mlogloss:0.73029
[12]	validation-mlogloss:0.70713
[13]	validation-mlogloss:0.68498
[14]	validation-mlogloss:0.66780
[15]	validation-mlogloss:0.65096
[16]	validation-mlogloss:0.63548
[17]	validation-mlogloss:0.62268
[18]	validation-mlogloss:0.60741
[19]	validation-mlogloss:0.59614
[20]	validation-mlogloss:0.58516
[21]	validation-mlogloss:0.57563
[22]	validation-mlogloss:0.56677
[23]	validation-mlogloss:0.55617
[24]	validation-mlogloss:0.54900
[25]	validation-mlogloss:0.54136
[26]	validation-mlogloss:0.53531
[27]	validation-mlogloss:0.52940
[28]	validation-mlogloss:0.52493
[29]	validation-mlogloss:0.51896
[30]	validation-mlog

In [24]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score

# 创建和训练 SVM
svm_clf = SVC(kernel='linear', C=1, probability=True)  # 设置 probability=True 以输出概率
svm_clf.fit(train_features, train_labels)

# 创建和训练 MLP
mlp_clf = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=500, random_state=42)
mlp_clf.fit(train_features, train_labels)

# 创建和训练 XGBoost
dtrain = xgb.DMatrix(train_features, label=train_labels)
dvalid = xgb.DMatrix(validation_features, label=validation_labels)
dtest = xgb.DMatrix(test_features, label=test_labels)

params = {
    'objective': 'multi:softprob',  # 使用 softprob 以输出概率
    'num_class': num_classes,
    'eval_metric': 'mlogloss',
    'max_depth': 6,
    'eta': 0.1,
    'seed': 42
}

xgb_clf = xgb.train(params, dtrain, num_boost_round=100, evals=[(dvalid, 'validation')])

# 获取各个模型在测试集上的预测概率
svm_probs = svm_clf.predict_proba(test_features)
mlp_probs = mlp_clf.predict_proba(test_features)
xgb_probs = xgb_clf.predict(dtest)

[0]	validation-mlogloss:1.27678
[1]	validation-mlogloss:1.18596
[2]	validation-mlogloss:1.10482
[3]	validation-mlogloss:1.04226
[4]	validation-mlogloss:0.98213
[5]	validation-mlogloss:0.93216
[6]	validation-mlogloss:0.88883
[7]	validation-mlogloss:0.84934
[8]	validation-mlogloss:0.81408
[9]	validation-mlogloss:0.78427
[10]	validation-mlogloss:0.75769
[11]	validation-mlogloss:0.73029
[12]	validation-mlogloss:0.70713
[13]	validation-mlogloss:0.68498
[14]	validation-mlogloss:0.66780
[15]	validation-mlogloss:0.65096
[16]	validation-mlogloss:0.63548
[17]	validation-mlogloss:0.62268
[18]	validation-mlogloss:0.60741
[19]	validation-mlogloss:0.59614
[20]	validation-mlogloss:0.58516
[21]	validation-mlogloss:0.57563
[22]	validation-mlogloss:0.56677
[23]	validation-mlogloss:0.55617
[24]	validation-mlogloss:0.54900
[25]	validation-mlogloss:0.54136
[26]	validation-mlogloss:0.53531
[27]	validation-mlogloss:0.52940
[28]	validation-mlogloss:0.52493
[29]	validation-mlogloss:0.51896
[30]	validation-mlog

In [25]:
from sklearn.ensemble import VotingClassifier
from sklearn.preprocessing import LabelEncoder

# 创建 VotingClassifier
voting_clf = VotingClassifier(
    estimators=[
        ('svm', svm_clf),
        ('mlp', mlp_clf),
        # 为了让 VotingClassifier 支持 XGBoost，我们需要包装 XGBoost
        ('xgb', xgb.XGBClassifier(objective='multi:softprob', n_estimators=100, max_depth=6, learning_rate=0.1))
    ],
    voting='soft',  # 使用软投票
    weights=[1, 1, 1]  # 你可以根据需要调整权重
)

# 使用训练集进行训练
voting_clf.fit(train_features, train_labels)

# 在测试集上进行预测
test_predictions_voting = voting_clf.predict(test_features)

# 计算准确率
voting_accuracy = accuracy_score(test_labels, test_predictions_voting)
print(f'Voting Classifier Test Accuracy: {voting_accuracy:.4f}')

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from ._gradient_boosting import predict_stages
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  from ._gradient_boosting import predict_stages


Voting Classifier Test Accuracy: 0.8289
