In [1]:
# -*- coding: utf-8 -*-
import os, re, warnings
warnings.filterwarnings("ignore")

import numpy as np
import cv2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, DistributedSampler
from torchvision.models import resnet34, resnet50

# 如果你有自定义 transforms（保持你原来的 API）
import transforms as trans   # 若没有这份文件，可自行改用 torchvision.transforms

# ========= 基本配置 =========
batchsize    = 4
oct_img_size = [512, 512]
image_size   = 256
iters        = 10000
val_ratio    = 0.2
trainset_root = "/home/yanggq/project/grading/Glaucoma_grading/training/multi-modality_images"
label_xlsx    = "/home/yanggq/project/grading/Glaucoma_grading/training/glaucoma_grading_training_GT.xlsx"
num_workers   = 4
init_lr       = 1e-4
save_dir      = "trained_models_torch"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

# ========= 数据集 =========
class GAMMA_sub1_dataset(Dataset):
    """
    输出:
      fundus_img: uint8, shape (3, H, W) - RGB
      oct_img   : uint8, shape (D, H, W) - 灰度体数据 (把 D 当做通道数用在 2D ResNet 上)
      label     : int64 标量（0..C-1）
    """
    def __init__(self, img_transforms, oct_transforms, dataset_root,
                 label_file='', filelists=None, num_classes=3, mode='train'):
        self.dataset_root   = dataset_root
        self.img_transforms = img_transforms
        self.oct_transforms = oct_transforms
        self.mode           = mode.lower()
        self.num_classes    = num_classes

        if self.mode == 'train':
            # 你原始脚本从 xlsx 里读 one-hot/prob 向量，这里保持一致
            # 注意：确保读到的是 float 数组
            label_map = {
                int(row['data']): np.asarray(row[1:].values, dtype=np.float32)
                for _, row in pd.read_excel(label_file).iterrows()
            }
            self.file_list = [[f, label_map[int(f)]] for f in os.listdir(dataset_root)]
        else:
            self.file_list = [[f, None] for f in os.listdir(dataset_root)]

        if filelists is not None:
            name_set = set(filelists)
            self.file_list = [it for it in self.file_list if it[0] in name_set]

    def _oct_sort_key(self, name: str):
        # 你原脚本用 int(x.strip("_")[0]) 会按第一个字符排序，10 与 2 会乱序
        stem = os.path.splitext(name)[0]
        m = re.search(r'(\d+)$', stem)
        return int(m.group(1)) if m else stem  # 更稳妥
        # 参考你原代码位置：oct_series_list 的排序逻辑。:contentReference[oaicite:3]{index=3}

    def __getitem__(self, idx):
        real_index, label_vec = self.file_list[idx]

        fundus_img_path = os.path.join(self.dataset_root, real_index, real_index + ".jpg")
        series_dir = os.path.join(self.dataset_root, real_index, real_index)
        oct_series_list = sorted(os.listdir(series_dir), key=self._oct_sort_key)

        fundus_img = cv2.imread(fundus_img_path)[:, :, ::-1]  # BGR->RGB
        # 装载一组 OCT 切片
        oct0 = cv2.imread(os.path.join(series_dir, oct_series_list[0]), cv2.IMREAD_GRAYSCALE)
        D, H, W = len(oct_series_list), oct0.shape[0], oct0.shape[1]
        oct_img = np.zeros((D, H, W, 1), dtype="uint8")
        for k, p in enumerate(oct_series_list):
            oct_img[k] = cv2.imread(os.path.join(series_dir, p), cv2.IMREAD_GRAYSCALE)[..., np.newaxis]

        # 可选的图像增强
        if self.img_transforms is not None:
            fundus_img = self.img_transforms(fundus_img)
        if self.oct_transforms is not None:
            oct_img = self.oct_transforms(oct_img)

        # NHWC -> NCHW
        fundus_img = fundus_img.transpose(2, 0, 1)  # (H,W,C)->(C,H,W)
        oct_img    = oct_img.squeeze(-1)            # (D,H,W,1)->(D,H,W)

        if self.mode == 'test':
            return fundus_img, oct_img, real_index

        # —— 标签：只 argmax 一次，返回 np.int64 标量（修复你原代码的“双重 argmax”问题）——
        # 参考处：你原脚本 L61-L64 同时做了 label.argmax() 和 np.argmax(label)，会出错。:contentReference[oaicite:4]{index=4}
        class_id = np.int64(np.argmax(label_vec))
        return fundus_img, oct_img, class_id

    def __len__(self):
        return len(self.file_list)

# ========= collate：显式堆叠，避免默认逻辑把标签搞坏 =========
def my_collate(batch):
    f_list, o_list, y_list = zip(*batch)
    f = np.stack(f_list, axis=0).astype('uint8')     # [N,3,H,W]
    o = np.stack(o_list, axis=0).astype('uint8')     # [N,D,H,W]
    y = np.asarray(y_list, dtype=np.int64)           # [N]
    return f, o, y

# ========= transforms（按你原脚本）=========
img_train_transforms = trans.Compose([
    trans.RandomResizedCrop(image_size, scale=(0.90, 1.1), ratio=(0.90, 1.1)),
    trans.RandomHorizontalFlip(),
    trans.RandomVerticalFlip(),
    trans.RandomRotation(30),
])
oct_train_transforms = trans.Compose([
    trans.CenterCrop([256] + oct_img_size),
    trans.RandomHorizontalFlip(),
    trans.RandomVerticalFlip(),
])
img_val_transforms = trans.Compose([
    trans.CropCenterSquare(),
    trans.Resize((image_size, image_size)),
])
oct_val_transforms = trans.Compose([
    trans.CenterCrop([256] + oct_img_size),
])

# ========= 划分数据 =========
filelists = os.listdir(trainset_root)
#train_filelists, val_filelists = train_test_split(filelists, test_size=val_ratio, random_state=12)

# 最后 20 个作为测试集
val_filelists = filelists[-20:]
# 其余的作为训练集
train_filelists = filelists[:-20]

print(f"Total Nums: {len(filelists)}, train: {len(train_filelists)}, val: {len(val_filelists)}")


train_dataset = GAMMA_sub1_dataset(dataset_root=trainset_root, 
                                   img_transforms=img_train_transforms,
                                   oct_transforms=oct_train_transforms,
                                   filelists=train_filelists,
                                   label_file=label_xlsx)

val_dataset = GAMMA_sub1_dataset(dataset_root=trainset_root, 
                                 img_transforms=img_val_transforms,
                                 oct_transforms=oct_val_transforms,
                                 filelists=val_filelists,
                                 label_file=label_xlsx)

train_loader = DataLoader(train_dataset, batch_size=batchsize, shuffle=True,
                          num_workers=num_workers, collate_fn=my_collate, pin_memory=True)
val_loader   = DataLoader(val_dataset,   batch_size=batchsize, shuffle=False,
                          num_workers=num_workers, collate_fn=my_collate, pin_memory=True)

# ========= 模型（PyTorch 版）=========
class ModelRes34(nn.Module):
    """
    两分支：
      - fundus_branch: resnet34 输入 3 通道
      - oct_branch   : resnet34 输入 256 通道（把 OCT D 当通道）
      - 拼接后接 linear 输出 3 类
    """
    def __init__(self, num_classes=3):
        super().__init__()
        self.fundus_branch = resnet34(weights="IMAGENET1K_V1")  # 预训练
        self.oct_branch    = resnet34(weights="IMAGENET1K_V1")

        # 去掉原 fc，改成输出特征
        self.fundus_branch.fc = nn.Identity()
        self.oct_branch.fc    = nn.Identity()

        # 替换 OCT 分支第一层为 256 输入通道（与你 Paddle 版一致）:contentReference[oaicite:5]{index=5}
        self.oct_branch.conv1 = nn.Conv2d(256, 64, kernel_size=7, stride=2, padding=3, bias=False)

        # resnet34 的全局池化+fc 前输出 512 维；两分支拼接后变 1024
        self.fc = nn.Linear(512 * 2, num_classes)

    def forward(self, fundus_img, oct_img):
        # fundus_img: [N,3,H,W] float
        # oct_img   : [N,256,H,W] float
        b1 = self.fundus_branch(fundus_img)  # [N,512]
        b2 = self.oct_branch(oct_img)        # [N,512]
        logit = self.fc(torch.cat([b1, b2], dim=1))  # [N,3]
        return logit

model = ModelRes34(num_classes=3).to(device)

# ========= 优化器 / 损失 =========
optimizer = torch.optim.Adam(model.parameters(), lr=init_lr)
criterion = nn.CrossEntropyLoss()

# ========= 训练 / 验证 =========
def tensorize_batch(batch):
    f, o, y = batch  # numpy
    # 转 float32 Tensor；归一化到 [0,1]
    fundus = torch.from_numpy(f).float().div_(255.0).to(device, non_blocking=True)  # [N,3,H,W]
    octv   = torch.from_numpy(o).float().div_(255.0).to(device, non_blocking=True)  # [N,D,H,W]
    # 把 D 作为通道，输入 2D ResNet：[N,256,H,W]
    # 你的 Dataset 已经输出 [D,H,W]，collate 后是 [N,D,H,W]，无需再转置
    labels = torch.from_numpy(y).long().to(device, non_blocking=True)               # [N]
    return fundus, octv, labels

@torch.no_grad()
def evaluate():
    model.eval()
    losses = []
    preds_all, gts_all = [], []
    for batch in val_loader:
        fundus, octv, labels = tensorize_batch(batch)
        logits = model(fundus, octv)
        loss = criterion(logits, labels)
        losses.append(loss.item())
        preds = torch.argmax(logits, dim=1).cpu().numpy()
        gts   = labels.cpu().numpy()
        preds_all.append(preds); gts_all.append(gts)
    preds_all = np.concatenate(preds_all, 0)
    gts_all   = np.concatenate(gts_all, 0)
    kappa = cohen_kappa_score(preds_all, gts_all, weights='quadratic')
    return float(np.mean(losses)), float(kappa)

def train(num_iters=iters, log_interval=10, eval_interval=100):
    os.makedirs(save_dir, exist_ok=True)
    model.train()
    best_kappa = -1e9
    avg_loss_buf = []
    kappa_buf_p, kappa_buf_g = [], []

    it = 0
    while it < num_iters:
        for batch in train_loader:
            it += 1
            fundus, octv, labels = tensorize_batch(batch)

            logits = model(fundus, octv)  # [N,3]
            # 断言标签范围（你之前的致命问题）
            with torch.no_grad():
                lb_min, lb_max = int(labels.min().item()), int(labels.max().item())
                assert 0 <= lb_min and lb_max < logits.size(1), \
                    f"label out of range [{lb_min},{lb_max}] vs C={logits.size(1)}"

            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            avg_loss_buf.append(loss.item())
            with torch.no_grad():
                pred = torch.argmax(logits, dim=1).cpu().numpy()
                gts  = labels.cpu().numpy()
                kappa_buf_p.append(pred); kappa_buf_g.append(gts)

            if it % log_interval == 0:
                avg_loss = float(np.mean(avg_loss_buf)); avg_loss_buf.clear()
                p = np.concatenate(kappa_buf_p, 0); g = np.concatenate(kappa_buf_g, 0)
                kappa = cohen_kappa_score(p, g, weights='quadratic')
                kappa_buf_p.clear(); kappa_buf_g.clear()
                print(f"[TRAIN] iter={it}/{num_iters} avg_loss={avg_loss:.4f} avg_kappa={kappa:.4f}")

            if it % eval_interval == 0:
                vloss, vkappa = evaluate()
                print(f"[EVAL ] iter={it}/{num_iters} avg_loss={vloss:.4f} kappa={vkappa:.4f}")
                if vkappa >= best_kappa:
                    best_kappa = vkappa
                    tag = f"best_model_{best_kappa:.4f}"
                    out_dir = os.path.join(save_dir, tag)
                    os.makedirs(out_dir, exist_ok=True)
                    torch.save(model.state_dict(), os.path.join(out_dir, "model.pt"))
                    torch.save(optimizer.state_dict(), os.path.join(out_dir, "optimizer.pt"))
                    print(f"[SAVE ] {out_dir}")
                model.train()

            if it >= num_iters:
                break

# ========= 先抽查一个 batch，确认标签健康 =========
f0, o0, y0 = next(iter(train_loader))
print("labels sample:", y0, "dtype:", y0.dtype, "shape:", y0.shape)  # 应为 int64、一维、0..2

# ========= 开始训练 =========
train()

device: cuda
Total Nums: 100, train: 80, val: 20
labels sample: [1 1 2 2] dtype: int64 shape: (4,)
[TRAIN] iter=10/10000 avg_loss=0.8657 avg_kappa=0.3056
[TRAIN] iter=20/10000 avg_loss=0.8393 avg_kappa=0.5690
[TRAIN] iter=30/10000 avg_loss=0.6135 avg_kappa=0.6937
[TRAIN] iter=40/10000 avg_loss=0.7508 avg_kappa=0.6881
[TRAIN] iter=50/10000 avg_loss=0.5900 avg_kappa=0.6784
[TRAIN] iter=60/10000 avg_loss=0.5306 avg_kappa=0.7363
[TRAIN] iter=70/10000 avg_loss=0.5428 avg_kappa=0.7674
[TRAIN] iter=80/10000 avg_loss=0.3897 avg_kappa=0.7684
[TRAIN] iter=90/10000 avg_loss=0.7558 avg_kappa=0.6840
[TRAIN] iter=100/10000 avg_loss=0.4271 avg_kappa=0.7998
[EVAL ] iter=100/10000 avg_loss=0.5818 kappa=0.5769
[SAVE ] trained_models_torch/best_model_0.5769
[TRAIN] iter=110/10000 avg_loss=0.4256 avg_kappa=0.8656
[TRAIN] iter=120/10000 avg_loss=0.4548 avg_kappa=0.7710
[TRAIN] iter=130/10000 avg_loss=0.3941 avg_kappa=0.7739
[TRAIN] iter=140/10000 avg_loss=0.5128 avg_kappa=0.6822
[TRAIN] iter=150/10000 avg_

In [None]:
# infer_torch.py
import os, re
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# ==== 配置 ====
image_size   = 256
oct_img_size = [512, 512]
num_classes  = 3

# 路径（按需修改）
best_model_path = "/home/yanggq/project/grading/GlaucomaRecognition-main/CodeOfTask1/trained_models_torch/best_model_0.7523/model.pt"  # PyTorch 权重
testset_root    = "/home/yanggq/project/grading/Glaucoma_grading/training/multi-modality_images"                    # 测试数据根目录

# ==== 轻量 transforms（与你之前的一致风格）====
import transforms as trans
img_test_transforms = trans.Compose([
    trans.CropCenterSquare(),
    trans.Resize((image_size, image_size)),
])
oct_test_transforms = trans.Compose([
    trans.CenterCrop([256] + oct_img_size),
])

# ==== 测试数据集 ====
class GAMMA_sub1_dataset_test(Dataset):
    """
    输出:
      fundus_img: uint8, (3, H, W)  RGB
      oct_img   : uint8, (D, H, W)  灰度体
      idx       : 样本 ID (str)
    """
    def __init__(self, dataset_root, img_transforms=None, oct_transforms=None):
        self.dataset_root   = dataset_root
        self.img_transforms = img_transforms
        self.oct_transforms = oct_transforms
        self.file_list = sorted(os.listdir(dataset_root))

    def _oct_sort_key(self, name: str):
        stem = os.path.splitext(name)[0]
        m = re.search(r'(\d+)$', stem)
        return int(m.group(1)) if m else stem

    def __getitem__(self, idx):
        real_index = self.file_list[idx]
        fundus_img_path = os.path.join(self.dataset_root, real_index, real_index + ".jpg")
        series_dir = os.path.join(self.dataset_root, real_index, real_index)
        oct_series_list = sorted(os.listdir(series_dir), key=self._oct_sort_key)

        # 读图
        fundus_img = cv2.imread(fundus_img_path)[:, :, ::-1]  # BGR->RGB
        oct0 = cv2.imread(os.path.join(series_dir, oct_series_list[0]), cv2.IMREAD_GRAYSCALE)
        D, H, W = len(oct_series_list), oct0.shape[0], oct0.shape[1]
        oct_img = np.zeros((D, H, W, 1), dtype="uint8")
        for k, p in enumerate(oct_series_list):
            oct_img[k] = cv2.imread(os.path.join(series_dir, p), cv2.IMREAD_GRAYSCALE)[..., np.newaxis]

        # transforms
        if self.img_transforms is not None:
            fundus_img = self.img_transforms(fundus_img)
        if self.oct_transforms is not None:
            oct_img = self.oct_transforms(oct_img)

        # NHWC->CHW / DHWC->DHW
        fundus_img = fundus_img.transpose(2, 0, 1)
        oct_img    = oct_img.squeeze(-1)

        return fundus_img, oct_img, real_index

    def __len__(self):
        return len(self.file_list)

# ===== collate（显式堆叠，保持 dtype）=====
def collate_test(batch):
    f_list, o_list, idx_list = zip(*batch)
    f = np.stack(f_list, axis=0).astype("uint8")  # [N,3,H,W]
    o = np.stack(o_list, axis=0).astype("uint8")  # [N,D,H,W]
    return f, o, list(idx_list)

# ===== 模型（与你训练用的两分支 ResNet34 对齐）=====
from torchvision.models import resnet34

class Model(nn.Module):
    def __init__(self, num_classes=3):
        super().__init__()
        self.fundus_branch = resnet34(weights="IMAGENET1K_V1")
        self.oct_branch    = resnet34(weights="IMAGENET1K_V1")

        # 去掉 fc，只取 512 维特征
        self.fundus_branch.fc = nn.Identity()
        self.oct_branch.fc    = nn.Identity()

        # OCT 分支第一层改为 256 输入通道（把 D 当通道）
        self.oct_branch.conv1 = nn.Conv2d(256, 64, kernel_size=7, stride=2, padding=3, bias=False)

        self.fc = nn.Linear(512 * 2, num_classes)

    def forward(self, fundus_img, oct_img):
        b1 = self.fundus_branch(fundus_img)  # [N,512]
        b2 = self.oct_branch(oct_img)        # [N,512]
        return self.fc(torch.cat([b1, b2], dim=1))  # [N, num_classes]

# ===== 推理 =====
def tensorize_batch(batch, device):
    f, o, idx_list = batch
    fundus = torch.from_numpy(f).float().div_(255.0).to(device, non_blocking=True)  # [N,3,H,W]
    octv   = torch.from_numpy(o).float().div_(255.0).to(device, non_blocking=True)  # [N,D,H,W]
    return fundus, octv, idx_list

def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device:", device)

    # 数据
    test_dataset = GAMMA_sub1_dataset_test(
        dataset_root=testset_root,
        img_transforms=img_test_transforms,
        oct_transforms=oct_test_transforms
    )
    test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False,
                             num_workers=2, collate_fn=collate_test, pin_memory=True)

    # 模型 & 权重
    model = Model(num_classes=num_classes).to(device)
    state = torch.load(best_model_path, map_location=device)
    model.load_state_dict(state)
    model.eval()

    rows = []
    with torch.no_grad():
        for batch in test_loader:
            fundus, octv, idx_list = tensorize_batch(batch, device)
            logits = model(fundus, octv)                   # [N,3]
            preds  = torch.argmax(logits, dim=1).cpu().numpy()  # [N]

            # 组装提交格式
            for i, idx in enumerate(idx_list):
                p = int(preds[i])
                rows.append([
                    idx,
                    int(p == 0),  # non
                    int(p == 1),  # early
                    int(p == 2),  # mid_advanced
                ])

    df = pd.DataFrame(rows, columns=["data", "non", "early", "mid_advanced"])
    df.to_csv("Classification_Results.csv", index=False)
    print("Saved: Classification_Results.csv")

if __name__ == "__main__":
    main()

In [None]:
import pandas as pd
from sklearn.metrics import cohen_kappa_score

# 读取文件
gt_df = pd.read_csv("/home/yanggq/project/grading/GlaucomaRecognition-main/CodeOfTask1/Classification_Results.csv")  # ground truth
pred_df = pd.read_excel("/home/yanggq/project/grading/Glaucoma_grading/training/glaucoma_grading_training_GT.xlsx")  # predictions

# 提取标签（取最大概率对应的类别）
gt_labels = gt_df[['non', 'early', 'mid_advanced']].values.argmax(axis=1)
pred_labels = pred_df[['non', 'early', 'mid_advanced']].values.argmax(axis=1)

# 计算 Cohen's Kappa
kappa = cohen_kappa_score(gt_labels, pred_labels)

print("Cohen's Kappa:", kappa)

In [20]:
import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score

# 路径按需修改
gt_path   = "/home/yanggq/project/grading/GlaucomaRecognition-main/CodeOfTask1/Classification_Results.csv"     # 真实标注（Excel）
pred_path = "/home/yanggq/project/grading/Glaucoma_grading/training/glaucoma_grading_training_GT.xlsx"            # 你的预测（CSV）
out_path  = "per_sample_compare.csv"                # 输出逐样本对比

# 1) 读取
gt_df   = pd.read_csv(gt_path)                    # 列: data, non, early, mid_advanced
pred_df = pd.read_excel(pred_path)                    # 列: data, non, early, mid_advanced

# 2) 按 data 对齐（只保留两边都出现过的样本）
cols = ["data", "non", "early", "mid_advanced"]
df = gt_df[cols].merge(pred_df[cols], on="data", suffixes=("_gt", "_pred"))

# 3) 计算逐样本标签与是否正确
gt_labels  = df[["non_gt", "early_gt", "mid_advanced_gt"]].to_numpy().argmax(axis=1)
pred_labels= df[["non_pred","early_pred","mid_advanced_pred"]].to_numpy().argmax(axis=1)

per_sample = pd.DataFrame({
    "data": df["data"],
    "gt_label":  gt_labels,
    "pred_label":pred_labels,
    "is_correct": (gt_labels == pred_labels).astype(int)  # 1=正确，0=错误
})

# 4) 统计指标（可选）
acc   = per_sample["is_correct"].mean()
kappa = cohen_kappa_score(gt_labels, pred_labels)
print(f"Accuracy = {acc:.4f}, Cohen's Kappa = {kappa:.4f}")

# 5) 保存逐样本对比
per_sample.to_csv(out_path, index=False)
print(f"Saved per-sample comparison -> {out_path}")

# 想看错误样本：
errors = per_sample.query("is_correct == 0")
print("Wrong samples (head):")
print(errors.head())

Accuracy = 0.9670, Cohen's Kappa = 0.9470
Saved per-sample comparison -> per_sample_compare.csv
Wrong samples (head):
    data  gt_label  pred_label  is_correct
60    70         2           1           0
80    90         2           1           0
87    97         2           1           0


In [27]:
import pandas as pd
from sklearn.metrics import roc_auc_score

# 读取CSV文件
gt_path   = "/home/yanggq/project/grading/GlaucomaRecognition-main/CodeOfTask1/Classification_Results2.csv"     # 真实标注（Excel）
pred_path = "/home/yanggq/project/grading/Glaucoma_grading/training/glaucoma_grading_training_GT.xlsx"            # 你的预测（CSV）
# 1) 读取
gt_df   = pd.read_csv(gt_path)                    # 列: data, non, early, mid_advanced
pred_df = pd.read_excel(pred_path)                    # 列: data, non, early, mid_advanced

# 提取真实标签和预测概率
y_true = gt_df[['non', 'early', 'mid_advanced']].values
y_pred = pred_df[['non', 'early', 'mid_advanced']].values

# 计算 macro-AUC
macro_auc = roc_auc_score(y_true, y_pred, average="macro", multi_class="ovr")
print("Macro-AUC:", macro_auc)

Macro-AUC: 0.9006438029109599
