In [42]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.autograd import Variable
import torchvision.transforms.functional as VF
from torchvision import transforms

import sys, argparse, os, copy, itertools, glob, datetime
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_fscore_support,confusion_matrix
from sklearn.datasets import load_svmlight_file
from collections import OrderedDict
import random

In [43]:
features = np.load('../../data/pretrained_resnet18/10X_full_slide_features_PtRes18.npy',allow_pickle=True).item()
full = np.load('../../config/data_segmentation_csv/10X_full.npy',allow_pickle=True).item()
full_index = list(full['full_list'].index)
grouping = np.load('../../config/data_segmentation_csv/10X_grouping.npy',allow_pickle=True).item()
test_index = grouping['test_list'].index

index_dict = {ind:i for i,ind in enumerate(full_index)}
label_dict = {'L':0,'H':1}

In [44]:
class args():
    def __init__(self):
        self.dataset = 'TCGA_BLCA'
        self.num_classes = 2
        self.feats_size = 512
        self.average = False
        self.dropout_node = 0
        self.non_linearity = 1
        self.path = './hyperparam_select_batch_9/weights/lrwdT_0.00044_6.4000000000000006e-06_50/85best_score.pth'
        self.lr = 0.28
        self.weight_decay = 1e-6

    def __getattribute__(self, __name: str):
        return object.__getattribute__(self, __name)
    
args = args()

In [45]:
def get_bag_feats(index, args):
    global features
    global index_dict
    
    feats_og = pd.DataFrame(features[f'index{index_dict[index]}'][2])
    feats = shuffle(feats_og).reset_index(drop=True).to_numpy()
    label_og = label_dict[features[f'index{index_dict[index]}'][1]]   # transformed label in form of int,[0,1]
    
    label = np.zeros(args.num_classes)
    if args.num_classes==1:
        label[0] = label_og
    else:
        if int(label_og)<=(len(label)-1):
            label[int(label_og)] = 1
    return label,feats
'''
example of label/feature : get_bag_feats(test_index[2],args)
label:[1. 0.]
feature:[[2.5330880e+00 1.6554745e-01 8.0470458e-02 ... 1.3655423e+00
  5.7236932e-02 5.2817654e-02]
 [2.8611205e+00 2.9038048e-01 6.0187571e-02 ... 1.8358935e+00
  5.3482568e-01 6.2871813e-03]
 [3.3081994e+00 7.3715396e-02 1.2616467e+00 ... 1.9259404e+00
  1.4681002e-01 2.8594225e-03]
 ...
 [2.9693909e+00 3.2868910e-01 1.3055435e-01 ... 2.4260533e+00
  1.7651926e-01 1.2930447e-01]
 [2.8800142e+00 3.0109720e-02 8.2876140e-01 ... 2.4528553e+00
  5.6700967e-03 0.0000000e+00]
 [1.4685658e+00 1.6393182e-01 6.0487707e-04 ... 1.2453270e+00
  0.0000000e+00 4.1464632e-03]]
'''
def set_seed(seed=10):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed) # 为了禁止hash随机化，使得实验可复现
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    print('seed set')
set_seed()

seed set


# eval -- test（func）

In [46]:
def multi_label_roc(labels, predictions, num_classes, pos_label=1):
    fprs = []
    tprs = []
    thresholds = []
    thresholds_optimal = []
    aucs = []
    if len(predictions.shape)==1:
        predictions = predictions[:, None]
    for c in range(0, num_classes):
        label = labels[:, c]
        prediction = predictions[:, c]
        fpr, tpr, threshold = roc_curve(label, prediction, pos_label=1)
        fpr_optimal, tpr_optimal, threshold_optimal = optimal_thresh(fpr, tpr, threshold)
        c_auc = roc_auc_score(label, prediction)
        aucs.append(c_auc)
        thresholds.append(threshold)
        thresholds_optimal.append(threshold_optimal)
    return aucs, thresholds, thresholds_optimal

def optimal_thresh(fpr, tpr, thresholds, p=0):
    loss = (fpr - tpr) - p * tpr / (fpr + tpr + 1)
    idx = np.argmin(loss, axis=0)
    return fpr[idx], tpr[idx], thresholds[idx]

In [50]:
'''
demo of values of intermediate variables:

original label:tensor([[0., 1.]], device='cuda:0'),shape of feats:torch.Size([1, 53, 512])
shape of feats after view:torch.Size([53, 512])
shape of ins_pred:torch.Size([53, 2]),original bag_pred:tensor([[-2.9899,  2.7934],
        [-3.7077,  3.5751],
        [-3.6781,  3.7495],
        [-3.8786,  3.5615]], device='cuda:0'),shape of original bag_pred:torch.Size([4, 2])
max pred:tensor([ 0.9809, -2.4702], device='cuda:0'),bag pred after mean:tensor([-3.5636,  3.4199], device='cuda:0')
 Testing bag [0/73] bag loss: 0.9777 
test laels:[0. 1.],test prediction : [array([0.02755601, 0.9683193 ], dtype=float32)]
first 5 class_pred_bag:[1. 0. 0. 0. 0.],
first 5 test_pred:[[0. 1.]
 [0. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]],
 first 5 labels:[[0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]]
'''
#load net
import dsmil
i_classifier = dsmil.FCLayer(in_size=args.feats_size, out_size=args.num_classes).cuda()
b_classifier = dsmil.BClassifier(input_size=args.feats_size, output_class=args.num_classes, dropout_v=args.dropout_node, nonlinear=args.non_linearity).cuda()
milnet = dsmil.MILNet(i_classifier, b_classifier).cuda()
milnet = torch.nn.DataParallel(milnet)  #若开启dataparallel，计算得到4个bag pred，使用torch.mean得到最终的bag pred
milnet = milnet.cuda()
#load params
state_dict_weights = torch.load(args.path)
try:
    milnet.load_state_dict(state_dict_weights, strict=False)
except:
    del state_dict_weights['b_classifier.v.1.weight']
    del state_dict_weights['b_classifier.v.1.bias']
    milnet.load_state_dict(state_dict_weights, strict=False)

test_df = test_index
criterion = criterion = nn.BCEWithLogitsLoss()
optimizer = optimizer = torch.optim.Adam(milnet.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=args.weight_decay)

milnet.eval()
total_loss = 0
test_labels = []
test_predictions = []
Tensor = torch.cuda.FloatTensor
#     Tensor = torch.FloatTensor
with torch.no_grad():
    for i in range(len(test_df)):
        label, feats = get_bag_feats(test_df[i], args)
        bag_label = Variable(Tensor([label]))
        bag_feats = Variable(Tensor([feats]))
    #     sys.stdout.write(f'original label:{bag_label},shape of feats:{bag_feats.shape}\n')
        bag_feats = bag_feats.view(-1, args.feats_size)
    #     sys.stdout.write(f'shape of feats after view:{bag_feats.shape}\n')
        ins_prediction, bag_prediction, _, _ = milnet(bag_feats)
    #     sys.stdout.write(f'shape of ins_pred:{ins_prediction.shape},original bag_pred:{bag_prediction},shape of original bag_pred:{bag_prediction.shape}\n')
        max_prediction, _ = torch.max(ins_prediction, 0)  
        bag_prediction = torch.mean(bag_prediction,dim=0)
    #     sys.stdout.write(f'max pred:{max_prediction},bag pred after mean:{bag_prediction}\n')
        bag_loss = criterion(bag_prediction.view(1, -1), bag_label.view(1, -1))
        max_loss = criterion(max_prediction.view(1, -1), bag_label.view(1, -1))
        loss = 0.5*bag_loss + 0.5*max_loss
        total_loss = total_loss + loss.item()
    #     sys.stdout.write('\r Testing bag [%d/%d] bag loss: %.4f \n' % (i, len(test_df), loss.item()))
        test_labels.extend([label])
        #test predictions equals to sigmoid（bag prediction）
        if args.average:
            test_predictions.extend([(0.5*torch.sigmoid(max_prediction)+0.5*torch.sigmoid(bag_prediction)).squeeze().cpu().numpy()])
        else: test_predictions.extend([(0.0*torch.sigmoid(max_prediction)+1.0*torch.sigmoid(bag_prediction)).squeeze().cpu().numpy()])
#     sys.stdout.write(f'test laels:{label},test prediction : {test_predictions}')
test_labels = np.array(test_labels)
test_predictions = np.array(test_predictions)
auc_value, _, thresholds_optimal = multi_label_roc(test_labels, test_predictions, args.num_classes, pos_label=1)
sys.stdout.write(f'auc_value:{auc_value},threshold_optimal:{thresholds_optimal}')
if args.num_classes==1:
    class_prediction_bag = copy.deepcopy(test_predictions)
    class_prediction_bag[test_predictions>=thresholds_optimal[0]] = 1
    class_prediction_bag[test_predictions<thresholds_optimal[0]] = 0
    test_predictions = class_prediction_bag
    test_labels = np.squeeze(test_labels)
else:        
    for i in range(args.num_classes):
        class_prediction_bag = copy.deepcopy(test_predictions[:, i])
        class_prediction_bag[test_predictions[:, i]>=thresholds_optimal[i]] = 1
        class_prediction_bag[test_predictions[:, i]<thresholds_optimal[i]] = 0
        test_predictions[:, i] = class_prediction_bag
sys.stdout.write(f'first 5 class_pred_bag:{class_prediction_bag[:5]},first 5 test_pred:{test_predictions[:5]},first 5 labels:{test_labels[:5]}')

#todo:将labels / preds改成单个数字，获得混淆矩阵

c = confusion_matrix(test_labels,test_predictions).ravel()
tn, fp, fn, tp = c.ravel()
bag_score = 0
for i in range(0, len(test_df)):
    bag_score = np.array_equal(test_labels[i], test_predictions[i]) + bag_score       
avg_score = bag_score / len(test_df)



auc_value:[0.6512820512820513, 0.6487179487179487],threshold_optimal:[0.6584029, 0.34987283]first 5 class_pred_bag:[1. 0. 0. 0. 0.],first 5 test_pred:[[0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]],first 5 labels:[[0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]]

In [48]:
import torch
from torch.utils.tensorboard import SummaryWriter
writter = SummaryWriter('test/abc')
writter.add_scalar('test',torch.tensor([0,1]))

AssertionError: scalar should be 0D

In [30]:
import os

def file_remover(folder):
    files = os.listdir(f'./hyperparam_select_batch_9/weights/{folder}')
    #change working path
    os.chdir(f'./hyperparam_select_batch_9/weights/{folder}')
    best_score = []
    best_auc = []
    best_avg_auc = []

    for name in files:
        first_num = int(name.split('b')[0])
        if 'avg' in name:
            best_avg_auc.append(first_num) 
        elif 'score' in name:
            best_score.append(first_num)
        elif 'auc' in name:
            best_auc.append(first_num)
    max_score = max(best_score)  
    max_auc = max(best_auc)
    max_avg_auc = max(best_avg_auc)
#     print(os.getcwd())
#     print(f'{max_score}best_score.pth')  
#     print(f'{max_auc}best_auc.pth')
#     print(f'{max_avg_auc}best_avg_auc.pth')
    
    #remove files that not satisfy the best
    for name in files:
        if name not in [f'{max_score}best_score.pth',f'{max_auc}best_auc.pth',f'{max_avg_auc}best_avg_auc.pth']:
            os.remove(name)

In [31]:
#initialize path
os.chdir(f'/home/wangyh/uro_biomarker/patho_AI/processing/mil classifier')
folder_ls = os.listdir('./hyperparam_select_batch_9/weights/')

for folder in folder_ls:
    file_remover(folder)
    #reset path
    os.chdir(f'/home/wangyh/uro_biomarker/patho_AI/processing/mil classifier')

88best_score.pth
88best_auc.pth
79best_avg_auc.pth
101best_score.pth
101best_auc.pth
101best_avg_auc.pth
163best_score.pth
163best_auc.pth
163best_avg_auc.pth
108best_score.pth
108best_auc.pth
108best_avg_auc.pth
73best_score.pth
75best_auc.pth
75best_avg_auc.pth
79best_score.pth
79best_auc.pth
79best_avg_auc.pth
30best_score.pth
30best_auc.pth
30best_avg_auc.pth
85best_score.pth
85best_auc.pth
85best_avg_auc.pth
93best_score.pth
30best_auc.pth
26best_avg_auc.pth
163best_score.pth
163best_auc.pth
163best_avg_auc.pth
