From 6447256268bd80bc1c32a3a427700431cdd1ec5e Mon Sep 17 00:00:00 2001 From: ilikewind <280458666@qq.com> Date: Tue, 12 Mar 2019 00:32:58 +0800 Subject: [PATCH] Debug camelyon16 --- README.md | 6 +- camelyon16/bin/Evaluation_FROC.py | 2 +- camelyon16/bin/camelyon16xml2json.py | 2 +- camelyon16/bin/extract_feature_probsmap.py | 9 +- camelyon16/bin/json2camelyon16xml.py | 3 +- camelyon16/bin/nms.py | 2 +- camelyon16/bin/non_tumor_mask.py | 3 +- camelyon16/bin/patch_gen.py | 2 +- camelyon16/bin/probs_map.py | 13 ++- camelyon16/bin/sampled_spot_gen.py | 2 + camelyon16/bin/tissue_mask.py | 2 +- camelyon16/bin/train.py | 66 ++++++++---- camelyon16/bin/tumor_mask.py | 2 +- camelyon16/configs/Densnet.json | 0 camelyon16/configs/cnn.json | 13 +++ camelyon16/data/image_producer.py | 81 +++++++++++++++ camelyon16/data/probs_ops.py | 111 +++++++++++++++++++++ requirements.txt | 26 +++++ 18 files changed, 308 insertions(+), 37 deletions(-) delete mode 100644 camelyon16/configs/Densnet.json create mode 100644 camelyon16/configs/cnn.json create mode 100644 camelyon16/data/image_producer.py create mode 100644 camelyon16/data/probs_ops.py diff --git a/README.md b/README.md index 1f3a6e8..358bfae 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,10 @@ ## Notes -- [x] **extras/CNNRF 是使用keras进行建模的相近项目** -- [ ] Debug camelyon16 +- [x] extras/CNNRF 是使用keras训练的相近项目 +- [x] Debug camelyon16 + - [] extract\_feature\_probsmap.py + - [] wsi_classification.py - [ ] Debug extras/CNNRF ## Requisetes diff --git a/camelyon16/bin/Evaluation_FROC.py b/camelyon16/bin/Evaluation_FROC.py index 8fbde84..78d714c 100644 --- a/camelyon16/bin/Evaluation_FROC.py +++ b/camelyon16/bin/Evaluation_FROC.py @@ -256,4 +256,4 @@ def plotFROC(total_FPs, total_sensitivity): print('Avg FP = ', str(eval_threshold[i])) print('Sensitivity = ', str(eval_TPs[i])) - print('Avg Sensivity = ', np.mean(eval_TPs)) \ No newline at end of file + print('Avg Sensivity = ', np.mean(eval_TPs)) diff --git a/camelyon16/bin/camelyon16xml2json.py b/camelyon16/bin/camelyon16xml2json.py index 7cc8dd1..c25bfa3 100644 --- a/camelyon16/bin/camelyon16xml2json.py +++ b/camelyon16/bin/camelyon16xml2json.py @@ -5,7 +5,7 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../../../') -from nnxgb.data.annotation import Formatter # noqa +from camelyon16.data.annotation import Formatter # noqa parser = argparse.ArgumentParser(description='Convert Camelyon16 xml format to' 'internal json format') diff --git a/camelyon16/bin/extract_feature_probsmap.py b/camelyon16/bin/extract_feature_probsmap.py index 9fc58ce..4db8009 100644 --- a/camelyon16/bin/extract_feature_probsmap.py +++ b/camelyon16/bin/extract_feature_probsmap.py @@ -9,7 +9,7 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__) + '/../../')) -from data.probs_ops import extract_features +from camelyon16.data.probs_ops import extractor_features parser = argparse.ArgumentParser(description='Extract features from probability map' 'for slide classification') @@ -34,6 +34,7 @@ def compute_features(extractor): probs_map_threshold_p90 = extractor.probs_map_set_p(0.9) probs_map_threshold_p50 = extractor.probs_map_set_p(0.5) + region_props_p90 = extractor.get_region_props(probs_map_threshold_p90) region_props_p50 = extractor.get_region_props(probs_map_threshold_p50) @@ -44,7 +45,7 @@ def compute_features(extractor): features.append(f_percentage_tumor_over_tissue_region) largest_tumor_region_index_t50 = extractor.get_largest_tumor_index(region_props_p50) - f_area_largest_tumor_region_t50 = extractor.region_props_t50[largest_tumor_region_index_t50].area # 3 + f_area_largest_tumor_region_t50 = region_props_p50[largest_tumor_region_index_t50].area # 3 features.append(f_area_largest_tumor_region_t50) f_longest_axis_largest_tumor_region_t50 = extractor.get_longest_axis_in_largest_tumor_region(region_props_p50, @@ -76,11 +77,11 @@ def compute_features(extractor): def run(args): - slide = openslide.OpenSlide(args.wsi_path) + slide_path = args.wsi_path probs_map = np.load(args.probs_map_path) - extractor = extract_features(probs_map, slide) + extractor = extractor_features(probs_map, slide_path) features = compute_features(extractor) diff --git a/camelyon16/bin/json2camelyon16xml.py b/camelyon16/bin/json2camelyon16xml.py index 1efc878..b70ed63 100644 --- a/camelyon16/bin/json2camelyon16xml.py +++ b/camelyon16/bin/json2camelyon16xml.py @@ -4,7 +4,7 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../../') -from nnxgb.data.annotation import Formatter # noqa +from camelyon16.data.annotation import Formatter # noqa parser = argparse.ArgumentParser(description='Convert My json format to' 'ASAP json format') @@ -15,6 +15,7 @@ parser.add_argument('color', default=None, metavar='COLOR', nargs='+', type=str, help='The polygon part of color') + def run(args): with open(args.json_path) as f: dict = json.load(f) diff --git a/camelyon16/bin/nms.py b/camelyon16/bin/nms.py index 2abba4c..47f76fb 100644 --- a/camelyon16/bin/nms.py +++ b/camelyon16/bin/nms.py @@ -63,4 +63,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/camelyon16/bin/non_tumor_mask.py b/camelyon16/bin/non_tumor_mask.py index d111324..fad4fe7 100644 --- a/camelyon16/bin/non_tumor_mask.py +++ b/camelyon16/bin/non_tumor_mask.py @@ -31,5 +31,6 @@ def main(): args = parser.parse_args() run(args) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/camelyon16/bin/patch_gen.py b/camelyon16/bin/patch_gen.py index 49d7fad..5e8ac9c 100644 --- a/camelyon16/bin/patch_gen.py +++ b/camelyon16/bin/patch_gen.py @@ -18,7 +18,7 @@ type=str, help='Path to the input list of coordinates') parser.add_argument('patch_path', default=None, metavar='PATCH_PATH', type=str, help='Path to the output directory of patch images') -parser.add_argument('--patch_size', default=768, type=int, help='patch size, ' +parser.add_argument('--patch_size', default=256, type=int, help='patch size, ' 'default 768') parser.add_argument('--level', default=0, type=int, help='level for WSI, to ' 'generate patches, default 0') diff --git a/camelyon16/bin/probs_map.py b/camelyon16/bin/probs_map.py index 1295d0c..cc7a09c 100644 --- a/camelyon16/bin/probs_map.py +++ b/camelyon16/bin/probs_map.py @@ -11,6 +11,7 @@ from torch.utils.data import DataLoader from torch.autograd import Variable from torchvision import models +from torch import nn sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../../') @@ -42,6 +43,14 @@ ' default 0, which means disabled') +def chose_model(mod): + if mod == 'resnet18': + model = models.resnet18(pretrained=False) + else: + raise Exception("I have not add any models. ") + return model + + def get_probs_map(model, dataloader): probs_map = np.zeros(dataloader.dataset._mask.shape) num_batch = len(dataloader) @@ -96,7 +105,9 @@ def run(args): mask = np.load(args.mask_path) ckpt = torch.load(args.ckpt_path) - model = models[cnn['model']]() + model = chose_model(cnn['model']) + fc_features = model.fc.in_features + model.fc = nn.Linear(fc_features, 1) model.load_state_dict(ckpt['state_dict']) model = model.cuda().eval() diff --git a/camelyon16/bin/sampled_spot_gen.py b/camelyon16/bin/sampled_spot_gen.py index 885fc94..b415e47 100644 --- a/camelyon16/bin/sampled_spot_gen.py +++ b/camelyon16/bin/sampled_spot_gen.py @@ -58,11 +58,13 @@ def run(args): with open(txt_path, "a") as f: np.savetxt(f, center_points, fmt="%s", delimiter=",") + def main(): logging.basicConfig(level=logging.INFO) args = parser.parse_args() run(args) + if __name__ == "__main__": main() diff --git a/camelyon16/bin/tissue_mask.py b/camelyon16/bin/tissue_mask.py index 41a094d..b75be6b 100644 --- a/camelyon16/bin/tissue_mask.py +++ b/camelyon16/bin/tissue_mask.py @@ -55,4 +55,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/camelyon16/bin/train.py b/camelyon16/bin/train.py index a2d6ab0..0b1989f 100644 --- a/camelyon16/bin/train.py +++ b/camelyon16/bin/train.py @@ -11,12 +11,13 @@ from torch.nn import BCEWithLogitsLoss, DataParallel from torch.optim import SGD from torchvision import models -from torchvision.datasets import ImageFolder +from torch import nn from tensorboardX import SummaryWriter sys.path.append(os.path.dirname(os.path.abspath(__file__)) + '/../../') +from camelyon16.data.image_producer import ImageDataset torch.manual_seed(0) torch.cuda.manual_seed_all(0) @@ -33,6 +34,14 @@ ' and GPU_1, default 0.') +def chose_model(cnn): + if cnn['model'] == 'resnet18': + model = models.resnet18(pretrained=False) + else: + raise Exception("I have not add any models. ") + return model + + def train_epoch(summary, summary_writer, cnn, model, loss_fn, optimizer, dataloader_train): model.train() @@ -44,10 +53,11 @@ def train_epoch(summary, summary_writer, cnn, model, loss_fn, optimizer, time_now = time.time() for step in range(steps): data_train, target_train = next(dataiter_train) - data_train = Variable(data_train.cuda(async=True)) - target_train = Variable(target_train.cuda(async=True)) + data_train = Variable(data_train.float().cuda(async=True)) + target_train = Variable(target_train.float().cuda(async=True)) output = model(data_train) + output = torch.squeeze(output) # noqa loss = loss_fn(output, target_train) optimizer.zero_grad() @@ -56,9 +66,10 @@ def train_epoch(summary, summary_writer, cnn, model, loss_fn, optimizer, probs = output.sigmoid() predicts = (probs >= 0.5).type(torch.cuda.FloatTensor) + acc_data = (predicts == target_train).type( - torch.cuda.FloatTensor).sum().data[0] * 1.0 / batch_size - loss_data = loss.data[0] + torch.cuda.FloatTensor).sum().data * 1.0 / batch_size + loss_data = loss.data time_spent = time.time() - time_now logging.info( @@ -74,9 +85,9 @@ def train_epoch(summary, summary_writer, cnn, model, loss_fn, optimizer, summary_writer.add_scalar('train/loss', loss_data, summary['step']) summary_writer.add_scalar('train/acc', acc_data, summary['step']) - summary['step'] += 1 + summary['epoch'] += 1 - return summary + return summary def valid_epoch(summary, model, loss_fn, @@ -91,20 +102,21 @@ def valid_epoch(summary, model, loss_fn, acc_sum = 0 for step in range(steps): data_valid, target_valid = next(dataiter_valid) - data_valid = Variable(data_valid.cuda(async=True), volatile=True) - target_valid = Variable(target_valid.cuda(async=True)) + data_valid = Variable(data_valid.float().cuda(async=True), volatile=True) + target_valid = Variable(target_valid.float().cuda(async=True)) output = model(data_valid) + output = torch.squeeze(output) # important loss = loss_fn(output, target_valid) probs = output.sigmoid() predicts = (probs >= 0.5).type(torch.cuda.FloatTensor) - acc_data = (predicts == target_valid).tpye( - torch.cuda.FloatTensor).sum().data[0] * 1.0 / batch_size - loss_data = loss.data[0] + acc_data = (predicts == target_valid).type( + torch.cuda.FloatTensor).sum().data * 1.0 / batch_size + loss_data = loss.data loss_sum += loss_data - acc_sum += loss_data + acc_sum += acc_data summary['loss'] = loss_sum / steps summary['acc'] = acc_sum / steps @@ -113,7 +125,7 @@ def valid_epoch(summary, model, loss_fn, def run(args): - with open(args.cnn_path) as f: + with open(args.cnn_path, 'r') as f: cnn = json.load(f) if not os.path.exists(args.save_path): @@ -128,14 +140,24 @@ def run(args): batch_size_valid = cnn['batch_size'] * num_GPU num_workers = args.num_workers * num_GPU - model = models.cnn['model'](pretrained=True) + model = chose_model(cnn) + fc_features = model.fc.in_features + model.fc = nn.Linear(fc_features, 1) # 须知 model = DataParallel(model, device_ids=None) model = model.cuda() loss_fn = BCEWithLogitsLoss().cuda() - optimizer = SGD(model.parameters(), lr=cnn['lf'], momentum=cnn['momentum']) - - dataset_train = ImageFolder(cnn['data_path_train']) - dataset_valid = ImageFolder(cnn['data_path_valid']) + optimizer = SGD(model.parameters(), lr=cnn['lr'], momentum=cnn['momentum']) + + # dataset_train = ImageFolder(cnn['data_path_train']) + # dataset_valid = ImageFolder(cnn['data_path_valid']) + dataset_train = ImageDataset(cnn['data_path_train'], + cnn['image_size'], + cnn['crop_size'], + cnn['normalize']) + dataset_valid = ImageDataset(cnn['data_path_valid'], + cnn['image_size'], + cnn['crop_size'], + cnn['normalize']) dataloader_train = DataLoader(dataset_train, batch_size=batch_size_train, @@ -145,9 +167,9 @@ def run(args): num_workers=num_workers) summary_train = {'epoch': 0, 'step': 0} - summary_valid = {'loss': float('int'), 'acc': 0} + summary_valid = {'loss': float('inf'), 'acc': 0} summary_writer = SummaryWriter(args.save_path) - loss_valid_best = float('int') + loss_valid_best = float('inf') for epoch in range(cnn['epoch']): summary_train = train_epoch(summary_train, summary_writer, cnn, model, loss_fn, optimizer, @@ -170,7 +192,7 @@ def run(args): summary_valid['acc'], time_spent)) summary_writer.add_scalar('valid/loss', - summary_valid['loss'],summary_train['step']) + summary_valid['loss'], summary_train['step']) summary_writer.add_scalar('valid/acc', summary_valid['acc'], summary_train['step']) diff --git a/camelyon16/bin/tumor_mask.py b/camelyon16/bin/tumor_mask.py index 05919ff..60d1092 100644 --- a/camelyon16/bin/tumor_mask.py +++ b/camelyon16/bin/tumor_mask.py @@ -18,7 +18,7 @@ help='Path to the JSON file') parser.add_argument('npy_path', default=None, metavar='NPY_PATH', type=str, help='Path to the output npy mask file') -parser.add_argument('--level', default=2, type=int, help='at which WSI level' +parser.add_argument('--level', default=6, type=int, help='at which WSI level' ' to obtain the mask, default 6') diff --git a/camelyon16/configs/Densnet.json b/camelyon16/configs/Densnet.json deleted file mode 100644 index e69de29..0000000 diff --git a/camelyon16/configs/cnn.json b/camelyon16/configs/cnn.json new file mode 100644 index 0000000..890f9be --- /dev/null +++ b/camelyon16/configs/cnn.json @@ -0,0 +1,13 @@ +{ + "model": "resnet18", + "batch_size": 10, + "image_size": 256, + "crop_size": 224, + "normalize": "True", + "lr": 0.001, + "momentum": 0.9, + "data_path_train": "/home/qianslab/likewind/github/CAMELYON16/wpatch/", + "data_path_valid": "/home/qianslab/likewind/github/CAMELYON16/wpatch/", + "epoch": 50, + "log_every": 5 +} \ No newline at end of file diff --git a/camelyon16/data/image_producer.py b/camelyon16/data/image_producer.py new file mode 100644 index 0000000..d2b63ac --- /dev/null +++ b/camelyon16/data/image_producer.py @@ -0,0 +1,81 @@ +import os +import sys + +import numpy as np +import random +from torch.utils.data import Dataset +from PIL import Image + +np.random.seed(0) + +from torchvision import transforms # noqa + + +class ImageDataset(Dataset): + + def __init__(self, data_path, img_size, + crop_size=224, normalize=True): + self._data_path = data_path + self._img_size = img_size + self._crop_size = crop_size + self._normalize = normalize + self._color_jitter = transforms.ColorJitter(64.0/255, 0.75, 0.25, 0.04) + self._pre_process() + + def _pre_process(self): + # find classes + if sys.version_info >= (3, 5): + # Faster and available in python 3.5 and above + classes = [d.name for d in os.scandir(self._data_path) if d.is_dir()] + else: + classes = [d for d in os.listdir(self._data_path) if os.path.isdir(os.path.join(self._data_path, d))] + classes.sort() + class_to_idx = {classes[i]: i for i in range(len(classes))} + + # make dataset + self._items = [] + for target in sorted(class_to_idx.keys()): + d = os.path.join(self._data_path, target) + if not os.path.isdir(d): + continue + + for root, _, fnames in sorted(os.walk(d)): + for fname in sorted(fnames): + if fname.split('.')[-1] == 'png': + path = os.path.join(root, fname) + item = (path, class_to_idx[target]) + self._items.append(item) + + random.shuffle(self._items) + + self._num_images = len(self._items) + + def __len__(self): + return self._num_images + + def __getitem__(self, idx): + path, label = self._items[idx] + label = np.array(label, dtype=float) + + img = Image.open(path) + + # color jitter + img = self._color_jitter(img) + + # use left_right flip + if np.random.rand() > 0.5: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + + # use rotate + num_rotate = np.random.randint(0, 4) + img = img.rotate(90 * num_rotate) + + # PIL image: H W C + # torch image: C H W + img = np.array(img, dtype=np.float32).transpose((2, 0, 1)) + + if self._normalize: + img = (img - 128.0) / 128.0 + + return img, label + diff --git a/camelyon16/data/probs_ops.py b/camelyon16/data/probs_ops.py new file mode 100644 index 0000000..eea4e13 --- /dev/null +++ b/camelyon16/data/probs_ops.py @@ -0,0 +1,111 @@ +import cv2 +import numpy as np +import scipy.stats.stats as st + +from skimage.measure import label +from skimage.measure import regionprops +from openslide import OpenSlide +from openslide import OpenSlideUnsupportedFormatError + +MAX, MEAN, VARIANCE, SKEWNESS, KURTOSIS = 0, 1, 2, 3, 4 + + +class extractor_features(object): + def __init__(self, probs_map, slide_path): + self._probs_map = probs_map + self._slide = get_image_open(slide_path) + + def get_region_props(self, probs_map_threshold): + labeled_img = label(probs_map_threshold) + return regionprops(labeled_img, intensity_image=self._probs_map) + + def probs_map_set_p(self, threshold): + probs_map_threshold = np.array(self._probs_map) + + probs_map_threshold[probs_map_threshold < threshold] = 0 + probs_map_threshold[probs_map_threshold >= threshold] = 1 + + return probs_map_threshold + + def get_num_probs_region(self, region_probs): + return len(region_probs) + + def get_tumor_region_to_tissue_ratio(self, region_props): + tissue_area = cv2.countNonZero(self._slide) + tumor_area = 0 + + n_regions = len(region_props) + for index in range(n_regions): + tumor_area += region_props[index]['area'] + + return float(tumor_area) / tissue_area + + def get_largest_tumor_index(self, region_props): + + largest_tumor_index = -1 + + largest_tumor_area = -1 + + n_regions = len(region_props) + for index in range(n_regions): + if region_props[index]['area'] > largest_tumor_area: + largest_tumor_area = region_props[index]['area'] + largest_tumor_index = index + + return largest_tumor_index + + def f_area_largest_tumor_region_t50(self): + pass + + def get_longest_axis_in_largest_tumor_region(self, + region_props, + largest_tumor_region_index): + largest_tumor_region = region_props[largest_tumor_region_index] + return max(largest_tumor_region['major_axis_length'], + largest_tumor_region['minor_axis_length']) + + def get_average_prediction_across_tumor_regions(self, region_props): + # close 255 + region_mean_intensity = [region.mean_intensity for region in region_props] + return np.mean(region_mean_intensity) + + def get_feature(self, region_props, n_region, feature_name): + feature = [0] * 5 + if n_region > 0: + feature_values = [region[feature_name] for region in region_props] + feature[MAX] = format_2f(np.max(feature_values)) + feature[MEAN] = format_2f(np.mean(feature_values)) + feature[VARIANCE] = format_2f(np.var(feature_values)) + feature[SKEWNESS] = format_2f(st.skew(np.array(feature_values))) + feature[KURTOSIS] = format_2f(st.kurtosis(np.array(feature_values))) + + return feature + + +def format_2f(number): + return float("{0:.2f}".format(number)) + + +def get_image_open(wsi_path): + try: + wsi_image = OpenSlide(wsi_path) + level_used = wsi_image.level_count - 1 + rgb_image = np.array(wsi_image.read_region((0, 0), level_used, + wsi_image.level_dimensions[level_used])) + wsi_image.close() + except OpenSlideUnsupportedFormatError: + raise ValueError('Exception: OpenSlideUnsupportedFormatError for %s' % wsi_path) + + # hsv -> 3 channel + hsv = cv2.cvtColor(rgb_image, cv2.COLOR_BGR2HSV) + lower_red = np.array([20, 20, 20]) + upper_red = np.array([200, 200, 200]) + # mask -> 1 channel + mask = cv2.inRange(hsv, lower_red, upper_red) + + close_kernel = np.ones((20, 20), dtype=np.uint8) + image_close = cv2.morphologyEx(np.array(mask), cv2.MORPH_CLOSE, close_kernel) + open_kernel = np.ones((5, 5), dtype=np.uint8) + image_open = cv2.morphologyEx(np.array(image_close), cv2.MORPH_OPEN, open_kernel) + + return image_open \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e69de29..f36c458 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,26 @@ + +certifi==2016.2.28 +cloudpickle==0.8.0 +cycler==0.10.0 +dask==1.1.4 +decorator==4.3.2 +kiwisolver==1.0.1 +matplotlib==3.0.3 +networkx==2.2 +numpy==1.16.2 +opencv-python==4.0.0.21 +openslide-python==1.1.1 +pandas==0.24.1 +Pillow==5.4.1 +protobuf==3.7.0 +pyparsing==2.3.1 +python-dateutil==2.8.0 +pytz==2018.9 +PyWavelets==1.0.2 +scikit-image==0.14.2 +scipy==1.2.1 +six==1.12.0 +tensorboardX==1.6 +toolz==0.9.0 +torch==1.0.1.post2 +torchvision==0.2.2.post3