In [37]:
import os 
import openslide
import cv2
import numpy as np
import time
from xml.etree.ElementTree import parse
import matplotlib.pyplot as plt

NORMAL_COLOR = 0
PATTERN3_COLOR = 1
PATTERN3_COLOR = (255, 255, 0)
PATTERN4_COLOR = 2
PATTERN5_COLOR = 3
TUMOR_COLOR = 4
TUMOR_COLOR = (255, 0, 0)

class Annotation:
    def __init__(self, slide_path, xml_path, level = -1):

        self.slide_path = slide_path
        self.xml_path = xml_path
        self.level = level

        self.slide = openslide.OpenSlide(slide_path)
        self.xml = parse(xml_path).getroot()

        slide_thumbnail = self.slide.get_thumbnail(self.slide.level_dimensions[self.level])
        slide_thumbnail = slide_thumbnail.convert('RGB')
        self.slide_thumbnail = np.array(slide_thumbnail)

    def get_coordinates(self):

        # if slide.level_count < 4:
        #     level_index = slide.level_count-1
        # else:
        #     level_index = 3

        slide_mask_ratio = round(self.slide.level_downsamples[self.level])

        annotations = []
        non_target_annotations = []
        patterns = []

        for anno in self.xml.iter('Annotation'):
            pattern = anno.get('class')
            patterns.append(pattern)
            annotation = []
            for i, coors in enumerate(anno):
                if i == 0: 
                    continue
                coordinates = []
                for coor in coors:
                    # coordinates.append([round(float(coor.get('x'))), round(float(coor.get('y')))])
                    coordinates.append([round(float(coor.get('x'))//slide_mask_ratio), round(float(coor.get('y'))//slide_mask_ratio)])
                annotation.append(coordinates)
            if pattern == 'Pattern5':
                annotations.append(annotation)
            if pattern == 'Pattern3':
                non_target_annotations.append(annotation)

        return annotations, non_target_annotations

    def make_mask(self, annotations, color = 255):

        width, height = self.slide.level_dimensions[self.level]
        mask = np.zeros((height, width)).astype(np.uint8)
        # mask = np.zeros((height, width, 3)).astype(np.uint8)

        for anno in annotations:
            _anno = []
            for coors in anno:
                _anno.append(np.array(coors))
            cv2.drawContours(mask, _anno, -1, color, -1)

        return mask

slide_dir = '/mnt/hdd1/c-MET_datasets/SLIDE_DATA/1차annotation'
label_dir = slide_dir + '/annotation' 
plot_save_dir = slide_dir + '/check'

os.makedirs(plot_save_dir, exist_ok = True)

slide_list = sorted([svs for svs in os.listdir(slide_dir) if 'svs' in svs])
label_list = sorted([xml for xml in os.listdir(label_dir) if 'xml' in xml])

In [38]:
slide_list

['S-LC0027-MET.svs',
 'S-LC0032-MET.svs',
 'S-LC0047-MET.svs',
 'S-LC0059-MET.svs',
 'S-LC0080-MET.svs',
 'S-LC0087-MET.svs',
 'S-LC0090-MET.svs',
 'S-LC0094-MET.svs',
 'S-LC0106-MET.svs',
 'S-LC0107-MET.svs']

In [39]:
label_list

['S-LC0027-MET.xml',
 'S-LC0032-MET.xml',
 'S-LC0047-MET.xml',
 'S-LC0059-MET.xml',
 'S-LC0080-MET.xml',
 'S-LC0087-MET.xml',
 'S-LC0090-MET.xml',
 'S-LC0094-MET.xml',
 'S-LC0106-MET.xml',
 'S-LC0107-MET.xml']

In [41]:
total_time = 0
for i, (slide_file, anno_file) in enumerate(zip(slide_list, label_list)):
    start_time = time.time()
    
    if slide_file[:-4] != anno_file[:-4]:
        print("Check the pairness between slide and ROI annotation")
        break
    
    slide_name = slide_file[:-4]
 
    slide_path = os.path.join(slide_dir, slide_file)
    label_path = os.path.join(label_dir, anno_file)

    AN = Annotation(slide_path = slide_path, xml_path = label_path, level = -1)
    annotations, non_target_annotations = AN.get_coordinates()
    tumor_label = AN.make_mask(annotations=annotations, color = 255)

    plt.figure(figsize=(30, 20))
    plt.imshow(AN.slide_thumbnail)

    if len(non_target_annotations) != 0:
        non_target_label = AN.make_mask(annotations=non_target_annotations, color = 50)
        label = cv2.bitwise_or(tumor_label, non_target_label)
        plt.imshow(label, cmap = 'jet', vmax = 255, vmin=0, alpha = 0.3)

    else:
        plt.imshow(tumor_label, cmap = 'jet', vmax = 255, vmin=0, alpha = 0.3)

    plt.savefig(f'{plot_save_dir}/{slide_name}_tumor_anno_overlay.jpg', bbox_inches = 'tight')
    plt.close()
    
    end_time = time.time()
    taken = end_time - start_time
    print(f'{slide_name} | slide/mask ratio: {int(AN.slide.level_downsamples[-1])} | size: {tumor_label.shape} | time: {round(taken, 2)} sec')
    total_time += taken
    
print(f'total time: {round(total_time, 2)} sec')

S-LC0027-MET | slide/mask ratio: 32 | size: (1568, 3237) | time: 3.01 sec
S-LC0032-MET | slide/mask ratio: 64 | size: (854, 2116) | time: 1.7 sec
S-LC0047-MET | slide/mask ratio: 64 | size: (1019, 2365) | time: 2.58 sec
S-LC0059-MET | slide/mask ratio: 32 | size: (2970, 3485) | time: 5.9 sec
S-LC0080-MET | slide/mask ratio: 64 | size: (725, 2334) | time: 1.17 sec
S-LC0087-MET | slide/mask ratio: 64 | size: (1090, 2365) | time: 2.03 sec
S-LC0090-MET | slide/mask ratio: 64 | size: (1306, 2458) | time: 1.97 sec
S-LC0094-MET | slide/mask ratio: 64 | size: (1266, 2458) | time: 2.65 sec
S-LC0106-MET | slide/mask ratio: 64 | size: (793, 2054) | time: 1.99 sec
S-LC0107-MET | slide/mask ratio: 64 | size: (1377, 2365) | time: 2.15 sec
total time: 25.15 sec


In [47]:
os.path.isdir('/mnt/hdd1/c-MET_datasets/SLIDE_DATA/1차annotation/annotation')

True

level = 0 은 너무 오래걸림 

추정 slide/mask ratio의 제곱만큼 더 걸린다면 

ex) S-LC0106-MET, level 3에서 약 2 초 소요

- level 0: 2 sec * (64)**2  = 8192 sec = 136.5 min = 2 h 16 min..... 
- level 1: 2 sec * (16)**2  = 512 sec = 8.5 min
- level 1: 2 sec * (8)**2  = 128 sec = 2 min
- level 2: 2 sec * (2)**2 = 8 sec


In [62]:
64*2/60

2.1333333333333333

In [12]:
total_time = 0
for i, (slide_file, anno_file) in enumerate(zip(slide_list, label_list)):
    start_time = time.time()
    
    if slide_file[:-4] != anno_file[:-4]:
        print("Check the pairness between slide and ROI annotation")
        break
    
    slide_name = slide_file[:-4]
    slide_path = os.path.join(slide_dir, slide_file)
    label_path = os.path.join(label_dir, anno_file)

    SH = SlideHandler(slide_path = slide_path, xml_path = label_path)
    annotations, non_target_annotations = SH.get_coordinates(level = 0)
    slide_thumbnail, tumor_label = SH.make_mask(annotations=annotations, color = TUMOR_COLOR, level = 0)
    
    plt.figure(figsize=(30, 20))
    plt.imshow(slide_thumbnail)
    plt.imshow(tumor_label, alpha = 0.3)

    if len(non_target_annotations) != 0:
        _, non_target_label = SH.make_mask(annotations=non_target_annotations, color = PATTERN3_COLOR, level = 0)
        plt.imshow(non_target_label, alpha = 0.3)

    plt.savefig(f'{plot_save_dir}/{slide_name}_tumor_anno_overlay_level_0.jpg', bbox_inches = 'tight')
    plt.close()

    end_time = time.time()
    taken = end_time - start_time
    print(f'{slide_name} | label/mask ratio: {int(SH.slide.level_downsamples[-1])} | size: {tumor_label.shape} | time: {round(taken, 2)} sec')
    total_time += taken

print(f'total time: {round(total_time, 2)} sec')

KeyboardInterrupt: 

In [4]:
slide_path = os.path.join(slide_dir, slide_list[0])
slide = openslide.OpenSlide(slide_path)

slide

OpenSlide('/mnt/hdd1/c-MET_datasets/SLIDE_DATA/1차annotation/S-LC0027-MET.svs')

In [12]:
slide.level_count

4

In [5]:
slide.level_dimensions

((103584, 50191), (25896, 12547), (6474, 3136), (3237, 1568))

In [9]:
slide.level_downsamples

AttributeError: 'numpy.ndarray' object has no attribute 'level_downsamples'