# Script to generate binary masks of each cell-type
# Created by Ruchika Verma

This code will create separate folder for each patient and subfolders for annotated images under each patient's folder.

Each sub-folder corresponding to sub-images under each patient will contain 4 sub-sub-folders (Epithelial, Lymphocyte, Neutrophil and Macrophage) to save their corresponding binary-masks with value 255 for each cell-type and background 0

# Input
data_path: Specify the path of downloaded images

destination_path = Specify the path to save corresponding binary masks

# Output
MoNuSAC_masks directory in the destination_path

Binary masks will be saved in each sub-sub-folder

Folder -- Patient name

Sub-folder -- Sub-images under each patient

Sub-Sub-folder -- Annotated cell-type on each sub-image


In [7]:
import sys
sys.path.append('/')
from config import *

ModuleNotFoundError: No module named 'config'

In [4]:
#Process whole slide images
import os
import openslide
from xml.dom import minidom
import numpy as np
import openslide
from openslide import open_slide  
from glob import glob
import cv2
import matplotlib.pyplot as plt
import scipy.io as sio
from PIL import Image
import scipy
import scipy.ndimage
from shapely.geometry import Polygon
from skimage import draw
import xml.etree.ElementTree as ET

In [51]:
# Read svs files from the desired path
count = 0
data_path = MONUSAC_DATA #Path to read data from
destination_path = MONUSAC_FOLDER.rstrip('/') # Path to save binary masks corresponding to xml files
os.chdir(destination_path)

try:
    os.mkdir(destination_path+'/MoNuSAC_masks')
except OSError:
    print ("Creation of the mask directory %s failed" % destination_path)
    
os.chdir(destination_path+'/MoNuSAC_masks')#Create folder named as MoNuSAC_masks
patients = [x[0] for x in os.walk(data_path)]#Total patients in the data_path
if data_path in patients: patients.remove(data_path)
len(patients)

46

In [52]:
full_dest_path = destination_path + '/MoNuSAC_masks/'

In [54]:
for patient_loc in patients[1:]:
    patient_name = patient_loc[len(data_path)+1:]#Patient name
    print(patient_name)
        
    ## To make patient's name directory in the destination folder
    try:
        os.mkdir(full_dest_path + patient_name)
    except OSError:
        print ("\n Creation of the patient's directory %s failed" % patient_name)
        
    ## Read sub-images of each patient in the data path        
    sub_images = glob(patient_loc+'/*.svs')
    for sub_image_loc in sub_images:
        sub_image_name = sub_image_loc[len(data_path)+len(patient_name)+1:-4]        
        print(sub_image_name)
        
        ## To make sub_image directory under the patient's folder
        sub_image = full_dest_path + patient_name+'/'+sub_image_name #Destination path
        try:
            os.mkdir(sub_image)
        except OSError:
            print ("\n Creation of the patient's directory %s failed" % sub_image)
            
        image_name = sub_image_loc
        img = openslide.OpenSlide(image_name)
                                  
        # If svs image needs to save in tif
        cv2.imwrite(sub_image_loc[:-4]+'.tif', np.array(img.read_region((0,0),0,img.level_dimensions[0])))      
   
        # Read xml file
        xml_file_name  = image_name[:-4]
        xml_file_name = xml_file_name+'.xml'
        tree = ET.parse(xml_file_name)
        root = tree.getroot()
        
        #Generate binary mask for each cell-type                         
        for k in range(len(root)):
            label = [x.attrib['Name'] for x in root[k][0]]
            label = label[0]
            
            for child in root[k]:
                for x in child:
                    r = x.tag
                    if r == 'Attribute':
                        count = count+1
                        print(count)
                        label = x.attrib['Name']
                        binary_mask = np.transpose(np.zeros((img.read_region((0,0),0,img.level_dimensions[0]).size), dtype='uint8')) 
                        print(label)
                        
                        # Create directory for each label
                        sub_path = sub_image+'/'+label
                        
                        try:
                            os.mkdir(sub_path)
                        except OSError:
                            print ("Creation of the directory %s failed" % label)
                        else:
                            print ("Successfully created the directory %s " % label) 
                                          
                        
                    if r == 'Region':
                        regions = []
                        vertices = x[1]
                        coords = np.zeros((len(vertices), 2))
                        for i, vertex in enumerate(vertices):
                            coords[i][0] = vertex.attrib['X']
                            coords[i][1] = vertex.attrib['Y']        
                        regions.append(coords)
                        poly = Polygon(regions[0])  
                        
                        vertex_row_coords = regions[0][:,0]
                        vertex_col_coords = regions[0][:,1]
                        fill_row_coords, fill_col_coords = draw.polygon(vertex_col_coords, vertex_row_coords, binary_mask.shape)
                        binary_mask[fill_row_coords, fill_col_coords] = 255
                        mask_path = sub_path+'/'+str(count)+'_mask.tif'
                        cv2.imwrite(mask_path, binary_mask)                       
    

TCGA-55-1594-01Z-00-DX1
/TCGA-55-1594-01Z-00-DX1_005
17
Epithelial
Successfully created the directory Epithelial 
18
Lymphocyte
Successfully created the directory Lymphocyte 
19
Neutrophil
Successfully created the directory Neutrophil 
20
Macrophage
Successfully created the directory Macrophage 
/TCGA-55-1594-01Z-00-DX1_004
21
Epithelial
Successfully created the directory Epithelial 
22
Lymphocyte
Successfully created the directory Lymphocyte 
23
Neutrophil
Successfully created the directory Neutrophil 
24
Macrophage
Successfully created the directory Macrophage 
/TCGA-55-1594-01Z-00-DX1_001
25
Epithelial
Successfully created the directory Epithelial 
26
Lymphocyte
Successfully created the directory Lymphocyte 
27
Neutrophil
Successfully created the directory Neutrophil 
28
Macrophage
Successfully created the directory Macrophage 
/TCGA-55-1594-01Z-00-DX1_003
29
Epithelial
Successfully created the directory Epithelial 
30
Lymphocyte
Successfully created the directory Lymphocyte 
31
Neu

135
Neutrophil
Successfully created the directory Neutrophil 
136
Macrophage
Successfully created the directory Macrophage 
TCGA-B9-A8YI-01Z-00-DX1
/TCGA-B9-A8YI-01Z-00-DX1_1
137
Epithelial
Successfully created the directory Epithelial 
138
Lymphocyte
Successfully created the directory Lymphocyte 
139
Neutrophil
Successfully created the directory Neutrophil 
140
Macrophage
Successfully created the directory Macrophage 
/TCGA-B9-A8YI-01Z-00-DX1_3
141
Epithelial
Successfully created the directory Epithelial 
142
Lymphocyte
Successfully created the directory Lymphocyte 
143
Neutrophil
Successfully created the directory Neutrophil 
144
Macrophage
Successfully created the directory Macrophage 
/TCGA-B9-A8YI-01Z-00-DX1_2
145
Epithelial
Successfully created the directory Epithelial 
146
Lymphocyte
Successfully created the directory Lymphocyte 
147
Neutrophil
Successfully created the directory Neutrophil 
148
Macrophage
Successfully created the directory Macrophage 
/TCGA-B9-A8YI-01Z-00-DX1_5


251
Epithelial
Successfully created the directory Epithelial 
252
Lymphocyte
Successfully created the directory Lymphocyte 
253
Neutrophil
Successfully created the directory Neutrophil 
254
Macrophage
Successfully created the directory Macrophage 
/TCGA-G7-A8LD-01Z-00-DX1_7
255
Epithelial
Successfully created the directory Epithelial 
256
Lymphocyte
Successfully created the directory Lymphocyte 
257
Neutrophil
Successfully created the directory Neutrophil 
258
Macrophage
Successfully created the directory Macrophage 
/TCGA-G7-A8LD-01Z-00-DX1_6
259
Epithelial
Successfully created the directory Epithelial 
260
Lymphocyte
Successfully created the directory Lymphocyte 
261
Neutrophil
Successfully created the directory Neutrophil 
262
Macrophage
Successfully created the directory Macrophage 
/TCGA-G7-A8LD-01Z-00-DX1_2
263
Epithelial
Successfully created the directory Epithelial 
264
Lymphocyte
Successfully created the directory Lymphocyte 
265
Neutrophil
Successfully created the directory N

371
Epithelial
Successfully created the directory Epithelial 
372
Lymphocyte
Successfully created the directory Lymphocyte 
373
Neutrophil
Successfully created the directory Neutrophil 
374
Macrophage
Successfully created the directory Macrophage 
/TCGA-J4-A67T-01Z-00-DX1-1
375
Epithelial
Successfully created the directory Epithelial 
376
Lymphocyte
Successfully created the directory Lymphocyte 
377
Neutrophil
Successfully created the directory Neutrophil 
378
Macrophage
Successfully created the directory Macrophage 
/TCGA-J4-A67T-01Z-00-DX1-2
379
Epithelial
Successfully created the directory Epithelial 
380
Lymphocyte
Successfully created the directory Lymphocyte 
381
Neutrophil
Successfully created the directory Neutrophil 
382
Macrophage
Successfully created the directory Macrophage 
/TCGA-J4-A67T-01Z-00-DX1-3
383
Epithelial
Successfully created the directory Epithelial 
384
Lymphocyte
Successfully created the directory Lymphocyte 
385
Neutrophil
Successfully created the directory N

Lymphocyte
Successfully created the directory Lymphocyte 
489
Neutrophil
Successfully created the directory Neutrophil 
490
Macrophage
Successfully created the directory Macrophage 
/TCGA-BH-A18T-01Z-00-DX1_2
491
Epithelial
Successfully created the directory Epithelial 
492
Lymphocyte
Successfully created the directory Lymphocyte 
493
Neutrophil
Successfully created the directory Neutrophil 
494
Macrophage
Successfully created the directory Macrophage 
/TCGA-BH-A18T-01Z-00-DX1_6
495
Epithelial
Successfully created the directory Epithelial 
496
Lymphocyte
Successfully created the directory Lymphocyte 
497
Neutrophil
Successfully created the directory Neutrophil 
498
Macrophage
Successfully created the directory Macrophage 
/TCGA-BH-A18T-01Z-00-DX1_5
499
Epithelial
Successfully created the directory Epithelial 
500
Lymphocyte
Successfully created the directory Lymphocyte 
501
Neutrophil
Successfully created the directory Neutrophil 
502
Macrophage
Successfully created the directory Macro

606
Macrophage
Successfully created the directory Macrophage 
/TCGA-EJ-5505-01Z-00-DX1-5
607
Epithelial
Successfully created the directory Epithelial 
608
Lymphocyte
Successfully created the directory Lymphocyte 
609
Neutrophil
Successfully created the directory Neutrophil 
610
Macrophage
Successfully created the directory Macrophage 
/TCGA-EJ-5505-01Z-00-DX1-4
611
Epithelial
Successfully created the directory Epithelial 
612
Lymphocyte
Successfully created the directory Lymphocyte 
613
Neutrophil
Successfully created the directory Neutrophil 
614
Macrophage
Successfully created the directory Macrophage 
/TCGA-EJ-5505-01Z-00-DX1-1
615
Epithelial
Successfully created the directory Epithelial 
616
Lymphocyte
Successfully created the directory Lymphocyte 
617
Neutrophil
Successfully created the directory Neutrophil 
618
Macrophage
Successfully created the directory Macrophage 
/TCGA-EJ-5505-01Z-00-DX1-3
619
Epithelial
Successfully created the directory Epithelial 
620
Lymphocyte
Successfu

/TCGA-78-7220-01Z-00-DX1_003
723
Epithelial
Successfully created the directory Epithelial 
724
Lymphocyte
Successfully created the directory Lymphocyte 
725
Neutrophil
Successfully created the directory Neutrophil 
726
Macrophage
Successfully created the directory Macrophage 
/TCGA-78-7220-01Z-00-DX1_002
727
Epithelial
Successfully created the directory Epithelial 
728
Lymphocyte
Successfully created the directory Lymphocyte 
729
Neutrophil
Successfully created the directory Neutrophil 
730
Macrophage
Successfully created the directory Macrophage 
/TCGA-78-7220-01Z-00-DX1_004
731
Epithelial
Successfully created the directory Epithelial 
732
Lymphocyte
Successfully created the directory Lymphocyte 
733
Neutrophil
Successfully created the directory Neutrophil 
734
Macrophage
Successfully created the directory Macrophage 
TCGA-G9-6342-01Z-00-DX1
/TCGA-G9-6342-01Z-00-DX1-1
735
Epithelial
Successfully created the directory Epithelial 
736
Lymphocyte
Successfully created the directory Lympho

In [11]:
from shutil import copyfile
image_dest_path = MONUSAC_IMGS

for patient_loc in patients:
    images = glob(patient_loc+'/*.tif')
    for image in images:
        copyfile(image, image_dest_path + image.split('/').pop())

In [87]:
mask_dest_path = MONUSAC_MASKS

for idx, patient_loc in enumerate(patients):
    
    patient_name = patient_loc.split('/').pop()
    patient_folder = full_dest_path + patient_name
    sub_images = glob(patient_folder + '/*')
    
    for sub_image in sub_images: 
        result = np.empty([1, 1], dtype='uint8')
        mask_paths = [os.path.join(dp, f) for dp, dn, fn in os.walk(sub_image) for f in fn if f.endswith('.tif') and not dp.endswith('/Macrophage')]
        
        for mask_path in mask_paths:
            img = cv2.imread(mask_path)
            if result.shape == (1,1): 
                result = img
            else:
                result = result + img
        
        cv2.imwrite(mask_dest_path + '/' + sub_image.split('/').pop() + '.tif', result)
        
    print(str(idx) + ': ' + patient_name)

0: TCGA-73-4668-01Z-00-DX1
1: TCGA-55-1594-01Z-00-DX1
2: TCGA-EV-5903-01Z-00-DX1
3: TCGA-YL-A9WY-01Z-00-DX1
4: TCGA-A2-A0ES-01Z-00-DX1
5: TCGA-DW-7841-01Z-00-DX1
6: TCGA-KK-A6E0-01Z-00-DX1
7: TCGA-UZ-A9PU-01Z-00-DX1
8: TCGA-B9-A8YI-01Z-00-DX1
9: TCGA-D8-A1X5-01Z-00-DX2
10: TCGA-EJ-5517-01Z-00-DX1
11: TCGA-J4-A67Q-01Z-00-DX1
12: TCGA-EW-A6SD-01Z-00-DX1
13: TCGA-E9-A22B-01Z-00-DX1
14: TCGA-G7-A8LD-01Z-00-DX1
15: TCGA-B6-A0WZ-01Z-00-DX1
16: TCGA-E9-A22G-01Z-00-DX1
17: TCGA-A2-A0CV-01Z-00-DX1
18: TCGA-KK-A59X-01Z-00-DX1
19: TCGA-J4-A67T-01Z-00-DX1
20: TCGA-69-A59K-01Z-00-DX1
21: TCGA-MH-A560-01Z-00-DX2
22: TCGA-UZ-A9PO-01Z-00-DX1
23: TCGA-G9-6499-01Z-00-DX1
24: TCGA-P4-AAVK-01Z-00-DX1
25: TCGA-BH-A18T-01Z-00-DX1
26: TCGA-69-7760-01Z-00-DX1
27: TCGA-86-8672-01Z-00-DX1
28: TCGA-MP-A4SY-01Z-00-DX1
29: TCGA-MP-A4T7-01Z-00-DX1
30: TCGA-KK-A7AW-01Z-00-DX1
31: TCGA-E2-A154-01Z-00-DX1
32: TCGA-EJ-5505-01Z-00-DX1
33: TCGA-SX-A7SR-01Z-00-DX1
34: TCGA-V1-A9O9-01Z-00-DX1
35: TCGA-B9-A44B-01Z-00-DX1
36