<a href="https://colab.research.google.com/github/vaguiar/camelyon_dl_2019/blob/cc_explore/working_with_starter_code_cc_modified.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Install the OpenSlide C library and Python bindings
!apt-get install openslide-tools
!pip install openslide-python

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following package was automatically installed and is no longer required:
  libnvidia-common-410
Use 'apt autoremove' to remove it.
The following additional packages will be installed:
  libopenslide0
Suggested packages:
  libtiff-tools
The following NEW packages will be installed:
  libopenslide0 openslide-tools
0 upgraded, 2 newly installed, 0 to remove and 11 not upgraded.
Need to get 92.5 kB of archives.
After this operation, 268 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libopenslide0 amd64 3.4.1+dfsg-2 [79.8 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 openslide-tools amd64 3.4.1+dfsg-2 [12.7 kB]
Fetched 92.5 kB in 1s (80.4 kB/s)
Selecting previously unselected package libopenslide0.
(Reading database ... 130812 files and directories currently installed.)
Preparing to unpack .../libopenslide0_3.4.1+dfsg-

In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from openslide import open_slide, __library_version__ as openslide_version
import os
from PIL import Image
from skimage.color import rgb2gray

In [0]:
## load data fom google drive 
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
base_dir = '/content/drive/My Drive/slides/'

CANCER_PATH = 'drive/My Drive/training_data/cancer'
NON_CANCER_PATH = 'drive/My Drive/training_data/non_cancer'

images = os.listdir(base_dir)

masks = [s for s in images if "_mask.tif" in s]
masks.sort()
slides = list(set([s for s in images if ".tif" in s]) - set(masks))
slides.sort()
slides.remove('tumor_038.tif')

masks_slides = zip(masks, slides)

In [0]:
test = list(masks_slides)[17]
slide_file = test[1]
tumor_mask_file = test[0]

('tumor_094_mask.tif', 'tumor_094.tif')

In [0]:
def getSlide_Mask(slide_file, tumor_mask_file):

  slide_path = os.path.join(base_dir, slide_file)
  tumor_mask_path = os.path.join(base_dir, tumor_mask_file)


  slide = open_slide(slide_path)
  tumor_mask = open_slide(tumor_mask_path)
  
  for i in range(len(slide.level_dimensions)):
      assert tumor_mask.level_dimensions[i][0] == slide.level_dimensions[i][0]
      assert tumor_mask.level_dimensions[i][1] == slide.level_dimensions[i][1]

  # Verify downsampling works as expected
  width, height = slide.level_dimensions[7]
  assert width * slide.level_downsamples[7] == slide.level_dimensions[0][0]
  assert height * slide.level_downsamples[7] == slide.level_dimensions[0][1]
  
  return slide, tumor_mask

def read_slide(slide, x, y, level, width, height, as_float=False):
    im = slide.read_region((x,y), level, (width, height))
    im = im.convert('RGB') # drop the alpha channel
    if as_float:
        im = np.asarray(im, dtype=np.float32)
    else:
        im = np.asarray(im)
    assert im.shape == (height, width, 3)
    return im
  

  
## use a sliding window filter of size (slide_height, slide_width)
## save in cancer folder if more than 30% of pixels are cancerous
## save in non_cancer folder otherwise


def getTraining_Data(level, slide, tumor_mask, slide_width, slide_height, slide_file, min_x = 0, min_y = 0):

  down_sample = int(slide.level_downsamples[level])
  pixel_width = int(down_sample*slide.level_dimensions[level][0])
  pixel_height = int(down_sample*slide.level_dimensions[level][1])

  for x in range(min_x*down_sample, pixel_width, slide_width*down_sample):
    for y in range(min_y*down_sample, pixel_height, slide_height*down_sample):

      slide_width_2 =  min(slide_width, slide.level_dimensions[level][0] - x/down_sample)
      slide_height_2 = min(slide_width, slide.level_dimensions[level][1] - y/down_sample)

      slide_image = read_slide(slide, 
                           x=x, 
                           y=y, 
                           level=level, 
                           width= int(slide_width_2) , 
                           height= int(slide_height_2) )

      mask_image = read_slide(tumor_mask,
                              x=x, 
                              y=y, 
                              level=level, 
                              width= int(slide_width_2) , 
                              height= int(slide_height_2) )

      mask_image = mask_image[:,:,0]

      pcnt_cancer =  np.sum(mask_image)/((slide_width*slide_height)/100)

      if pcnt_cancer >=.3:
        name = CANCER_PATH + "/" + str(x) + "_" + str(y)+ "_" + slide_file
      else:
        name = NON_CANCER_PATH + "/" + str(x) + "_" + str(y)+ "_" + slide_file


      print(name)
      plt.figure(figsize=(10,10), dpi=100)
      plt.imshow(slide_image)
      plt.imshow(mask_image, cmap='jet', alpha=0.5) # Red regions contains cancer.
      plt.savefig(name) 




In [0]:
slide, tumor_mask = getSlide_Mask(slide_file, tumor_mask_file)

In [0]:
level = 6
slide_width = 300
slide_height = 300 

In [0]:
getTraining_Data(level, slide, tumor_mask, slide_width, slide_height, slide_file)

In [0]:

slide_image = read_slide(slide, 
                         x=550*down_sample, 
                        y=180*down_sample, 
                        level=7, 
                        width=200, 
                        height= 100)

# Example: read the entire mask at the same zoom level
mask_image = read_slide(tumor_mask, 
                        x=610*down_sample, 
                        y=220*down_sample, 
                        level=7, 
                        width=20, 
                        height= 10)

# Note: the program provided by the dataset authors generates a mask with R,G,B channels.
# The mask info we need is in the first channel only.
# If you skip this step, the mask will be displayed as all black.
mask_image = mask_image[:,:,0]

plt.figure(figsize=(10,10), dpi=100)
plt.imshow(mask_image)

In [0]:
np.sum(mask_image)/200

In [0]:
(200*100)/100.0

In [0]:
# Overlay them. The idea is that the mask shows the region of the slide that
# contain cancerous cells.
plt.figure(figsize=(10,10), dpi=100)
plt.imshow(slide_image)
plt.imshow(mask_image, cmap='jet', alpha=0.5) # Red regions contains cancer.

In [0]:
# Example: extract a region from the L7 downsampled image
# Notice we're multiplying the x,y coordinates by the downsample factor.
# This math can be tricky to get right, debug carefully.
# Here, we're "aiming" for the top right blob in the image above.
region = read_slide(tumor_mask, x=350 * 128, y=120 * 128, level=7, width=50, height=50)[:,:,0]
plt.imshow(region)

In [0]:
# As mentioned in class, we can improve efficiency by ignoring non-tissue areas 
# of the slide. We'll find these by looking for all gray regions.
def find_tissue_pixels(image, intensity=0.8):
    im_gray = rgb2gray(image)
    assert im_gray.shape == (image.shape[0], image.shape[1])
    indices = np.where(im_gray <= intensity)
    return zip(indices[0], indices[1])

tissue_pixels = find_tissue_pixels(slide_image)
num_pixels = len(list(tissue_pixels))
percent_tissue = num_pixels/ float(slide_image.shape[0] * slide_image.shape[0]) * 100
print ("%d tissue_pixels pixels (%.1f percent of the image)" % (num_pixels , percent_tissue)) 

def apply_mask(im, mask, color=(255,0,0)):
    masked = np.copy(im)
    for x,y in mask: masked[x][y] = color
    return masked

tissue_regions = apply_mask(slide_image, tissue_pixels)
plt.imshow(tissue_regions)