In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader,Dataset
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import argparse
import cv2
from google.colab.patches import cv2_imshow
import pathlib
import PIL
import os
import json
import random

In [5]:
from google.colab import drive
drive.mount('/content/drive')
os.chdir('/content/drive/MyDrive/WIKM')

Mounted at /content/drive


Helper Functions

In [6]:
# put mask over image
def maskOverImage(source, mask):
  source2 = cv2.resize(mask, source.shape[1::-1])

  dst = cv2.bitwise_and(source, source2)

  return dst


#create folder
def createFolderandChdir(pathToFolder):
  if not os.path.exists(pathToFolder):
    os.makedirs(pathToFolder)

  os.chdir(pathToFolder)

### JSON structure 

In [None]:
import pathlib
import os
import json
# os.chdir('/content/drive/My Drive/WIKM')
datasetPath = pathlib.Path("/content/drive/My Drive/WIKM/data/LungCancer")
pathDataset = []

patientDirs = sorted([str(pathlib.Path(datasetPath, patient)) for patient in os.listdir(datasetPath) if patient != "FinalPublishedResults"]) # names of all the folders

# print(patientDirs)
# print()

for patientNumber, patientDir in enumerate(patientDirs, 1):
  slidesImages = sorted(os.listdir(str(patientDir) + '/images'))
  maskImages = sorted(os.listdir(str(patientDir) + '/annotations'))

  slideDict = {}

  for slideNum, images in enumerate(slidesImages, 1):
    slideKey = f'slide{slideNum}'
    slideDict[slideKey] = {}
    slideDict[slideKey]['imgPath'] = str(pathlib.Path(str(patientDir), 'images', images).absolute())
    slideDict[slideKey]['invasive'] = list()
    slideDict[slideKey]['in_situ'] = list()
    slideDict[slideKey]['both'] = list()
    slideDict[slideKey]['airway'] = ""
    slideDict[slideKey]['blood'] = ""
    annotsForCurrSlide = [annot for annot in maskImages if annot[:18] == images[:-5]]

    for annotations in annotsForCurrSlide:
      pathToAnnotation = str(pathlib.Path(patientDir, 'annotations', annotations))
      serial = annotations[33:-5]

      if serial in ["R000G000B255","R001G000B255","R002G000B255"]: #in situ
          slideDict[slideKey]['in_situ'].append(pathToAnnotation)
      elif serial in ["R000G255B000", "R001G255B000", "R002G255B000", "R003G255B000", "R004G255B000", "R005G255B000", "R006G255B000"]: #invasive
          slideDict[slideKey]['invasive'].append(pathToAnnotation)
      elif serial == "R255G000B000": #both region
          slideDict[slideKey]['both'].append(pathToAnnotation)


    patientDirPathObj = pathlib.Path(patientDir)
    airPath = pathlib.Path(patientDirPathObj.parent, 'FinalPublishedResults', patientDirPathObj.name, 'histology', 'masks', 'airways')  #some don't have
    bloodPath = pathlib.Path(patientDirPathObj.parent, 'FinalPublishedResults', patientDirPathObj.name, 'histology', 'masks', 'blood')

    if os.path.exists(airPath):
        for air in os.listdir(airPath):
            if images[:-1].lower() == air.lower() or images[:-5].lower() == air[:-4].lower(): #some tif some png
                slideDict[slideKey]['airway'] = str(pathlib.Path(airPath, air).absolute()) # os.path.join(airPath, air)

    if os.path.exists(bloodPath):
        for bld in os.listdir(bloodPath):
            if images[:-1].lower() == bld.lower() or images[:-5].lower() == bld[:-4].lower():
                slideDict[slideKey]['blood'] = str(pathlib.Path(bloodPath, bld).absolute()) # os.path.join(bloodPath, bld)

  patientDict = {}
  patientDict['patient{}'.format(patientNumber)] = slideDict
  pathDataset.append(patientDict)

#print(pathDataset)
#print(json.dumps(pathDataset, indent = 5))

### Masked normal region

In [None]:
wikmMaskedImagesFolder = '//content//drive//My Drive//WIKM//maskedImages'
os.chdir(wikmMaskedImagesFolder)
# took about 10 minutes 20 seconds
for patientDict in pathDataset:
  for patientNum in patientDict:
    os.chdir(wikmMaskedImagesFolder)
    patientFolder = wikmMaskedImagesFolder + '//' + patientNum
    createFolderandChdir(patientFolder)
    # print(patientFolder)

    for slideNum, slideDict in patientDict[patientNum].items():
      slideFolder = patientFolder + '//' + slideNum
      # print(slideFolder)
      createFolderandChdir(slideFolder)

      for slideKey, slideContents in slideDict.items():
        if slideKey == 'imgPath':
          slideImage = cv2.imread(slideContents)
        
        elif slideKey == 'invasive':
          invasiveFolder = slideFolder + '//invasive'
          createFolderandChdir(invasiveFolder)

          for imageNum, invasiveSlide in enumerate(slideContents, 1):
            imageName = str(patientNum) + '_{}'.format(slideNum) + '_INVASIVE_' + str(imageNum) + '.tiff'
            imageToMask = cv2.imread(invasiveSlide)
            maskedSlide = maskOverImage(slideImage, imageToMask)
            cv2.imwrite(imageName, maskedSlide)

        elif slideKey == 'in_situ':
          inSituFolder = slideFolder + '//in_situ'
          createFolderandChdir(inSituFolder)

          for imageNum, inSitu in enumerate(slideContents, 1):
            imageName = str(patientNum) + '_{}'.format(slideNum) + '_IN_SITU_' + str(imageNum) + '.tiff'
            imageToMask = cv2.imread(inSitu)
            maskedSlide = maskOverImage(slideImage, imageToMask)
            cv2.imwrite(imageName, maskedSlide)

        elif slideKey == 'both':
          both = slideFolder + '//both'
          createFolderandChdir(both)

          for imageNum, both in enumerate(slideContents, 1):
            imageName = str(patientNum) + '_{}'.format(slideNum) + '_BOTH_' + str(imageNum) + '.tiff'
            imageToMask = cv2.imread(both)
            maskedSlide = maskOverImage(slideImage, imageToMask)
            cv2.imwrite(imageName, maskedSlide)

        imageToMask = None


### Install Patchify

In [None]:
!pip install patchify

### Patching from Invasive, Both, and In Situ regions

In [None]:
from patchify import patchify, unpatchify
from PIL import Image

wikmMaskedImagesFolder = '//content//drive//My Drive//WIKM//maskedImages'

patchedImagesPNG256x256 = '/content/drive/MyDrive/WIKM/patchedImagesPNG256x256'

os.chdir(wikmMaskedImagesFolder)
wikmMaskedImagesPath = pathlib.Path(wikmMaskedImagesFolder)


patchNum = 1
for patientFolder in wikmMaskedImagesPath.iterdir():
  os.chdir(wikmMaskedImagesFolder)
  patientPath = pathlib.Path(wikmMaskedImagesFolder, patientFolder)

  for slideFolder in patientPath.iterdir():
    slidePath = pathlib.Path(patientPath, slideFolder)
    if slideFolder.name in ['both', 'in_situ', 'invasive']: # Checks for the right folder

      for maskedImage in sorted(slidePath.iterdir()): # "Patching" the image
        thisImageTotalPatches = 1
        os.chdir(maskedImage.parent)
        print('--ACCESSING IMAGE--', maskedImage.absolute())
        image = cv2.imread(str(maskedImage.absolute()))

        patches = patchify(image, (256, 256, 3), step = 256)    # im is image // 256x256 patches // 3 is the for colored image // step = 256
        if not os.path.exists(patchedImagesPNG256x256 + '//' + patientFolder.name + '//' + slideFolder.name):
          os.makedirs(patchedImagesPNG256x256 + '//' + patientFolder.name + '//' + slideFolder.name)

        os.chdir(patchedImagesPNG256x256 + '//' + patientFolder.name + '//' + slideFolder.name)


        for i in range(patches.shape[0]): #0 is first dimension
            for j in range(patches.shape[1]): #1 is second dimension
              singlePatch = patches[i, j, 0, :, :, :]

              if not np.all(singlePatch == 0): #ignore all black patches
                cv2.imwrite(f'Patch{patchNum}.png', singlePatch)
                patchNum += 1

### Reversing Mask (masking invasive, in situ, both)

In [None]:
wikmMaskedImagesFolder = '//content//drive//My Drive//WIKM//maskedImages'
wikmPatchedImagesFolderNorm = '//content//drive//My Drive//WIKM//normMaskedImages'

os.chdir(wikmMaskedImagesFolder)
wikmMaskedImagesPath = pathlib.Path(wikmMaskedImagesFolder)

def combineAllMasks(masksList):
  for maskNum, masksPath in enumerate(masksList):
    if maskNum == 0:
      currentMask = cv2.imread(masksPath)
      prevMask = None
    else:
      currentMask = cv2.bitwise_or(cv2.imread(masksPath), currentMask)
      prevMask = currentMask

  return currentMask


for patientDict in pathDataset: # remove [0] to do all patients
  print(patientDict)
  for patientKey, slideDict in patientDict.items(): # 1 value only
    print(patientKey, slideDict)
    for slideKey in slideDict:
      if not(patientKey == 'patient5' and slideKey == 'slide3'):
        if len(slideDict[slideKey]['both']) == 0:
          masksList = []

          for mask in slideDict[slideKey]['in_situ']:
            masksList.append(mask)

          for mask in slideDict[slideKey]['invasive']:
            masksList.append(mask)

          # reverse patch it through the both

        else:
          # reverse patch it through the in_situ and invasive combined
          masksList = slideDict[slideKey]['both']

        # combine the masks
        combinedMasks = combineAllMasks(masksList) 
        combinedMasksInv = cv2.bitwise_not(combinedMasks)
        slideImg = cv2.imread(slideDict[slideKey]['imgPath'])
                              
        assert combinedMasksInv.shape == slideImg.shape, ("not the same size!")

        normal = cv2.bitwise_and(slideImg, combinedMasksInv)

        os.chdir(wikmPatchedImagesFolderNorm)

        cv2.imwrite(f'{patientKey}_{slideKey}_normal.png', normal) # patient1_slide1_normal.png

### Patching Normal region

In [None]:
wikmPatchedImagesFolderNorm = '//content//drive//My Drive//WIKM//normMaskedImages'
wikmTrainNormalPatches = '/content/drive/MyDrive/WIKM/patchedImagesPNG256x256/Train/normal'
wikmTestNormalPatches = '//content//drive//My Drive//WIKM//normalPatches//Test'
wikmValidationNormalPatches = '//content//drive//My Drive//WIKM//normalPatches//Validation'
os.chdir(wikmPatchedImagesFolderNorm)

patchNum = 1
for imageName in os.listdir(wikmPatchedImagesFolderNorm):
  os.chdir(wikmPatchedImagesFolderNorm)
  image = cv2.imread(imageName)
  patches = patchify(image, (256, 256, 3), step = 256)    # im is image // 256x256 patches // 3 is the for colored image // step = 256

  for i in range(patches.shape[0]): #0 is first dimension
      for j in range(patches.shape[1]): #1 is second dimension
        singlePatch = patches[i, j, 0, :, :, :]

        if not (np.all(singlePatch == 0) or np.all(singlePatch == 255)):
          #RNG = random.randint(1, 10)
          #if RNG == 1:
          #  os.chdir(wikmTestNormalPatches)
          #  cv2.imwrite(f'Patch{patchNum}.png', singlePatch) # test
          #elif RNG == 2:
          #  os.chdir(wikmValidationNormalPatches)
          #  cv2.imwrite(f'Patch{patchNum}.png', singlePatch) # validation
          #else:
          os.chdir(wikmTrainNormalPatches)
          cv2.imwrite(f'Patch{patchNum}.png', singlePatch) # train
        patchNum += 1