In [3]:
import os
import sys
sys.path = ['/nfs/gns/homes/willj/anaconda3/envs/GTEx/lib/python3.5/site-packages'] + sys.path
GTEx_directory = '/hps/nobackup/research/stegle/users/willj/GTEx'
import openslide
from openslide.deepzoom import DeepZoomGenerator
from openslide import open_slide
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import scipy
import h5py

tissue = 'Lung'
# ID
tissue_filepath = os.path.join(GTEx_directory,'data','raw',tissue)
tissue_images = os.listdir(tissue_filepath)
tissue_IDs = [x.split('.')[0] for x in tissue_images]


# Choose a lung image slice

In [4]:
ID = 'GTEX-145MF-0726'
with h5py.File(GTEx_directory + '/data/better_covering_patches/{}_256.hdf5'.format(ID),'r') as f:
    patches = f['patches'].value

with h5py.File(GTEx_directory + '/data/retrained_inceptionet_features.hdf5','r') as f:
    features = f['lung']['features']['256'][ID].value

# Look at the maximally activating patches for associated features from Leo's analysis

In [None]:
h = 70
feature = 895
feature_vector = features[:,feature]
ordered_idx = np.argsort(feature_vector)[::-1]
ordered_patches = patches[ordered_idx]
f,a = plt.subplots(int(h/10),10, figsize=(10,10))
for i in range(h):
    a.flatten()[i].imshow(ordered_patches[i])
    a.flatten()[i].set_title(feature_vector[ordered_idx[i]])
    a.flatten()[i].axis('off')
    
normalised_feature_vector = np.clip(500*feature_vector / max(feature_vector), 0,255)

This feature seems to activate in whitespace

# Visualising activation areas on the tissue slice

I construct the mask with the same parameters as when I first extract patches. This results in the same patch centers as. We know this because len(mask_coords) = len(patches)

In [None]:
print (len(mask_coords))

In [None]:
print (len(features))

In [None]:
import cv2
from scipy.misc import imresize
import mahotas
import os
image_filepath = os.path.join(GTEx_directory,'data','raw',tissue, ID + '.svs')

image_slide = open_slide(image_filepath)
# cv2.line(image_slide, (0,128), (200,128),color=[255,255,255], thickness=1)
toplevel = image_slide.level_count - 1
topdim = image_slide.level_dimensions[-1]
topdownsample = image_slide.level_downsamples[-1]
topdownsampleint = int(topdownsample)

toplevelslide = image_slide.read_region((0,0), toplevel, topdim)
toplevelslide = np.array(toplevelslide)
toplevelslide = toplevelslide[:,:,0:3]
slide = toplevelslide

blurredslide = cv2.GaussianBlur(slide, (51,51),0)
blurredslide = cv2.cvtColor(blurredslide, cv2.COLOR_BGR2GRAY)
T_otsu = mahotas.otsu(blurredslide)

mask = np.zeros_like(slide)
mask = mask[:,:,0]
mask[blurredslide < T_otsu] = 255

# plt.imshow(cv2.bitwise_and(slide, slide, mask=mask))

patchsize = 256
downsampledpatchsize = patchsize / topdownsampleint
xlimit = int(topdim[1] / downsampledpatchsize)
ylimit = int(topdim[0] / downsampledpatchsize)


# Find downsampled coords
coords = []
for i in range(xlimit):
    for j in range(ylimit):
        x = int(downsampledpatchsize/2 + i*downsampledpatchsize)
        y = int(downsampledpatchsize/2 + j*downsampledpatchsize)
        coords.append((x,y))

# Find coords in downsampled mask
mask_coords = []
for c in coords:
    x = c[0]
    y = c[1]
    if mask[x,y] > 0:
        mask_coords.append(c)
        
slidemarkings = slide.copy()
for (j,c) in enumerate(mask_coords):
    x = c[0]
    y = c[1]
    
    slidemarkings[x-2:x+2,y-2:y+2] = [normalised_feature_vector[j],0,0]
    
plt.figure(figsize=(100,100))
plt.imshow(cv2.bitwise_and(slidemarkings, slidemarkings, mask=mask))

Displaying the activations across the entire image slice suggests that the feature picks up areas of white space in the image

# Zooming in

In [None]:
plt.figure(figsize=(100,100))
plt.imshow(cv2.bitwise_and(slidemarkings, slidemarkings, mask=mask)[500:1500,1000:2000])

The feature seems to localise to airhole pockets.

In [47]:
# Convert downsampled masked coords to full masked coords
full_mask_coords = []
for c in mask_coords:
    x = c[0]
    y = c[1]
    full_x = int(topdownsample*x)
    full_y = int(topdownsample*y)
    full_mask_coords.append((full_x,full_y))

# Repeat for feature 783

In [None]:
h = 70
feature = 783
feature_vector = features[:,feature]
ordered_idx = np.argsort(feature_vector)[::-1]
ordered_patches = patches[ordered_idx]
f,a = plt.subplots(int(h/10),10, figsize=(10,10))
for i in range(h):
    a.flatten()[i].imshow(ordered_patches[i])
    a.flatten()[i].set_title(feature_vector[ordered_idx[i]])
    a.flatten()[i].axis('off')
    
normalised_feature_vector = np.clip(500*feature_vector / max(feature_vector), 0,255)

In [1]:
import cv2
from scipy.misc import imresize
import mahotas
import os
image_filepath = os.path.join(GTEx_directory,'data','raw',tissue, ID + '.svs')

image_slide = open_slide(image_filepath)
# cv2.line(image_slide, (0,128), (200,128),color=[255,255,255], thickness=1)
toplevel = image_slide.level_count - 1
topdim = image_slide.level_dimensions[-1]
topdownsample = image_slide.level_downsamples[-1]
topdownsampleint = int(topdownsample)

toplevelslide = image_slide.read_region((0,0), toplevel, topdim)
toplevelslide = np.array(toplevelslide)
toplevelslide = toplevelslide[:,:,0:3]
slide = toplevelslide

blurredslide = cv2.GaussianBlur(slide, (51,51),0)
blurredslide = cv2.cvtColor(blurredslide, cv2.COLOR_BGR2GRAY)
T_otsu = mahotas.otsu(blurredslide)

mask = np.zeros_like(slide)
mask = mask[:,:,0]
mask[blurredslide < T_otsu] = 255

# plt.imshow(cv2.bitwise_and(slide, slide, mask=mask))

patchsize = 256
downsampledpatchsize = patchsize / topdownsampleint
xlimit = int(topdim[1] / downsampledpatchsize)
ylimit = int(topdim[0] / downsampledpatchsize)


# Find downsampled coords
coords = []
for i in range(xlimit):
    for j in range(ylimit):
        x = int(downsampledpatchsize/2 + i*downsampledpatchsize)
        y = int(downsampledpatchsize/2 + j*downsampledpatchsize)
        coords.append((x,y))

# Find coords in downsampled mask
mask_coords = []
for c in coords:
    x = c[0]
    y = c[1]
    if mask[x,y] > 0:
        mask_coords.append(c)
        
slidemarkings = slide.copy()
for (j,c) in enumerate(mask_coords):
    x = c[0]
    y = c[1]
    
    slidemarkings[x-2:x+2,y-2:y+2] = [normalised_feature_vector[j],0,0]
    
plt.figure(figsize=(100,100))
plt.imshow(cv2.bitwise_and(slidemarkings, slidemarkings, mask=mask))

NameError: name 'GTEx_directory' is not defined

In [2]:
plt.figure(figsize=(100,100))
plt.imshow(cv2.bitwise_and(slidemarkings, slidemarkings, mask=mask)[1500:2500,500:1500])

NameError: name 'plt' is not defined

Harder to interpret these regions