# Explore Organoid Screen Data

In [None]:
# load third-party Python modules
import javabridge
import bioformats as bf
import skimage
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import seaborn as sn
import pandas as pd

javabridge.start_vm(class_path=bf.JARS)

In [None]:
path = '../data/BortenMA_OrganoSeg_FileS3/Breast Raw Images/'

In [None]:
imgpath = os.path.join(path, 'BT549')
fnames = [f for f in os.listdir(imgpath) if 'tiff' in f]
fnames.sort()

In [None]:
from bioimg import read_image, plot_channels

In [None]:
imgs = [read_image(os.path.join(imgpath, f), verbose=False) for f in fnames]

In [None]:
imgs[0].shape

In [None]:
plot_channels([imgs[0][:,:,i] for i in range(img.shape[-1])],
              scale_x=6, scale_y=6,
              nrow=1, ncol=3, cmap='gray')
plt.show()

In [None]:
imgpath = os.path.join(path, 'DCIS.CoM')
fnames = [f for f in os.listdir(imgpath) if 'tiff' in f]
fnames.sort()

In [None]:
imgs = [read_image(os.path.join(imgpath, f), verbose=False) for f in fnames]

In [None]:
plot_channels([imgs[1][:,:,i] for i in range(img.shape[-1])],
              scale_x=6, scale_y=6,
              nrow=1, ncol=3, cmap='gray')
plt.show()

## Train an Unsupervised Segmentation-Free Model
Load 3 images of each breast cancer cell line organoid

In [None]:
cell_lines = [f for f in os.listdir(path) if not f.startswith('.')]

In [None]:
import random

In [None]:
def load_cell_line(cell_line):
    imgpath = os.path.join(path, cell_line)
    fnames = [f for f in os.listdir(imgpath) if 'tiff' in f]
    # sample randomly 3 files
    random.sample(fnames, 3)

In [None]:
random.seed(2207)
imgs_br = []
titles = []
for cl in cell_lines:
    imgpath = os.path.join(path, cl)
    fnames = [f for f in os.listdir(imgpath) if 'tiff' in f]
    # sample randomly 3 files
    fnames = random.sample(fnames, 3)
    imgs = [read_image(os.path.join(imgpath, f), verbose=False) for f in fnames]
    imgs_br += imgs
    titles += fnames

In [None]:
from skimage.filters import gaussian

In [None]:
def subtract_background(img, sigma):
    return img - gaussian(img, sigma=sigma)

In [None]:
img_bgsub = [np.stack([subtract_background(img[:,:,i], sigma=50) for i in range(img.shape[-1])], axis=-1) for img in imgs_br]

In [None]:
def minmax_scale(a):
    return (a - a.min()) / (a.max() - a.min())

def normalize_channels(img):
    return np.stack([minmax_scale(img[:,:,i]) for i in range(img.shape[-1]) ], axis=-1)

In [None]:
imgs_norm = [normalize_channels(img) for img in img_bgsub ]

In [None]:
from bioimg import SegfreeProfiler
segf = SegfreeProfiler(tile_size=(30,30),
                       n_block_types=20,
                       n_supblock_types=20,
                       n_components=50)

In [None]:
tiles = segf.tile_color_images(imgs_norm)

In [None]:
plot_channels([tiles[0][i] for i in range(tiles[0].shape[0])], 
              ncol=34, nrow=25, wspace=0.05, hspace=0.05)
plt.show()

In [None]:
segf_prof30 = segf.fit_transform(imgs_norm)

In [None]:
plt.plot(np.cumsum(segf.pca.explained_variance_ratio_), linewidth=3)
sn.despine()
plt.axhline(y=1, color='black', linestyle=':')
plt.xlabel('Number of principal components')
plt.ylabel('Cumulative explained variance')

In [None]:
# numpber of optical sections: 3
nstacks = 3
eigentiles = segf.pca.components_.reshape((segf.n_components, *segf.tile_size, nstacks))

In [None]:
plot_channels([np.max(eigentiles[i], axis=-1) for i in range(segf.n_components)],
              nrow=5, ncol=10, scale_x=2, scale_y=2)
plt.show()

In [None]:
# run at different scales and concatenate
segf = SegfreeProfiler(tile_size=(60,60),
                       n_block_types=20,
                       n_supblock_types=20,
                       n_components=50)
segf_prof60 = segf.fit_transform(imgs_norm)

In [None]:
# run at different scales and concatenate
segf = SegfreeProfiler(tile_size=(15,15),
                       n_block_types=20,
                       n_supblock_types=20,
                       n_components=50)
segf_prof15 = segf.fit_transform(imgs_norm)

In [None]:
segf_prof = pd.concat([segf_prof15, segf_prof30, segf_prof60], axis=1)

In [None]:
segf_prof.index = [t.replace('.tiff', '') for t in titles]

In [None]:
from sklearn.feature_selection import VarianceThreshold
sel = VarianceThreshold(threshold=1e-4).fit(segf_prof)
hmap = sn.clustermap(segf_prof.loc[:,sel.get_support()], metric='euclidean')

In [None]:
segf_prof.index[hmap.dendrogram_row.reordered_ind]

In [None]:
np.array(hmap.dendrogram_row.reordered_ind)

In [None]:
from bioimg import threshold_img

In [None]:
'''plot_channels([threshold_img(1-imgs_norm[21][:,:,i], method='otsu') for i in range(imgs_norm[0].shape[-1])],
              scale_x=6, scale_y=6,
              nrow=1, ncol=3, cmap='gray')
plt.show()'''

In [None]:
'''plot_channels([1-imgs_norm[0][:,:,i] for i in range(imgs_norm[0].shape[-1])],
              scale_x=6, scale_y=6,
              nrow=1, ncol=3, cmap='gray')
plt.show()'''

## Colorectal Cancer Organoids in 3 Patients

In [None]:
path = '../data/BortenMA_OrganoSeg_FileS3/Colon Raw Images/'

In [None]:
patients = [f for f in os.listdir(path) if not f.startswith('.')]

In [None]:
random.seed(2207)
imgs_crc = []
titles = []
for pat in patients:
    imgpath = os.path.join(path, pat)
    fnames = [f for f in os.listdir(imgpath) if 'tif' in f]
    imgs = [read_image(os.path.join(imgpath, f), verbose=False) for f in fnames]
    imgs_crc += imgs
    titles += fnames

In [None]:
# only 3D data with 3 optical sections
titles = [t for t, img in zip(titles, imgs_crc) if img.shape[-1] == 3]
imgs_crc = [img for img in imgs_crc if img.shape[-1] == 3]

In [None]:
len(imgs_crc)

In [None]:
img_bgsub = [np.stack([subtract_background(img[:,:,i], sigma=50) for i in range(img.shape[-1])], axis=-1) for img in imgs_crc]

In [None]:
imgs_norm = [normalize_channels(img) for img in img_bgsub ]

In [None]:
segf = SegfreeProfiler(tile_size=(30,30),
                       n_block_types=50,
                       n_supblock_types=30,
                       n_components=50)

In [None]:
tiles = segf.tile_color_images(imgs_norm)

In [None]:
plot_channels([tiles[0][i] for i in range(tiles[0].shape[0])], 
              ncol=28, nrow=21, wspace=0.05, hspace=0.05)
plt.show()

In [None]:
segf_prof30 = segf.fit_transform(imgs_norm)

In [None]:
plt.plot(np.cumsum(segf.pca.explained_variance_ratio_), linewidth=3)
sn.despine()
plt.axhline(y=1, color='black', linestyle=':')
plt.xlabel('Number of principal components')
plt.ylabel('Cumulative explained variance')

In [None]:
# numpber of optical sections: 3
nstacks = 3
eigentiles = segf.pca.components_.reshape((segf.n_components, *segf.tile_size, nstacks))

In [None]:
plot_channels([np.max(eigentiles[i], axis=-1) for i in range(segf.n_components)],
              nrow=5, ncol=10, scale_x=2, scale_y=2)
plt.show()

In [None]:
# run at different scales and concatenate
segf = SegfreeProfiler(tile_size=(60,60),
                       n_block_types=50,
                       n_supblock_types=30,
                       n_components=50)
segf_prof60 = segf.fit_transform(imgs_norm)

In [None]:
'''# run at different scales and concatenate
segf = SegfreeProfiler(tile_size=(15,15),
                       n_block_types=20,
                       n_supblock_types=20,
                       n_components=50)
segf_prof15 = segf.fit_transform(imgs_norm)'''

In [None]:
segf_prof = pd.concat([segf_prof30, segf_prof60], axis=1)

In [None]:
segf_prof.index = [t.replace('.tif', '') for t in titles]

In [None]:
sel = VarianceThreshold(threshold=1e-4).fit(segf_prof)
hmap = sn.clustermap(segf_prof.loc[:,sel.get_support()], metric='euclidean')

In [None]:
segf_prof.index[hmap.dendrogram_row.reordered_ind]

In [None]:
np.array(hmap.dendrogram_row.reordered_ind)

In [None]:
ind = 23
plot_channels([imgs_norm[ind][:,:,i] for i in range(imgs_norm[0].shape[-1])],
              titles=[titles[ind].replace('.tif', '')]*3,
              scale_x=8, scale_y=8,
              nrow=3, ncol=1, cmap='gray')
plt.show()