## Segmentation-free profiling of compound screen in breast cancer organoids

In [None]:
import javabridge
import bioformats as bf
import skimage
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import re
import random
import seaborn as sn
import pandas as pd

javabridge.start_vm(class_path=bf.JARS)

In [None]:
datadir = '/Volumes/gitlab/microscopy-notebooks/data/organoids-sylwia/CL34/'

paths = [os.path.join(datadir, 'CL34A_4x_2p1mm_100umsteps_day0_001/data'),
         os.path.join(datadir, 'CL34B_4x_2p1mm_100umsteps_day0_001/data'),
         os.path.join(datadir, 'CL34A_4x_2p1mm_100umsteps_day3_001/data'),
         os.path.join(datadir, 'CL34B_4x_2p1mm_100umsteps_day3_001/data')]

In [None]:
from bioimg import load_image_series, plot_channels

def get_all_wells(path):
    fnames = [f for f in os.listdir(path) if 'tif' in f]
    fnames.sort()
    all_wells = list(set([re.search('--(W[0-9]+)--(.+)', f).group(1) for f in fnames]))
    return fnames, all_wells

In [None]:
random.seed(1108)

imglist = []
titles = []
for path in paths:
    fnames, all_wells = get_all_wells(path=path)
    # randomly sample 10 wells
    sel_wells = random.sample(all_wells, k=4)
    for w in sel_wells:
        well_files = [f for f in fnames if w in f]
        imgstack = load_image_series(path=path, imgfiles=[w for w in well_files if 'P00001' in w])
        imgstack = imgstack.swapaxes(0,-1)
        imglist.append(imgstack)
        titles.append(re.search('(.+)(--W[0-9]+)', well_files[0]).group(1))

In [None]:
imglist[0].shape

In [None]:
plot_channels([imglist[0][:,:,i] for i in range(imglist[0].shape[-1])], 
              nrow=3, ncol=7, cmap='gray')
plt.show()

In [None]:
plt.figure(figsize=(10,10))
plt.imshow(imglist[0][500:,500:,11], cmap='gray')
plt.axis('off')

In [None]:
from bioimg import SegfreeProfiler
segf = SegfreeProfiler(tile_size=(20,20),
                       n_block_types=50,
                       n_supblock_types=50,
                       n_components=50)

In [None]:
tiles = segf.tile_color_images(imglist)

In [None]:
tiles[0].shape

In [None]:
'''ncol, nrow = tuple(int(x / y) for x,y in zip(imglist[0].shape, segf.tile_size))
plot_channels([np.min(tiles[0][i],axis=-1) for i in range(tiles[0].shape[0])], 
              ncol=ncol, nrow=nrow, wspace=0.05, hspace=0.05)
plt.show()'''

In [None]:
segf_prof = segf.fit_transform(imglist)

In [None]:
plt.plot(np.cumsum(segf.pca.explained_variance_ratio_), linewidth=3)
sn.despine()
plt.axhline(y=1, color='black', linestyle=':')
plt.xlabel('Number of principal components')
plt.ylabel('Cumulative explained variance')

In [None]:
# numpber of optical sections
nstacks = imglist[0].shape[-1]
eigentiles = segf.pca.components_.reshape((segf.n_components, *segf.tile_size, nstacks))

In [None]:
plot_channels([np.max(eigentiles[i], axis=-1) for i in range(segf.n_components)],
              nrow=5, ncol=10, scale_x=2, scale_y=2)
plt.show()

In [None]:
from sklearn.feature_selection import VarianceThreshold
segf_prof.index = titles
sel = VarianceThreshold(threshold=1e-4).fit(segf_prof)
hmap = sn.clustermap(segf_prof.loc[:,sel.get_support()], metric='euclidean')