# Visualizing CellPainting Compound Phenotypes

In [None]:
# load third-party Python modules
import javabridge
import bioformats as bf
import skimage
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import pandas as pd

javabridge.start_vm(class_path=bf.JARS)

In [None]:
path = '/Volumes/gitlab/microscopy-notebooks/data/cytodata/datasets/CDRPBIO-BBBC036-Bray/images/CDRPBIO-BBBC036-Bray'

In [None]:
chemannot = pd.read_csv("../data/Bray-metadata/plate_annot.txt", sep='\t')

Filter out the bad wells and plot all drugs in the heatmap side-by-side with replicates:

In [None]:
noisywells = pd.read_csv('../data/Bray-noisywells.csv', index_col=0)
noisywells = noisywells[noisywells['count'] > 3].reset_index(drop=True)

chemannot = pd.merge(left=chemannot,
         right=noisywells[['plateID', 'well_position']],
        how='outer', indicator=True)
chemannot = (chemannot[chemannot['_merge'] == 'left_only'].
             drop(columns='_merge').reset_index(drop=True))

In [None]:
# compounds with strong phenotypes based
# on mean well profiles
drugs = ['vinblastine sulfate',
         'dorsomorphin',
         'amthamine',
         'carboxin',
         'resveratrol',
         'KN-93']

In [None]:
def load_drug_images(path, chemannot, drug, which=0):
    '''Retrun list of CellPainting image file names for the selected drug
    '''
    cmpd_annot = chemannot[chemannot['CPD_NAME'] == drug].reset_index(drop=True)
    plate = cmpd_annot['plateID'][which]
    well = cmpd_annot['well_position'][which]
    
    imgpath = os.path.join(path, str(plate))
    imgfiles = os.listdir(imgpath)
    cmpd_imgs = [f for f in imgfiles if "_" + well + "_" in f]
    
    ctrl_annot = chemannot.loc[np.logical_and(chemannot['broad_sample'] == 'DMSO',
                               chemannot['plateID'] == plate),:].reset_index(drop=True)
    well = ctrl_annot['well_position'][which]
    ctrl_imgs = [f for f in imgfiles if "_" + well + "_" in f]
    
    return imgpath, cmpd_imgs, ctrl_imgs

## Getting Started
First load a couple of control and drug-treated (vinblastine sulfate) images:

In [None]:
imgpath, cmpd_imgs, ctrl_imgs = load_drug_images(path=path, chemannot=chemannot,
                                                drug='vinblastine sulfate',
                                                which=0)

In [None]:
from bioimg import read_image, load_image_series
from bioimg import plot_channels, combine_channels
imgs = [read_image(fname=os.path.join(imgpath, f)) for f in cmpd_imgs if 's1' in f]

In [None]:
plot_channels(imgs, nrow=2, ncol=3,
              cmap='gray',
              scale_x=4, scale_y=3.5,
              titles=['Nuclei',
                      'ER', 'RNA',
                      'F-actin', 'Mito'])

In [None]:
# save the plotting settings and re-use for
# all images in CellPainting assay
'''col_params={'colors': ['blue', 
                       'yellow',
                       'green', 
                       'orange', 
                       'red'],
            'blend': [6, 1, 2, 2, 3],
            'gamma': [0.5, 0.8, 0.6, 0.5, 0.7]}'''
# new parameters
col_params={'colors': ['blue', 
                       'yellow',
                       'green', 
                       'white', 
                       'red'],
            'blend': [2, 0.5, 1, 0.5, 1],
            'gamma': [0.5, 0.8,0.6,0.5,0.7]}

In [None]:
plt.figure(figsize=(10,8))
plt.imshow(combine_channels(imgs, **col_params))
plt.axis('off')

Load control wells:

In [None]:
dmso_imgs = [read_image(fname=os.path.join(imgpath, f)) for f in ctrl_imgs if 's1' in f]

In [None]:
plot_channels(dmso_imgs, nrow=2, ncol=3, cmap='gray',
             scale_x=4, scale_y=3.5,
              titles=['Nuclei',
                      'ER', 'RNA',
                      'F-actin', 'Mito'])

In [None]:
plt.figure(figsize=(10,8))
plt.imshow(combine_channels(dmso_imgs, **col_params))
plt.axis('off')

## Plot drug-treated and control images side by side for a number of drugs:

We can loop over the drugs with the strongest effects:

In [None]:
imglist = []
titles = []
for d in drugs:
    for i in range(3):
        imgpath, cmpd_imgs, ctrl_imgs = load_drug_images(path=path, chemannot=chemannot,
                                                    drug=d,
                                                    which=i)
        fview = 's3'
        imgs = [read_image(fname=os.path.join(imgpath, f), verbose=False) for f in cmpd_imgs if fview in f]
        dmso_imgs = [read_image(fname=os.path.join(imgpath, f), verbose=False) for f in ctrl_imgs if fview in f]

        drug_img = combine_channels(imgs, **col_params)
        imglist.append(drug_img)
        titles.append(d)
        ctrl_img = combine_channels(dmso_imgs, **col_params)
        titles.append('DMSO')
        imglist.append(ctrl_img)

In [None]:
nrow = len(imglist) / 6

In [None]:
plot_channels(imglist, titles=titles, nrow=nrow, ncol=6, scale_y=4,
             hspace=0.1, wspace=0.05, top=0.65)

Visualzie more drugs from the heatmap:

In [None]:
drugs = ['5-iodotubercidin',
         'harmol', 'scoulerine',
         'LY-294002',
         'VEGF receptor 2 kinase inhibitor IV',
         'A-23187']

In [None]:
imglist = []
titles = []
for d in drugs:
    for i in range(3):
        imgpath, cmpd_imgs, ctrl_imgs = load_drug_images(path=path, chemannot=chemannot,
                                                    drug=d,
                                                    which=i)
        fview = 's3'
        imgs = [read_image(fname=os.path.join(imgpath, f), verbose=False) for f in cmpd_imgs if fview in f]
        dmso_imgs = [read_image(fname=os.path.join(imgpath, f), verbose=False) for f in ctrl_imgs if fview in f]

        drug_img = combine_channels(imgs, **col_params)
        imglist.append(drug_img)
        titles.append(d)
        ctrl_img = combine_channels(dmso_imgs, **col_params)
        titles.append('DMSO')
        imglist.append(ctrl_img)

In [None]:
nrow = len(imglist) / 6
plot_channels(imglist, titles=titles, nrow=nrow, ncol=6, scale_y=4,
             hspace=0.1, wspace=0.05, top=0.65)

Check more drugs:

In [None]:
drugs = ['capsazepine', 'crustecdysone',
         'butein', 'SB 218078',
         'hinokitiol', 'SRC kinase inhibitor II']

In [None]:
imglist = []
titles = []
for d in drugs:
    for i in range(3):
        imgpath, cmpd_imgs, ctrl_imgs = load_drug_images(path=path, chemannot=chemannot,
                                                    drug=d,
                                                    which=i)
        fview = 's3'
        imgs = [read_image(fname=os.path.join(imgpath, f), verbose=False) for f in cmpd_imgs if fview in f]
        dmso_imgs = [read_image(fname=os.path.join(imgpath, f), verbose=False) for f in ctrl_imgs if fview in f]

        drug_img = combine_channels(imgs, **col_params)
        imglist.append(drug_img)
        titles.append(d)
        ctrl_img = combine_channels(dmso_imgs, **col_params)
        titles.append('DMSO')
        imglist.append(ctrl_img)

In [None]:
nrow = len(imglist) / 6
plot_channels(imglist, titles=titles, nrow=nrow, ncol=6, scale_y=4,
             hspace=0.1, wspace=0.05, top=0.65)