# Large-Scale Morphology Analysis

In [None]:
# load third-party Python modules
import javabridge
import bioformats as bf
import skimage
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd
import re
import os
import sys
import h5py
from sklearn.manifold import TSNE
sys.path.append('..')

javabridge.start_vm(class_path=bf.JARS)

In [None]:
def scale_columns(df):
    return (df-df.mean())/df.std()

In [None]:
# load plate annotation
annot_df = pd.read_csv('data/AML_trainset/drugannot.txt',
                      sep='\t')

In [None]:
#plates = [f for f in os.listdir('../imgdata') if 'Plate' in f]
plates = ['180528_Plate3', '180528_Plate5',
          '180625_Plate3', '181109_Plate1',
          '181109_Plate3']

In [None]:
platedir = os.path.join('../imgdata/', plates[0])

In [None]:
dmso = annot_df[annot_df.Drug == 'DMSO'].reset_index(drop=True)
dmso_wells = dmso['well'].unique()

In [None]:
imgdf = []
for w in dmso_wells:
    imgdf.append(pd.read_csv(os.path.join(platedir, w+'.csv')))

In [None]:
imgdf = pd.concat(imgdf).reset_index(drop=True)
labels = imgdf[['class', 'file']]
imgdf = imgdf.drop(['class', 'file'], axis=1)
labels['well'] = labels['file'].replace(regex=r'f[0-9].+', value='')
labels['class'] = labels['class'].apply(lambda x: 'Viable' if x == 2 else 'Apoptotic')
labels = pd.merge(labels, dmso, on='well')
imgdf_scaled = scale_columns(imgdf)

In [None]:
feat_subset = ['ch-Calcein-area',
               'ch-Calcein-eccentricity',
               'ch-Calcein-mean_intensity',
               'ch-Hoechst-area',
               'ch-Hoechst-eccentricity',
               'ch-Hoechst-mean_intensity',
               'ch-Lysosomal-area',
               'ch-Lysosomal-eccentricity',
               'ch-Lysosomal-mean_intensity']

In [None]:
Xfeat = imgdf_scaled.loc[:,feat_subset]

In [None]:
X_tsne = TSNE(n_components=2, random_state=21, perplexity=50).fit_transform(imgdf_scaled)

In [None]:
X_df = pd.concat([pd.DataFrame(X_tsne, columns=['tsne1', 'tsne2']), labels], axis=1)

In [None]:
X_df = pd.concat([X_df, Xfeat], axis=1)

In [None]:
fig, ax = plt.subplots(figsize = (9,8))
sn.set(font_scale=1.4)
sn.set_style('white')
sn.despine()
sn.scatterplot(x = 'tsne1', y = 'tsne2', data=X_df,
               hue = 'Culture', style = 'class',
               s = 40, alpha = 0.8)
plt.legend(loc='lower right', bbox_to_anchor=(1.2,0.05))
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
plt.xlabel('TSNE 1')
plt.ylabel('TSNE 2')
plt.title('DMSO control wells')

## Morphology analysis for viable cells
Subset only to viable cells:

In [None]:
img_viab = imgdf.iloc[np.where(labels['class'] == 'Viable')[0],:]
img_viab = scale_columns(img_viab).reset_index(drop=True)
labels_viab = (labels[labels['class'] == 'Viable'].
              reset_index(drop=True))

In [None]:
X_tsne = TSNE(n_components=2, random_state=21, perplexity=30).fit_transform(img_viab)

In [None]:
X_viab = pd.concat([pd.DataFrame(X_tsne, columns=['tsne1', 'tsne2']), labels_viab], axis=1)

In [None]:
X_viab = pd.concat([X_viab, img_viab.loc[:,feat_subset]], axis=1)

In [None]:
fig, ax = plt.subplots(figsize = (8,8))
sn.set(font_scale=1.3)
sn.set_style('white')
sn.despine()
sn.scatterplot(x = 'tsne1', y = 'tsne2', data=X_viab,
               hue = 'Culture', 
               #style = 'Culture',
               #palette = sn.cubehelix_palette(dark=.9, light=.1, as_cmap=True),
               s = 60, alpha = 0.5)
plt.legend(loc='lower right', bbox_to_anchor=(1.5,0.1))
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
plt.xlabel('TSNE 1')
plt.ylabel('TSNE 2')
plt.title('Viable cells')

In [None]:
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
cmap = sn.diverging_palette(240, 15, as_cmap=True)
fig, ax = plt.subplots(nrows=3, ncols=3, figsize = (16,16))
sn.despine()
for r in range(3):
    for c in range(3):
        f = feat_subset[3*r+c]
        ax[r,c].set_title(f.replace('ch-', ''))
        maxval = np.max([np.abs(np.percentile(X_viab[f].values, 0.1)),
                         np.abs(np.percentile(X_viab[f].values, 0.9))])
        sc = ax[r,c].scatter(x=X_viab['tsne1'].values, 
                   y=X_viab['tsne2'].values,
                   c=X_viab[f].values,
                       cmap=cmap, alpha = 0.5,
                             vmin=-maxval, vmax=maxval)
        cbaxes = inset_axes(ax[r,c], width="3%", height="45%", loc=1) 
        plt.colorbar(sc, ax=ax[r,c], cax = cbaxes)
        

fig.text(0.5, 0.04, 'TSNE 1', ha='center')
fig.text(0.04, 0.5, 'TSNE 2', va='center', rotation='vertical')

Save the viable cells of DMSO wells before loading drug-treated wells:

In [None]:
img_viab = imgdf.iloc[np.where(labels['class'] == 'Viable')[0],:]
X_ctrl = img_viab

Now load some drug wells:

In [None]:
drug_sel = ['Tofacitinib', 'Midostaurin',
                   'Ganetespib', 'Lenalidomide',
                   'Pyridone 6', 'UMI-77',
                   'Bafilomycin A1', 
                   'Quizartinib', 'Hydroxychloroquine',
                   'Fludarabine', 'Vorinostat',
                   'Thioguanine', 'Nutlin 3a',
                   'Palbociclib', 'Carfilzomib',
                   'JQ1', 'Cytarabine',
                   'BAY61-3606', 'Venetoclax',
                   'Ixazomib']

In [None]:
drugs = annot_df[np.isin(annot_df.Drug, drug_sel) ].reset_index(drop=True)
drug_wells = drugs['well'].unique()

In [None]:
imgdf = []
for w in drug_wells:
    df = pd.read_csv(os.path.join(platedir, w+'.csv'))
    imgdf.append(df[df['class'] == 2])

In [None]:
imgdf = pd.concat(imgdf).reset_index(drop=True)
labels = imgdf[['class', 'file']]
imgdf = imgdf.drop(['class', 'file'], axis=1)
labels['well'] = labels['file'].replace(regex=r'f[0-9].+', value='')
labels['class'] = labels['class'].apply(lambda x: 'Viable' if x == 2 else 'Apoptotic')
labels = pd.merge(labels, drugs, on='well')

In [None]:
X_drug = scale_columns(imgdf)
Xfeat = X_drug.loc[:,feat_subset]

In [None]:
X_tsne = TSNE(n_components=2, random_state=21, perplexity=50).fit_transform(X_drug)

In [None]:
X_df = pd.concat([pd.DataFrame(X_tsne, columns=['tsne1', 'tsne2']), labels], axis=1)
X_df = pd.concat([X_df, Xfeat], axis=1)

In [None]:
fig, ax = plt.subplots(figsize = (9,8))
sn.set(font_scale=1.4)
sn.set_style('white')
sn.despine()
sn.scatterplot(x = 'tsne1', y = 'tsne2', data=X_df,
               hue = 'Culture',
               s = 40, alpha = 0.8)
plt.legend(loc='lower right', bbox_to_anchor=(1.2,0.05))
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
plt.xlabel('TSNE 1')
plt.ylabel('TSNE 2')
plt.title('Drug-treated wells')

In [None]:
drug_chunks = [drug_sel[i:i + 5] for i in range(0, len(drug_sel), 5)]

In [None]:
fig, ax = plt.subplots(ncols=2, nrows=2,
                       figsize = (14,12))
sn.set(font_scale=1.2)
sn.set_style('white')
sn.despine()
for r in range(2):
    for c in range(2):
        sn.scatterplot(x = 'tsne1', y = 'tsne2', data=X_df[np.isin(X_df['Drug'], drug_chunks[r*2+c])],
               hue = 'Drug',
               s = 40, alpha = 0.8, ax=ax[r,c])
        ax[r,c].legend(loc='lower right', bbox_to_anchor=(1.4,0.7))
        ax[r,c].set_xlabel('TSNE 1')
        ax[r,c].set_ylabel('TSNE 2')
fig.subplots_adjust(wspace=0.5)

In [None]:
cmap = sn.diverging_palette(240, 15, as_cmap=True)
fig, ax = plt.subplots(nrows=3, ncols=3, figsize = (16,16))
sn.despine()
for r in range(3):
    for c in range(3):
        f = feat_subset[3*r+c]
        ax[r,c].set_title(f.replace('ch-', ''))
        maxval = np.max([np.abs(np.percentile(X_df[f].values, 0.1)),
                         np.abs(np.percentile(X_df[f].values, 0.9))])
        sc = ax[r,c].scatter(x=X_df['tsne1'].values, 
                   y=X_df['tsne2'].values,
                   c=X_df[f].values,
                       cmap=cmap, alpha = 0.5,
                             vmin=-maxval, vmax=maxval)
        cbaxes = inset_axes(ax[r,c], width="3%", height="45%",
                            loc='upper right') 
        plt.colorbar(sc, ax=ax[r,c], cax = cbaxes)
        
fig.subplots_adjust(wspace=0.4)
fig.text(0.5, 0.04, 'TSNE 1', ha='center')
fig.text(0.04, 0.5, 'TSNE 2', va='center', rotation='vertical')

In [None]:
X_df.columns

In [None]:
colsub = ['Culture'] +feat_subset
X_long = pd.melt(X_df[colsub], 
                 id_vars=['Culture'], 
                 value_vars=feat_subset,
                 var_name='feature', 
                 value_name='val')

In [None]:
plt.figure(figsize=(10,10))
g = sn.FacetGrid(X_long,
                 col="feature",
                 hue="Culture",
                col_wrap=3,
                sharex=False)
g.map(sn.kdeplot, "val", lw=3).add_legend()
axes = g.axes.flatten()
for i, ax in enumerate(axes):
    ax.set_title(feat_subset[i].replace('ch-', ''))
    ax.set_xlabel('')

In [None]:
colsub = ['Drug', 'Culture'] +feat_subset
X_long = pd.melt(X_df[colsub], 
                 id_vars=['Drug', 'Culture'], 
                 value_vars=feat_subset,
                 var_name='feature', 
                 value_name='val')

In [None]:
plt.figure(figsize=(10,10))
g = sn.catplot(x="Drug", y="val", 
               col="feature", hue='Culture',
               kind="box", data=X_long,
               sharey=False,
               col_wrap=3)
axes = g.axes.flatten()
plt.xticks(rotation=90)
for i, ax in enumerate(axes):
    ax.set_title(feat_subset[i].replace('ch-', ''))
    ax.set_xlabel('')
    if i % 3 == 0:
        ax.set_ylabel('Standardized value')
    if i > 5:
        ax.set_xticklabels(ax.get_xticklabels(), rotation=90)

In [None]:
X_ctrl = X_ctrl.reset_index(drop=True)                     

In [None]:
labels_viab = labels_viab.reset_index(drop=True)

In [None]:
Xfeat = pd.concat([labels_viab['Culture'], X_ctrl], axis=1)

Check correlation structure of the features:

In [None]:
from scipy.spatial.distance import pdist, squareform
def cordf(X):
    return squareform(1. - pdist(X, 'correlation'))

In [None]:
# feature correlation
featcor = pd.DataFrame(cordf(X_ctrl.T),
                       index=X_ctrl.columns,
                       columns=X_ctrl.columns )

In [None]:
import matplotlib.colors as mcolors
def make_colormap(seq):
    """Return a LinearSegmentedColormap
    seq: a sequence of floats and RGB-tuples. The floats should be increasing
    and in the interval (0,1).
    """
    seq = [(None,) * 3, 0.0] + list(seq) + [1.0, (None,) * 3]
    cdict = {'red': [], 'green': [], 'blue': []}
    for i, item in enumerate(seq):
        if isinstance(item, float):
            r1, g1, b1 = seq[i - 1]
            r2, g2, b2 = seq[i + 1]
            cdict['red'].append([item, r1, r2])
            cdict['green'].append([item, g1, g2])
            cdict['blue'].append([item, b1, b2])
    return mcolors.LinearSegmentedColormap('CustomMap', cdict)

def diverge_map(high=(0.565, 0.392, 0.173), low=(0.094, 0.310, 0.635)):
    '''
    low and high are colors that will be used for the two
    ends of the spectrum. they can be either color strings
    or rgb color tuples
    '''
    c = mcolors.ColorConverter().to_rgb
    if isinstance(low, str): low = c(low)
    if isinstance(high, str): high = c(high)
    return make_colormap([low, c('white'), 0.5, c('white'), high])


In [None]:
#cmap = sn.diverging_palette(220, 40, as_cmap=True)
#cmap = plt.get_cmap("coolwarm")
cmap = diverge_map(low='teal', high='goldenrod')

In [None]:
# plot the heatmap and annotation on it
ax = sn.clustermap(featcor,
           xticklabels=False,
           yticklabels=False,
                  cmap=cmap,
                 cbar_pos=(1, .45, .03, .3),
                  vmin=-1,
                  vmax=1,
                  figsize=(6.5,6))
ax.ax_row_dendrogram.set_visible(False)
ax.ax_col_dendrogram.set_visible(False)
#ax.savefig('featcor-AML.pdf',  bbox_inches='tight')

Run Mann-Whitney-Wilcoxon test:

In [None]:
from scipy.stats import mannwhitneyu

In [None]:
plt.hist((X_ctrl.apply(lambda x: mannwhitneyu(x[labels_viab.Culture == 'Mono-culture'],
                                     x[labels_viab.Culture == 'Co-culture'])[1])).values)

In [None]:
pvaldf = pd.DataFrame(X_ctrl.apply(lambda x: mannwhitneyu(x[labels_viab.Culture == 'Mono-culture'],
                                     x[labels_viab.Culture == 'Co-culture'])[1]), columns=['pval'])

In [None]:
pvaldf.head()