# Large-Scale Morphology Analysis

In [None]:
# load third-party Python modules
import javabridge
import bioformats as bf
import skimage
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd
import re
import os
import sys
import h5py
from sklearn.manifold import TSNE

javabridge.start_vm(class_path=bf.JARS)

In [None]:
# load plate annotation
annot_df = pd.read_csv('../data/AML_trainset/drugannot.txt',
                      sep='\t')

In [None]:
#plates = [f for f in os.listdir('../imgdata') if 'Plate' in f]
plates = ['180528_Plate3', '180528_Plate5',
          '180625_Plate3', '181109_Plate1',
          '181109_Plate3']

In [None]:
platedir = os.path.join('../imgdata/', plates[0])

In [None]:
dmso = annot_df[annot_df.Drug == 'DMSO'].reset_index(drop=True)
dmso_wells = dmso['well'].unique()

In [None]:
imgdf = []
for w in dmso_wells:
    imgdf.append(pd.read_csv(os.path.join(platedir, w+'.csv')))

In [None]:
imgdf = pd.concat(imgdf).reset_index(drop=True)
labels = imgdf[['class', 'file']]
imgdf = imgdf.drop(['class', 'file'], axis=1)
labels['well'] = labels['file'].replace(regex=r'f[0-9].+', value='')
labels['class'] = labels['class'].apply(lambda x: 'Viable' if x == 2 else 'Apoptotic')
labels = pd.merge(labels, dmso, on='well')

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X=imgdf)

In [None]:
from bioimg.singlecell import scale_data, check_data
imgdf_scaled = scale_data(imgdf, scaler=scaler)

In [None]:
check_data(imgdf_scaled)

In [None]:
feat_subset = ['ch-Calcein-area',
               'ch-Calcein-eccentricity',
               'ch-Calcein-mean_intensity',
               'ch-Hoechst-area',
               'ch-Hoechst-eccentricity',
               'ch-Hoechst-mean_intensity',
               'ch-Lysosomal-area',
               'ch-Lysosomal-eccentricity',
               'ch-Lysosomal-mean_intensity']

In [None]:
Xfeat = imgdf_scaled.loc[:,feat_subset]

In [None]:
X_tsne = TSNE(n_components=2, random_state=21, perplexity=50).fit_transform(imgdf_scaled)

In [None]:
X_df = pd.concat([pd.DataFrame(X_tsne, columns=['tsne1', 'tsne2']), labels], axis=1)

In [None]:
X_df = pd.concat([X_df, Xfeat], axis=1)

In [None]:
from bioimg.singlecell import plot_dimred
plot_dimred(X_df, 
            hue='Culture',
            style='class',
            title='DMSO control wells')

## Morphology analysis for viable cells
Subset only to viable cells:

In [None]:
img_viab = imgdf.iloc[np.where(labels['class'] == 'Viable')[0],:]
scaler = StandardScaler().fit(img_viab)
img_viab = scale_data(img_viab, scaler=scaler).reset_index(drop=True)
labels_viab = (labels[labels['class'] == 'Viable'].
              reset_index(drop=True))

In [None]:
X_tsne = TSNE(n_components=2, random_state=21, perplexity=30).fit_transform(img_viab)

In [None]:
X_viab = pd.concat([pd.DataFrame(X_tsne, columns=['tsne1', 'tsne2']), labels_viab], axis=1)

In [None]:
X_viab = pd.concat([X_viab, img_viab.loc[:,feat_subset]], axis=1)

In [None]:
plot_dimred(X_viab, hue='Culture',
           title='Viable cells in control wells')
plt.legend(loc='lower right',
           bbox_to_anchor=(1.2,0.05))

In [None]:
from bioimg.singlecell import facet_dimred
facet_dimred(X_viab, feat_subset=feat_subset,
            nrows=3, ncols=3)

Save the viable cells of DMSO wells before loading drug-treated wells:

In [None]:
img_viab = imgdf.iloc[np.where(labels['class'] == 'Viable')[0],:]
X_ctrl = img_viab

Now load some drug wells:

In [None]:
drug_sel = ['Tofacitinib', 'Midostaurin',
                   'Ganetespib', 'Lenalidomide',
                   'Pyridone 6', 'UMI-77',
                   'Bafilomycin A1', 
                   'Quizartinib', 'Hydroxychloroquine',
                   'Fludarabine', 'Vorinostat',
                   'Thioguanine', 'Nutlin 3a',
                   'Palbociclib', 'Carfilzomib',
                   'JQ1', 'Cytarabine',
                   'BAY61-3606', 'Venetoclax',
                   'Ixazomib']

In [None]:
drugs = annot_df[np.isin(annot_df.Drug, drug_sel) ].reset_index(drop=True)
drug_wells = drugs['well'].unique()

In [None]:
imgdf = []
for w in drug_wells:
    df = pd.read_csv(os.path.join(platedir, w+'.csv'))
    imgdf.append(df[df['class'] == 2])

In [None]:
imgdf = pd.concat(imgdf).reset_index(drop=True)
labels = imgdf[['class', 'file']]
imgdf = imgdf.drop(['class', 'file'], axis=1)
labels['well'] = labels['file'].replace(regex=r'f[0-9].+', value='')
labels['class'] = labels['class'].apply(lambda x: 'Viable' if x == 2 else 'Apoptotic')
labels = pd.merge(labels, drugs, on='well')

In [None]:
X_drug = scale_data(imgdf, scaler=scaler)
Xfeat = X_drug.loc[:,feat_subset]

In [None]:
X_tsne = TSNE(n_components=2, random_state=21, perplexity=50).fit_transform(X_drug)

In [None]:
X_df = pd.concat([pd.DataFrame(X_tsne, columns=['tsne1', 'tsne2']), labels], axis=1)
X_df = pd.concat([X_df, Xfeat], axis=1)

In [None]:
plot_dimred(X_df, hue='Culture',
           title='Viable cells in drug-treated wells')

In [None]:
drug_chunks = [drug_sel[i:i + 5] for i in range(0, len(drug_sel), 5)]

In [None]:
fig, ax = plt.subplots(ncols=2, nrows=2,
                       figsize = (14,12))
sn.set(font_scale=1.2)
sn.set_style('white')
sn.despine()
for r in range(2):
    for c in range(2):
        sn.scatterplot(x = 'tsne1', y = 'tsne2', data=X_df[np.isin(X_df['Drug'], drug_chunks[r*2+c])],
               hue = 'Drug',
               s = 40, alpha = 0.8, ax=ax[r,c])
        ax[r,c].legend(loc='lower right', bbox_to_anchor=(1.4,0.7))
        ax[r,c].set_xlabel('TSNE 1')
        ax[r,c].set_ylabel('TSNE 2')
fig.subplots_adjust(wspace=0.5)

In [None]:
facet_dimred(X_df, feat_subset=feat_subset,
            nrows=3, ncols=3)

In [None]:
X_df.columns

In [None]:
colsub = ['Culture'] +feat_subset
X_long = pd.melt(X_df[colsub], 
                 id_vars=['Culture'], 
                 value_vars=feat_subset,
                 var_name='feature', 
                 value_name='val')

In [None]:
from bioimg.singlecell import facet_density
facet_density(X_long, feat_column='feature',
             ncols=3, hue='Culture')

In [None]:
colsub = ['Drug', 'Culture'] +feat_subset
X_long = pd.melt(X_df[colsub], 
                 id_vars=['Drug', 'Culture'], 
                 value_vars=feat_subset,
                 var_name='feature', 
                 value_name='val')

In [None]:
from bioimg.singlecell import facet_boxplot
facet_boxplot(X_long, x='Drug',
              y='val', feat_column='feature',
              ncols=3,
              nrows=3, hue='Culture')

In [None]:
X_ctrl = X_ctrl.reset_index(drop=True)                     

In [None]:
labels_viab = labels_viab.reset_index(drop=True)

In [None]:
Xfeat = pd.concat([labels_viab['Culture'], X_ctrl], axis=1)

Check correlation structure of the features:

In [None]:
from scipy.spatial.distance import pdist, squareform
def cordf(X):
    return squareform(1. - pdist(X, 'correlation'))

In [None]:
# feature correlation
featcor = pd.DataFrame(cordf(X_ctrl.T),
                       index=X_ctrl.columns,
                       columns=X_ctrl.columns )

In [None]:
from bioimg.singlecell import plot_heatmap
plot_heatmap(featcor)

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE

In [None]:
y=labels_viab['Culture'].values
y[y=='Mono-culture'] = 0
y[y=='Co-culture'] = 1
y = y.astype(int)

In [None]:
clf = RandomForestClassifier(n_estimators=500,
                             max_depth=7,
                             random_state=3,
                             n_jobs=-1)
rfe = RFE(estimator=clf, n_features_to_select=500, step=50)
rfe.fit(X_ctrl, y)

In [None]:
sel_features = X_ctrl.columns[rfe.get_support()].values

Run Mann-Whitney-Wilcoxon test:

In [None]:
from scipy.stats import mannwhitneyu

In [None]:
plt.hist((X_ctrl.apply(lambda x: mannwhitneyu(x[labels_viab.Culture == 'Mono-culture'],
                                     x[labels_viab.Culture == 'Co-culture'])[1])).values)

In [None]:
pvaldf = pd.DataFrame(X_ctrl.apply(lambda x: mannwhitneyu(x[labels_viab.Culture == 'Mono-culture'],
                                     x[labels_viab.Culture == 'Co-culture'])[1]), columns=['pval'])

In [None]:
pvaldf.head()