# Aggregated Well Profiles

In [None]:
# load third-party Python modules
import javabridge
import bioformats as bf
import skimage
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd
import re
import os
import sys
import h5py
import json
from sklearn.manifold import TSNE

javabridge.start_vm(class_path=bf.JARS)

In [None]:
patannot = pd.read_csv('../data/coculture_metafiles/patannot.txt',
                      sep='\t')

In [None]:
# load plate annotation
annot_df = pd.read_csv('../data/AML_trainset/drugannot.txt',
                      sep='\t')

In [None]:
hcl_annot = pd.read_csv('../data/coculture_metafiles/HCL_plate_layout.txt',
                       sep='\t')

In [None]:
hcl_annot.head()

In [None]:
# load viable cell count data
viabcount = pd.read_table('../data/viable-count.tsv')

In [None]:
viabcount.head()

In [None]:
with open('featselect.json') as file:
    featdict = json.load(file)
{k : len(v) for k,v in featdict.items()}

In [None]:
feat_sel = ['well'] + featdict['residcor']

In [None]:
prof_dir = '../data/coculture_profiles/'

In [None]:
plates = os.listdir(prof_dir)

In [None]:
data_all = []
for plate in plates:
    imgdf = pd.read_csv(os.path.join(prof_dir, plate))
    X_subset = imgdf[feat_sel].copy()
    count_df = viabcount[viabcount['plate']==plate.replace('.csv', '')]
    count_df = count_df.drop(['plate'], axis=1)
    X_subset = pd.merge(X_subset,
                        count_df,
                        on='well')
    
    if patannot[patannot['plate']== plate.replace('.csv', '')]['Diagnosis'].values[0] == 'HCL':
        plate_df = pd.merge(X_subset, hcl_annot, on='well')
    else:
        plate_df = pd.merge(X_subset, annot_df,on='well')
    mono = plate_df[plate_df['Culture']=='Mono-culture'].drop(['well', 'Culture', 'conc'], axis=1)
    co = plate_df[plate_df['Culture']=='Co-culture'].drop(['well', 'Culture', 'conc'], axis=1)

    mono = mono.groupby(['Drug'], as_index=False).agg('mean')
    co = co.groupby(['Drug'], as_index=False).agg('mean')

    plate_df = pd.merge(mono,co, on='Drug', how='inner')
    plate_df.index = plate_df['Drug']
    plate_df = plate_df.drop(['Drug'], axis=1)
    plate_df['plate'] = plate.replace('.csv', '')
    data_all.append(plate_df)

In [None]:
df_all = pd.concat(data_all)

In [None]:
df_wide = pd.pivot_table(df_all, index=df_all.index, columns='plate')

In [None]:
df_wide = df_wide[df_wide.index != "Vemurafenib"]

In [None]:
from bioimg.singlecell import plot_heatmap
plot_heatmap(df_wide, xticklabels=False, size=(10,14),
            yticklabels=True, vmin=-5, vmax=5)

Now include drug concentrations:

In [None]:
data_all = []
for plate in plates:
    imgdf = pd.read_csv(os.path.join(prof_dir, plate))
    X_subset = imgdf[feat_sel].copy()
    
    count_df = viabcount[viabcount['plate']==plate.replace('.csv', '')]
    count_df = count_df.drop(['plate'], axis=1)
    X_subset = pd.merge(X_subset,
                        count_df,
                        on='well')
    
    plate_df = pd.merge(X_subset, annot_df,on='well')
    plate_df['drugconc'] = plate_df['Drug']+"_"+plate_df['conc'].astype(str)
    mono = plate_df[plate_df['Culture']=='Mono-culture'].drop(['well', 'Culture', 'Drug', 'conc'], axis=1)
    co = plate_df[plate_df['Culture']=='Co-culture'].drop(['well', 'Culture', 'Drug', 'conc'], axis=1)

    mono = mono.groupby(['drugconc'], as_index=False).agg('mean')
    co = co.groupby(['drugconc'], as_index=False).agg('mean')

    plate_df = pd.merge(mono,co, on='drugconc', how='inner')
    plate_df.index = plate_df['drugconc']
    plate_df = plate_df.drop(['drugconc'], axis=1)
    plate_df['plate'] = plate.replace('.csv', '')
    data_all.append(plate_df)

In [None]:
df_all = pd.concat(data_all)

In [None]:
#df_wide[df_wide.isna()] = 0

In [None]:
'''plot_heatmap(df_wide, xticklabels=False, size=(10,24),
            yticklabels=True, vmin=-5, vmax=5)'''

In [None]:
data_all = []
for plate in plates:
    imgdf = pd.read_csv(os.path.join(prof_dir, plate))
    X_subset = imgdf[feat_sel].copy()
    
    count_df = viabcount[viabcount['plate']==plate.replace('.csv', '')]
    count_df = count_df.drop(['plate'], axis=1)
    X_subset = pd.merge(X_subset,
                        count_df,
                        on='well')
    
    if patannot[patannot['plate']== plate.replace('.csv', '')]['Diagnosis'].values[0] == 'HCL':
        plate_df = pd.merge(X_subset, hcl_annot, on='well')
    else:
        plate_df = pd.merge(X_subset, annot_df,on='well')
    plate_df['drugconc'] = plate_df['Drug']+"_"+plate_df['conc'].astype(str)
    mono = plate_df[plate_df['Culture']=='Mono-culture'].drop(['well', 'Culture', 'Drug', 'conc'], axis=1)
    co = plate_df[plate_df['Culture']=='Co-culture'].drop(['well', 'Culture', 'Drug', 'conc'], axis=1)

    mono = mono.groupby(['drugconc'], as_index=False).agg('mean')
    mono.columns = mono.columns + "_M"
    co = co.groupby(['drugconc'], as_index=False).agg('mean')
    co.columns = co.columns + "_C"
    
    mono = mono.rename({"drugconc_M": "drugconc"}, axis=1)
    co = co.rename({"drugconc_C": "drugconc"}, axis=1)

    plate_df = pd.merge(mono,co, on='drugconc', how='inner')
    plate_df.index = plate_df['drugconc']
    plate_df = plate_df.drop(['drugconc'], axis=1)
    plate_df['plate'] = plate.replace('.csv', '')
    data_all.append(plate_df)

In [None]:
df_all = pd.concat(data_all)

In [None]:
df_wide = pd.pivot_table(df_all, index=df_all.index, columns='plate')
df_wide.columns = df_wide.columns.map('|'.join)

In [None]:
df_wide.to_csv('../data/all_profiles_coculture.csv')