In [None]:
import pandas as pd
import numpy as np
import os
from scipy.stats import ttest_ind as ttest
from statsmodels.stats.multitest import fdrcorrection as fdr
from matplotlib import pyplot as plt
from umap import UMAP
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import robust_scale
import hdbscan
import sys
import seaborn as sns
sys.path.insert(0, '../../')
sys.path.insert(0, '../../cycif/')
from get_data import file2frame
from cycif import *
from common_apis import *

In [None]:
os.chdir('d:/data')
metadata = pd.read_csv('MCF10A commons metadata.csv',index_col=0)
metadata.loc[metadata.Conc==0,'Conc'] = 1
for time in reversed(['24h','48h','72h']):
    fn = 'MCF10A commons pooled data '+ time + '.csv'
    if os.path.exists('Umap data ' + fn):
        df_data = pd.read_csv(fn,index_col=0)
        df_data = robust_scale(df_data,quantile_range=(1,99))
        n_neighbors = int(df_data.shape[0]/100)
        umap = UMAP(n_components=2,n_neighbors=50, min_dist = 0.1)
        df_data_umap = pd.read_csv('Umap data ' + fn,index_col=0).values
    else:
        df_data_umap = umap.fit_transform(df_data)
        pd.DataFrame(df_data_umap).to_csv('Umap data ' + fn)
    min_size = 5/int(df_data_umap.shape[0]/50000)
    max_size = 20/int(df_data_umap.shape[0]/50000)
    plt.figure(figsize=(32,18))
    labels = metadata[metadata.time==time].DrugName.values
    sizes = metadata[metadata.time==time].groupby('DrugName').Conc.transform(lambda x: (x/x.max()).values)
    fig = sns.scatterplot(df_data_umap[:,0],df_data_umap[:,1],hue = labels, size=sizes,sizes=(min_size,max_size),edgecolor = 'none', legend='brief')
    handles, labels = fig.get_legend_handles_labels()
    fig.legend(handles[:9], labels[:9], markerscale=2, fontsize = 20)
    plt.savefig(time+'.png')
    plt.close()
    
    # contour maps
    labels = metadata[metadata.time==time].DrugName.reset_index()                                     
    # Set up the figure
    _,axs = plt.subplots(3,3,figsize=(30, 30))
    axs = axs.ravel()
    dmso = labels[labels.iloc[:,1]=='DMSO'].index
    dmso = df_data_umap[dmso,:]
    # Draw the two density plots
    dmso_plot = sns.kdeplot(dmso[:,0], dmso[:,1],cmap="Blues", shade=True, shade_lowest=False, ax = axs[0])
    dmso_plot.set_title('DMSO')                                  
    ax_i = 1
    for drug in sorted(labels.iloc[:,1].unique()):
        if drug == 'DMSO':
            continue
        idx = labels[labels.iloc[:,1]==drug].index
        df_drug = df_data_umap[idx,:]
        subplot = sns.kdeplot(df_drug[:,0], df_drug[:,1],cmap="Reds", shade=True, shade_lowest=False,ax = axs[ax_i])
        subplot.set_title(drug)
        ax_i+=1
    plt.savefig(time+' Distribution contour plot.png')
    plt.close()