In [None]:
# Import packages
import sys
import numpy as np
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

src_path = str(Path.cwd().parent)
if src_path not in sys.path:
    sys.path.append(src_path)
    
from src.d03_exclusion_analysis import plot_from_dataframe

%matplotlib notebook
%matplotlib inline

In [None]:
dataframe_path = input('Please enter the full path for the dataframe:\n')
#dataframe_path = '/Users/kwu2/Library/CloudStorage/GoogleDrive-kwu2@stanford.edu/My Drive/Lab/CryoEM_live_imaging/Experiments/CE020_SINbootcamp/Bootcamp day 2/zeiss_proc/img_processing/masked_imgs/exclusion_analysis/region_areas_df.csv'
#dataframe_path = '/Users/kwu2/Library/CloudStorage/GoogleDrive-kwu2@stanford.edu/My Drive/Lab/CryoEM_live_imaging/Experiments/CE020_SINbootcamp/Bootcamp day 3/zeiss_proc/img_processing/masked_imgs/exclusion_analysis/region_areas_df.csv'
#dataframe_path = Path(dataframe_path)

In [None]:
df = pd.read_csv(dataframe_path)

In [None]:
graphs_dir = input('Please enter the full path for the dataframe:\n')
#graphs_dir = '/Users/kwu2/Library/CloudStorage/GoogleDrive-kwu2@stanford.edu/My Drive/Lab/CryoEM_live_imaging/Experiments/CE020_SINbootcamp/Bootcamp day 2/zeiss_proc/img_processing/masked_imgs/graphs'
#graphs_dir = '/Users/kwu2/Library/CloudStorage/GoogleDrive-kwu2@stanford.edu/My Drive/Lab/CryoEM_live_imaging/Experiments/CE020_SINbootcamp/Bootcamp day 3/zeiss_proc/img_processing/graphs'
#graphs_dir = Path(graphs_dir)

In [None]:
df

In [None]:
first_tx_frame = 8
df.insert(1, 'tx status', (np.where(df['frame'] < first_tx_frame, 'pre', 'post')))

# Compute additional metrics
df['% exclusion'] = df['CAAX-excluded area']/df['cell area'] * 100
df['change in % exclusion'] = df.groupby('UID', sort=False)['% exclusion'].diff(periods=1)
df['change in cell area'] = df.groupby('UID', sort=False)['cell area'].diff(periods=1)
df['net change in exclusion'] = df['new exclusion area'] - df['new reverted area']
df['CAAX+ cell area gained or lost'] = df.groupby('UID', sort=False)['CAAX-positive area'].diff(periods=1) + df['net change in exclusion']
df['CAAX-excluded cell area gained or lost'] = df.groupby('UID')['CAAX-excluded area'].diff(periods=1) - df['net change in exclusion']
df['cumulative change in % exclusion'] = df.groupby('UID')['change in % exclusion'].cumsum()


# Replace 0 or empty cells with NaN
#df.replace('', np.nan, inplace=True)
#df.replace(0, np.nan, inplace=True)

df

In [None]:
# save data with the additional info
new_dataframe_path = dataframe_path.parent / 'exclusion_analysis.csv'
df.to_csv(new_dataframe_path, index=False)

In [None]:
def df_groupby_calculations(x):
    d = {}
    d['Image Name'] = x['Image Name'].iloc[0]
    d['DIV'] = x['DIV'].iloc[0]
    d['Tx'] = x['Tx'].iloc[0]
    
    # Cell area
    d['initial cell area'] = x['cell area'].iloc[0]
    d['mean change in cell area / min'] = x['change in cell area'].mean()
    d['median change in cell area / min'] = x['change in cell area'].median()
    d['max change in cell area'] = x['change in cell area'].loc[x['change in cell area'].abs().idxmax()]
    
    # Changes in % exclusion
    d['mean change in % exclusion / min'] = x['change in % exclusion'].mean()
    d['mean change in % exclusion / min'] = x['change in % exclusion'].median()
    d['max change in % exclusion'] = x['change in % exclusion'].loc[x['change in % exclusion'].abs().idxmax()]
    
    # Changes in exclusion within existing cell regions
    d['mean change in exclusion / min'] = x['net change in exclusion'].mean()
    d['median change in exclusion / min'] = x['net change in exclusion'].median()
    d['max change in exclusion'] = x['net change in exclusion'].loc[x['net change in exclusion'].abs().idxmax()]
    
    # Characteristics of cell regions that are gained/lost
    d['mean CAAX+ cell area gained or lost'] = x['CAAX+ cell area gained or lost'].mean()
    d['mean CAAX-excluded cell area gained or lost'] = x['CAAX-excluded cell area gained or lost'].mean()
    
    
    return pd.Series(d)

summary_df = df.groupby(['UID', 'tx status'], sort=False).apply(df_groupby_calculations).reset_index()
summary_df

In [None]:
summary_df_path = dataframe_path.parent / 'exclusion_analysis_groupedbyUID.csv'
summary_df.to_csv(summary_df_path, index=False)

In [None]:
def setup_fig_subplots(ydata_labels, max_cols=2):
    subplot_cols = min(len(ydata_labels), max_cols)
    subplot_rows = int(np.ceil(len(ydata_labels) / subplot_cols))
    fig, axes = plt.subplots(subplot_rows, subplot_cols, figsize=(5 * subplot_cols, 4.5 * subplot_rows),
                             constrained_layout=True)
    return fig, axes


def plot_timepoints(df, graphs_dir, xdata_label, ydata_labels, splitby, tx_line=0, display=False):
    graph_kind = 'line'
    splitby_vals = np.unique(df[splitby])
    
    # Loop through each unique cell
    for val in splitby_vals:
        fig, axs = setup_fig_subplots(ydata_labels)
        axs_r = axs.ravel()
                
        # Loop through data columns
        for i, ylabel in enumerate(ydata_labels):

            # Selects relevant data and excludes the first timepoint when the data represents a change from the previous timepoint
            df_selection = df.loc[(df[splitby] == val) & (df[ylabel] != 0)]
            
            sns.lineplot(data=df_selection, x=xdata_label, y=ylabel, hue='UID', ax=axs_r[i], legend=False)
            if first_tx_frame is not None:
                axs_r[i].axvline(x = tx_line, color='k', linewidth=0.2, linestyle='dashed')
            
            # Sets y-limit to span 0 to the max value out of all cells
            ymin = np.minimum(0, df[ylabel].min() * 1.2)
            ymax = np.maximum(0, df[ylabel].max() * 1.2)
            axs_r[i].set(ylim=(ymin, ymax))
            axs_r[i].set_title(val)
   
        imgname = df.loc[(df[splitby] == val)].iloc[0]['Image Name']
        fig_title = f'{imgname} over time'
        graph_title = f'{imgname}_tp.png'
        plt.suptitle(fig_title)

        graphs_path = graphs_dir / graph_title
        plt.savefig(graphs_path)

        if display==True:
            plt.show()
        plt.close()

# plot a graph comparing pre- and post- treatment
def plot_preposttx_metrics(df, graphs_dir, xdata_label, hue=None, display=True):
    
    datacol = df.columns.tolist()
    ydata_labels = datacol[datacol.index('initial cell area'):]

    fig, axs = setup_fig_subplots(ydata_labels)
    axs_r = axs.ravel()
    
    hue_order = ['pre', 'post']
    for i, ylabel in enumerate(ydata_labels):
        
        sns.lineplot(data=df, x=xdata_label, y=ylabel, hue=hue, ax=axs_r[i], legend=True, marker='o', linewidth=0.5)
        #sns.swarmplot(data=df, x=xdata_label, y=ylabel, hue=hue, ax=axs_r[i], dodge=False, legend=False)
        axs_r[i].set_ylabel(ylabel)
        
        ymin = np.minimum(0, df[ylabel].min() * 1.2)
        ymax = np.maximum(0, df[ylabel].max() * 1.2)
        axs_r[i].set(ylim=(ymin, ymax))
        axs_r[i].margins(x=0.3)
        axs_r[i].axhline(0, color='k', linewidth=0.2)
        sns.move_legend(axs_r[i], "upper left", bbox_to_anchor=(1, 1))
        
    graph_path = graphs_dir / f'summary_metrics_by{xdata_label}_preposttx.png'
    fig.savefig(graph_path)
    if display == True:
        plt.show()
    plt.close()
        
def plot_summary_metrics(df, graphs_dir, xdata_label, display):

    grouped_df = df.groupby(['UID']).apply(df_groupby_calculations).reset_index()
    datacol = grouped_df.columns.tolist()
    ydata_labels = datacol[datacol.index('initial cell area'):]

    fig, axs = setup_fig_subplots(ydata_labels)
    axs_r = axs.ravel()

    for i, ylabel in enumerate(ydata_labels):

        sns.pointplot(grouped_df, x=xdata_label, y=ylabel, estimator='median', errorbar='ci',
                      color='k', capsize=0.2, join=False, errwidth=0.6, ax=axs_r[i])
        sns.swarmplot(grouped_df, x=xdata_label, y=ylabel,
                      hue='average change in CAAX protein-negative cell area / hr',
                      palette='vlag', legend=False, ax=axs_r[i])
        axs_r[i].set_ylabel(ylabel)

    graph_path = graphs_dir / f'summary_metrics_by{xdata_label}.png'
    fig.savefig(graph_path)
    if display == True:
        plt.show()
    plt.ylim(0)
    plt.close()


In [None]:
datacol = df.columns.tolist()
xdata_label = 'Elapsed time (min)'
ydata_labels = datacol[(datacol.index('UID') + 1):]
plot_timepoints(df, graphs_dir, xdata_label, ydata_labels, splitby='UID', tx_line=0, display=True)
plot_preposttx_metrics(summary_df, graphs_dir, xdata_label='tx status', hue='UID', display=False)

In [None]:
# Add additional graph
tx_line=0
ax = sns.lineplot(data=df, x=xdata_label, y='cumulative change in % exclusion', hue='UID', legend=True, linewidth=0.5)
ax.axvline(x=tx_line, color='k', linewidth=0.2, linestyle='dashed')
#ax.axhline(0, color='k', linewidth=0.3)
plt.xticks(np.arange(min(df[xdata_label]), max(df[xdata_label]), 2))
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
plt.tight_layout()
graph_path = graphs_dir / 'multiple_overlay_over_time'
plt.savefig(graph_path)
plt.show()



In [None]:
# aggregated mean + SD plot
tx_line=0
ax = sns.lineplot(data=df, x=xdata_label, y='cumulative change in % exclusion', err_style='band', errorbar='sd', estimator=np.mean, legend=True, linewidth=0.5)
ax.axvline(x=tx_line, color='k', linewidth=0.2, linestyle='dashed')
#ax.axhline(0, color='k', linewidth=0.3)
plt.xticks(np.arange(min(df[xdata_label]), max(df[xdata_label]), 2))
graph_path = graphs_dir / 'agg_mean_and_sd.png'
plt.savefig(graph_path) 
plt.show()