In [1]:
import anndata
import os
import pandas as pd
import numpy as np
from delve import *
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import phate

In [8]:
def standardize(x =  None):
    """Standardizes data by removing the mean and scaling to unit variance.

    Parameters
    x: pd.DataFrame (default = None)
        data matrix (dimensions = cells x features)
    ----------

    Returns
    X: pd.DataFrame
        standardized data matrix (dimensions = cells x features)
    ----------
    """
    scaler = StandardScaler(with_mean = True, with_std = True)
    X = pd.DataFrame(scaler.fit_transform(x), index = x.index, columns = x.columns)
    return X

def compute_phate(df = None,
                n_components = 2,
                knn = 100,
                random_state = 0,
                n_jobs = -1):
    """Performs nonlinear dimensionality reduction with PHATE: https://pubmed.ncbi.nlm.nih.gov/31796933/
    Parameters
    df: pd.DataFrame (default = None)
        dataframe to perform nonlinear dimensionality reduction
    n_components: int (default = 5)
        number of components for MDS
    knn: int (default = 100)
        number of nearest neighbors
    n_jobs: int (default = -1)
        number of tasks
    ----------
    Returns
    X_phate: np.ndarray
        PHATE embedding (dimensions = cells x n_components)
    ----------
    """
    phate_op = phate.PHATE(n_components = n_components, knn = knn, n_jobs = n_jobs, random_state = random_state) 
    X_phate = phate_op.fit_transform(df)
    return X_phate

In [9]:
directory = 'data'
save_data = 'data_standard'
save_directory = 'figures'

In [56]:

# Get a list of all files in the directory
all_files = os.listdir(directory)
EV = all_files[0:3]
EV200 = all_files[3:7]
KRAS = all_files[10:13]
KRAS200 = all_files[13:17]

current_treatment = "EV_combined.csv"

# Read and concatenate all CSV files
combined_csv = pd.concat([pd.read_csv(os.path.join(directory, f)) for f in KRAS])

# Export the combined data to a single CSV file
combined_csv.to_csv(os.path.join(directory,current_treatment), index=False)

In [57]:
# Run combined well data
df = pd.read_csv(os.path.join(directory,current_treatment), index_col = 0)

In [58]:
nuc_feats = ['area',
             '00_DNA_nuc_median',
             '00_pRB_nuc_median',
             '00_Rb_nuc_median',
             '00_p21_nuc_median',
             '01_pp27_nuc_median',
             '01_pH2AX_nuc_median',
             '01_CDK2_nuc_median',
             '02_pp53_nuc_median',
             '02_pp21_nuc_median',
             '02_cycA2_nuc_median',
             '03_EdU_nuc_median']

ring_feats= ['00_DNA_ring_median',
             '00_pRB_ring_median',
             '00_Rb_ring_median',
             '00_p21_ring_median',
             '01_pp27_ring_median',
             '01_pH2AX_ring_median',
             '01_CDK2_ring_median',
             '02_pp53_ring_median',
             '02_pp21_ring_median',
             '02_cycA2_ring_median',
             '03_EdU_ring_median']

integrated_nuc_feats = ['Integrated_DNA_nuc',
                        'Integrated_Edu_nuc',
                        'Integrated_CDK2_nuc',
                        'Integrated_pRB_nuc',
                        'Integrated_Rb_nuc',
                        'Integrated_pRB/RB',
                        'Integrated_cycA2_nuc',
                        'Integrated_pp53_nuc',
                        'Integrated_pH2AX_nuc',
                        'Integrated_pp27_nuc',
                        'Integrated_p21_nuc',
                        'Integrated_pp21_nuc']

df['Integrated_DNA_nuc'] = df['area']*df['00_DNA_nuc_median']
df['Integrated_pRB_nuc'] = df['area']*df['00_pRB_nuc_median']
df['Integrated_Rb_nuc'] = df['area']*df['00_Rb_nuc_median']
df['Integrated_p21_nuc'] = df['area']*df['00_p21_nuc_median']
df['Integrated_pp21_nuc'] = df['area']*df['02_pp21_nuc_median']
df['Integrated_pp27_nuc'] = df['area']*df['01_pp27_nuc_median']
df['Integrated_pp53_nuc'] = df['area']*df['02_pp53_nuc_median']
df['Integrated_pH2AX_nuc'] = df['area']*df['01_pH2AX_nuc_median']
df['Integrated_CDK2_nuc'] = df['area']*df['01_CDK2_nuc_median']
df['Integrated_cycA2_nuc'] = df['area']*df['02_cycA2_nuc_median']
df['Integrated_Edu_nuc'] = df['area']*df['03_EdU_nuc_median']
df['Integrated_pRB/RB'] = df['Integrated_pRB_nuc'] / df['Integrated_Rb_nuc']

df['pRB/RB'] = df['00_pRB_nuc_median'] / df['00_Rb_nuc_median']

In [61]:

df = df.loc[:, integrated_nuc_feats]
df = standardize(df)

# Export standardized integrated data to a single CSV file
df.to_csv(os.path.join(save_data,f'Intg_STD_{current_treatment}'), index=False)

**Standardized** normalized data was pruned in R then run though PHATE here


In [45]:
# R-pruned data
R_current_treatment = 'R_EV_filtered.csv'

# Run R-pruned combined well data
df = pd.read_csv(os.path.join(save_data,R_current_treatment), header=0)

In [None]:
# Run PHATE with parameters
X_phate = compute_phate(df = df,
                n_components = 2,
                knn = 200,
                random_state = 0,
                n_jobs = -1)

In [None]:

fig, axes = plt.subplots(4,3, figsize = (14,12), gridspec_kw={'hspace': 0.3, 'wspace': 0.1, 'bottom':0.15})
for i, ax in zip(range(df.shape[1]), axes.flat):
        feat = df.columns[i]
        sc = ax.scatter(X_phate[:, 0], X_phate[:, 1], c=df[feat], vmin=-1, vmax=2, cmap='RdYlBu_r', s=3)
        ax.tick_params(labelsize=12)
        ax.set_xlabel('PHATE 1', fontsize = 11)
        ax.set_ylabel('PHATE 2', fontsize = 11)
        ax.set_title(feat, fontsize = 11)
        
        ax.set_xticks([])
        ax.set_yticks([])
        cbar = fig.colorbar(sc, ax=ax)
        cbar.ax.tick_params(labelsize=11)

plt.savefig(os.path.join(save_directory, f'knn200_n2_{R_current_treatment}_integrated.png'), bbox_inches = 'tight')
plt.show()

In [49]:
for i in range(df.shape[1]):
    feat = df.columns[i]
    fig, ax = plt.subplots(figsize=(5, 3.75))
    sc = ax.scatter(X_phate[:, 0], X_phate[:, 1], c=df[feat], vmin=-1, vmax=2, cmap='RdYlBu_r', s=3)
    ax.tick_params(labelsize=12)
    ax.set_xlabel('PHATE 1', fontsize=11)
    ax.set_ylabel('PHATE 2', fontsize=11)
    ax.set_title(feat, fontsize=11)
    
    ax.set_xticks([])
    ax.set_yticks([])
    cbar = fig.colorbar(sc, ax=ax)
    cbar.ax.tick_params(labelsize=11)
    
    # Save each plot as a separate PDF file
    plt.savefig(os.path.join(save_directory, f'knn200_n2_{R_current_treatment}_{feat}_integrated.png'), bbox_inches='tight')
    plt.close(fig)