In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
import anndata as ad
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.io import mmread
from sklearn.preprocessing import normalize
import anndata as ad
import matplotlib as mpl
from harmony import harmonize
sc.set_figure_params(figsize=(6, 6), frameon=False)
sc.settings.n_jobs=8

In [None]:
def delete_empty_or_full_GEM(count_matrix):
    
    GEM_to_delete = []
    
    for c in range(count_matrix.shape[1]):
        this_GEM = count_matrix[:, c]
        
        #if non-zero mean less than 20 or 
        #GEM expressed in less than 5% of spots, remove it
        if ((np.sum(this_GEM)/(np.sum(this_GEM != 0)+1) < 10) or 
        (np.count_nonzero(this_GEM) < int(0.05 * len(this_GEM)))):
            GEM_to_delete.append(c)
        
        #if globally-expressed GEM, that is more than 95% spots express and average level more than 50, remove
        if ((np.sum(this_GEM)/(np.sum(this_GEM != 0)+1) > 50) and 
        (np.count_nonzero(this_GEM) > int(0.95 * len(this_GEM)))):
            GEM_to_delete.append(c)
    
    count_matrix_pruned = np.delete(count_matrix, GEM_to_delete, axis = 1)
    return count_matrix_pruned, GEM_to_delete
        

# Read in Patient 1 GEMs

In [None]:
TandNK_ST_inference_1T = pd.read_csv('./nHDP/binary_nHDP_count_TNK_1T_final.csv', header = None).transpose().values
Stroma_ST_inference_1T = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_1T_final.csv', header = None).transpose().values
B_cell_ST_inference_1T = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_1T_final.csv', header = None).transpose().values
Epithelial_ST_inference_1T = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_1T_final.csv', header = None).transpose().values
Myeloid_ST_inference_1T = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_1T_final.csv', header = None).transpose().values
TandNK_ST_inference_1L = pd.read_csv('./nHDP/binary_nHDP_count_TNK_1L_final.csv', header = None).transpose().values
Stroma_ST_inference_1L = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_1L_final.csv', header = None).transpose().values
B_cell_ST_inference_1L = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_1L_final.csv', header = None).transpose().values
Epithelial_ST_inference_1L = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_1L_final.csv', header = None).transpose().values
Myeloid_ST_inference_1L = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_1L_final.csv', header = None).transpose().values
TandNK_ST_inference_1N = pd.read_csv('./nHDP/binary_nHDP_count_TNK_1N_final.csv', header = None).transpose().values
Stroma_ST_inference_1N = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_1N_final.csv', header = None).transpose().values
B_cell_ST_inference_1N = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_1N_final.csv', header = None).transpose().values
Epithelial_ST_inference_1N = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_1N_final.csv', header = None).transpose().values
Myeloid_ST_inference_1N = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_1N_final.csv', header = None).transpose().values

In [None]:
GEM_combined_1T = np.concatenate([TandNK_ST_inference_1T, Stroma_ST_inference_1T, B_cell_ST_inference_1T, 
                               Epithelial_ST_inference_1T, Myeloid_ST_inference_1T], axis = 1)
GEM_combined_1L = np.concatenate([TandNK_ST_inference_1L, Stroma_ST_inference_1L, B_cell_ST_inference_1L, 
                               Epithelial_ST_inference_1L, Myeloid_ST_inference_1L], axis = 1)
GEM_combined_1N = np.concatenate([TandNK_ST_inference_1N, Stroma_ST_inference_1N, B_cell_ST_inference_1N, 
                               Epithelial_ST_inference_1N, Myeloid_ST_inference_1N], axis = 1)

# Read in Patient 2 GEMs

In [None]:
TandNK_ST_inference_2T = pd.read_csv('./nHDP/binary_nHDP_count_TNK_2T_final.csv', header = None).transpose().values
Stroma_ST_inference_2T = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_2T_final.csv', header = None).transpose().values
B_cell_ST_inference_2T = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_2T_final.csv', header = None).transpose().values
Epithelial_ST_inference_2T = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_2T_final.csv', header = None).transpose().values
Myeloid_ST_inference_2T = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_2T_final.csv', header = None).transpose().values
TandNK_ST_inference_2L = pd.read_csv('./nHDP/binary_nHDP_count_TNK_2L_final.csv', header = None).transpose().values
Stroma_ST_inference_2L = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_2L_final.csv', header = None).transpose().values
B_cell_ST_inference_2L = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_2L_final.csv', header = None).transpose().values
Epithelial_ST_inference_2L = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_2L_final.csv', header = None).transpose().values
Myeloid_ST_inference_2L = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_2L_final.csv', header = None).transpose().values
TandNK_ST_inference_2N = pd.read_csv('./nHDP/binary_nHDP_count_TNK_2N_final.csv', header = None).transpose().values
Stroma_ST_inference_2N = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_2N_final.csv', header = None).transpose().values
B_cell_ST_inference_2N = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_2N_final.csv', header = None).transpose().values
Epithelial_ST_inference_2N = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_2N_final.csv', header = None).transpose().values
Myeloid_ST_inference_2N = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_2N_final.csv', header = None).transpose().values

In [None]:
GEM_combined_2T = np.concatenate([TandNK_ST_inference_2T, Stroma_ST_inference_2T, B_cell_ST_inference_2T, 
                               Epithelial_ST_inference_2T, Myeloid_ST_inference_2T], axis = 1)
GEM_combined_2L = np.concatenate([TandNK_ST_inference_2L, Stroma_ST_inference_2L, B_cell_ST_inference_2L, 
                               Epithelial_ST_inference_2L, Myeloid_ST_inference_2L], axis = 1)
GEM_combined_2N = np.concatenate([TandNK_ST_inference_2N, Stroma_ST_inference_2N, B_cell_ST_inference_2N, 
                               Epithelial_ST_inference_2N, Myeloid_ST_inference_2N], axis = 1)

# Read in Patient 3 GEMs

In [None]:
TandNK_ST_inference_3T = pd.read_csv('./nHDP/binary_nHDP_count_TNK_3T_final.csv', header = None).transpose().values
Stroma_ST_inference_3T = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_3T_final.csv', header = None).transpose().values
B_cell_ST_inference_3T = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_3T_final.csv', header = None).transpose().values
Epithelial_ST_inference_3T = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_3T_final.csv', header = None).transpose().values
Myeloid_ST_inference_3T = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_3T_final.csv', header = None).transpose().values
TandNK_ST_inference_3L = pd.read_csv('./nHDP/binary_nHDP_count_TNK_3L_final.csv', header = None).transpose().values
Stroma_ST_inference_3L = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_3L_final.csv', header = None).transpose().values
B_cell_ST_inference_3L = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_3L_final.csv', header = None).transpose().values
Epithelial_ST_inference_3L = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_3L_final.csv', header = None).transpose().values
Myeloid_ST_inference_3L = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_3L_final.csv', header = None).transpose().values
TandNK_ST_inference_3N = pd.read_csv('./nHDP/binary_nHDP_count_TNK_3N_final.csv', header = None).transpose().values
Stroma_ST_inference_3N = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_3N_final.csv', header = None).transpose().values
B_cell_ST_inference_3N = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_3N_final.csv', header = None).transpose().values
Epithelial_ST_inference_3N = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_3N_final.csv', header = None).transpose().values
Myeloid_ST_inference_3N = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_3N_final.csv', header = None).transpose().values

In [None]:
GEM_combined_3T = np.concatenate([TandNK_ST_inference_3T, Stroma_ST_inference_3T, B_cell_ST_inference_3T, 
                               Epithelial_ST_inference_3T, Myeloid_ST_inference_3T], axis = 1)
GEM_combined_3L = np.concatenate([TandNK_ST_inference_3L, Stroma_ST_inference_3L, B_cell_ST_inference_3L, 
                               Epithelial_ST_inference_3L, Myeloid_ST_inference_3L], axis = 1)
GEM_combined_3N = np.concatenate([TandNK_ST_inference_3N, Stroma_ST_inference_3N, B_cell_ST_inference_3N, 
                               Epithelial_ST_inference_3N, Myeloid_ST_inference_3N], axis = 1)

# Read in Patient 4 GEMs

In [None]:
TandNK_ST_inference_4T = pd.read_csv('./nHDP/binary_nHDP_count_TNK_4T_final.csv', header = None).transpose().values
Stroma_ST_inference_4T = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_4T_final.csv', header = None).transpose().values
B_cell_ST_inference_4T = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_4T_final.csv', header = None).transpose().values
Epithelial_ST_inference_4T = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_4T_final.csv', header = None).transpose().values
Myeloid_ST_inference_4T = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_4T_final.csv', header = None).transpose().values
TandNK_ST_inference_4L = pd.read_csv('./nHDP/binary_nHDP_count_TNK_4L_final.csv', header = None).transpose().values
Stroma_ST_inference_4L = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_4L_final.csv', header = None).transpose().values
B_cell_ST_inference_4L = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_4L_final.csv', header = None).transpose().values
Epithelial_ST_inference_4L = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_4L_final.csv', header = None).transpose().values
Myeloid_ST_inference_4L = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_4L_final.csv', header = None).transpose().values
TandNK_ST_inference_4N = pd.read_csv('./nHDP/binary_nHDP_count_TNK_4N_final.csv', header = None).transpose().values
Stroma_ST_inference_4N = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_4N_final.csv', header = None).transpose().values
B_cell_ST_inference_4N = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_4N_final.csv', header = None).transpose().values
Epithelial_ST_inference_4N = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_4N_final.csv', header = None).transpose().values
Myeloid_ST_inference_4N = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_4N_final.csv', header = None).transpose().values

In [None]:
GEM_combined_4T = np.concatenate([TandNK_ST_inference_4T, Stroma_ST_inference_4T, B_cell_ST_inference_4T, 
                               Epithelial_ST_inference_4T, Myeloid_ST_inference_4T], axis = 1)
GEM_combined_4L = np.concatenate([TandNK_ST_inference_4L, Stroma_ST_inference_4L, B_cell_ST_inference_4L, 
                               Epithelial_ST_inference_4L, Myeloid_ST_inference_4L], axis = 1)
GEM_combined_4N = np.concatenate([TandNK_ST_inference_4N, Stroma_ST_inference_4N, B_cell_ST_inference_4N, 
                               Epithelial_ST_inference_4N, Myeloid_ST_inference_4N], axis = 1)

In [None]:
spot_n_1T = GEM_combined_1T.shape[0]
spot_n_1L = GEM_combined_1L.shape[0]
spot_n_1N = GEM_combined_1N.shape[0]
spot_n_2T = GEM_combined_2T.shape[0]
spot_n_2L = GEM_combined_2L.shape[0]
spot_n_2N = GEM_combined_2N.shape[0]
spot_n_3T = GEM_combined_3T.shape[0]
spot_n_3L = GEM_combined_3L.shape[0]
spot_n_3N = GEM_combined_3N.shape[0]
spot_n_4T = GEM_combined_4T.shape[0]
spot_n_4L = GEM_combined_4L.shape[0]
spot_n_4N = GEM_combined_4N.shape[0]

In [None]:
TandNK_GEM_names = ['TandNK GEM ' + str(c + 1) for c in range(85)]
Stroma_GEM_names = ['Stroma GEM ' + str(c + 1) for c in range(85)]
B_cell_GEM_names = ['B cell GEM ' + str(c + 1) for c in range(85)]
Epithelial_GEM_names = ['Epithelial GEM ' + str(c + 1) for c in range(85)]
Myeloid_GEM_names = ['Myeloid GEM ' + str(c + 1) for c in range(85)]
GEM_names = TandNK_GEM_names + Stroma_GEM_names + B_cell_GEM_names \
            + Epithelial_GEM_names + Myeloid_GEM_names

In [None]:
len(GEM_names)

# Plot ClusterMap

Leading edge slides are divided into tumor side and normal side, and we plot the clustering results of different tissue types here based on their GEM expresion

In [None]:
P1_L_cluster = pd.read_csv('./ClusterMap/P1L.csv').values.flatten()
P2_L_cluster = pd.read_csv('./ClusterMap/P2L.csv').values.flatten()
P3_L_cluster = pd.read_csv('./ClusterMap/P3L.csv').values.flatten()
P4_L_cluster = pd.read_csv('./ClusterMap/P4L.csv').values.flatten()

In [None]:
#refer to previous notebook to know cluster number
P1_L_T_side_idx = np.where(P1_L_cluster == 1)[0]
P1_L_N_side_idx = np.where((P1_L_cluster == 0) | (P1_L_cluster == 3))[0]
P2_L_T_side_idx = np.where(P2_L_cluster == 4)[0]
P2_L_N_side_idx = np.where((P2_L_cluster == 1) | (P2_L_cluster == 2))[0]
P3_L_T_side_idx = np.where(P3_L_cluster == 0)[0]
P3_L_N_side_idx = np.where(P3_L_cluster == 1)[0]
P4_L_T_side_idx = np.where(P4_L_cluster == 0)[0]
P4_L_N_side_idx = np.where((P4_L_cluster == 1) | (P4_L_cluster == 2))[0]

In [None]:
sample_n = 5
P1_T = GEM_combined_1T[:sample_n]
P1_N = GEM_combined_1N[:sample_n]
P1_L_T_side = GEM_combined_1L[P1_L_T_side_idx, :][:sample_n]
P1_L_N_side = GEM_combined_1L[P1_L_N_side_idx, :][:sample_n]

P2_T = GEM_combined_2T[:sample_n]
P2_N = GEM_combined_2N[:sample_n]
P2_L_T_side = GEM_combined_2L[P2_L_T_side_idx, :][:sample_n]
P2_L_N_side = GEM_combined_2L[P2_L_N_side_idx, :][:sample_n]

P3_T = GEM_combined_3T[:sample_n]
P3_N = GEM_combined_3N[:sample_n]
P3_L_T_side = GEM_combined_3L[P3_L_T_side_idx, :][:sample_n]
P3_L_N_side = GEM_combined_3L[P3_L_N_side_idx, :][:sample_n]

P4_T = GEM_combined_4T[:sample_n]
P4_N = GEM_combined_4N[:sample_n]
P4_L_T_side = GEM_combined_4L[P4_L_T_side_idx, :][:sample_n]
P4_L_N_side = GEM_combined_4L[P4_L_N_side_idx, :][:sample_n]

In [None]:
combined_GEM_idx = ['Patient 1 Primary Tumor', 'Patient 1 Adjacent Area',
                    'Patient 1 Leading Edge Tumor Side', 'Patient 1 Leading Edge Normal Side',
                    'Patient 2 Primary Tumor', 'Patient 2 Adjacent Area',
                    'Patient 2 Leading Edge Tumor Side', 'Patient 2 Leading Edge Normal Side',
                    'Patient 3 Primary Tumor', 'Patient 3 Adjacent Area',
                    'Patient 3 Leading Edge Tumor Side', 'Patient 3 Leading Edge Normal Side',
                    'Patient 4 Primary Tumor', 'Patient 4 Adjacent Area',
                    'Patient 4 Leading Edge Tumor Side', 'Patient 4 Leading Edge Normal Side']
combined_GEM_idx_repeat = [i for i in combined_GEM_idx for t in range(sample_n)]

In [None]:
combine_GEM_for_map = np.concatenate([P1_T, P1_N, P1_L_T_side, P1_L_N_side,
                                     P2_T, P2_N, P2_L_T_side, P2_L_N_side,
                                     P3_T, P3_N, P3_L_T_side, P3_L_N_side,
                                     P4_T, P4_N, P4_L_T_side, P4_L_N_side], axis = 0)

combine_GEM_for_map_df = pd.DataFrame(combine_GEM_for_map, index = combined_GEM_idx_repeat,
                                     columns = GEM_names)

In [None]:
combine_GEM_for_map_df = combine_GEM_for_map_df.loc[:, (combine_GEM_for_map_df > 200).any(axis=0)]

In [None]:
import seaborn as sns

In [None]:
sns.set(font_scale=0.4)
g = sns.clustermap(combine_GEM_for_map_df, col_cluster = False)
g.fig.suptitle('Clustermap of GEMs') 
plt.show()

# Delete Empty or Global GEMs

In [None]:
complete_GEMs_mtx = np.concatenate([GEM_combined_1T, GEM_combined_1L, GEM_combined_1N,
                                    GEM_combined_2T, GEM_combined_2L, GEM_combined_2N,
                                    GEM_combined_3T, GEM_combined_3L, GEM_combined_3N,
                                    GEM_combined_4T, GEM_combined_4L, GEM_combined_4N], axis = 0)

In [None]:
complete_GEMs_mtx.shape

In [None]:
complete_GEMs_mtx_pruned, deleted_GEMs = delete_empty_or_full_GEM(complete_GEMs_mtx)

In [None]:
complete_GEMs_mtx_pruned.shape

In [None]:
tissue_names = ['Patient 1 Primary Tumor' for i in range(spot_n_1T)] \
             + ['Patient 1 Leading Edge' for i in range(spot_n_1L)] \
             + ['Patient 1 Ajacent Area' for i in range(spot_n_1N)] \
             + ['Patient 2 Primary Tumor' for i in range(spot_n_2T)] \
             + ['Patient 2 Leading Edge' for i in range(spot_n_2L)] \
             + ['Patient 2 Ajacent Area' for i in range(spot_n_2N)] \
             + ['Patient 3 Primary Tumor' for i in range(spot_n_3T)] \
             + ['Patient 3 Leading Edge' for i in range(spot_n_3L)] \
             + ['Patient 3 Ajacent Area' for i in range(spot_n_3N)] \
             + ['Patient 4 Primary Tumor' for i in range(spot_n_4T)] \
             + ['Patient 4 Leading Edge' for i in range(spot_n_4L)] \
             + ['Patient 4 Ajacent Area' for i in range(spot_n_4N)]
GEM_names_pruned = []
for i in range(425):
    if i not in deleted_GEMs:
        GEM_names_pruned.append(GEM_names[i])

print(len(tissue_names))
print(len(GEM_names_pruned))

In [None]:
complete_GEMs_df = pd.DataFrame(complete_GEMs_mtx_pruned, columns = GEM_names_pruned)

In [None]:
complete_GEMs_df['tissue'] = tissue_names

# Plot GEMs of All Patients

In [None]:
HCC_1T = './raw_data/Primary_Tumor/HCC-1T'
HCC_1L = './raw_data/Leading_Edge/HCC-1L'
HCC_1N = './raw_data/Adjacent/HCC-1N'

HCC_2T = './raw_data/Primary_Tumor/HCC-2T'
HCC_2L = './raw_data/Leading_Edge/HCC-2L'
HCC_2N = './raw_data/Adjacent/HCC-2N'

HCC_3T = './raw_data/Primary_Tumor/HCC-3T'
HCC_3L = './raw_data/Leading_Edge/HCC-3L'
HCC_3N = './raw_data/Adjacent/HCC-3N'

HCC_4T = './raw_data/Primary_Tumor/HCC-4T'
HCC_4L = './raw_data/Leading_Edge/HCC-4L'
HCC_4N = './raw_data/Adjacent/HCC-4N'

visium_paths = [HCC_1T, HCC_1L, HCC_1N, HCC_2T, HCC_2L, HCC_2N,
                HCC_3T, HCC_3L, HCC_3N, HCC_4T, HCC_4L, HCC_4N]
tissue_names_unique = ['Patient 1 Primary Tumor', 
                       'Patient 1 Leading Edge', 
                       'Patient 1 Ajacent Area',
                       'Patient 2 Primary Tumor', 
                       'Patient 2 Leading Edge', 
                       'Patient 2 Ajacent Area', 
                       'Patient 3 Primary Tumor', 
                       'Patient 3 Leading Edge', 
                       'Patient 3 Ajacent Area',
                       'Patient 4 Primary Tumor', 
                       'Patient 4 Leading Edge', 
                       'Patient 4 Ajacent Area']

In [None]:
adata_l = []
for i in range(12):
    adata = sc.read_visium(visium_paths[i])
    adata_l.append(adata)

In [None]:
for adata in adata_l:
    sc.pl.spatial(adata, img_key="hires", color='CLDN5', vmin=0, cmap = 'Purples', 
                      alpha_img = 0.3, colorbar_loc = 'bottom', vmax = 10)

In [None]:
for t in range(12):
    for g in range(len(GEM_names_pruned)):
        adata_l[t].obs[GEM_names_pruned[g]] = \
        complete_GEMs_df[complete_GEMs_df['tissue'] == tissue_names_unique[t]].loc[:, GEM_names_pruned[g]].values

In [None]:
dark_image_idx = [3, 4, 9, 10, 11]
sc.set_figure_params(fontsize = 50, dpi=60, dpi_save=60)
for g in range(len(GEM_names_pruned)):
    fig, ax = plt.subplots(4,3, figsize=(30, 50))
    for p in range(4):
        for t in range(3):
            
            idx = 3*p + t
            
            if idx in dark_image_idx:
                alpha_img = 0.6
                alpha = 0.7
            else:
                alpha_img = 0.3
                alpha = 0.9
                
            adata = adata_l[idx]
            sc.pl.spatial(adata, img_key="hires", color=GEM_names_pruned[g], 
                          vmin=0, cmap = 'Greens', alpha = alpha, alpha_img = alpha_img, 
                          colorbar_loc = None, vmax = 500, ax = ax[p, t], 
                          title = tissue_names_unique[idx], show = False)
            
    fig.suptitle(GEM_names_pruned[g], fontsize = 80)
    plt.savefig('./GEM_vis/' + GEM_names_pruned[g] + '.pdf')
    plt.close(fig)

In [None]:
#output original HE staining image
sc.set_figure_params(fontsize = 50)
fig, ax = plt.subplots(4,3, figsize=(30, 50))
for p in range(4):
    for t in range(3):
        idx = 3*p + t
        adata = adata_l[idx]
        sc.pl.spatial(adata, img_key="hires",ax = ax[p, t], 
                    title = tissue_names_unique[idx], show = False)

fig.suptitle("Original H&E Staining Images", fontsize = 80)
plt.savefig('./GEM_vis/Original H&E Staining Images.pdf')
plt.close(fig)

# GEM Co-occurance Analysis with All Patients' Slides Merged

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse

In [None]:
complete_GEMs_mtx_sparse = sparse.csr_matrix(complete_GEMs_mtx_pruned)
complete_GEMs_mtx_sparse = complete_GEMs_mtx_sparse.transpose()
complete_GEMs_mtx_sparse.shape

In [None]:
similarities = cosine_similarity(complete_GEMs_mtx_sparse)

In [None]:
similarities.shape

In [None]:
mask = np.triu(np.ones_like(similarities))
cmap = sns.diverging_palette(220, 10, as_cmap=True)

In [None]:
plt.figure(figsize = (10, 8))
ax = sns.heatmap(similarities, 
                 xticklabels=GEM_names_pruned, 
                 yticklabels=GEM_names_pruned,
                 mask=mask, cmap=cmap)
sns.set(font_scale = 0.1)
plt.title("GEM Co-occurance Matrix - All Patients", fontsize = 10)
plt.show()

# Ligand-Receptor Analysis

In [None]:
LR_product = pd.read_csv("./Ligand-Receptor/LR_product_record.csv", index_col = None)
LR_product_mtx = LR_product.values
LR_names = list(LR_product.columns)

In [None]:
LR_product[LR_product > 0] = 1

In [None]:
complete_GEMs_mtx_pruned.shape

In [None]:
LR_product.shape

In [None]:
binary_complete_GEMs_mtx = complete_GEMs_mtx_pruned.copy()
binary_complete_GEMs_mtx[binary_complete_GEMs_mtx > 0] = 1

In [None]:
#for each LR, find the GEM that most likely activated by it
#by checking how many spots express both LR and GEM
LR_GEM_matching = []
for i in range(LR_product_mtx.shape[1]):
    print("Progress: ", i + 1)
    best_score = 0
    best_GEM = None
    for j in range(binary_complete_GEMs_mtx.shape[1]):
        this_LR = LR_product_mtx[:, i]
        this_GEM = binary_complete_GEMs_mtx[:, j]
        score = np.sum(this_LR * this_GEM)
        if score > best_score:
            best_score = score
            best_GEM = GEM_names_pruned[j]
    LR_GEM_matching.append((LR_names[i], best_GEM))

In [None]:
LR_GEM_matching