In [None]:
import scanpy as sc
import os
import numpy as np
import pandas as pd
import anndata as ad
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.io import mmread
from sklearn.preprocessing import normalize
import anndata as ad
import matplotlib as mpl
from harmony import harmonize
from numpy.linalg import norm
sc.settings.n_jobs=8

In [None]:
def prune_GEMs(GEM_n, count_matrix):
    GEM_names = create_GEM_names(GEM_n)
    count_matrix_pruned, GEM_to_delete_idx = delete_empty_or_full_GEM(count_matrix)
    pruned_names = [name for i, name in enumerate(GEM_names) if i not in GEM_to_delete_idx]
    return count_matrix_pruned, pruned_names

In [None]:
def create_GEM_names(GEM_n):
    TandNK_GEM_names = ['TandNK GEM ' + str(c + 1) for c in range(GEM_n)]
    Stroma_GEM_names = ['Stroma GEM ' + str(c + 1) for c in range(GEM_n)]
    B_cell_GEM_names = ['B cell GEM ' + str(c + 1) for c in range(GEM_n)]
    Epithelial_GEM_names = ['Epithelial GEM ' + str(c + 1) for c in range(GEM_n)]
    Myeloid_GEM_names = ['Myeloid GEM ' + str(c + 1) for c in range(GEM_n)]
    GEM_names = TandNK_GEM_names + Stroma_GEM_names + B_cell_GEM_names \
                + Epithelial_GEM_names + Myeloid_GEM_names    
    return GEM_names

In [None]:
def delete_empty_or_full_GEM(count_matrix):
    
    GEM_to_delete = []
    
    for c in range(count_matrix.shape[1]):
        this_GEM = count_matrix[:, c]
        
        #if non-zero mean less than 20 or 
        #GEM expressed in less than 5% of spots, remove it
        if ((np.sum(this_GEM)/(np.sum(this_GEM != 0)+1) < 10) or 
        (np.count_nonzero(this_GEM) < int(0.05 * len(this_GEM)))):
            GEM_to_delete.append(c)
        
        #if globally-expressed GEM, that is more than 70% spots express and average level more than 50, remove
        if ((np.sum(this_GEM)/(np.sum(this_GEM != 0)+1)) > 20) and \
        (np.count_nonzero(this_GEM) > int(0.70 * len(this_GEM))):
            GEM_to_delete.append(c)
    
    count_matrix_pruned = np.delete(count_matrix, GEM_to_delete, axis = 1)
    return count_matrix_pruned, GEM_to_delete
        

# Read in Patient's Data

In [None]:
TandNK_ST_inference_1T = pd.read_csv('./nHDP/binary_nHDP_count_TNK_1T_final.csv', header = None).transpose().values
Stroma_ST_inference_1T = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_1T_final.csv', header = None).transpose().values
B_cell_ST_inference_1T = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_1T_final.csv', header = None).transpose().values
Epithelial_ST_inference_1T = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_1T_final.csv', header = None).transpose().values
Myeloid_ST_inference_1T = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_1T_final.csv', header = None).transpose().values
TandNK_ST_inference_1L = pd.read_csv('./nHDP/binary_nHDP_count_TNK_1L_final.csv', header = None).transpose().values
Stroma_ST_inference_1L = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_1L_final.csv', header = None).transpose().values
B_cell_ST_inference_1L = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_1L_final.csv', header = None).transpose().values
Epithelial_ST_inference_1L = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_1L_final.csv', header = None).transpose().values
Myeloid_ST_inference_1L = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_1L_final.csv', header = None).transpose().values
TandNK_ST_inference_1N = pd.read_csv('./nHDP/binary_nHDP_count_TNK_1N_final.csv', header = None).transpose().values
Stroma_ST_inference_1N = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_1N_final.csv', header = None).transpose().values
B_cell_ST_inference_1N = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_1N_final.csv', header = None).transpose().values
Epithelial_ST_inference_1N = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_1N_final.csv', header = None).transpose().values
Myeloid_ST_inference_1N = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_1N_final.csv', header = None).transpose().values

In [None]:
GEM_combined_1T = np.concatenate([TandNK_ST_inference_1T, Stroma_ST_inference_1T, B_cell_ST_inference_1T, 
                               Epithelial_ST_inference_1T, Myeloid_ST_inference_1T], axis = 1)
GEM_combined_1L = np.concatenate([TandNK_ST_inference_1L, Stroma_ST_inference_1L, B_cell_ST_inference_1L, 
                               Epithelial_ST_inference_1L, Myeloid_ST_inference_1L], axis = 1)
GEM_combined_1N = np.concatenate([TandNK_ST_inference_1N, Stroma_ST_inference_1N, B_cell_ST_inference_1N, 
                               Epithelial_ST_inference_1N, Myeloid_ST_inference_1N], axis = 1)

In [None]:
TandNK_ST_inference_2T = pd.read_csv('./nHDP/binary_nHDP_count_TNK_2T_final.csv', header = None).transpose().values
Stroma_ST_inference_2T = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_2T_final.csv', header = None).transpose().values
B_cell_ST_inference_2T = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_2T_final.csv', header = None).transpose().values
Epithelial_ST_inference_2T = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_2T_final.csv', header = None).transpose().values
Myeloid_ST_inference_2T = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_2T_final.csv', header = None).transpose().values
TandNK_ST_inference_2L = pd.read_csv('./nHDP/binary_nHDP_count_TNK_2L_final.csv', header = None).transpose().values
Stroma_ST_inference_2L = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_2L_final.csv', header = None).transpose().values
B_cell_ST_inference_2L = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_2L_final.csv', header = None).transpose().values
Epithelial_ST_inference_2L = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_2L_final.csv', header = None).transpose().values
Myeloid_ST_inference_2L = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_2L_final.csv', header = None).transpose().values
TandNK_ST_inference_2N = pd.read_csv('./nHDP/binary_nHDP_count_TNK_2N_final.csv', header = None).transpose().values
Stroma_ST_inference_2N = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_2N_final.csv', header = None).transpose().values
B_cell_ST_inference_2N = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_2N_final.csv', header = None).transpose().values
Epithelial_ST_inference_2N = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_2N_final.csv', header = None).transpose().values
Myeloid_ST_inference_2N = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_2N_final.csv', header = None).transpose().values

In [None]:
GEM_combined_2T = np.concatenate([TandNK_ST_inference_2T, Stroma_ST_inference_2T, B_cell_ST_inference_2T, 
                               Epithelial_ST_inference_2T, Myeloid_ST_inference_2T], axis = 1)
GEM_combined_2L = np.concatenate([TandNK_ST_inference_2L, Stroma_ST_inference_2L, B_cell_ST_inference_2L, 
                               Epithelial_ST_inference_2L, Myeloid_ST_inference_2L], axis = 1)
GEM_combined_2N = np.concatenate([TandNK_ST_inference_2N, Stroma_ST_inference_2N, B_cell_ST_inference_2N, 
                               Epithelial_ST_inference_2N, Myeloid_ST_inference_2N], axis = 1)

In [None]:
TandNK_ST_inference_3T = pd.read_csv('./nHDP/binary_nHDP_count_TNK_3T_final.csv', header = None).transpose().values
Stroma_ST_inference_3T = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_3T_final.csv', header = None).transpose().values
B_cell_ST_inference_3T = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_3T_final.csv', header = None).transpose().values
Epithelial_ST_inference_3T = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_3T_final.csv', header = None).transpose().values
Myeloid_ST_inference_3T = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_3T_final.csv', header = None).transpose().values
TandNK_ST_inference_3L = pd.read_csv('./nHDP/binary_nHDP_count_TNK_3L_final.csv', header = None).transpose().values
Stroma_ST_inference_3L = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_3L_final.csv', header = None).transpose().values
B_cell_ST_inference_3L = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_3L_final.csv', header = None).transpose().values
Epithelial_ST_inference_3L = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_3L_final.csv', header = None).transpose().values
Myeloid_ST_inference_3L = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_3L_final.csv', header = None).transpose().values
TandNK_ST_inference_3N = pd.read_csv('./nHDP/binary_nHDP_count_TNK_3N_final.csv', header = None).transpose().values
Stroma_ST_inference_3N = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_3N_final.csv', header = None).transpose().values
B_cell_ST_inference_3N = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_3N_final.csv', header = None).transpose().values
Epithelial_ST_inference_3N = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_3N_final.csv', header = None).transpose().values
Myeloid_ST_inference_3N = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_3N_final.csv', header = None).transpose().values

In [None]:
GEM_combined_3T = np.concatenate([TandNK_ST_inference_3T, Stroma_ST_inference_3T, B_cell_ST_inference_3T, 
                               Epithelial_ST_inference_3T, Myeloid_ST_inference_3T], axis = 1)
GEM_combined_3L = np.concatenate([TandNK_ST_inference_3L, Stroma_ST_inference_3L, B_cell_ST_inference_3L, 
                               Epithelial_ST_inference_3L, Myeloid_ST_inference_3L], axis = 1)
GEM_combined_3N = np.concatenate([TandNK_ST_inference_3N, Stroma_ST_inference_3N, B_cell_ST_inference_3N, 
                               Epithelial_ST_inference_3N, Myeloid_ST_inference_3N], axis = 1)

In [None]:
TandNK_ST_inference_4T = pd.read_csv('./nHDP/binary_nHDP_count_TNK_4T_final.csv', header = None).transpose().values
Stroma_ST_inference_4T = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_4T_final.csv', header = None).transpose().values
B_cell_ST_inference_4T = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_4T_final.csv', header = None).transpose().values
Epithelial_ST_inference_4T = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_4T_final.csv', header = None).transpose().values
Myeloid_ST_inference_4T = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_4T_final.csv', header = None).transpose().values
TandNK_ST_inference_4L = pd.read_csv('./nHDP/binary_nHDP_count_TNK_4L_final.csv', header = None).transpose().values
Stroma_ST_inference_4L = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_4L_final.csv', header = None).transpose().values
B_cell_ST_inference_4L = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_4L_final.csv', header = None).transpose().values
Epithelial_ST_inference_4L = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_4L_final.csv', header = None).transpose().values
Myeloid_ST_inference_4L = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_4L_final.csv', header = None).transpose().values
TandNK_ST_inference_4N = pd.read_csv('./nHDP/binary_nHDP_count_TNK_4N_final.csv', header = None).transpose().values
Stroma_ST_inference_4N = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_4N_final.csv', header = None).transpose().values
B_cell_ST_inference_4N = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_4N_final.csv', header = None).transpose().values
Epithelial_ST_inference_4N = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_4N_final.csv', header = None).transpose().values
Myeloid_ST_inference_4N = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_4N_final.csv', header = None).transpose().values

In [None]:
GEM_combined_4T = np.concatenate([TandNK_ST_inference_4T, Stroma_ST_inference_4T, B_cell_ST_inference_4T, 
                               Epithelial_ST_inference_4T, Myeloid_ST_inference_4T], axis = 1)
GEM_combined_4L = np.concatenate([TandNK_ST_inference_4L, Stroma_ST_inference_4L, B_cell_ST_inference_4L, 
                               Epithelial_ST_inference_4L, Myeloid_ST_inference_4L], axis = 1)
GEM_combined_4N = np.concatenate([TandNK_ST_inference_4N, Stroma_ST_inference_4N, B_cell_ST_inference_4N, 
                               Epithelial_ST_inference_4N, Myeloid_ST_inference_4N], axis = 1)

# Delete GEMs

In [None]:
GEM_n = 85
GEM_combined_1T, pruned_names_1T = prune_GEMs(GEM_n, GEM_combined_1T)
GEM_combined_1L, pruned_names_1L = prune_GEMs(GEM_n, GEM_combined_1L)
GEM_combined_1N, pruned_names_1N = prune_GEMs(GEM_n, GEM_combined_1N)
GEM_combined_2T, pruned_names_2T = prune_GEMs(GEM_n, GEM_combined_2T)
GEM_combined_2L, pruned_names_2L = prune_GEMs(GEM_n, GEM_combined_2L)
GEM_combined_2N, pruned_names_2N = prune_GEMs(GEM_n, GEM_combined_2N)
GEM_combined_3T, pruned_names_3T = prune_GEMs(GEM_n, GEM_combined_3T)
GEM_combined_3L, pruned_names_3L = prune_GEMs(GEM_n, GEM_combined_3L)
GEM_combined_3N, pruned_names_3N = prune_GEMs(GEM_n, GEM_combined_3N)
GEM_combined_4T, pruned_names_4T = prune_GEMs(GEM_n, GEM_combined_4T)
GEM_combined_4L, pruned_names_4L = prune_GEMs(GEM_n, GEM_combined_4L)
GEM_combined_4N, pruned_names_4N = prune_GEMs(GEM_n, GEM_combined_4N)

In [None]:
def binarize_GEM_count(GEM_express_thre, GEM_matrix):
    GEM_matrix[GEM_matrix < GEM_express_thre] = 0
    GEM_matrix[GEM_matrix >= GEM_express_thre] = 1
    return GEM_matrix

In [None]:
bin_GEM_combined_1T = GEM_combined_1T.copy()
bin_GEM_combined_1L = GEM_combined_1L.copy()
bin_GEM_combined_1N = GEM_combined_1N.copy()
bin_GEM_combined_2T = GEM_combined_2T.copy()
bin_GEM_combined_2L = GEM_combined_2L.copy()
bin_GEM_combined_2N = GEM_combined_2N.copy()
bin_GEM_combined_3T = GEM_combined_3T.copy()
bin_GEM_combined_3L = GEM_combined_3L.copy()
bin_GEM_combined_3N = GEM_combined_3N.copy()
bin_GEM_combined_4T = GEM_combined_4T.copy()
bin_GEM_combined_4L = GEM_combined_4L.copy()
bin_GEM_combined_4N = GEM_combined_4N.copy()
GEM_express_thre = 20
bin_GEM_combined_1T = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_1T)
bin_GEM_combined_1L = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_1L)
bin_GEM_combined_1N = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_1N)
bin_GEM_combined_2T = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_2T)
bin_GEM_combined_2L = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_2L)
bin_GEM_combined_2N = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_2N)
bin_GEM_combined_3T = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_3T)
bin_GEM_combined_3L = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_3L)
bin_GEM_combined_3N = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_3N)
bin_GEM_combined_4T = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_4T)
bin_GEM_combined_4L = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_4L)
bin_GEM_combined_4N = binarize_GEM_count(GEM_express_thre, bin_GEM_combined_4N)

# Construct DataFrame

For each slide, create its GEM dataframe

In [None]:
raw_GEMs = [GEM_combined_1T, GEM_combined_1L, GEM_combined_1N,
           GEM_combined_2T, GEM_combined_2L, GEM_combined_2N,
           GEM_combined_3T, GEM_combined_3L, GEM_combined_3N,
           GEM_combined_4T, GEM_combined_4L, GEM_combined_4N]
bin_GEMs = [bin_GEM_combined_1T, bin_GEM_combined_1L, bin_GEM_combined_1N,
           bin_GEM_combined_2T, bin_GEM_combined_2L, bin_GEM_combined_2N,
           bin_GEM_combined_3T, bin_GEM_combined_3L, bin_GEM_combined_3N,
           bin_GEM_combined_4T, bin_GEM_combined_4L, bin_GEM_combined_4N]
pruned_names = [pruned_names_1T, pruned_names_1L, pruned_names_1N,
               pruned_names_2T, pruned_names_2L, pruned_names_2N,
               pruned_names_3T, pruned_names_3L, pruned_names_3N,
               pruned_names_4T, pruned_names_4L, pruned_names_4N,]

In [None]:
def construct_dataframe(GEM_combined, pruned_names):
    return pd.DataFrame(GEM_combined, columns = pruned_names)

In [None]:
raw_GEMs_df = []
for i in range(len(raw_GEMs)):
    GEM_df = construct_dataframe(raw_GEMs[i], pruned_names[i])
    raw_GEMs_df.append(GEM_df)

In [None]:
len(raw_GEMs_df)

# Get GEM-GEM similarity

Here we tried to use cosine similarity to rank GEM-GEM correlation on each slide (since there is batch effect between slides so we treated each slide independently). That is, we selected out top 30 GEM pairs with highest cosine similarity score. (We decided to use cosine vector similarity and abandon the experimental costimized one.)|

In [None]:
# def GEM_simiarity_costimize(slide_binary, GEM_names, top_n):
#     scores = []
#     GEM_name_pairs = []
#     for i in range(slide_binary.shape[1] - 1):
#         GEM_i_name = GEM_names[i]
#         for j in range(i + 1, slide_binary.shape[1]):
#             GEM_j_name = GEM_names[j]
#             #skip celltype confounding (GEM name 1st letter matches, skip)
#             if GEM_i_name[0] == GEM_j_name[0]:
#                 continue
#             bin_GEM_i = slide_binary[:, i]
#             bin_GEM_j = slide_binary[:, j]
#             score = get_score(bin_GEM_i, bin_GEM_j)
#             scores.append(score)
#             GEM_name_pairs.append((GEM_i_name, GEM_j_name))
            
#     sorted_GEM_names_pairs = [x for _, x in sorted(zip(scores, GEM_name_pairs))]
#     sorted_GEM_names_pairs.reverse()
#     if top_n != None:
#         return sorted_GEM_names_pairs[:top_n]
#     else:
#         return sorted_GEM_names_pairs

In [None]:
# def get_score(GEM1, GEM2):
#     GEM_combined = GEM1 + GEM2
#     unique, counts = np.unique(GEM_combined, return_counts=True)
#     if max(unique) == 1:
#         return 0
#     else:
#         return counts[2] / (counts[1] + counts[2])

In [None]:
def GEM_simiarity_cosine(slide_raw, GEM_names, top_n):
    scores = []
    GEM_name_pairs = []
    for i in range(slide_raw.shape[1] - 1):
        GEM_i_name = GEM_names[i]
        for j in range(i + 1, slide_raw.shape[1]):
            GEM_j_name = GEM_names[j]
            #skip celltype confounding (GEM name 1st letter matches, skip)
            if GEM_i_name[0] == GEM_j_name[0]:
                continue
            raw_GEM_i = slide_raw[:, i]
            raw_GEM_j = slide_raw[:, j]
            score = get_cosine_similarity(raw_GEM_i, raw_GEM_j)
            scores.append(score)
            GEM_name_pairs.append((GEM_i_name, GEM_j_name))
            
    sorted_GEM_names_pairs = [x for _, x in sorted(zip(scores, GEM_name_pairs))]
    sorted_GEM_names_pairs.reverse()
    if top_n != None:
        return sorted_GEM_names_pairs[:top_n]
    else:
        return sorted_GEM_names_pairs

In [None]:
def get_cosine_similarity(A, B):
    return np.dot(A,B)/(norm(A)*norm(B))

In [None]:
# costimize_GEM_similarities = []
# for i in range(12):
#     similarity = GEM_simiarity_costimize(bin_GEMs[i], pruned_names[i], 50)
#     costimize_GEM_similarities.append(similarity)

In [None]:
cosine_GEM_similarities = []
for i in range(12):
    similarity = GEM_simiarity_cosine(raw_GEMs[i], pruned_names[i], 30)
    cosine_GEM_similarities.append(similarity)

# Write each correlated GEM pairs to txt

In [None]:
# for s in range(len(costimize_GEM_similarities)):
#     slide_pairs = costimize_GEM_similarities[s]
#     slide_GEM = raw_GEMs_df[s]
#     for i, pair in enumerate(slide_pairs):
#         GEM1_name = pair[0]
#         GEM2_name = pair[1]
#         GEM1_exp = slide_GEM.loc[:, GEM1_name].values
#         GEM2_exp = slide_GEM.loc[:, GEM2_name].values
#         np.savetxt("./Ligand-Receptor/slide_correlated_GEMs/costimize/" + "slide-" + str(s) + "-pair-" + str(i) + '-' + GEM1_name + ".txt", 
#                    GEM1_exp.astype(int), fmt = '%i')
#         np.savetxt("./Ligand-Receptor/slide_correlated_GEMs/costimize/" + "slide-" + str(s) + "-pair-" + str(i) + '-' + GEM2_name + ".txt", 
#                    GEM2_exp.astype(int), fmt = '%i')

In [None]:
# for s in range(len(cosine_GEM_similarities)):
#     slide_pairs = cosine_GEM_similarities[s]
#     slide_GEM = raw_GEMs_df[s]
#     for i, pair in enumerate(slide_pairs):
#         GEM1_name = pair[0]
#         GEM2_name = pair[1]
#         GEM1_exp = slide_GEM.loc[:, GEM1_name].values
#         GEM2_exp = slide_GEM.loc[:, GEM2_name].values
#         np.savetxt("./Ligand-Receptor/slide_correlated_GEMs/cosine/" + "slide-" + str(s) + "-pair-" + str(i) + '-' + GEM1_name + ".txt", 
#                    GEM1_exp.astype(int), fmt = '%i')
#         np.savetxt("./Ligand-Receptor/slide_correlated_GEMs/cosine/" + "slide-" + str(s) + "-pair-" + str(i) + '-' + GEM2_name + ".txt", 
#                    GEM2_exp.astype(int), fmt = '%i')

# Read in ST data

In [None]:
HCC_1T = './raw_data/Primary_Tumor/HCC-1T'
HCC_1L = './raw_data/Leading_Edge/HCC-1L'
HCC_1N = './raw_data/Adjacent/HCC-1N'

HCC_2T = './raw_data/Primary_Tumor/HCC-2T'
HCC_2L = './raw_data/Leading_Edge/HCC-2L'
HCC_2N = './raw_data/Adjacent/HCC-2N'

HCC_3T = './raw_data/Primary_Tumor/HCC-3T'
HCC_3L = './raw_data/Leading_Edge/HCC-3L'
HCC_3N = './raw_data/Adjacent/HCC-3N'

HCC_4T = './raw_data/Primary_Tumor/HCC-4T'
HCC_4L = './raw_data/Leading_Edge/HCC-4L'
HCC_4N = './raw_data/Adjacent/HCC-4N'

visium_paths = [HCC_1T, HCC_1L, HCC_1N, HCC_2T, HCC_2L, HCC_2N,
                HCC_3T, HCC_3L, HCC_3N, HCC_4T, HCC_4L, HCC_4N]
tissue_names_unique = ['Patient 1 Primary Tumor', 
                       'Patient 1 Leading Edge', 
                       'Patient 1 Ajacent Area',
                       'Patient 2 Primary Tumor', 
                       'Patient 2 Leading Edge', 
                       'Patient 2 Ajacent Area', 
                       'Patient 3 Primary Tumor', 
                       'Patient 3 Leading Edge', 
                       'Patient 3 Ajacent Area',
                       'Patient 4 Primary Tumor', 
                       'Patient 4 Leading Edge', 
                       'Patient 4 Ajacent Area']

In [None]:
adata_l = []
for i in range(12):
    adata = sc.read_visium(visium_paths[i])
    adata_l.append(adata)

# Plug in GEMs and Plot Correlated GEMs (by cosine similarity)

In [None]:
from PIL import Image
from PIL import ImageEnhance
import sys
from matplotlib.colors import LinearSegmentedColormap
import matplotlib as mpl

In [None]:
for i in range(12):
    adata = adata_l[i]
    for c in range(raw_GEMs[i].shape[1]):
        adata.obs[pruned_names[i][c]] = raw_GEMs[i][:, c]

In [None]:
PureRed = mpl.colors.LinearSegmentedColormap.from_list("", ["white", (1, 0, 0)])
PureGreen = mpl.colors.LinearSegmentedColormap.from_list("", ["white",(0, 1, 0)])
PureOrange = mpl.colors.LinearSegmentedColormap.from_list("", ["white",(1, 1, 0)])

In [None]:
def plot_globally(counter, adata, GEM1, GEM2, method, dpi = 50):
    file_name1 = generate_titled_image(adata, 1, counter, GEM1, dpi, PureRed, method)
    file_name2 = generate_titled_image(adata, 2, counter, GEM2, dpi, PureGreen, method)
    file_name3 = generate_overlap_image(adata, 3, counter, GEM1, GEM2, dpi, PureOrange, method)
    file_name4, file_name5 = generate_disjoint_image(adata, 4, counter, GEM1, GEM2, dpi, 
                                                     PureRed, PureGreen, method)
    
    file_name6 = Overlap_two_pngs(file_name3, file_name4, method, 6, counter, False)
    file_name7 = Overlap_two_pngs(file_name3, file_name5, method, 7,  counter, False)
    file_name8 = Overlap_two_pngs(file_name6, file_name7, method, 8, counter, True)
    file_name_final = concatenate_three_images(file_name1, file_name2, file_name8, method, counter)

#     os.remove(file_name1)
#     os.remove(file_name2)
#     os.remove(file_name3)
    os.remove(file_name4)
    os.remove(file_name5)
    os.remove(file_name6)
    os.remove(file_name7)
#     os.remove(file_name8)
    return file_name_final

In [None]:
def generate_disjoint_image(adata, order, counter, GEM1, GEM2, dpi, color_map1, color_map2, method):
    sc.set_figure_params(fontsize = 60, dpi=dpi, dpi_save=dpi)
    
    
    GEM1_exp = adata.obs[GEM1].values.copy()
    GEM2_exp = adata.obs[GEM2].values.copy()
    GEM1_exp[np.where(GEM2_exp != 0)] = 0
    GEM2_exp[np.where(adata.obs[GEM1].values != 0)] = 0
    
    disjoin_name1 = GEM1 + '-' + GEM2
    disjoin_name2 = GEM2 + '-' + GEM1
    
    adata.obs[disjoin_name1] = GEM1_exp
    adata.obs[disjoin_name2] = GEM2_exp
    
    fig, ax = plt.subplots(1,1, figsize=(20, 20))
    sc.pl.spatial(adata, img_key="hires", color=disjoin_name1, ax = ax, vmin = 0, vmax = 'p95', alpha_img = 0,
                      alpha = 1, color_map = color_map1, colorbar_loc = None, title = '', show = False)
    
    file_name1 = './GEM_vis/GEM_corr_png/' + method + '/' + str(counter) + '-' + str(order) + '.png'
    
    ax.set_xlabel('')
    ax.set_ylabel('')
    fig.savefig(file_name1)
    plt.close(fig)
    
    fig, ax = plt.subplots(1,1, figsize=(20, 20))
    sc.pl.spatial(adata, img_key="hires", color=disjoin_name2, ax = ax, vmin = 0, vmax = 'p95', alpha_img = 0,
                      alpha = 1, color_map = color_map2, colorbar_loc = None, title = '', show = False)
    
    file_name2 = './GEM_vis/GEM_corr_png/' + method + '/' + str(counter) + '-' + str(order + 1) + '.png'
    
    ax.set_xlabel('')
    ax.set_ylabel('')
    fig.savefig(file_name2)
    plt.close(fig)
    
    
    return file_name1, file_name2

In [None]:
def generate_overlap_image(adata, order, counter, GEM1, GEM2, dpi, color_map, method):
    sc.set_figure_params(fontsize = 40, dpi=dpi, dpi_save=dpi)
    GEM1_exp = adata.obs[GEM1].values.copy()
    GEM2_exp = adata.obs[GEM2].values.copy()
    sum_exp = GEM1_exp + GEM2_exp
    sum_exp[np.where(GEM1_exp == 0)] = 0
    sum_exp[np.where(GEM2_exp == 0)] = 0
    sum_name = GEM1 + '+' + GEM2
    adata.obs[sum_name] = sum_exp
     
    fig, ax = plt.subplots(1,1, figsize=(20, 20))
    sc.pl.spatial(adata, img_key="hires", color=sum_name, ax = ax, vmin = 0, vmax = 'p90', alpha_img = 0.2,
                      alpha = 1, color_map = color_map, colorbar_loc = None, title = '', show = False)
    
    file_name = './GEM_vis/GEM_corr_png/' + method + '/GEM_sum/' + str(counter) + '.png'
    
    ax.set_xlabel('')
    ax.set_ylabel('')
    fig.savefig(file_name)
    plt.close(fig)
    
    return file_name

In [None]:
def generate_titled_image(adata, order, counter, GEM_name, dpi, color_map, method, show_title = True):
    fig, ax = plt.subplots(1,1, figsize=(20, 20))
    sc.set_figure_params(fontsize = 60, dpi=dpi, dpi_save=dpi)
    
    
    sc.pl.spatial(adata, img_key="hires", color=GEM_name, ax = ax, vmin = 0, vmax = 'p95', alpha_img = 0.3,
                      alpha = 1, color_map = color_map, colorbar_loc = 'bottom', title = GEM_name, show = False)
    
    file_name = './GEM_vis/GEM_corr_png/' + method + '/GEM' + str(order)+ '/' + str(counter)  + '.png'
    
    ax.set_xlabel('')
    ax.set_ylabel('')
    fig.savefig(file_name)
    plt.close(fig)
    
    return file_name

In [None]:
def Overlap_two_pngs(file_name1, file_name2, method, order, counter, final_step):
    img1 = Image.open(file_name1)
    img2 = Image.open(file_name2)

    img1 = img1.convert("RGBA")
    img2 = img2.convert("RGBA")

    new_img = Image.blend(img1, img2, 0.40)
    enh_con = ImageEnhance.Contrast(new_img)
    contrast = 1.6
    new_img = enh_con.enhance(contrast)
    if final_step:
        file_name = './GEM_vis/GEM_corr_png/' + method + '/overlap/' + str(counter) + '.png'
    else:
        file_name = './GEM_vis/GEM_corr_png/' + method + '/' + str(counter) + '-' + str(order) + '.png'
    new_img.save(file_name,"PNG")
    return file_name

In [None]:
def concatenate_three_images(file_name1, file_name3, file_name5, method, counter):
    images = [Image.open(x) for x in [file_name1, file_name3, file_name5]]
    widths, heights = zip(*(i.size for i in images))
    total_width = sum(widths)
    max_height = max(heights)

    new_im = Image.new('RGB', (total_width, max_height))

    x_offset = 0
    for im in images:
        new_im.paste(im, (x_offset,0))
        x_offset += im.size[0]
    file_name = './GEM_vis/GEM_corr_png/' + method + '/final/' + str(counter) + '.png'
    new_im.save(file_name)
    return file_name

In [None]:
# counter = 1
# for i in range(12):
#     adata = adata_l[i]
#     for s in range(len(costimize_GEM_similarities[i])):
#         plot_globally(counter, adata, costimize_GEM_similarities[i][s][0], 
#                       costimize_GEM_similarities[i][s][1], 'costimize')
#         print("Progress: ", counter)
#         counter += 1       

In [None]:
counter = 1
for i in range(12):
    adata = adata_l[i]
    for s in range(len(cosine_GEM_similarities[i])):
        plot_globally(counter, adata, cosine_GEM_similarities[i][s][0], 
                      cosine_GEM_similarities[i][s][1], 'cosine')
        print("Progress: ", counter)
        counter += 1        

# Discover GEM Common Pattern

Pattern means that if one GEM pair exist in more than 2 slides' top correlated GEM pairs, then this pair is a pattern

In [None]:
import cv2
import json

In [None]:
common_corr = dict()
for s in range(12):
    slide_corr = cosine_GEM_similarities[s]
    for corr in slide_corr:
        if corr not in common_corr:
            common_corr[corr] = []
        common_corr[corr].append(s)


found_patterns = dict()
for pair, slides in common_corr.items():
    if len(slides) >= 2:
        if pair not in found_patterns:
            found_patterns[pair] = []
        found_patterns[pair] = slides

In [None]:
with open(r'found_patterns.txt','w+') as f:
    f.write(str(found_patterns))

In [None]:
count = 1
for pair, slides in found_patterns.items():
    pair_paths = []
    for s in slides:
        counter = "Pattern" + str(count)
        file_name = plot_globally(counter, adata_l[s], pair[0], 
                          pair[1], 'cosine')
        pair_paths.append(file_name)
        count += 1

    imgs = []
    for path in pair_paths:
        img = cv2.imread(path)
        imgs.append(img)
    im_v = cv2.vconcat(imgs)
    cv2.imwrite('./GEM_vis/GEM_corr_png/cosine/Patterns/' + pair[0] + '-' + pair[1] + '.png', im_v)