In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
from numpy.linalg import norm
import scanpy as sc
import pandas as pd
import anndata as ad
import cv2
import copy
import json
from scipy.optimize import curve_fit
from scipy.stats import linregress

In [None]:
all_ligand_filenames = os.listdir('./Ligand-Receptor/slide_lr_without_diffusion_without_normalization/ligands/')
all_receptor_filenames = os.listdir('./Ligand-Receptor/slide_lr_without_diffusion_without_normalization/receptors/')
all_lr_filenames = os.listdir('./Ligand-Receptor/slide_lr_without_diffusion_without_normalization/products/')
all_GEM_filenames = os.listdir('./Ligand-Receptor/slide_correlated_GEMs/cosine/')

In [None]:
def get_name(f, name = 'lr'):
    dash_count = 0
    start = 0
    end = 0
    for c in range(len(f)):
        if f[c] == '-':
            dash_count += 1
        if name != 'GEM':
            if dash_count == 2:
                start = c + 1
                break
        else:        
            if dash_count == 4:
                start = c + 1
                break  
    return f[start:-4]

In [None]:
def construct_dictionary(path, file_names, task_type = 'lr'):
    slides_dict = dict()
    for i in range(12):
        slides_dict["slide" + str(i)] = dict()
    
    for f in file_names:
        name = get_name(f, task_type)
        for i in range(12):
            slide_name = "slide-" + str(i) + '-'
            if f.startswith(slide_name):
                slides_dict["slide" + str(i)][name] = np.loadtxt(path + f)
    return slides_dict

In [None]:
ligands_path = './Ligand-Receptor/slide_lr_without_diffusion_without_normalization/ligands/'
receptors_path = './Ligand-Receptor/slide_lr_without_diffusion_without_normalization/receptors/'
lr_path = './Ligand-Receptor/slide_lr_without_diffusion_without_normalization/products/'

In [None]:
slides_ligand = construct_dictionary(ligands_path, all_ligand_filenames, 'ligands')
slides_receptor = construct_dictionary(receptors_path, all_receptor_filenames, 'receptors')
slides_lr = construct_dictionary(lr_path, all_lr_filenames, 'lr')

In [None]:
path_GEM = './Ligand-Receptor/slide_correlated_GEMs/cosine/'
slides_GEM = dict()
for i in range(12):
    slides_GEM["slide" + str(i)] = dict()

for f in all_GEM_filenames:
    for i in range(12):
        for j in range(30):
            slide_name = "slide-" + str(i) + "-pair" + '-' + str(j) + "-"
            if f.startswith(slide_name):
                GEM_name = get_name(f, 'GEM')
                slides_GEM["slide" + str(i)][GEM_name] = np.loadtxt(path_GEM + f)

# Alignment between GEM and LR

found_patterns means that a GEM pair shows up in more than 2 slides' Top30 correlated GEM pairs

In [None]:
found_patterns = ''
with open(r'found_patterns.txt','r') as f:
         for i in f.readlines():
            found_patterns=i #string
found_patterns = eval(found_patterns) # this is orignal dict with instace dict

For all these found_patterns GEM pairs, for each slide they showed up, use the average of this 2 GEMs to represent them(thus from 2 vectors to 1 vector), then use cosine similarity to match 2 GEMs with LR products on this slide. Find out top 10 most corrected GEM pair-LR

In [None]:
Top_10_LRs_per_slide_per_pair = dict()
for pair, slides in found_patterns.items():
    Top_10_LRs_per_slide_per_pair[pair] = dict()
    for slide in slides:
        Top_10_LRs_per_slide_per_pair[pair]['slide' + str(slide)] = []

In [None]:
def get_cosine_similarity(A, B):
    return np.dot(A,B)/(norm(A)*norm(B))

In [None]:
for pair, slides in Top_10_LRs_per_slide_per_pair.items():
    GEM1 = pair[0]
    GEM2 = pair[1]
    for slide in slides.keys():
        GEM_pair_average = (slides_GEM[slide][GEM1] + slides_GEM[slide][GEM2]) / 2
        cosine_score = []
        for LR_name, val in slides_lr[slide].items():
            cosine_score.append((LR_name, get_cosine_similarity(GEM_pair_average, val)))
        cosine_score = sorted(cosine_score, key=lambda tup: tup[1], reverse = True)
        for i in range(10):
            Top_10_LRs_per_slide_per_pair[pair][slide].append(cosine_score[i][0])

Here we try to find LR pairs that exist in all slides in a pattern.

In [None]:
def common_member(p):
    result = set(p[0])
    for s in p[1:]:
        result.intersection_update(s)
    return list(result)

In [None]:
common_LRs_per_pair = dict()
for pair, slides in Top_10_LRs_per_slide_per_pair.items():
    LR_of_slides = []
    for _, LRs in slides.items():
        LR_of_slides.append(LRs)
    common_LRs_per_pair[pair] = common_member(LR_of_slides)

for k in list(common_LRs_per_pair.keys()):   
    if len(common_LRs_per_pair[k]) < 2:
        del common_LRs_per_pair[k]

In [None]:
import pickle
with open("common_LRs_per_pair", "wb") as file:
    pickle.dump(common_LRs_per_pair, file)

# Plot

In [None]:
HCC_1T = './raw_data/Primary_Tumor/HCC-1T'
HCC_1L = './raw_data/Leading_Edge/HCC-1L'
HCC_1N = './raw_data/Adjacent/HCC-1N'

HCC_2T = './raw_data/Primary_Tumor/HCC-2T'
HCC_2L = './raw_data/Leading_Edge/HCC-2L'
HCC_2N = './raw_data/Adjacent/HCC-2N'

HCC_3T = './raw_data/Primary_Tumor/HCC-3T'
HCC_3L = './raw_data/Leading_Edge/HCC-3L'
HCC_3N = './raw_data/Adjacent/HCC-3N'

HCC_4T = './raw_data/Primary_Tumor/HCC-4T'
HCC_4L = './raw_data/Leading_Edge/HCC-4L'
HCC_4N = './raw_data/Adjacent/HCC-4N'

visium_paths = [HCC_1T, HCC_1L, HCC_1N, HCC_2T, HCC_2L, HCC_2N,
                HCC_3T, HCC_3L, HCC_3N, HCC_4T, HCC_4L, HCC_4N]
tissue_names_unique = ['Patient 1 Primary Tumor', 
                       'Patient 1 Leading Edge', 
                       'Patient 1 Ajacent Area',
                       'Patient 2 Primary Tumor', 
                       'Patient 2 Leading Edge', 
                       'Patient 2 Ajacent Area', 
                       'Patient 3 Primary Tumor', 
                       'Patient 3 Leading Edge', 
                       'Patient 3 Ajacent Area',
                       'Patient 4 Primary Tumor', 
                       'Patient 4 Leading Edge', 
                       'Patient 4 Ajacent Area']

In [None]:
adata_l = []
for i in range(12):
    adata = sc.read_visium(visium_paths[i])
    adata_l.append(adata)

In [None]:
def clean_outliers(GEM_exp, LR_exp):
    assert(len(GEM_exp) == len(LR_exp))
    
    LR_exp_c = copy.deepcopy(LR_exp)
    GEM_exp_c = copy.deepcopy(GEM_exp)
    
    outlier_idx = []
    for i in range(len(LR_exp_c)):
        if ((LR_exp_c[i] == 0) and (GEM_exp_c[i] != 0)) or \
            ((LR_exp_c[i] != 0) and (GEM_exp_c[i] == 0)):
            outlier_idx.append(i)
    
    LR_exp_c = [LR_exp_c[i] for i in range(len(LR_exp_c)) if i not in outlier_idx]
    GEM_exp_c = [GEM_exp_c[i] for i in range(len(GEM_exp_c)) if i not in outlier_idx]
    return GEM_exp_c, LR_exp_c

In [None]:
def sigmoid(x, L ,x0, k, b):
    y = L / (1 + np.exp(-k*(x-x0))) + b
    return y

In [None]:
def linear(x, p):
    y = p * x
    return y

In [None]:
def get_ligand_receptor_name(LR):
    ligand_receptor = LR.split('-')
    ligand_name = ligand_receptor[0]
    receptor_name = ligand_receptor[1]
    if len(ligand_receptor) > 2:
        ligand_name = ligand_receptor[0] + '-' + ligand_receptor[1]
        receptor_name = ligand_receptor[2]
        if ligand_name not in slides_ligand[slide].keys():
            ligand_name = ligand_receptor[0]
            receptor_name = ligand_receptor[1] + '-' + ligand_receptor[2]
    return ligand_name, receptor_name

In [None]:
def fit_sigmoid(xdata, ydata):
    p0 = [max(ydata), np.median(xdata),1,min(ydata)]
    popt_sigmoid, _ = curve_fit(sigmoid, xdata, ydata, p0, method='dogbox', max_nfev=5000)
    return popt_sigmoid

In [None]:
def try_fit_sigmoid(x, y):
    plot_sigmoid = True
    try:
        popt_sigmoid = fit_sigmoid(x, y)
        esti = sigmoid(x, *popt_sigmoid)
        sigmoid_correlation_coefficient = np.corrcoef(y, esti)[0, 1] ** 2

    except:
        plot_sigmoid = False
        
    if plot_sigmoid:
        return plot_sigmoid, esti, sigmoid_correlation_coefficient
    else:
        return plot_sigmoid, None, 0

In [None]:
def try_fit_linear(x, y):
    plot_linear = True
    try:
        slope, intercept, r_value, p_value, std_err = linregress(x, y)
        esti = slope * np.array(x) + intercept
        linear_correlation_coefficient = r_value**2
        
    except:
        plot_linear = False
 
    
    if plot_linear:
        return plot_linear, esti, linear_correlation_coefficient
    else:
        return plot_linear, None, 0


In [None]:
slides_GEM_LR_corrs = dict()

In [None]:
    #plot1: GEM1 vs GEM2
    #plot2: GEM2 vs GEM1
    #####################
    #plot3: GEM1 vs Ligand
    #plot4: GEM2 vs Ligand
    #plot5: Receptor vs GEM1
    #plot6: Receptor vs GEM2  
    #plot7: GEM1 vs Products
    #plot9: Products vs GEM1 
    #plot10: GEM2 vs Products
    #plot10: Products vs GEM2  

In [None]:
whole_counter = 0
for pair, LRs in common_LRs_per_pair.items():
    whole_counter += 1
    slides_GEM_LR_corrs[pair] = dict()
    print("Progress: %d / %d" %(whole_counter, len(common_LRs_per_pair)))
    show_up_slides = found_patterns[pair]

    fig, ax = plt.subplots(len(show_up_slides), 2 + len(LRs) * 8, figsize=((2 + len(LRs) * 8) * 8, len(show_up_slides) * 8))
    
    for i, s in enumerate(show_up_slides):
        #we first plot GEM1-GEM2 relationship, since we do not know which one emit ligand and which one 
        #is regulated through receptor, we plot GEM1-GEM2 and GEM2-GEM1 individually
        slide = 'slide' + str(s)
        GEM1_exp = slides_GEM[slide][pair[0]]
        GEM2_exp = slides_GEM[slide][pair[1]]
        f1_GEM1, f1_GEM2 = clean_outliers(GEM1_exp, GEM2_exp)
        
        plot_sigmoid1, GEM_esti_sigmoid1, sigmoid_correlation_coefficient1 = try_fit_sigmoid(f1_GEM1, f1_GEM2)
        plot_sigmoid2, GEM_esti_sigmoid2, sigmoid_correlation_coefficient2 = try_fit_sigmoid(f1_GEM2, f1_GEM1)
        plot_linear1, GEM_esti_linear1, linear_correlation_coefficient1 = try_fit_linear(f1_GEM1, f1_GEM2)
        plot_linear2, GEM_esti_linear2, linear_correlation_coefficient2 = try_fit_linear(f1_GEM2, f1_GEM1)
        
        ax[i,0].scatter(f1_GEM1, f1_GEM2, alpha = 1, s = 120)
        if plot_sigmoid1:
            ax[i,0].scatter(f1_GEM1, GEM_esti_sigmoid1, color='red', 
                            label = 'sigmoid fit: %.2f'%sigmoid_correlation_coefficient1, alpha = 1, s = 120)
        if plot_linear1:
            ax[i,0].scatter(f1_GEM1, GEM_esti_linear1, color='orange', 
                            label = 'linear fit: %.2f'%linear_correlation_coefficient1, alpha = 1, s = 120)
        if plot_sigmoid1 or plot_linear1:
            ax[i,0].legend(loc = 'lower right', fontsize = 20)
            
        title = "Slide: %s \n GEM1: %s \n GEM2: %s " %(tissue_names_unique[s], pair[0], pair[1])
        xlabel = "GEM: %s"%(pair[0])
        ylabel = "GEM: %s"%(pair[1])
        ax[i,0].set_title(title, fontsize = 25)
        ax[i,0].set_xlabel(xlabel, fontsize = 25)
        ax[i,0].set_ylabel(ylabel, fontsize = 25)
        
        
        
        ax[i,1].scatter(f1_GEM2, f1_GEM1, alpha = 1, s = 120)
        if plot_sigmoid2:
            ax[i,1].scatter(f1_GEM2, GEM_esti_sigmoid2, color='red', 
                            label = 'sigmoid fit: %.2f'%sigmoid_correlation_coefficient2, alpha = 1, s = 120)
        if plot_linear2:
            ax[i,1].scatter(f1_GEM2, GEM_esti_linear2, color='orange', 
                            label = 'linear fit: %.2f'%linear_correlation_coefficient2, alpha = 1, s = 120)
        if plot_sigmoid2 or plot_linear2:
            ax[i,1].legend(loc = 'lower right', fontsize = 20)
            
        title = "Slide: %s \n GEM1: %s \n GEM2: %s " %(tissue_names_unique[s], pair[1], pair[0])
        xlabel = "GEM: %s"%(pair[1])
        ylabel = "GEM: %s"%(pair[0])
        ax[i,1].set_title(title, fontsize = 25)
        ax[i,1].set_xlabel(xlabel, fontsize = 25)
        ax[i,1].set_ylabel(ylabel, fontsize = 25)
        
        
        slides_GEM_LR_corrs[pair][slide] = dict()
        slides_GEM_LR_corrs[pair][slide]['GEM1-GEM2'] = dict()
        slides_GEM_LR_corrs[pair][slide]['GEM1-GEM2'][(pair[0], pair[1])] = [sigmoid_correlation_coefficient1, linear_correlation_coefficient1]
        slides_GEM_LR_corrs[pair][slide]['GEM1-GEM2'][(pair[1], pair[0])] = [sigmoid_correlation_coefficient2, linear_correlation_coefficient2]
        slides_GEM_LR_corrs[pair][slide]['GEM1-Ligand'] = dict()
        slides_GEM_LR_corrs[pair][slide]['GEM2-Ligand'] = dict()
        slides_GEM_LR_corrs[pair][slide]['Receptor-GEM1'] = dict()
        slides_GEM_LR_corrs[pair][slide]['Receptor-GEM2'] = dict()
        slides_GEM_LR_corrs[pair][slide]['Products-GEM1'] = dict()
        slides_GEM_LR_corrs[pair][slide]['GEM1-Products'] = dict()
        slides_GEM_LR_corrs[pair][slide]['Products-GEM2'] = dict()
        slides_GEM_LR_corrs[pair][slide]['GEM2-Products'] = dict()
        
        for j, LR in enumerate(LRs):
            ligand_name, receptor_name = get_ligand_receptor_name(LR)

            
            ligand_exp = slides_ligand[slide][ligand_name]
            receptor_exp = slides_receptor[slide][receptor_name]
            LR_exp = slides_lr[slide][LR]
            

            f3_GEM1, f3_ligand = clean_outliers(GEM1_exp, ligand_exp)
            f4_GEM2, f4_ligand = clean_outliers(GEM2_exp, ligand_exp)
            f5_GEM1, f5_receptor  = clean_outliers(GEM1_exp, receptor_exp)
            f6_GEM2, f6_receptor = clean_outliers(GEM2_exp, receptor_exp)
            f7_GEM1, f7_lr = clean_outliers(GEM1_exp, LR_exp)
            f8_GEM2, f8_lr = clean_outliers(GEM2_exp, LR_exp)
            
            corrected_data = [(f3_GEM1, f3_ligand), (f4_GEM2, f4_ligand), 
                              (f5_receptor, f5_GEM1), (f6_receptor, f6_GEM2), 
                              (f7_GEM1, f7_lr), (f7_lr, f7_GEM1), 
                              (f8_GEM2, f8_lr), (f8_lr, f8_GEM2)]
            
            for m in range(len(corrected_data)):
                X_c = corrected_data[m][0]#x
                Y_c = corrected_data[m][1]#y
                

                plot_sigmoid, Y_esti_sigmoid, sigmoid_correlation_coefficient = try_fit_sigmoid(X_c, Y_c)
                plot_linear, Y_esti_linear, linear_correlation_coefficient = try_fit_linear(X_c, Y_c)
                
                    
                    
                if m == 0:
                    title = "Slide: %s \n GEM: %s \n Ligand: %s " %(tissue_names_unique[s], pair[0], ligand_name)
                    xlabel = "GEM: %s"%(pair[0])
                    ylabel = "Ligand: %s"%(ligand_name)
                    slides_GEM_LR_corrs[pair][slide]['GEM1-Ligand'][pair[0]] = [sigmoid_correlation_coefficient, linear_correlation_coefficient]
                elif m == 1:
                    title = "Slide: %s \n GEM: %s \n Ligand: %s " %(tissue_names_unique[s], pair[1], ligand_name)
                    xlabel = "GEM: %s"%(pair[1])
                    ylabel = "Ligand: %s"%(ligand_name)
                    slides_GEM_LR_corrs[pair][slide]['GEM2-Ligand'][pair[1]] = [sigmoid_correlation_coefficient, linear_correlation_coefficient]
                elif m == 2:
                    title = "Slide: %s \n GEM: %s \n Receptor: %s " %(tissue_names_unique[s], pair[0], receptor_name)
                    xlabel = "Receptor: %s"%(receptor_name)
                    ylabel = "GEM: %s"%(pair[0])
                    slides_GEM_LR_corrs[pair][slide]['Receptor-GEM1'][pair[0]] = [sigmoid_correlation_coefficient, linear_correlation_coefficient]
                elif m == 3:
                    title = "Slide: %s \n GEM: %s \n Receptor: %s " %(tissue_names_unique[s], pair[1], receptor_name)
                    xlabel = "Receptor: %s"%(receptor_name)
                    ylabel = "GEM: %s"%(pair[1])
                    slides_GEM_LR_corrs[pair][slide]['Receptor-GEM2'][pair[1]] = [sigmoid_correlation_coefficient, linear_correlation_coefficient]
                elif m == 4:
                    title = "Slide: %s \n GEM: %s \n LR: %s " %(tissue_names_unique[s], pair[0], LR)
                    xlabel = "GEM: %s"%(pair[0])
                    ylabel = "LR product: %s"%(LR)
                    slides_GEM_LR_corrs[pair][slide]['Products-GEM1'][LR] = [sigmoid_correlation_coefficient, linear_correlation_coefficient]
                elif m == 5:
                    title = "Slide: %s \n GEM: %s \n LR: %s " %(tissue_names_unique[s], pair[0], LR)
                    xlabel = "LR product: %s"%(LR)
                    ylabel = "GEM: %s"%(pair[0])
                    slides_GEM_LR_corrs[pair][slide]['GEM1-Products'][LR] = [sigmoid_correlation_coefficient, linear_correlation_coefficient]
                elif m == 6:
                    title = "Slide: %s \n GEM: %s \n LR: %s " %(tissue_names_unique[s], pair[1], LR)
                    xlabel = "GEM: %s"%(pair[1])
                    ylabel = "LR product: %s"%(LR)
                    slides_GEM_LR_corrs[pair][slide]['Products-GEM2'][LR] = [sigmoid_correlation_coefficient, linear_correlation_coefficient]
                elif m == 7:
                    title = "Slide: %s \n GEM: %s \n LR: %s " %(tissue_names_unique[s], pair[1], LR)
                    xlabel = "LR product: %s"%(LR)
                    ylabel = "GEM: %s"%(pair[1])
                    slides_GEM_LR_corrs[pair][slide]['GEM2-Products'][LR] = [sigmoid_correlation_coefficient, linear_correlation_coefficient]

                ax[i, 8 * j + m + 2].scatter(X_c, Y_c, alpha = 1, s = 120)
                if plot_sigmoid:
                    ax[i, 8 * j + m + 2].scatter(X_c, Y_esti_sigmoid, color='red', 
                                                label = 'sigmoid fit: %.2f'%sigmoid_correlation_coefficient, alpha = 1, s = 120)
                if plot_linear:
                    ax[i, 8 * j + m + 2].scatter(X_c, Y_esti_linear, color='orange', 
                                                label = 'linear fit: %.2f'%linear_correlation_coefficient, alpha = 1, s = 120)
                if plot_sigmoid or plot_linear:
                    ax[i, 8 * j + m + 2].legend(loc = 'lower right', fontsize = 20)
                ax[i, 8 * j + m + 2].set_title(title, fontsize = 25)
                ax[i, 8 * j + m + 2].set_xlabel(xlabel, fontsize = 25)
                ax[i, 8 * j + m + 2].set_ylabel(ylabel, fontsize = 25)
                

    
    plt.tight_layout()
    path = "./GEM_vis/GEM_corr_png/cosine/Patterns_LRs/inconcat_GEM_LR_corr_without_diff_detailed/"
    file_name = "%s-%s" %(pair[0], pair[1])
    fig.savefig(path+file_name)
    plt.close(fig)   
        

In [None]:
with open(r'GEM_LR_corr.txt','w+') as f:
     f.write(str(slides_GEM_LR_corrs))

In [None]:
slides_GEM_LR_corrs = ''
with open(r'GEM_LR_corr.txt','r') as f:
         for i in f.readlines():
            slides_GEM_LR_corrs=i #string
slides_GEM_LR_corrs = eval(slides_GEM_LR_corrs) # this is orignal dict with instace dict

# Choose Highly Correlated GEM-LR

In [None]:
#based on empirical observation
# GEM1: B Cell GEM 22 GEM2: Myeloid GEM 81 Ligand: FN1 Receptor: SDC1
# GEM1: B Cell GEM 22 GEM2: Myeloid GEM 81 Ligand: FN1 Receptor: SDC4
# GEM1: Epithelial GEM 15 GEM2: Myeloid GEM 85 Ligand: FN1 Receptor: SDC1

In [None]:
GEM1 = 'B cell GEM 22'
GEM2 = 'Myeloid GEM 81'
Ligand = 'FN1'
Receptor = 'SDC1'

In [None]:
found_slides = found_patterns[(GEM1, GEM2)]

In [None]:
GEM1_exp_concat = []
GEM2_exp_concat = []
Ligand_exp_concat = []
Receptor_exp_concat = []
Product_exp_concat = []
for s in found_slides:
    slide_name = 'slide' + str(s)
    GEM1_exp_concat.append(slides_GEM[slide_name][GEM1])
    GEM2_exp_concat.append(slides_GEM[slide_name][GEM2])
    Ligand_exp_concat.append(slides_ligand[slide_name][Ligand])
    Receptor_exp_concat.append(slides_receptor[slide_name][Receptor])
    Product_exp_concat.append(slides_lr[slide_name][Ligand + '-' + Receptor])
GEM1_exp_concat = np.concatenate(GEM1_exp_concat)
GEM2_exp_concat = np.concatenate(GEM2_exp_concat)
Ligand_exp_concat = np.concatenate(Ligand_exp_concat)
Receptor_exp_concat = np.concatenate(Receptor_exp_concat)
Product_exp_concat = np.concatenate(Product_exp_concat)

In [None]:
example_LR_GEM_profile = np.stack([GEM1_exp_concat, GEM2_exp_concat, 
                                         Ligand_exp_concat, Receptor_exp_concat, Product_exp_concat], axis = 0).transpose()

In [None]:
example_LR_GEM_profile = pd.DataFrame(example_LR_GEM_profile, columns = [GEM1, GEM2, Ligand, Receptor, "Product"])

In [None]:
example_LR_GEM_profile.to_csv("./TME_CD/example1_GEM_LR_CD.csv", index = False)

# Test Correlation of 2 GEMs Given the LR

In [None]:
import pingouin as pg

In [None]:
example_LR_GEM_profile

In [None]:
pg.pairwise_corr(data=example_LR_GEM_profile, columns = [GEM1, GEM2], covar='Product').round(3)

In [None]:
pg.pairwise_corr(data=example_LR_GEM_profile, columns = [GEM1, GEM2]).round(3)