In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os
from numpy.linalg import norm
import scanpy as sc
import pandas as pd
import anndata as ad
import cv2
import copy

In [None]:
all_lr_filenames = os.listdir('./Ligand-Receptor/slide_lr_diffusion_without_normalization/')
all_GEM_filenames = os.listdir('./Ligand-Receptor/slide_correlated_GEMs/cosine/')

In [None]:
def get_name(f, name = 'lr'):
    dash_count = 0
    start = 0
    end = 0
    for c in range(len(f)):
        if f[c] == '-':
            dash_count += 1
        if name == 'lr':
            if dash_count == 2:
                start = c + 1
                break
        else:        
            if dash_count == 4:
                start = c + 1
                break  
    return f[start:-4]

For each slide, we have Top30 most correlated GEM pairs.
For eahc Slide, we have LR pair and their products.

In [None]:
path_lr = './Ligand-Receptor/slide_lr_diffusion/'
slides_lr = dict()
for i in range(12):
    slides_lr["slide" + str(i)] = dict()

for f in all_lr_filenames:
    lr_name = get_name(f, 'lr')
    for i in range(12):
        slide_name = "slide-" + str(i) + '-'
        if f.startswith(slide_name):
            slides_lr["slide" + str(i)][lr_name] = np.loadtxt(path_lr + f)

In [None]:
path_GEM = './Ligand-Receptor/slide_correlated_GEMs/cosine/'
slides_GEM = dict()
for i in range(12):
    slides_GEM["slide" + str(i)] = dict()

for f in all_GEM_filenames:
    for i in range(12):
        for j in range(30):
            slide_name = "slide-" + str(i) + "-pair" + '-' + str(j) + "-"
            if f.startswith(slide_name):
                GEM_name = get_name(f, 'GEM')
                slides_GEM["slide" + str(i)][GEM_name] = np.loadtxt(path_GEM + f)

# Alignment between GEM and LR

found_patterns means that a GEM pair shows up in more than 2 slides' Top30 correlated GEM pairs

In [None]:
found_patterns = ''
with open(r'found_patterns.txt','r') as f:
         for i in f.readlines():
            found_patterns=i #string
found_patterns = eval(found_patterns) # this is orignal dict with instace dict

For all these found_patterns GEM pairs, for each slide they showed up, use the average of this 2 GEMs to represent them(thus from 2 vector to 1 vector), then use cosine similarity to match 2 GEMs with LR products on this slide. Find out top 10 most corrected GEM pair-LR

In [None]:
Top_10_LRs_per_slide_per_pair = dict()
for pair, slides in found_patterns.items():
    Top_10_LRs_per_slide_per_pair[pair] = dict()
    for slide in slides:
        Top_10_LRs_per_slide_per_pair[pair]['slide' + str(slide)] = []

In [None]:
def get_cosine_similarity(A, B):
    return np.dot(A,B)/(norm(A)*norm(B))

In [None]:
for pair, slides in Top_10_LRs_per_slide_per_pair.items():
    GEM1 = pair[0]
    GEM2 = pair[1]
    for slide in slides.keys():
        GEM_pair_average = (slides_GEM[slide][GEM1] + slides_GEM[slide][GEM2]) / 2
        cosine_score = []
        for LR_name, val in slides_lr[slide].items():
            cosine_score.append((LR_name, get_cosine_similarity(GEM_pair_average, val)))
        cosine_score = sorted(cosine_score, key=lambda tup: tup[1], reverse = True)
        for i in range(10):
            Top_10_LRs_per_slide_per_pair[pair][slide].append(cosine_score[i][0])

Here we try to find LR pairs that exist in all slides in a pattern.

In [None]:
def common_member(p):
    result = set(p[0])
    for s in p[1:]:
        result.intersection_update(s)
    return list(result)

In [None]:
common_LRs_per_pair = dict()
for pair, slides in Top_10_LRs_per_slide_per_pair.items():
    LR_of_slides = []
    for _, LRs in slides.items():
        LR_of_slides.append(LRs)
    common_LRs_per_pair[pair] = common_member(LR_of_slides)

for k in list(common_LRs_per_pair.keys()):   
    if len(common_LRs_per_pair[k]) == 0:
        del common_LRs_per_pair[k]

In [None]:
common_LRs_per_pair

# Plot

In [None]:
HCC_1T = './raw_data/Primary_Tumor/HCC-1T'
HCC_1L = './raw_data/Leading_Edge/HCC-1L'
HCC_1N = './raw_data/Adjacent/HCC-1N'

HCC_2T = './raw_data/Primary_Tumor/HCC-2T'
HCC_2L = './raw_data/Leading_Edge/HCC-2L'
HCC_2N = './raw_data/Adjacent/HCC-2N'

HCC_3T = './raw_data/Primary_Tumor/HCC-3T'
HCC_3L = './raw_data/Leading_Edge/HCC-3L'
HCC_3N = './raw_data/Adjacent/HCC-3N'

HCC_4T = './raw_data/Primary_Tumor/HCC-4T'
HCC_4L = './raw_data/Leading_Edge/HCC-4L'
HCC_4N = './raw_data/Adjacent/HCC-4N'

visium_paths = [HCC_1T, HCC_1L, HCC_1N, HCC_2T, HCC_2L, HCC_2N,
                HCC_3T, HCC_3L, HCC_3N, HCC_4T, HCC_4L, HCC_4N]
tissue_names_unique = ['Patient 1 Primary Tumor', 
                       'Patient 1 Leading Edge', 
                       'Patient 1 Ajacent Area',
                       'Patient 2 Primary Tumor', 
                       'Patient 2 Leading Edge', 
                       'Patient 2 Ajacent Area', 
                       'Patient 3 Primary Tumor', 
                       'Patient 3 Leading Edge', 
                       'Patient 3 Ajacent Area',
                       'Patient 4 Primary Tumor', 
                       'Patient 4 Leading Edge', 
                       'Patient 4 Ajacent Area']

In [None]:
adata_l = []
for i in range(12):
    adata = sc.read_visium(visium_paths[i])
    adata_l.append(adata)

In [None]:
# def concatenate_img(img_pathes, GEM_pair, method = 'v'):
#     imgs = []
#     for path in img_pathes:
#         img = cv2.imread(path)
#         imgs.append(img)
#     if method == 'v':
#         im_v = cv2.vconcat(imgs)
#     elif method == 'h':
#         im_v = cv2.hconcat(imgs)
#     file_path = './GEM_vis/GEM_corr_png/cosine/Patterns_LRs_general/' + GEM_pair[0] + '-' + GEM_pair[1] + '.png'
#     cv2.imwrite(file_path, im_v)
#     return file_path

In [None]:
# sc.set_figure_params(fontsize = 40, dpi=50, dpi_save=50)

# counter = 0
# for pair, slides in Top_10_LRs_per_slide_per_pair.items():
#     counter += 1
#     print("Progress: %d / %d" %(counter, len(Top_10_LRs_per_slide_per_pair)))
#     file_addresses = []
#     for slide in slides.keys():
#         fig, ax = plt.subplots(2,5, figsize=(55, 20))
#         for k, LR in enumerate(Top_10_LRs_per_slide_per_pair[pair][slide]):
#             adata_idx = int(slide[5:])
#             adata = adata_l[adata_idx]
#             adata.obs[LR] = slides_lr[slide][LR]
#             sc.pl.spatial(adata, img_key="hires", color=LR, ax = ax[k // 5, k % 5], vmin = 0, vmax = 'p95', alpha_img = 0.2,
#                       alpha = 1, color_map = 'Purples', colorbar_loc = 'bottom', title = 'Top' + str(k+1) + ' LR: ' + LR, show = False)
#             ax[k // 5, k % 5].set_xlabel('')
#             ax[k // 5, k % 5].set_ylabel('')
#         file_address = './GEM_vis/GEM_corr_png/cosine/Patterns_LRs/' + str(pair[0]) + \
#         '-' + str(pair[1]) + '-' + slide + '.png'
#         fig.savefig(file_address)
#         plt.close(fig)
#         file_addresses.append(file_address)
        
#     top10LR_path = concatenate_img(file_addresses, pair, 'v')
#     for path in file_addresses:
#         os.remove(path)
#     GEM_pair_img_path = './GEM_vis/GEM_corr_png/cosine/Patterns/' + str(pair[0]) + \
#         '-' + str(pair[1]) + '.png'
#     concatenate_img([GEM_pair_img_path, top10LR_path], pair, 'h')

Concatenated version: merge all slides in a pattern

In [None]:
concat_GEM_pairs = dict()
concat_LRs = dict()
for pair, LRs in common_LRs_per_pair.items():
    concat_GEM_pairs[pair] = dict()
    concat_LRs[pair] = dict()
    
    GEM1 = pair[0]
    GEM2 = pair[1]
    
    concat_GEM1 = []
    concat_GEM2 = []
    
    
    show_up_slides = found_patterns[pair]
    
    for s in show_up_slides:
        slide = 'slide' + str(s)
        concat_GEM1.extend(slides_GEM[slide][GEM1])
        concat_GEM2.extend(slides_GEM[slide][GEM2])
    
    concat_GEM_pairs[pair][GEM1] = concat_GEM1
    concat_GEM_pairs[pair][GEM2] = concat_GEM2
    
    for LR in LRs:
        concat_LR = []
        for s in show_up_slides:
            slide = 'slide' + str(s)
            concat_LR.extend(slides_lr[slide][LR])
        concat_LRs[pair][LR] = concat_LR

In [None]:
def clean_outliers(LR_exp, GEM1_exp, GEM2_exp):
    assert(len(LR_exp) == len(GEM1_exp))
    assert(len(LR_exp) == len(GEM2_exp))
    
    LR_exp_c = copy.deepcopy(LR_exp)
    GEM1_exp_c = copy.deepcopy(GEM1_exp)
    GEM2_exp_c = copy.deepcopy(GEM2_exp)
    
    outlier_idx = []
    for i in range(len(LR_exp)):
        if ((LR_exp_c[i] == 0) and (GEM1_exp_c[i] != 0 or GEM2_exp_c[i] != 0)) or \
            ((LR_exp_c[i] != 0) and (GEM1_exp_c[i] == 0 or GEM2_exp_c[i] == 0)) or \
            ((LR_exp_c[i] == 0) and (GEM1_exp_c[i] == 0 and GEM2_exp_c[i] == 0)):
            outlier_idx.append(i)
    
    LR_exp_c = [LR_exp_c[i] for i in range(len(LR_exp_c)) if i not in outlier_idx]
    GEM1_exp_c = [GEM1_exp_c[i] for i in range(len(GEM1_exp_c)) if i not in outlier_idx]
    GEM2_exp_c = [GEM2_exp_c[i] for i in range(len(GEM2_exp_c)) if i not in outlier_idx]
    return LR_exp_c, GEM1_exp_c, GEM2_exp_c

In [None]:
plt.rcParams['font.size'] = 50
for pair, LRs in concat_LRs.items():
    fig, ax = plt.subplots(1,len(LRs), figsize=(len(LRs) * 20, 20))
    
    for i, (LR, LR_exp) in enumerate(LRs.items()):
        GEM1_exp = concat_GEM_pairs[pair][pair[0]]
        GEM2_exp = concat_GEM_pairs[pair][pair[1]]
        LR_exp_c, GEM1_exp_c, GEM2_exp_c = clean_outliers(LR_exp, GEM1_exp, GEM2_exp)
        if len(LRs) > 1:
            ax[i].scatter(LR_exp_c, GEM1_exp_c, label = pair[0], alpha = 0.5, s = 80)
            ax[i].scatter(LR_exp_c, GEM2_exp_c, label = pair[1], alpha = 0.5, s = 80)
            ax[i].legend(loc = 'upper right')
            ax[i].set_title("GEM Pair: %s-%s \n Ligand-Receptor: %s " %(pair[0], pair[1], LR))
            ax[i].set_xlabel("LR expression")
            ax[i].set_ylabel("GEM expression")
        else:
            ax.scatter(LR_exp_c, GEM1_exp_c, label = pair[0], alpha = 0.5, s = 80)
            ax.scatter(LR_exp_c, GEM2_exp_c, label = pair[1], alpha = 0.5, s = 80)
            ax.legend(loc = 'upper right')
            ax.set_title("GEM Pair: %s-%s \n Ligand-Receptor: %s " %(pair[0], pair[1], LR))
            ax.set_xlabel("LR expression")
            ax.set_ylabel("GEM expression")
    plt.tight_layout()
    path = "./GEM_vis/GEM_corr_png/cosine/Patterns_LRs/concat_GEM_LR_corr/"
    file_name = "%s-%s-%s" %(pair[0], pair[1], LR)
    fig.savefig(path+file_name)
    plt.close(fig)

Unconcatenated version, that is plot slides in a pattern separately

In [None]:
plt.rcParams['font.size'] = 50
for pair, LRs in common_LRs_per_pair.items():
    show_up_slides = found_patterns[pair]
    fig, ax = plt.subplots(len(show_up_slides),len(LRs), figsize=(len(LRs) * 20, len(show_up_slides) * 20))
    
    for i, s in enumerate(show_up_slides):
        slide = 'slide' + str(s)
        GEM1_exp = slides_GEM[slide][pair[0]]
        GEM2_exp = slides_GEM[slide][pair[1]]
        for j, LR in enumerate(LRs):
            LR_exp = slides_lr[slide][LR]
            LR_exp_c, GEM1_exp_c, GEM2_exp_c = clean_outliers(LR_exp, GEM1_exp, GEM2_exp)
            if len(LRs) > 1:
                ax[i,j].scatter(LR_exp_c, GEM1_exp_c, label = pair[0], alpha = 0.5, s = 80)
                ax[i,j].scatter(LR_exp_c, GEM2_exp_c, label = pair[1], alpha = 0.5, s = 80)
                ax[i,j].legend(loc = 'upper right')
                ax[i,j].set_title("Slide: %s \n GEM Pair: %s-%s \n Ligand-Receptor: %s " %(tissue_names_unique[s], pair[0], pair[1], LR))
                ax[i,j].set_xlabel("LR expression")
                ax[i,j].set_ylabel("GEM expression")
            else:
                ax[i].scatter(LR_exp_c, GEM1_exp_c, label = pair[0], alpha = 0.5, s = 80)
                ax[i].scatter(LR_exp_c, GEM2_exp_c, label = pair[1], alpha = 0.5, s = 80)
                ax[i].legend(loc = 'upper right')
                ax[i].set_title("Slide: %s \n GEM Pair: %s-%s \n Ligand-Receptor: %s " %(tissue_names_unique[s], pair[0], pair[1], LR))
                ax[i].set_xlabel("LR expression")
                ax[i].set_ylabel("GEM expression")
    
    plt.tight_layout()
    path = "./GEM_vis/GEM_corr_png/cosine/Patterns_LRs/inconcat_GEM_LR_corr/"
    file_name = "%s-%s-%s" %(pair[0], pair[1], LR)
    fig.savefig(path+file_name)
    plt.close(fig)   
        