In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
import anndata as ad
import matplotlib.pyplot as plt
from scipy.io import mmread
sc.set_figure_params(figsize=(6, 6), frameon=False)
sc.settings.n_jobs=8

In [None]:
HCC_1N = './raw_data/Adjacent/HCC-1N'
HCC_2N = './raw_data/Adjacent/HCC-2N'
HCC_3N = './raw_data/Adjacent/HCC-3N'
HCC_4N = './raw_data/Adjacent/HCC-4N'

HCC_1L = './raw_data/Leading_Edge/HCC-1L'
HCC_2L = './raw_data/Leading_Edge/HCC-2L'
HCC_3L = './raw_data/Leading_Edge/HCC-3L'
HCC_4L = './raw_data/Leading_Edge/HCC-4L'

HCC_1T = './raw_data/Primary_Tumor/HCC-1T'
HCC_2T = './raw_data/Primary_Tumor/HCC-2T'
HCC_3T = './raw_data/Primary_Tumor/HCC-3T'
HCC_4T = './raw_data/Primary_Tumor/HCC-4T'

visium_paths = [HCC_1N, HCC_2N, HCC_3N, HCC_4N, HCC_1L, HCC_2L, HCC_3L, HCC_4L, HCC_1T, HCC_2T, HCC_3T, HCC_4T]
visium_names = ['HCC-'+ str(i + 1) + 'N-Adjacent'for i in range(4)] + \
               ['HCC-'+ str(i + 1) + 'L-Leading_Edge'for i in range(4)] + \
               ['HCC-'+ str(i + 1) + 'T-Primary_Tumor'for i in range(4)]

In [None]:
adata_l = []
for i in range(12):
    adata = sc.read_visium(visium_paths[i])
    adata_l.append(adata)

In [None]:
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 100

In [None]:
slice_idx = 4
adata = adata_l[slice_idx]

In [None]:
adata.var_names_make_unique()
adata.var["mt"] = adata.var_names.str.startswith("MT-")
sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True)

In [None]:
sc.pl.spatial(adata, img_key="hires", alpha_img = 1)

In [None]:
sc.pl.spatial(adata, img_key="hires", color=["total_counts", "n_genes_by_counts"])

In [None]:
TandNK_ST_inference_bin = pd.read_csv('./nHDP/binary_nHDP_count_TNK_1L_final.csv', header = None).transpose().values
Stroma_ST_inference_bin = pd.read_csv('./nHDP/binary_nHDP_count_Stromal_1L_final.csv', header = None).transpose().values
B_cell_ST_inference_bin = pd.read_csv('./nHDP/binary_nHDP_count_PlasmaB_1L_final.csv', header = None).transpose().values
Epithelial_ST_inference_bin = pd.read_csv('./nHDP/binary_nHDP_count_Epithelial_1L_final.csv', header = None).transpose().values
Myeloid_ST_inference_bin = pd.read_csv('./nHDP/binary_nHDP_count_Myeloid_1L_final.csv', header = None).transpose().values

In [None]:
#remove empty GEMs or globally-expressed(non-uniqueness) GEMs
#return pruned GEM count matrix and deleted GEM index
def delete_empty_or_full_GEM(count_matrix):
    
    GEM_to_delete = []
    
    for c in range(count_matrix.shape[1]):
        this_GEM = count_matrix[:, c]
        
        #if non-zero mean less than 5 or GEM expressed in less than 5% spots, remove
        if ((np.sum(this_GEM)/(np.sum(this_GEM != 0)+1) < 5) or 
        (np.count_nonzero(this_GEM) < int(0.05 * len(this_GEM)))):
            GEM_to_delete.append(c)
            
#         #if globally-expressed GEM, remove
        if ((np.sum(this_GEM)/(np.sum(this_GEM != 0)+1) > 500) and 
        (np.count_nonzero(this_GEM) > int(0.99 * len(this_GEM)))):
            GEM_to_delete.append(c)
    
    count_matrix_pruned = np.delete(count_matrix, GEM_to_delete, axis = 1)
    return count_matrix_pruned, GEM_to_delete
        

In [None]:
TandNK_ST_inference_bin, TandNK_drop_GEM_bin = delete_empty_or_full_GEM(TandNK_ST_inference_bin)
Stroma_ST_inference_bin, Stroma_drop_GEM_bin = delete_empty_or_full_GEM(Stroma_ST_inference_bin)
B_cell_ST_inference_bin, B_drop_GEM_bin = delete_empty_or_full_GEM(B_cell_ST_inference_bin)
Epithelial_ST_inference_bin, Epithelial_drop_GEM_bin = delete_empty_or_full_GEM(Epithelial_ST_inference_bin)
Myeloid_ST_inference_bin, Myeloid_drop_GEM_bin = delete_empty_or_full_GEM(Myeloid_ST_inference_bin)

In [None]:
TandNK_GEM_names_bin = ['1L TandNK GEM binary ' + str(c + 1) for c in range(85) if c not in TandNK_drop_GEM_bin]
Stroma_GEM_names_bin = ['1L Stroma GEM binary ' + str(c + 1) for c in range(85) if c not in Stroma_drop_GEM_bin]
B_cell_GEM_names_bin = ['1L B cell GEM binary ' + str(c + 1) for c in range(85) if c not in B_drop_GEM_bin]
Epithelial_GEM_names_bin = ['1L Epithelial GEM binary ' + str(c + 1) for c in range(85) if c not in Epithelial_drop_GEM_bin]
Myeloid_GEM_names_bin = ['1L Myeloid GEM binary ' + str(c + 1) for c in range(85) if c not in Myeloid_drop_GEM_bin]

In [None]:
for c in range(TandNK_ST_inference_bin.shape[1]):
    adata.obs[TandNK_GEM_names_bin[c]] = TandNK_ST_inference_bin[:, c]
sc.pl.spatial(adata, img_key="hires", color=TandNK_GEM_names_bin, vmin=0, cmap = 'Purples', 
                      alpha_img = 0.3, colorbar_loc = 'bottom', vmax = 300)

In [None]:
for c in range(Stroma_ST_inference_bin.shape[1]):
    adata.obs[Stroma_GEM_names_bin[c]] = Stroma_ST_inference_bin[:, c]
sc.pl.spatial(adata, img_key="hires", color=Stroma_GEM_names_bin, vmin=0, cmap = 'Purples', 
                      alpha_img = 0.3, colorbar_loc = 'bottom', vmax = 300)

In [None]:
for c in range(B_cell_ST_inference_bin.shape[1]):
    adata.obs[B_cell_GEM_names_bin[c]] = B_cell_ST_inference_bin[:, c]
sc.pl.spatial(adata, img_key="hires", color=B_cell_GEM_names_bin, vmin=0, cmap = 'Purples', 
                      alpha_img = 0.3, colorbar_loc = 'bottom', vmax = 300)

In [None]:
for c in range(Epithelial_ST_inference_bin.shape[1]):
    adata.obs[Epithelial_GEM_names_bin[c]] = Epithelial_ST_inference_bin[:, c]
sc.pl.spatial(adata, img_key="hires", color=Epithelial_GEM_names_bin, vmin=0, cmap = 'Purples', 
                      alpha_img = 0.3, colorbar_loc = 'bottom', vmax = 300)

In [None]:
for c in range(Myeloid_ST_inference_bin.shape[1]):
    adata.obs[Myeloid_GEM_names_bin[c]] = Myeloid_ST_inference_bin[:, c]
sc.pl.spatial(adata, img_key="hires", color=Myeloid_GEM_names_bin, vmin=0, cmap = 'Purples', 
                      alpha_img = 0.3, colorbar_loc = 'bottom', vmax = 300)