In [None]:
import scSpace
import scanpy as sc
import anndata
import matplotlib.pyplot as plt
import matplotlib.colors as clr
import numpy as np
import pandas as pd
import scipy
from sklearn.metrics import adjusted_rand_score
import random
import torch
import warnings
warnings.filterwarnings("ignore")

In [None]:
def setup_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = False
    torch.backends.cudnn.benchmark = False
    
setup_seed(100)

In [None]:
root = "./"
data_id = "embryo"

our_spatial = anndata.read_loom(f"{root}/results/{data_id}/imputeFormer_ST_{data_id}.loom", var_names='var_names', obs_names="obs_names")
our_seq = anndata.read_loom(f"{root}/results/{data_id}/imputeFormer_SC_{data_id}.loom", var_names='var_names', obs_names="obs_names")

In [None]:
our_spatial

In [None]:
our_seq.X = our_seq.X.todense()

In [None]:
our_spatial.X = our_spatial.X.todense()

In [None]:
our_seq.X

In [None]:
#sc_obj, st_obj = scSpace.preporcess(sc_adata=our_seq, st_adata=our_spatial, st_type='image', n_features=2000, normalize=True)

In [None]:
sc.pp.normalize_total(our_seq, target_sum=1e4)
sc.pp.log1p(our_seq)
# st_adata
sc.pp.normalize_total(our_spatial, target_sum=1e4)
sc.pp.log1p(our_spatial)

In [None]:
## modify the contruct_psedudo_space to return the mlp location prediction function

sc_obj, st_obj, mlp_pos = scSpace.construct_pseudo_space(
    sc_adata=our_seq,
    st_adata=our_spatial,
    batch_size=128,
    activation='sigmoid',
    lr=0.001,
    epoch_num=1000,
    log_epoch=1000)

In [None]:
sc_obj

In [None]:
st_obj

In [None]:
mlp_pos.eval()

st_pred_pos = mlp_pos(torch.tensor(st_obj.obsm['TCA']).to('cuda', torch.float32))
st_pred_pos = st_pred_pos.cpu().detach().numpy()

In [None]:
st_pred_pos

In [None]:
sc_obj.obsm['pseudo_space']

In [None]:
sc_obj.obsm['spatial'] = sc_obj.obsm['pseudo_space']

In [None]:
np.save("scSpace_inferME_STpos.npy", st_pred_pos)
np.save("scSpace_inferME_SCpos.npy", sc_obj.obsm['pseudo_space'])

In [None]:
from matplotlib import cm, colors
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.font_manager # to solve: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.
import json

centimeter = 1/2.54  # centimeter in inches

# https://www.geeksforgeeks.org/react-js-blueprint-colors-qualitative-color-schemes/
react_cols_10 = ['#147EB3','#29A634','#D1980B','#D33D17','#9D3F9D','#00A396','#DB2C6F','#8EB125','#946638','#7961DB']

# http://tsitsul.in/blog/coloropt/
norm_7 = ['#4053d3','#ddb310','#b51d14','#00beff','#fb49b0','#00b25d','#cacaca']
norm_12 = ['#ebac23','#b80058','#008cf9','#006e00','#00bbad','#d163e6','#b24502',
           '#ff9287','#5954d6','#00c6f8','#878500','#00a76c','#bdbdbd']

def config_rc(dpi=400, font_size=6, lw=1.):
    # matplotlib.rcParams.keys()
    rc={
        'font.size': font_size, 
        'axes.labelsize': font_size, 
        'axes.titlesize': font_size, 
        'xtick.labelsize': font_size, 
        'ytick.labelsize': font_size,
        'figure.dpi':dpi,'axes.linewidth':lw,
        'legend.markerscale': 0.8, 
        'legend.markerscale': 0.8, 
        'legend.loc': 'upper right',
        'legend.borderpad':0.2,
        'legend.columnspacing': 0.5,
        'legend.labelspacing': 0.2,
        'legend.handletextpad': 0.1,
        'legend.borderaxespad': 0.1,
        'legend.handleheight': 1.0,
        'legend.handlelength': 1.0,
    } # 'figure.figsize':(11.7/1.5,8.27/1.5)
    
    sns.set(style='ticks',rc=rc) 
    sns.set_context("paper")

    mpl.rcParams.update(rc)

    mpl.rcParams['pdf.fonttype'] = 42
    mpl.rcParams['ps.fonttype'] = 42

    #mpl.rcParams['font.sans-serif'] = "Arial"
    mpl.rcParams['font.family'] = "sans-serif"
    mpl.rcParams['axes.unicode_minus']=False # negative minus sign

In [None]:
config_rc(dpi=300, font_size=5)

sc_change = {'Blood progenitors 1': "Blood progenitors", 'Blood progenitors 2': "Blood progenitors", 
             'Erythroid1': 'Erythroid', 'Erythroid2':'Erythroid', 'Erythroid3': 'Erythroid' }
our_seq.obs["celltype"] = our_seq.obs["celltype"].replace(sc_change)

st_change = {'Presomitic mesoderm': "Somitic mesoderm", 'Splanchnic mesoderm': "Pharyngeal mesoderm", 'Definitive endoderm': 'Def. endoderm',
            'Gut tube': "Gut", 'Mixed mesenchymal mesoderm': "Mesenchyme"}
our_spatial.obs["celltype_mapped_refined"] = our_spatial.obs["celltype_mapped_refined"].replace(st_change)

_palette = {'Allantois': '#1CE6FF',
 'Anterior somitic tissues': '#FF34FF',
 'Blood progenitors': '#FF4A46',
 'Cardiomyocytes': '#008941',
 'Caudal Mesoderm': '#006FA6',
 'Cranial mesoderm': '#A30059',
 'Def. endoderm': '#FFDBE5',
 'Dermomyotome': '#7A4900',
 'Endothelium': '#0000A6',
 'Erythroid': '#63FFAC',
 'ExE endoderm': '#B79762',
 'ExE mesoderm': '#004D43',
 'Forebrain/Midbrain/Hindbrain': '#8FB0FF',
 'Gut': '#997D87',
 'Haematoendothelial progenitors': '#5A0007',
 'Intermediate mesoderm': '#809693',
 'Lateral plate mesoderm': '#6A3A4C',
 'Low quality': '#1B4400',
 'Mesenchyme': '#4FC601',
 'NMP': '#3B5DFF',
 'Neural crest': '#4A3B53',
 'Notochord': '#FF2F80',
 'PGC': '#61615A',
 'Paraxial mesoderm': '#BA0900',
 'Pharyngeal mesoderm': '#6B7900',
 'Rostral neurectoderm': '#00C2A0',
 'Sclerotome': '#FFAA92',
 'Somitic mesoderm': '#FF90C9',
 'Spinal cord': '#B903AA',
 'Surface ectoderm': '#D16100',
 'Visceral endoderm': '#DDEFFF'}

### plot the pseudo-space for spatial transcriptomics

In [None]:
our_spatial.obsm["raw_spatial"] = our_spatial.obsm["spatial"]
our_spatial.obsm["spatial"] = st_pred_pos
sc.pl.spatial(our_spatial, color="celltype_mapped_refined", palette=_palette, spot_size=0.05, legend_loc=None)

In [None]:
our_spatial.obsm["infer_spatial"] = st_pred_pos

In [None]:
our_spatial.write_loom("scSpace_ME_ST.loom")

In [None]:
sc_obj.write_loom("scSpace_ME_SC.loom")

### plot the pseudo-space for single cell data

In [None]:
sc.pl.spatial(sc_obj, color="celltype", palette=_palette, spot_size=0.07, legend_loc=None)

### Calculate the KNN of the same cell-type based on psuedo-space

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import make_classification

X = our_seq.obsm["spatial"]

# Convert discrete labels to numerical labels
label_encoder = LabelEncoder()
#y = label_encoder.fit_transform(adata_concat_envi.obs['celltype'])
y = label_encoder.fit_transform(our_seq.obs['celltype'])

# Number of neighbors
k = 10

# Initialize the KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=k) # default: euclidian

# Fit the model (using the same data here for simplicity)
knn.fit(X, y)

# Find the k-neighbors for each sample
distances, indices = knn.kneighbors(X)

# Calculate the proportion of neighbors belonging to the same class for each sample
proportions = []
for i in range(len(X)):
    # Get the labels of the k-neighbors
    neighbor_labels = y[indices[i]]
    # Calculate the proportion of neighbors with the same class as the sample
    proportion = np.sum(neighbor_labels == y[i]) / k
    proportions.append(proportion)

# Print the results
# for i, proportion in enumerate(proportions):
#     print(f"Sample {i} (Class {label_encoder.inverse_transform([y[i]])[0]}): {proportion * 100:.2f}% of neighbors belong to the same class")

In [None]:
our_seq.obs['KNN_10_fraction'] = proportions

In [None]:
our_seq.obs['KNN_10_fraction'].mean()

In [None]:
our_seq.obs['celltype']

In [None]:
df = our_seq.obs[['celltype', 'KNN_10_fraction']]
# subset_df = df[df['celltype'].isin(common_celltype)]
# subset_df = subset_df.reset_index(drop=True)
# categories_to_remove = set(adata_concat_envi.obs["celltype"]) - common_celltype
# subset_df['celltype'] = subset_df['celltype'].cat.remove_categories(list(categories_to_remove))
# #ground_truth = (adata_concat_envi.obs['celltype'].value_counts()/len(adata_concat_envi)).to_dict()

fig, ax = plt.subplots(figsize=(10*centimeter, 3*centimeter))
custom_palette = [
    "#E69F00", "#56B4E9", "#009E73", "#F0E442", 
    "#0072B2", "#D55E00", "#CC79A7", "#999999", 
    "#007500", "#1F77B4", "#FF7F0E", "#2CA02C", 
    "#D62728", "#9467BD", "#8C564B", "#E377C2"
]
sns.boxplot(x='celltype', y='KNN_10_fraction', data=df, ax=ax, palette=_palette, dodge=False, width=0.5, showfliers=False)
ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')

In [None]:
df.groupby(["celltype"]).mean().sort_values(by='KNN_10_fraction', ascending=False)

### For every given cell, calculate the KNN spots that belong to the same cell-type as the cell

In [None]:
# our_spatial = anndata.read_loom("scSpace_ME_ST.loom", var_names='var_names', obs_names="obs_names")
# our_seq = anndata.read_loom("scSpace_ME_SC.loom", var_names='var_names', obs_names="obs_names")

In [None]:
our_spatial

In [None]:
our_spatial.obsm['spatial']

In [None]:
# here we only select the shared common celltype for both scRNA-seq and ST
common_celltype = set(our_spatial.obs["celltype_mapped_refined"]).intersection(set(our_seq.obs["celltype"]))

obsm_spatial = np.concatenate([our_seq.obsm["spatial"], our_spatial.obsm["infer_spatial"]])
adata_pos = anndata.AnnData(obsm_spatial)

adata_pos.obs["celltype"] = np.concatenate([our_seq.obs["celltype"], our_spatial.obs["celltype_mapped_refined"]])
adata_pos.obs['celltype'] = pd.Categorical(adata_pos.obs['celltype'])
adata_pos.obs["modal"] = (["seq"] * our_seq.shape[0]) + (["spatial"] * our_spatial.shape[0])

# Convert discrete labels to numerical labels
label_encoder = LabelEncoder()
#y = label_encoder.fit_transform(adata_concat_envi.obs['celltype'])
y = label_encoder.fit_transform(adata_pos.obs['celltype'])

adata_pos.obs["celltype_label"] = y

# Number of neighbors
k = 10

# Initialize the KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=k) # default: euclidian

adata_pos_st = adata_pos[adata_pos.obs['modal']=='spatial',:]
adata_pos_sc = adata_pos[adata_pos.obs['modal']=='seq',:]

# Fit the model (using the same data here for simplicity)
knn.fit(adata_pos_st.X, adata_pos_st.obs['celltype_label'])

# Find the k-neighbors for every single cell in scRNA-seq
distances, indices = knn.kneighbors(adata_pos_sc.X)

# Calculate the proportion of neighbors in ST belonging to the same celltype for each cell
proportions = []
celltype_label_ST = adata_pos_st.obs["celltype_label"]
celltype_label_SC = adata_pos_sc.obs["celltype_label"]

for i in range(len(adata_pos_sc.X)):
    # Get the labels of the k-neighbors
    neighbor_labels = celltype_label_ST.iloc[indices[i]]
    # Calculate the proportion of neighbors with the same class as the sample
    proportion = np.sum(neighbor_labels == celltype_label_SC.iloc[i]) / k
    proportions.append(proportion)

adata_pos_sc.obs['KNN_10_fraction_inST'] = proportions
#adata_pos_sc.obs['celltype'] = pd.Categorical(adata_pos_sc.obs['celltype'])

In [None]:
df = adata_pos_sc.obs[['celltype', 'KNN_10_fraction_inST']]
subset_df = df[df['celltype'].isin(common_celltype)]
subset_df = subset_df.reset_index(drop=True)
categories_to_remove = set(adata_pos_sc.obs["celltype"]) - common_celltype
subset_df['celltype'] = subset_df['celltype'].cat.remove_categories(list(categories_to_remove))
ground_truth = (adata_pos_st.obs['celltype'].value_counts()/len(adata_pos_st)).to_dict()

fig, ax = plt.subplots(figsize=(10*centimeter, 3*centimeter))

sns.boxplot(x='celltype', y='KNN_10_fraction_inST', data=subset_df, ax=ax, palette=_palette, dodge=False, width=0.5, showfliers=False)
ax.set_xticklabels(ax.get_xticklabels(), rotation=30, ha='right')

ordered_celltype = subset_df['celltype'].cat.categories.values

for celltype in ordered_celltype:
    ax.scatter([celltype], [ground_truth[celltype]], s=3, marker="^", color="r", zorder=3) #, edgecolors="black"

In [None]:
adata_pos_sc.obs['KNN_10_fraction_inST'].mean()

In [None]:
df.groupby(["celltype"]).mean().sort_values(by='KNN_10_fraction_inST', ascending=False)

In [None]:
subset_df.groupby(["celltype"]).mean().sort_values(by='KNN_10_fraction_inST', ascending=False)