# Cross-sample annotation
Using the annotated S1 slice to annotated the corresponding unlabeled S2 slice based on the integrating results.

In [None]:
import pandas as pd
import anndata as ad

from umap.umap_ import UMAP

import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

import warnings
warnings.filterwarnings("ignore")

### Load the data
Load the two slices and the latent embedings of spots, and set the current label of each spot in S2 as 'Unidentified'

In [None]:
model = 'INSTINCT'
mode_index = 3
mode_list = ['E11_0', 'E13_5', 'E15_5', 'E18_5']
mode = mode_list[mode_index]

data_dir = '../../data/spMOdata/EpiTran_MouseBrain_Jiang2023/preprocessed/'
save_dir = f'../../results/MouseBrain_Jiang2023/vertical/{mode}/'
slice_name_list = [f'{mode}-S1', f'{mode}-S2']

cas_list = [ad.read_h5ad(save_dir + f'filtered_merged_{sample}_atac.h5ad') for sample in slice_name_list]
cas_list[1].obs['Annotation_for_Combined'] = 'Unidentified'
adata_concat = ad.concat(cas_list, label="slice_name", keys=slice_name_list)

spots_count = [0]
n = 0
for sample in cas_list:
    num = sample.shape[0]
    n += num
    spots_count.append(n)

adata_concat.obsm['latent'] = pd.read_csv(save_dir + f'{model}/{mode}_INSTINCT_embed.csv', header=None).values
for j in range(len(cas_list)):
    cas_list[j].obsm['latent'] = adata_concat.obsm['latent'][spots_count[j]:spots_count[j + 1]].copy()

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder


def StrLabel2Idx(string_labels):

    label_encoder = LabelEncoder()
    idx_labels = label_encoder.fit_transform(string_labels)

    return np.array(idx_labels)


def knn_label_translation(reference_X, reference_y, target_X, k=20):
    label_encoder = LabelEncoder()
    reference_y_idx = label_encoder.fit_transform(reference_y)
    neigh = KNeighborsClassifier(n_neighbors=k)
    neigh.fit(reference_X, reference_y_idx)
    target_y_idx = neigh.predict(target_X)
    target_y = label_encoder.inverse_transform(target_y_idx)

    return target_y

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from matplotlib.lines import Line2D

def plot_mousebrain_verti(cas_list, adata_concat, ground_truth_key, annotation_key, cluster_to_color_map,
                          slice_name_list, cls_list, sp_embedding, mode,
                          save_root=None, save=False, plot=False):

    fig, axs = plt.subplots(1, 2, figsize=(10, 4))
    fig.suptitle(f'{mode} Annotation Results', fontsize=14)

    real_colors = list(cas_list[0].obs[ground_truth_key].astype('str').map(cluster_to_color_map))
    axs[0].scatter(cas_list[0].obsm['spatial'][:, 0], cas_list[0].obsm['spatial'][:, 1], linewidth=0.5, s=50,
                   marker=".", color=real_colors, alpha=0.9)
    axs[0].set_title(f'{slice_name_list[0]} (Ture Labels)', size=12)
    axs[0].invert_yaxis()
    axs[0].axis('off')

    anno_colors = list(cas_list[1].obs[annotation_key].astype('str').map(cluster_to_color_map))
    axs[1].scatter(cas_list[1].obsm['spatial'][:, 0], cas_list[1].obsm['spatial'][:, 1], linewidth=0.5, s=50,
                   marker=".", color=anno_colors, alpha=0.9)
    axs[1].set_title(f'{slice_name_list[1]} (Annotation)', size=12)
    if mode == 'E13_5':
        axs[1].invert_xaxis()
    else:
        axs[1].invert_yaxis()
    axs[1].axis('off')

    legend_handles = [
        Line2D([0], [0], marker='o', color='w', markersize=8, markerfacecolor=cluster_to_color_map[cluster], label=cluster)
        for cluster in cls_list
    ]
    axs[1].legend(
        handles=legend_handles,
        fontsize=8, title='Spot-types', title_fontsize=10, bbox_to_anchor=(1, 1))
    plt.gcf().subplots_adjust(left=0.05, top=0.8, bottom=0.1, right=0.75)
    if save:
        save_path = save_root + f'annotation_results.pdf'
        plt.savefig(save_path)

    spots_count = [0]
    n = 0
    for sample in cas_list:
        num = sample.shape[0]
        n += num
        spots_count.append(n)

    n_spots = adata_concat.shape[0]
    size = 10000 / n_spots
    # order = np.arange(n_spots)
    colors_for_slices = [[0.70567316, 0.01555616, 0.15023281],
                         [0.2298057, 0.70567316, 0.15023281]]
    slice_cmap = {slice_name_list[i]: colors_for_slices[i] for i in range(len(slice_name_list))}
    colors = list(adata_concat.obs['slice_name'].astype('str').map(slice_cmap))
    plt.figure(figsize=(5, 5))
    plt.rc('axes', linewidth=1)
    plt.scatter(sp_embedding[spots_count[1]:spots_count[2], 0], sp_embedding[spots_count[1]:spots_count[2], 1],
                s=size, c=colors[spots_count[1]:spots_count[2]])
    plt.scatter(sp_embedding[spots_count[0]:spots_count[1], 0], sp_embedding[spots_count[0]:spots_count[1], 1],
                s=size, c=colors[spots_count[0]:spots_count[1]])
    plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,
                    labelleft=False, labelbottom=False, grid_alpha=0)
    legend_handles = [
        Line2D([0], [0], marker='o', color='w', markersize=8, markerfacecolor=slice_cmap[slice_name_list[i]],
               label=slice_name_list[i])
        for i in range(len(slice_name_list))
    ]
    plt.legend(handles=legend_handles, fontsize=8, title='Slices', title_fontsize=10,
               loc='upper left')
    plt.title(f'{mode} Slices', fontsize=14)
    if save:
        save_path = save_root + f"slices_umap.pdf"
        plt.savefig(save_path)

    colors = list(cas_list[0].obs[ground_truth_key].astype('str').map(cluster_to_color_map))
    plt.figure(figsize=(5, 5))
    plt.rc('axes', linewidth=1)
    plt.scatter(sp_embedding[spots_count[1]:spots_count[2], 0], sp_embedding[spots_count[1]:spots_count[2], 1],
                s=size, c='gray')
    plt.scatter(sp_embedding[spots_count[0]:spots_count[1], 0], sp_embedding[spots_count[0]:spots_count[1], 1],
                s=size, c=colors)
    plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,
                    labelleft=False, labelbottom=False, grid_alpha=0)
    plt.title(f'{mode} True Labels', fontsize=14)
    if save:
        save_path = save_root + f"true_labels_umap.pdf"
        plt.savefig(save_path)

    colors = list(cas_list[1].obs[annotation_key].astype('str').map(cluster_to_color_map))
    plt.figure(figsize=(5, 5))
    plt.rc('axes', linewidth=1)
    plt.scatter(sp_embedding[spots_count[1]:spots_count[2], 0], sp_embedding[spots_count[1]:spots_count[2], 1],
                s=size, c=colors)
    plt.tick_params(axis='both', bottom=False, top=False, left=False, right=False,
                    labelleft=False, labelbottom=False, grid_alpha=0)
    plt.title(f'{mode} Annotation', fontsize=14)
    if save:
        save_path = save_root + f"annotation_umap.pdf"
        plt.savefig(save_path)

    if plot:
        plt.show()

### Annotation

In [None]:
# annotation
cas_list[1].obs['predicted_labels'] = knn_label_translation(cas_list[0].obsm['latent'].copy(),
                                                            cas_list[0].obs['Annotation_for_Combined'].copy(),
                                                            cas_list[1].obsm['latent'].copy(), k=20)

# save the annotated S2 slice
cas_list[1].write(save_dir + f'{model}/annotated_{slice_name_list[1]}_atac.h5ad')

### Plot spatial organization and UMAP visualization of annotation results

In [None]:
reducer = UMAP(n_neighbors=30, n_components=2, metric="correlation", n_epochs=None, learning_rate=1.0,
               min_dist=0.3, spread=1.0, set_op_mix_ratio=1.0, local_connectivity=1, repulsion_strength=1,
               negative_sample_rate=5, a=None, b=None, random_state=1234, metric_kwds=None,
               angular_rp_forest=False, verbose=False)

sp_embedding = reducer.fit_transform(adata_concat.obsm['latent'])

cls_list_all = ['Primary_brain_1', 'Primary_brain_2', 'Midbrain', 'Diencephalon_and_hindbrain', 'Basal_plate_of_hindbrain',
                'Subpallium_1', 'Subpallium_2', 'Cartilage_1', 'Cartilage_2', 'Cartilage_3', 'Cartilage_4',
                'Mesenchyme', 'Muscle', 'Thalamus', 'DPallm', 'DPallv']

colors_for_all = ['red', 'tomato', 'chocolate', 'orange', 'goldenrod',
                  'b', 'royalblue', 'g', 'limegreen', 'lime', 'springgreen',
                  'deepskyblue', 'pink', 'fuchsia', 'yellowgreen', 'olivedrab']

cls_list = list(set(list(cas_list[0].obs['Annotation_for_Combined'])))
cls_list_reordered = [cls for cls in cls_list_all if cls in cls_list]
colors_for_clusters = [colors_for_all[i] for i in range(len(colors_for_all)) if cls_list_all[i] in cls_list]

cluster_to_color_map = {cluster: color for cluster, color in zip(cls_list_reordered, colors_for_clusters)}
print(cluster_to_color_map)

plot_mousebrain_verti(cas_list, adata_concat, 'Annotation_for_Combined', 'predicted_labels', cluster_to_color_map,
                      slice_name_list, cls_list_reordered, sp_embedding, mode,
                      save_root=save_dir+f'{model}/', save=True, plot=True)