# Preparation

In [None]:
import sys
sys.path

In [None]:
import tensorflow as tf

In [None]:
import pandas as pd
import numpy as np
import scanpy as sc
import os
import sys
import matplotlib.pyplot as plt
import seaborn as sns
import gc
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
import warnings
warnings.filterwarnings("ignore")

In [None]:
import STAGATE

In [None]:
# the location of R (used for the mclust clustering)
os.environ['R_HOME'] = 'D:\\anaconda\envs\STAligner\Lib\R'
os.environ['R_USER'] = 'D:\\anaconda\envs\STAligner\Lib\site-packages\rpy2'

In [None]:
sample_names = ["10X","slide","stereo"]
input_dir = 'G:/dataset/06-Mouse olfactory bulb/input/25um/'
output_dir = 'G:/dataset/06-Mouse olfactory bulb/output/25um/STAGATE/'
experiment_name = 'MouseOlfactoryBulb25um'

# load data

In [None]:
import anndata as ad
adata_list = {}

for dataset in sample_names:
    # adata = sc.read_visium(input_dir + dataset)
    # adata.var_names_make_unique()
    # adata.obs_names_make_unique()

    adata = sc.read_h5ad(input_dir + dataset + '.h5ad')
    df = adata.obs[['x','y']].astype('float32')
    adata.obsm['spatial'] = df.values
    
    # adata.obs['Ground Truth'] = adata.obs.loc[adata.obs_names, 'Classification'].astype('category')
    
    # adata = adata[~pd.isna(adata.obs['x'])]
    
    # make spot name unique
    adata.obs_names = [x + '_' + dataset for x in adata.obs_names]
    
    adata_list[dataset] = adata.copy()

In [None]:
print(adata_list)

In [None]:
fig, axs = plt.subplots(1, len(sample_names), figsize=(12, 3))
it=0
for section_id in sample_names:
    if it == len(sample_names)-1:
        sc.pl.spatial(adata_list[section_id], img_key="hires", ax=axs[it],
                      color=["x"], title=section_id, show=False, spot_size = 1)
    else:
        sc.pl.spatial(adata_list[section_id], img_key="hires", ax=axs[it], legend_loc=None,
                      color=["x"], title=section_id, show=False, spot_size = 100)
    it+=1

In [None]:
for section_id in sample_names:
    if(section_id == '10X'):
        STAGATE.Cal_Spatial_Net(adata, rad_cutoff=200)
    elif(section_id == 'slide'):
        STAGATE.Cal_Spatial_Net(adata, rad_cutoff=50)
    else:
        STAGATE.Cal_Spatial_Net(adata, rad_cutoff=3)
    STAGATE.Stats_Spatial_Net(adata_list[section_id])

# Conbat the scanpy objects and spatial networks

In [None]:
del adata
adata = sc.concat([adata_list[x] for x in datasets], keys=None)
print(adata)
adata.uns['Spatial_Net'] = pd.concat([adata_list[x].uns['Spatial_Net'] for x in datasets])
STAGATE.Stats_Spatial_Net(adata)

In [None]:
print(adata.obsm['spatial'])

# Normalization

In [None]:
#Normalization
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

# Running STAGATE

In [None]:
adata

In [None]:
adata_Vars =  adata[:, adata.var['highly_variable']]
X = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
print(X)
cells = np.array(X.index)
print(cells)

In [None]:
%%time
adata = STAGATE.train_STAGATE(adata, alpha=0)

In [None]:
print(adata)
print(type(adata.obsm['STAGATE']))

#### save embedding

In [None]:
# save embedding
np.savetxt(output_dir + experiment_name + '_STAGATE.csv', adata.obsm['STAGATE'], delimiter=",")

In [None]:
# import seaborn as sns
# slice_colors = sns.color_palette(n_colors = 6).as_hex()
series = adata.obs['Ground Truth']
series = series.astype("category")

celltype_num = len(series.value_counts().index)
celltypes = series.value_counts().index.tolist()

import seaborn as sns
colors = sns.color_palette(n_colors = celltype_num).as_hex()

num_cluster = celltype_num
print(num_cluster)

In [None]:
sc.pp.neighbors(adata, use_rep='STAGATE')
sc.tl.umap(adata)

# louvein
# sc.tl.leiden(adata_sedr, key_added="SEDR_leiden", resolution=eval_resolution)

adata = STAGATE.mclust_R(adata, used_obsm='STAGATE', num_cluster=num_cluster)

In [None]:
# adata.obs['batch_name'] = [x.split('_')[-1] for x in adata.obs_names]

plt.rcParams["figure.figsize"] = (3, 3)
sc.pl.umap(adata, color='batch')
plt.savefig(output_dir + experiment_name + '_batch.png', dpi=300)

plt.rcParams["figure.figsize"] = (3, 3)
sc.pl.umap(adata, color='Ground Truth')
plt.savefig(output_dir + experiment_name + '_truth.png', dpi=300)

plt.rcParams["figure.figsize"] = (3, 3)
sc.pl.umap(adata, color='mclust')
plt.savefig(output_dir + experiment_name + '_mclust.png', dpi=300)

In [None]:
fig, axs = plt.subplots(1, len(datasets), figsize=(12, 3))
it=0
for section_id in datasets:
    adata_list[section_id].obs['STAGATE'] = adata.obs.loc[adata_list[section_id].obs_names, 'mclust']
    if it == len(datasets)-1:
        sc.pl.spatial(adata_list[section_id], img_key=None, ax=axs[it],
                      color=["STAGATE"], title=section_id, show=False, spot_size =1)
    else:
        sc.pl.spatial(adata_list[section_id], img_key=None, ax=axs[it], legend_loc=None,
                      color=["STAGATE"], title=section_id, show=False, spot_size =1)
    it+=1

# Calculate ARI

In [None]:
from sklearn.metrics.cluster import adjusted_rand_score

for section_id in datasets:
    temp_adata = adata[adata.obs['batch']==section_id]
    temp_obs = temp_adata.obs.dropna()
    temp_ARI = adjusted_rand_score(temp_obs['mclust'], temp_obs['Ground Truth'])
    print('ARI of section ID %s: %.3f' %(section_id, temp_ARI))

# Visualization

In [None]:
sc.pp.neighbors(adata, use_rep='STAGATE', random_state=666)
sc.tl.umap(adata, random_state=666)

section_color = sns.color_palette(n_colors = len(datasets)).as_hex()
section_color_dict = dict(zip(datasets, section_color))
adata.uns['batch_name_colors'] = [section_color_dict[x] for x in adata.obs.batch.cat.categories]
# adata.obs['mclust'] = pd.Series(ST_utils.match_cluster_labels(adata.obs['Ground Truth'], adata.obs['mclust'].values),
#                                          index=adata.obs.index, dtype='category')

import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = "Arial"
plt.rcParams["figure.figsize"] = (3, 3)
plt.rcParams['font.size'] = 12

sc.pl.umap(adata, color=['batch', 'Ground Truth', 'mclust'], ncols=3,
           wspace=0.5, show=False)
plt.savefig(output_dir + experiment_name + '_umap.png', dpi=300)

In [None]:
Batch_list = []
for section_id in datasets:
    Batch_list.append(adata[adata.obs['batch'] == section_id])

import matplotlib.pyplot as plt
from sklearn.metrics import adjusted_rand_score as ari_score

spot_size = 1
title_size = 12
ARI_list = []
for bb in range(len(datasets)):
    ARI_list.append(round(ari_score(Batch_list[bb].obs['Ground Truth'], Batch_list[bb].obs['mclust']), 2))

fig, ax = plt.subplots(1, len(datasets), figsize=(10, 5), gridspec_kw={'wspace': 0.05, 'hspace': 0.1})
_sc_0 = sc.pl.spatial(Batch_list[0], img_key=None, color=['mclust'], title=[''],
                      legend_loc=None, legend_fontsize=12, show=False, ax=ax[0], frameon=False,
                      spot_size=spot_size)
_sc_0[0].set_title("ARI=" + str(ARI_list[0]), size=title_size)
_sc_1 = sc.pl.spatial(Batch_list[1], img_key=None, color=['mclust'], title=[''],
                      legend_loc=None, legend_fontsize=12, show=False, ax=ax[1], frameon=False,
                      spot_size=spot_size)
_sc_1[0].set_title("ARI=" + str(ARI_list[1]), size=title_size)

plt.savefig(output_dir + experiment_name + '_ARI.png', dpi=300)
plt.show()

# save

In [None]:
print(type(adata))
print(adata.obsm)
print(adata)

adata.obs = adata.obs.astype('str')

print(adata.isbacked)
adata.filename = output_dir + experiment_name + '.h5ad'
print(adata.isbacked)

In [1]:
try:   
    !jupyter nbconvert --to python MouseOb_STAGATE.ipynb
    # python即转化为.py，script即转化为.html
    # file_name.ipynb即当前module的文件名
except:
    pass

[NbConvertApp] Converting notebook MouseOb_STAGATE.ipynb to python
[NbConvertApp] Writing 7773 bytes to MouseOb_STAGATE.py
