In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import scanpy as sc
import numpy as np
from sklearn.metrics.cluster import adjusted_rand_score
import anndata as ad
import os
import numpy as np
import matplotlib.pyplot as plt

os.environ['R_HOME'] = 'E:/R-4.3.1'
os.environ['R_USER'] = 'E:/anaconda/lib/site-packages/rpy2'
import sys
sys.path.append(r'D:/study/learning\spatial_transcriptome/papers\spatial_multi_omics-main')
from Model.utils import mclust_R
from Model.model import DCCAE
from Model.preprocess import fix_seed
fix_seed(2024)

replicate = 1
file_fold_1 = f'D:/study/learning/spatial_transcriptome/papers/spatial_multi_omics-main/data/Spatial_Scenario_{replicate}/simulation{replicate}_RNA'
file_fold_2 = f'D:/study/learning/spatial_transcriptome/papers/spatial_multi_omics-main/data/Spatial_Scenario_{replicate}/simulation{replicate}_Protein'

adata_omics_1 = sc.read_h5ad(file_fold_1 + '.h5ad')
adata_omics_2 = sc.read_h5ad(file_fold_2 + '.h5ad')

adata_omics_1.X = adata_omics_1.uns['INR']
adata_omics_2.X = adata_omics_2.uns['INR']

batch = 2
adata_RNA = adata_omics_1[adata_omics_1.obs['batch'] == batch]
adata_ADT = adata_omics_2[adata_omics_2.obs['batch'] == batch]

sc.tl.pca(adata_RNA, use_highly_variable=False)
sc.tl.pca(adata_ADT, use_highly_variable=False)

In [9]:
# bandwidth=0.02, n_DCCA = 10, epochs = 300
n_DCCA = 10

features1 = adata_RNA.obsm['X_pca'].shape[1]  # Feature sizes
features2 = adata_ADT.obsm['X_pca'].shape[1]
layers1 = [256, 256, n_DCCA]  # nodes in each hidden layer and the output size
layers2 = [256, 256, n_DCCA]
X = adata_RNA.obsm['X_pca'].copy()
Y = adata_ADT.obsm['X_pca'].copy()

use_rep = ['DCCA_X', "DCCA_Y", "DCCA"]

epochs = 300
dcca = DCCAE(input_size1=features1, input_size2=features2, n_components=n_DCCA, layer_sizes1=layers1, layer_sizes2=layers2, epoch_num=epochs, learning_rate=0.001)
dcca.fit([X, Y])
Xs_transformed = dcca.transform([X, Y])
adata_RNA.obsm["DCCA_X"] =  Xs_transformed[0]
adata_ADT.obsm["DCCA_Y"] =  Xs_transformed[1]
adata_RNA.obsm["DCCA"] = np.concatenate((adata_RNA.obsm["DCCA_X"], adata_ADT.obsm["DCCA_Y"]),axis=1)

use_rep = ['DCCA_X','DCCA_Y', 'DCCA']
n = 4
mclust_R(adata_RNA, used_obsm=use_rep[0], num_cluster=n)
obs_df = adata_RNA.obs.dropna()
ARI_1 = adjusted_rand_score(obs_df['clusters_mclust'], obs_df['Ground Truth'])
print(f'n={n}, DCCA_X, ARI = {ARI_1}')
    
mclust_R(adata_ADT, used_obsm=use_rep[1], num_cluster=n)
obs_df = adata_ADT.obs.dropna()
ARI_2 = adjusted_rand_score(obs_df['clusters_mclust'], obs_df['Ground Truth'])
print(f'n={n}, DCCA_Y, ARI = {ARI_2}')

mclust_R(adata_RNA, used_obsm=use_rep[2], num_cluster=n)
obs_df = adata_RNA.obs.dropna()
ARI_3 = adjusted_rand_score(obs_df['clusters_mclust'], obs_df['Ground Truth'])
print(f'n={n}, DCCA, ARI = {ARI_3}')

Training Progress: 100%|██████████| 300/300 [00:08<00:00, 33.38it/s]

model training finished!
fitting ...
  |                                                                            




n=4, DCCA_X, ARI = 1.0
fitting ...
n=4, DCCA_Y, ARI = 0.9666599600683922
fitting ...
n=4, DCCA, ARI = 1.0


In [3]:
adata_1 = ad.AnnData(obs=adata_RNA.obs[['clusters_mclust', 'batch']], obsm={'SpaKnit': adata_RNA.obsm['DCCA']})

In [6]:
adata_2 = ad.AnnData(obs=adata_RNA.obs[['clusters_mclust', 'batch']], obsm={'SpaKnit': adata_RNA.obsm['DCCA']})

In [9]:
adata_3 = ad.AnnData(obs=adata_RNA.obs[['clusters_mclust', 'batch']], obsm={'SpaKnit': adata_RNA.obsm['DCCA']})

In [10]:
adata_results = adata_1.concatenate(adata_2, adata_3, batch_key='batch')

In [11]:
obs_df = adata_results.obs

# 然后，重命名列名
obs_df = obs_df.rename(columns={'clusters_mclust': 'SpaKnit'})

# 最后，将修改后的DataFrame重新赋值给AnnData对象的obs属性
adata_results.obs = obs_df

In [12]:
results = sc.read_h5ad(f'D:/study/learning/spatial_transcriptome/papers/spatial_multi_omics-main/Results/Spatial_Scenario_{replicate}.h5ad')

results.obs['SpaKnit'] = adata_results.obs['SpaKnit'].values
results.obsm['SpaKnit'] = adata_results.obsm['SpaKnit']
results.write_h5ad(f'D:/study/learning/spatial_transcriptome/papers/spatial_multi_omics-main/Results/Spatial_Scenario_{replicate}.h5ad')

In [14]:
results

AnnData object with n_obs × n_vars = 1200 × 3031
    obs: 'Ground Truth', 'batch', 'SpaGCN', 'SpatialGlue', 'STAGATE', 'MultiMAP', 'MultiVI', 'Modality1', 'Modality2', 'SpaKnit'
    obsm: 'Modality1', 'Modality2', 'MultiMAP', 'MultiVI', 'STAGATE', 'SpatialGlue', 'spatial', 'SpaKnit'