# Single Modality Analysis of Simulated Dataset 2 (Different Noise Levels and Combinations)

In [1]:
results_RNA = []
results_Protein = []
embedding_RNA = []
embedding_Protein = []

## Loading Packages

In [None]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import scanpy as sc
import numpy as np
from sklearn.metrics.cluster import adjusted_rand_score

import anndata as ad
import os
import numpy as np

os.environ['R_HOME'] = 'E:/R-4.3.1'
os.environ['R_USER'] = 'E:/anaconda/lib/site-packages/rpy2'

import sys
sys.path.append(r'D:/study/learning/spatial_transcriptome/papers/spatial_multi_omics-main')
from Model.utils import expand_anndata, mclust_R

## Loading Data

In [None]:
replicate = 4
file_fold_1 = f'D:/study/learning/spatial_transcriptome/papers/spatial_multi_omics-main/data/Noise_Combination_{replicate}/Combination{replicate}_RNA'
file_fold_2 = f'D:/study/learning/spatial_transcriptome/papers/spatial_multi_omics-main/data/Noise_Combination_{replicate}/Combination{replicate}_Protein'

adata_omics_1 = sc.read_h5ad(file_fold_1 + '.h5ad')
adata_omics_2 = sc.read_h5ad(file_fold_2 + '.h5ad')

adata_omics_1 = expand_anndata(adata_omics_1, used_rep = 'obsm')
adata_omics_2 = expand_anndata(adata_omics_2, used_rep = 'obsm')

noise_level = 3
adata_omics_1 = adata_omics_1[adata_omics_1.obs['noise_level']==noise_level]
adata_omics_2 = adata_omics_2[adata_omics_2.obs['noise_level']==noise_level]

sc.tl.pca(adata_omics_1)
sc.tl.pca(adata_omics_2)

adata_RNA = adata_omics_1
adata_Protein = adata_omics_2

## Single Modality Clustering

In [10]:
mclust_R(adata_RNA, used_obsm='X_pca', num_cluster=4)
mclust_R(adata_Protein, used_obsm='X_pca', num_cluster=4)

results_RNA.extend(adata_RNA.obs['clusters_mclust'])
results_Protein.extend(adata_Protein.obs['clusters_mclust'])

embedding_RNA.extend(adata_RNA.obsm['X_pca'])
embedding_Protein.extend(adata_Protein.obsm['X_pca'])

print(np.shape(embedding_RNA))
print(np.shape(embedding_Protein))

fitting ...
fitting ...
(4800, 50)
(4800, 30)


## Storing the Results

In [4]:
results_all = sc.read_h5ad(f'D:/study/learning/spatial_transcriptome/papers/spatial_multi_omics-main/Results/Noise_Combination_{replicate}.h5ad')
results_all

AnnData object with n_obs × n_vars = 4800 × 0
    obs: 'Ground Truth', 'noise_level', 'SpaGCN', 'SpatialGlue', 'MultiMAP', 'STAGATE', 'Modality1', 'Modality2', 'SpaKnit', 'MultiVI'
    obsm: 'Modality1', 'Modality2', 'MultiMAP', 'MultiVI', 'STAGATE', 'SpaKnit', 'SpatialGlue', 'spatial'

In [11]:
results_all.obs['Modality1'] = results_RNA
results_all.obs['Modality2'] = results_Protein

results_all.obsm['Modality1'] = np.array(embedding_RNA)
results_all.obsm['Modality2'] = np.array(embedding_Protein)

In [12]:
ari_scores = {}
obs_df = results_all.obs
for noise_level in obs_df['noise_level'].unique():
    # 选择当前 noise_level 的数据
    subset_df = obs_df[obs_df['noise_level'] == noise_level]
    
    # 计算 ARI
    ari = adjusted_rand_score(subset_df['Modality2'], subset_df['Ground Truth'])
    
    # 存储 ARI 分数
    ari_scores[noise_level] = ari
ari_scores

{0: 0.2277916601230629,
 1: 0.19186152688322164,
 2: 0.14692914897993037,
 3: 0.1220728669573271}

In [13]:
results_all.write_h5ad(f'D:/study/learning/spatial_transcriptome/papers/spatial_multi_omics-main/Results/Noise_Combination_{replicate}.h5ad')