## These codes are used for clustering based on W matrix

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scvi
import cellcharter as cc
import warnings
warnings.filterwarnings("ignore")


In [None]:
adata = sc.read_h5ad("./100samples_before_clustering.h5ad")
adata._inplace_subset_var([])
W_value = pd.read_csv("./W_values.csv",index_col=0)
W_value_aligned = W_value.loc[adata.obs_names]
adata.obsm["X_neighboring"] = np.round(W_value_aligned.to_numpy(), 8).astype(np.float32)


In [None]:
autok = cc.tl.ClusterAutoK(
    n_clusters=(5,16),
    max_runs=5,
    model_params=dict(
        random_state=12345,
        trainer_params=dict(accelerator='gpu', devices=1)
    )
)

autok.fit(adata, use_rep='X_neighboring')
adata.obs['cluster'] = autok.predict(adata, use_rep='X_neighboring')


In [None]:
adata.obs['cellular_compartment'] = adata.obs['cluster'].copy()
replace_dict = {
    0: 'Inflammatory stromal',
    1: 'NKT cell-infiltrated tumor',
    2: 'Low-UMI epi-region',
    3: 'Inflammatory stromal',
    4: 'Low-UMI fibro-region',
    5: 'Myeloid-infiltrated tumor',
    6: 'Immune cell-enriched',
    7: 'Endothelium-enriched stromal',
    8: 'Tumor boundary',
    9: 'Necrotic region',
    10: 'Low-UMI epi-region',
    11: 'Necrotic region',
    12: 'Tumor core',
    13: 'B cell-enriched stromal'
}

adata.obs['cellular_compartment'] = adata.obs['cellular_compartment'].replace(replace_dict)


In [None]:
adata.obs.to_csv("./100sample_clustered_obs.csv")


In [None]:
proportion_table = pd.crosstab(adata.obs['sample'], adata.obs['cellular_compartment'], normalize='index')
proportion_table.to_csv("./compartment_composition_in_samples.csv")


In [None]:
def calculate_roe(mat: pd.DataFrame) -> pd.DataFrame:
    mat = mat.copy()
    
    row_sums = mat.sum(axis=1)
    col_sums = mat.sum(axis=0)
    total = mat.values.sum()
    expected = np.outer(row_sums, col_sums) / total
    observed = mat.values
    roe = np.divide(observed, expected, out=np.zeros_like(observed, dtype=float), where=expected != 0)
    roe_df = pd.DataFrame(roe, index=mat.index, columns=mat.columns)

    return roe_df


In [None]:
cross_tab = pd.crosstab(adata.obs['cellular_compartment'], adata.obs['annotations'])
roe_df = calculate_roe(cross_tab)
roe_df = roe_df[['NK', 'T', 'B', 'DC', 'Mast', 'Neutrophil', 'Macrophage', 'Endothelial',  'Fibroblast', 'Epithelial', 'Others']]


In [None]:
roe_df.to_csv("./niche_celltype_roe.csv")
