## This script is used to analyse DEG between cells from different groups

In [1]:
import warnings
import pandas as pd
import scanpy as sc
import os
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)


In [2]:
sample_list = pd.read_csv("HD-OV 100.csv")


### DEGs among niches in HGSOC

In [None]:
result_path = "../out/DEGs/all_niches"
top_n_genes = 10

In [None]:

adata = sc.read_h5ad("clustered_adata_8um.h5ad")
adata.obs['in_tissue'] = adata.obs['in_tissue'].astype(float)
adata.obs['array_row'] = adata.obs['array_row'].astype(float)
adata.obs['array_col'] = adata.obs['array_col'].astype(float)
adata.obsm['spatial'] = adata.obsm['spatial'].astype(float)
adata.X = adata.layers["counts"]

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.tl.rank_genes_groups(adata, 'cluster_cellcharter', method='t-test')
results = adata.uns['rank_genes_groups']
groups = results['names'].dtype.names

for group in groups:
    df = pd.DataFrame({
        'names': results['names'][group],
        'scores': results['scores'][group],
        'pvals': results['pvals'][group],
        'pvals_adj': results['pvals_adj'][group],
        'logfoldchanges': results['logfoldchanges'][group]
    })

    df.to_csv(f'{result_path}/group_{group}.csv', index=False)

names_list = []
for file_name in os.listdir(result_path):
    if file_name.startswith('group_'):
        file_path = os.path.join(result_path, file_name)
        df = pd.read_csv(file_path)
        top_names = df.sort_values(by='logfoldchanges', ascending=False).head(top_n_genes)['names'].tolist()
        names_list.extend(top_names)

genes_of_interest = list(dict.fromkeys(names_list))
adata.obs['cluster_sample'] = adata.obs['cluster_cellcharter'].astype(str) + '_' + adata.obs['sample'].astype(str)
adata_inte = adata[:, genes_of_interest].copy()

avg_expression = pd.DataFrame(
    adata_inte.X.toarray(),
    index=adata_inte.obs['cluster_sample'],
    columns=genes_of_interest
).groupby('cluster_sample').mean()
avg_expression = avg_expression.T
avg_expression.to_csv(f"{result_path}/top_FC_average_expression_per_cluster_sample.csv")


### DEGs among malignant cells in HGSOC

In [None]:
result_path = "../out/DEGs/malignant_cells"

In [None]:

adata = sc.read_h5ad("clustered_adata_8um.h5ad")
adata.obs['in_tissue'] = adata.obs['in_tissue'].astype(float)
adata.obs['array_row'] = adata.obs['array_row'].astype(float)
adata.obs['array_col'] = adata.obs['array_col'].astype(float)
adata.obsm['spatial'] = adata.obsm['spatial'].astype(float)
adata.X = adata.layers["counts"]

adata = adata[(adata.obs["cluster_cellcharter"].isin([1, 3, 8, 9, 10, 11, 12, 16, 17])) & (adata.obs["annotations"]=="Malignant")].copy()
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.tl.rank_genes_groups(adata, 'cluster_cellcharter', method='t-test')
results = adata.uns['rank_genes_groups']
groups = results['names'].dtype.names

for group in groups:
    df = pd.DataFrame({
        'names': results['names'][group],
        'scores': results['scores'][group],
        'pvals': results['pvals'][group],
        'pvals_adj': results['pvals_adj'][group],
        'logfoldchanges': results['logfoldchanges'][group]
    })

    df.to_csv(f'{result_path}/group_{group}.csv', index=False)

### DEGs of malignant cells in CIN-high niche

#### Compared by treatment status in recurrence samples

In [None]:
result_path = "../out/DEGs/malignant_in_recurrence"

In [None]:
adata = sc.read_h5ad("clustered_adata_8um.h5ad")
adata.obs['in_tissue'] = adata.obs['in_tissue'].astype(float)
adata.obs['array_row'] = adata.obs['array_row'].astype(float)
adata.obs['array_col'] = adata.obs['array_col'].astype(float)
adata.obsm['spatial'] = adata.obsm['spatial'].astype(float)
adata.X = adata.layers["counts"]

recurrence_adata = adata[adata.obs["relapse_status"]=="recurrence",]
recurrence_adata = recurrence_adata[recurrence_adata.obs["treatment_status"]!="undefined",]
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.tl.rank_genes_groups(adata, 'treatment_status', method='t-test')
results = adata.uns['rank_genes_groups']
groups = results['names'].dtype.names

for group in groups:
    df = pd.DataFrame({
        'names': results['names'][group],
        'scores': results['scores'][group],
        'pvals': results['pvals'][group],
        'pvals_adj': results['pvals_adj'][group],
        'logfoldchanges': results['logfoldchanges'][group]
    })

    df.to_csv(f'{result_path}/group_{group}.csv', index=False)

#### Compared by recurrence status in pre-treatment samples

In [None]:
result_path = "../out/DEGs/malignant_in_pretreatment"

In [None]:
adata = sc.read_h5ad("clustered_adata_8um.h5ad")
adata.obs['in_tissue'] = adata.obs['in_tissue'].astype(float)
adata.obs['array_row'] = adata.obs['array_row'].astype(float)
adata.obs['array_col'] = adata.obs['array_col'].astype(float)
adata.obsm['spatial'] = adata.obsm['spatial'].astype(float)
adata.X = adata.layers["counts"]

recurrence_adata = adata[adata.obs["treatment_status"]=="pretreatment",]
recurrence_adata = recurrence_adata[recurrence_adata.obs["relapse_status"]!="undefined",]
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.tl.rank_genes_groups(adata, 'relapse_status', method='t-test')
results = adata.uns['rank_genes_groups']
groups = results['names'].dtype.names

for group in groups:
    df = pd.DataFrame({
        'names': results['names'][group],
        'scores': results['scores'][group],
        'pvals': results['pvals'][group],
        'pvals_adj': results['pvals_adj'][group],
        'logfoldchanges': results['logfoldchanges'][group]
    })

    df.to_csv(f'{result_path}/group_{group}.csv', index=False)

### DEGs among fibroblasts in HGSOC

In [None]:
result_path = "../out/DEGs/fibroblasts"

In [None]:
adata = sc.read_h5ad("clustered_adata_8um.h5ad")
adata.obs['in_tissue'] = adata.obs['in_tissue'].astype(float)
adata.obs['array_row'] = adata.obs['array_row'].astype(float)
adata.obs['array_col'] = adata.obs['array_col'].astype(float)
adata.obsm['spatial'] = adata.obsm['spatial'].astype(float)
adata.X = adata.layers["counts"]

In [None]:
adata.obs['fibro_niche'] = adata.obs['cluster_cellcharter'].apply(
    lambda x: 'niche_3_20' if x in [3,20] else 'no_niche_3_20'
)
adata = adata[adata.obs["annotations"]=="Fibroblast"].copy()

sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.tl.rank_genes_groups(adata, 'fibro_niche', method='t-test')
results = adata.uns['rank_genes_groups']
groups = results['names'].dtype.names

for group in groups:
    df = pd.DataFrame({
        'names': results['names'][group],
        'scores': results['scores'][group],
        'pvals': results['pvals'][group],
        'pvals_adj': results['pvals_adj'][group],
        'logfoldchanges': results['logfoldchanges'][group]
    })

    df.to_csv(f'{result_path}/group_{group}.csv', index=False)