In [None]:
import scanpy as sc
import pandas as pd
import anndata as ad
import numpy as np

In [None]:
adata = sc.read_h5ad("/data/wuqinhua/phase/AD/AD_NatureNeuro_raw.h5ad")
adata

In [None]:
adata.var_names_make_unique()

In [None]:
adata.var.index = adata.var["feature_name"]
adata.var.index.name = None 

In [None]:
sample_counts = adata.obs.groupby('ADNC')['donor_id'].nunique()
sample_counts

In [None]:
adata.obs["ADNC"] = adata.obs["ADNC"].replace("Reference", "Not AD")
sample_counts = adata.obs.groupby('ADNC')['donor_id'].nunique()
sample_counts

In [None]:
sc.pp.highly_variable_genes(adata, n_top_genes=5000, flavor="cell_ranger", min_mean=0.0125, max_mean=3, min_disp=0.5, batch_key = 'Specimen ID')
# adata.raw = adata
# adata_s = adata[:, adata.var.highly_variable].copy()


In [None]:
cat_cols = adata.var.select_dtypes(include=['category']).columns
cat_categories = {col: adata.var[col].cat.categories for col in cat_cols}
adata.var[cat_cols] = adata.var[cat_cols].astype(str)

var_names = adata.var.index.tolist()
unique_names = []
name_counts = {}
for name in var_names:
    if name in name_counts:
        name_counts[name] += 1
        unique_names.append(f"{name}_{name_counts[name]}")  
    else:
        name_counts[name] = 0
        unique_names.append(name)
adata.var.index = unique_names 


In [None]:
sorted_genes = adata.var.sort_values('dispersions_norm', ascending=False).index
top5000_genes = sorted_genes[:5000]
adata_s = adata[:, top5000_genes].copy()

In [None]:
adata_s.obs = adata_s.obs.rename(columns={"Specimen ID": "sample_id", "ADNC": "phenotype", "Subclass": "celltype"})
adata_s.obs

In [None]:
new_df = adata_s.obs[['sample_id',"phenotype",'donor_id', "tissue",]].drop_duplicates(subset='sample_id', keep='first')
new_df.to_csv('/data/wuqinhua/phase_1r/AD/sample_info.csv', index=False)

In [None]:
sc.pl.umap(adata_s,color=["celltype"])
sc.pl.umap(adata_s,color=["phenotype"])

In [None]:
del adata_s.raw

In [None]:
adata_s.write_h5ad("/data/wuqinhua/phase/AD/AD_NatureNeuro_ADNC.h5ad")