In [1]:
import pandas as pd
import numpy as np
import scanpy as sc

# Read the data
data = pd.read_csv("/Users/khoahuynh/Library/Mobile Documents/com~apple~CloudDocs/Documents/MERSCOPE/CompareTACIT/CRC_DataSet.csv")
signature = pd.read_csv("~/Downloads/test_crc_signature.csv")

# Process the data
data = data.dropna(subset=['CD31 - vasculature:Cyc_19_ch_3'])
signature.columns.values[0] = "cell_type"
signature = signature.fillna(0)

# Intensity data (A_ij)
data_anb = data[signature.columns[1:]]
orig_values = data_anb.values.T
orig_values_metadata = pd.DataFrame({"CellID": range(1, data_anb.shape[0] + 1)})

# Create AnnData object
adata = sc.AnnData(X=orig_values.T)
adata.obs = orig_values_metadata

# Normalization and scaling
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=500)
sc.pp.scale(adata, max_value=10)

# PCA
sc.tl.pca(adata, n_comps=30)

# UMAP
sc.pp.neighbors(adata, n_pcs=20)
sc.tl.umap(adata, n_components=30)

# Leiden clustering
sc.tl.leiden(adata, resolution=1.0)  # Adjust resolution as needed

# Print the results
print(adata.obs['leiden'])

# Save results
#adata.write("/path/to/save/processed_data.h5ad")


OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


0         21
1          9
2          7
3         11
4          9
          ..
235514     3
235515     0
235516     0
235517    11
235518    27
Name: leiden, Length: 235519, dtype: category
Categories (33, object): ['0', '1', '2', '3', ..., '29', '30', '31', '32']


In [2]:
adata.obs[['leiden']].to_csv("leiden_clustering_results_CRC.csv", index=True)

In [1]:
import pandas as pd
import numpy as np
import scanpy as sc

# Read the data
data = pd.read_csv("/Users/khoahuynh/Library/Mobile Documents/com~apple~CloudDocs/Documents/Compare_TACIT/MERFISH/Moffitt_and_Bambah-Mukku_et_al_merfish_all_cells.csv")
signature = pd.read_csv("/Users/khoahuynh/Library/Mobile Documents/com~apple~CloudDocs/Documents/Compare_TACIT/MERFISH/Signature_MERFISH_v3.csv")

# Process the data
data = data[data['Cell_class'] != 'Ambiguous']
signature.columns.values[0] = "cell_type"
signature = signature.fillna(0)

# Intensity data (A_ij)
data_anb = data[signature.columns[1:]]
orig_values = data_anb.values.T
orig_values_metadata = pd.DataFrame({"CellID": range(1, data_anb.shape[0] + 1)})

# Create AnnData object
adata = sc.AnnData(X=orig_values.T)
adata.obs = orig_values_metadata

# Normalization and scaling
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=500)
sc.pp.scale(adata, max_value=10)

# PCA
sc.tl.pca(adata, n_comps=30)

# UMAP
sc.pp.neighbors(adata, n_pcs=20)
sc.tl.umap(adata, n_components=30)

# Leiden clustering
sc.tl.leiden(adata)  # Adjust resolution as needed

# Print the results
print(adata.obs['leiden'])

# Save results
#adata.write("/path/to/save/processed_data.h5ad")

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


0         2
1         5
2         0
3         5
4         2
         ..
874763    9
874764    7
874765    1
874766    7
874767    1
Name: leiden, Length: 874598, dtype: category
Categories (19, object): ['0', '1', '2', '3', ..., '15', '16', '17', '18']


In [2]:
adata.obs[['leiden']].to_csv("leiden_clustering_results_MERFISH.csv", index=True)