## These codes are used for celltype compostion computation in neighboring region for each cell

In [None]:
import numpy as np
import pandas as pd
import warnings
from tqdm import tqdm
warnings.filterwarnings("ignore")

In [None]:
def calculate_neighbor_composition(obs, ring, annotation_col='annotations'):

    all_categories = obs[annotation_col].unique().tolist()

    results = pd.DataFrame(
        index=obs.index,
        columns=all_categories,
        dtype=float
    ).fillna(0.0)

    sample_list = obs["sample"].unique().tolist()
    
    for sample in tqdm(sample_list, desc="Processing samples"):
        sample_obs = obs[obs["sample"] == sample]
        
        spatial_index = sample_obs.set_index(['array_row', 'array_col'])
        
        for idx, row in tqdm(sample_obs.iterrows(), total=len(sample_obs), desc=f"Cells in {sample}"):
            r, c = row['array_row'], row['array_col']
            
            min_r, max_r = r - ring, r + ring
            min_c, max_c = c - ring, c + ring
            
            neighbors = []
            for rr in range(min_r, max_r + 1):
                for cc in range(min_c, max_c + 1):
                    if (rr, cc) in spatial_index.index:
                        neighbors.append(spatial_index.loc[(rr, cc), annotation_col])
            
            if neighbors:
                neighbor_series = pd.Series(neighbors)
                counts = neighbor_series.value_counts(normalize=True)
                
                for cat, prop in counts.items():
                    results.at[idx, cat] = prop
    
    return results

In [None]:
entire_obs = pd.read_csv("./100sample_obs.csv", index_col=0)
entire_obs['array_row'] = entire_obs['array_row'].astype(int)
entire_obs['array_col'] = entire_obs['array_col'].astype(int)

result_df = calculate_neighbor_composition(
    obs=entire_obs,
    ring=4,
    annotation_col='annotations'
)

result_df.to_csv("./neighboring_celltype_composition.csv")
