In [None]:
from shapely.geometry import shape, GeometryCollection, Point, Polygon
import os
import json
import pickle
import pandas as pd
import openslide
import cv2
from pathpretrain import load_image
import geojson
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gpd
import scanpy as sc
import torch
from torch_geometric.nn.pool import voxel_grid
import plotly.graph_objects as go
import anndata as ad
import plotly.io as pio
import plotly.express as px

In [3]:
def coarsen(full_xys, full_exp,size):
    voxels=voxel_grid(torch.from_numpy(full_xys),size=(size,size))
    coarse_xys=[]
    coarse_exp=[]
    central_point_indices=[]
    for i in set(voxels.tolist()):
        indices = torch.nonzero(voxels == i, as_tuple=False)
        xys=torch.from_numpy(full_xys[indices]).reshape((-1,2))
        centroid = xys.mean(dim=0)
        distances = torch.norm(xys - centroid, dim=1)
        most_central_point_index = torch.argmin(distances)
        most_central_point = xys[most_central_point_index]
        #get point whose xy should be retained, and assigned the mean exp. could experiment with keeping just that point exp.
        exps = torch.from_numpy(full_exp[indices]).reshape((-1,1000))
        mean_exp = exps.mean(dim=0)
        coarse_exp.append(mean_exp.numpy().reshape((1,1000)))
        coarse_xys.append(most_central_point.numpy().reshape((1,2)))
        keep_index = indices[most_central_point_index]
        central_point_indices.append(keep_index)
        #print(most_central_point_index)
    return coarse_xys, coarse_exp, central_point_indices

In [4]:
def get_adatas_coords_2d(patient_id):
    warped=pickle.load(open('../outs/coregistration/annots_warped_xys.pickle','rb'))
    #print(warped)
    mappings_path=PATH_TO_SECTION2PATIENT_MAPPING_TXT
    adata_path=PATH_TO_INFERRED_ST_ANNDATA_SAVED
    metadata_path=PATH_TO_METADATA
    annots_base_path=PATH_TO_ANNOTATIONS
    mappings=pd.read_csv(mappings_path)
    slides=list(mappings[mappings['deident']==patient_id]['image_name'].values)
    layers=list(mappings[mappings['deident']==patient_id]['layer'].values)
    sorted_slides=[x for _, x in sorted(zip(layers, slides))]
    slides=sorted_slides
    #print(slides, sorted_slides, layers)
    slides = [s.replace('svs','h5ad') for s in slides]
    all_xys=[]
    all_colors=[]
    adatas=[]
    print(sorted(layers))
    all_x=[]
    all_y=[]
    all_z=[]
    for i in range(len(slides)):
        xys=warped[slides[i]]
        z = np.array([i for q in range(len(xys))])
        #return annots
        adata = sc.read(adata_path+slides[i])
        #return adata
        exp = adata.X
        coarse_xys, coarse_exp, keep_indices= coarsen(xys, exp)
        xs=np.array(coarse_xys).reshape((-1,2))[:,0]
        ys=np.array(coarse_xys).reshape((-1,2))[:,1]
        zs=[i for q in range(len(coarse_xys))]
        all_x+=xs.tolist();all_y+=ys.tolist();all_z+=zs
        #annots = get_annots_xys(coarse_xys, annots_base_path+slides[i].replace('h5ad','geojson'))
        #all_colors+=annots
        #return annots
        keep_indices=[t.item() for t in keep_indices]
        #print(keep_indices)
        sample_id=adata.obs['sample'].iloc[0]
        metadata=pd.read_csv(metadata_path+sample_id+'_metadata.csv')
        array_row=list(metadata['array_row'])
        #print(array_row)
        array_col=list(metadata['array_column'])
        #print(adata.obsm['spatial'].shape, keep_indices[:50])
        #print(adata)
        
        new_adata = ad.AnnData(X=adata.X[keep_indices], 
                               obs={'array_col':np.array(array_col)[keep_indices], 'array_row':np.array(array_row)[keep_indices],
                                   'in_tissue':adata.obs['in_tissue'][keep_indices]},
                               obsm={'spatial':xys[keep_indices],'slice':np.array(zs).reshape((-1,1))},
                               uns={'spatial':adata.uns['spatial']})
        
        new_adata.var.index = adata.var.index
        adatas.append(new_adata)
    return all_x, all_y, all_z, adatas

In [None]:
#For 3D
saved_dir=SAVE_DIR_SELECTED_SAMPLE
items=os.listdir(saved_dir)
adatas = sorted([item for item in items if 'adata' in item])
adatas = [sc.read_h5ad(saved_dir+i) for i in adatas]
for i in range(len(adatas)):
    adatas[i].obs['section_id']=i
adatas=ad.concat(adatas)
latent = pd.read_csv(saved_dir+'/representation.csv', index_col=0)
adata_all = adatas[latent.index]
adata_all.obsm['latent'] = np.array(latent.values)
sc.pp.neighbors(adata_all, use_rep='latent', n_neighbors=30)
sc.tl.umap(adata_all)
sc.tl.leiden(adata_all, resolution=0.2)

In [None]:
sc.tl.rank_genes_groups(adata_all, groupby="leiden", method="wilcoxon")
list(sc.get.rank_genes_groups_df(adata_all, group="0").head(50)['names'])#use for Enrichr

In [None]:
#2.5D
saved_dir=SAVE_DIR_SELECTED_SAMPLE
items=os.listdir(saved_dir)
adatas = sorted([item for item in items if 'adata' in item])
adatas = [sc.read_h5ad(saved_dir+i) for i in adatas]
for i in range(len(adatas)):
    adatas[i].obs['section_id']=i
adatas=ad.concat(adatas)
latent = pd.read_csv(saved_dir+'/representation.csv', index_col=0)
adata_all = adatas[latent.index]
adata_all.obsm['latent'] = np.array(latent.values)
adata_all=adata_all[adata_all.obs['section_id']==0]
sc.pp.neighbors(adata_all, use_rep='latent', n_neighbors=30)
sc.tl.umap(adata_all)
sc.tl.leiden(adata_all, resolution=0.2)
sc.tl.rank_genes_groups(adata_all, groupby="leiden", method="wilcoxon")
list(sc.get.rank_genes_groups_df(adata_all, group="0").head(50)['names'])#use for Enrichr

In [None]:
#Raw 2D
def domains_top_slice_norm(patient, resolution, voxel_size):
    adata_path="/dartfs/rc/nosnapshots/V/VaickusL-nb/EDIT_Students/projects/3d_pathology_spatial_omics/outs/saved_adata_2/"
    adata_st_list_raw=load(patient, voxel_size)
    raw_2d = adata_st_list_raw[0]
    sc.pp.neighbors(raw_2d, n_neighbors=30, use_rep='X')
    sc.tl.umap(raw_2d)
    sc.tl.leiden(raw_2d, resolution=resolution)
    return raw_2d
twod_96 = domains_top_slice_norm(PATIENT_ID,0.2,1500)
sc.tl.rank_genes_groups(twod_96, groupby="leiden", method="wilcoxon")
list(sc.get.rank_genes_groups_df(twod_96, group="0").head(50)['names'])#use for Enrichr