In [1]:
#!/usr/bin/env python
# coding: utf-8

import os 

print(os.getcwd())#显示当前路径
os.chdir('D:/bio/DeepST/DeepST-main2/deepst')#更改路径，''里面为更改的路径
print(os.getcwd())#显示当前路径

from DeepST import run
import matplotlib.pyplot as plt
from pathlib import Path
import scanpy as sc
import pandas as pd
import numpy as np
from anndata import AnnData


data_name_list = ['A1', 'B1']
data_path = 'G:/dataset/04-PDAC/input/'
save_path = 'G:/dataset/04-PDAC/output/'


deepen = run(save_path = save_path, 
	task = "Integration",
	pre_epochs = 800, 
	epochs = 1000, 
	use_gpu = False,
	)

import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

from scipy.sparse import issparse,csr_matrix
from sklearn.preprocessing import maxabs_scale, MaxAbsScaler
from torch.utils.data import TensorDataset

import matplotlib.pyplot as plt

from pathlib import Path, PurePath
from typing import Optional, Union
from anndata import AnnData
import numpy as np
from PIL import Image
import pandas as pd
# import stlearn
from _compat import Literal
import scanpy
import scipy

from matplotlib.image import imread
import json

def create_image(path,
                is_sparse=True,
                library_id=None,
                scale=None,
                quality="hires",
                spot_diameter_fullres=1,
                background_color="white",
                ):
    
    adata = sc.read_h5ad(path)
    adata.obsm['spatial'] = adata.obs[['x', 'y']].values
    adata.obsm['spatial'] = adata.obsm['spatial'].astype(float)
    if scale == None:
        max_coor = np.max(adata.obsm["spatial"])
        scale = 20 / max_coor

    adata.obs["imagecol"] = adata.obsm["spatial"][:, 0] * scale
    adata.obs["imagerow"] = adata.obsm["spatial"][:, 1] * scale

    # Create image
    max_size = np.max([adata.obs["imagecol"].max(), adata.obs["imagerow"].max()])
    max_size = int(max_size + 0.1 * max_size)
    if background_color == "black":
        image = Image.new("RGB", (max_size, max_size), (0, 0, 0, 0))
    else:
        image = Image.new("RGB", (max_size, max_size), (255, 255, 255, 255))
    imgarr = np.array(image)

    if library_id is None:
        library_id = "MERFISH"

    adata.uns["spatial"] = {}
    adata.uns["spatial"][library_id] = {}
    adata.uns["spatial"][library_id]["images"] = {}
    adata.uns["spatial"][library_id]["images"][quality] = imgarr
    adata.uns["spatial"][library_id]["use_quality"] = quality
    adata.uns["spatial"][library_id]["scalefactors"] = {}
    adata.uns["spatial"][library_id]["scalefactors"]["tissue_" + quality + "_scalef"] = scale
    adata.uns["spatial"][library_id]["scalefactors"]["spot_diameter_fullres"] = spot_diameter_fullres

    return adata

def add_visium_image(
    adata,
    path,
    scale=None,
    library_id = None,
    load_images = True,
    source_image_path = None,
    quality ="fulres",
    spot_diameter_fullres=1
) -> AnnData:
    
    path = Path(path)

    from h5py import File
    if library_id is None:
        library_id = 'deepst'
        
    image_path = path / 'pic_low_quality.jpg'
    img = plt.imread(image_path)
    
    if scale == None:
        max_coor = np.max(adata.obsm["spatial"])
        scale = 20 / max_coor
    
    adata.obs["imagecol"] = adata.obsm["spatial"][:, 0] * scale
    adata.obs["imagerow"] = adata.obsm["spatial"][:, 1] * scale
    
    adata.uns["spatial"] = {}
    adata.uns["spatial"][library_id] = {}
    adata.uns["spatial"][library_id]["images"] = {}
    adata.uns["spatial"][library_id]["images"][quality] = img
    adata.uns["spatial"][library_id]["use_quality"] = quality
    adata.uns["spatial"][library_id]["scalefactors"] = {}
    adata.uns["spatial"][library_id]["scalefactors"]["tissue_" + quality + "_scalef"] = scale
    adata.uns["spatial"][library_id]["scalefactors"]["spot_diameter_fullres"] = spot_diameter_fullres
    return adata


###### Generate an augmented list of multiple datasets
augement_data_list = []
graph_list = []
from scipy.sparse import csr_matrix
for i in range(len(data_name_list)):
	print(data_name_list[i])
	adata = sc.read_h5ad(data_path + data_name_list[i] + '/' + data_name_list[i] +'.h5ad')
	adata.obsm['spatial'] = adata.obs[['x', 'y']].values
	adata.obsm['spatial'] = adata.obsm['spatial'].astype(float)
	adata = add_visium_image(adata, data_path + data_name_list[i])
	adata = deepen._get_image_crop(adata, data_name=data_name_list[i])
	adata = deepen._get_augment(adata, spatial_type="BallTree", use_morphological=False)
	graph_dict = deepen._get_graph(adata.obsm["spatial"], distType = "BallTree")
	augement_data_list.append(adata)
	graph_list.append(graph_dict)

print('single adata OK!')

######## Synthetic Datasets and Graphs
multiple_adata, multiple_graph = deepen._get_multiple_adata(adata_list = augement_data_list, data_name_list = data_name_list, graph_list = graph_list)

print('multiple adata OK!')
###### Enhanced data preprocessing
print('Enhanced data preprocessing')
data = deepen._data_process(multiple_adata, pca_n_comps = 200)


deepst_embed = deepen._fit(
		data = data,
		graph_dict = multiple_graph,
		domains = multiple_adata.obs["batch"].values,  ##### Input to Domain Adversarial Model
		n_domains = len(data_name_list))
np.savetxt(os.path.join(save_path, "deepst_embedding.csv"), deepst_embed, delimiter=",")
multiple_adata.obsm["DeepST_embed"] = deepst_embed
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)


sc.pp.neighbors(multiple_adata, use_rep='DeepST_embed')
sc.tl.umap(multiple_adata)
sc.pl.umap(multiple_adata, color=["DeepST_refine_domain","batch_name"])
plt.savefig(os.path.join(save_path, f'{"_".join(data_name_list)}_umap.pdf'), bbox_inches='tight', dpi=300)


for data_name in data_name_list:
	adata = multiple_adata[multiple_adata.obs["batch_name"]==data_name]
	sc.pl.spatial(adata, color='DeepST_refine_domain', frameon = False, spot_size=150)
	plt.savefig(os.path.join(save_path, f'{data_name}_domains.pdf'), bbox_inches='tight', dpi=300)


print(multiple_adata.isbacked)
multiple_adata.filename = save_path + '/PDAC.h5ad'
print(multiple_adata.isbacked)




D:\bio\jupyter_code\PDAC
D:\bio\DeepST\DeepST-main2\deepst
A1


Tiling image: 100%|███████████████████████████████████████████████████████████████████████████████ [ time left: 00:00 ]
Extract image feature: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ]


Physical distance calculting Done!
The number of nearest tie neighbors in physical distance is: 30.0
Gene correlation calculting Done!
The weight result of image feature is added to adata.obsm['weights_matrix_all'] !


Find adjacent spots of each spot: 100%|███████████████████████████████████████████████████████████ [ time left: 00:00 ]


Step 1: Augment molecule expression is Done!
12.0000 neighbors per cell on average.
Step 2: Graph computing is Done!
B1


Tiling image: 100%|███████████████████████████████████████████████████████████████████████████████ [ time left: 00:00 ]
Extract image feature: 100%|██████████████████████████████████████████████████████████████████████ [ time left: 00:00 ]


Physical distance calculting Done!
The number of nearest tie neighbors in physical distance is: 30.0
Gene correlation calculting Done!
The weight result of image feature is added to adata.obsm['weights_matrix_all'] !


Find adjacent spots of each spot: 100%|███████████████████████████████████████████████████████████ [ time left: 00:00 ]

Step 1: Augment molecule expression is Done!
12.0000 neighbors per cell on average.
Step 2: Graph computing is Done!
single adata OK!



  [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],
  [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],


multiple adata OK!
Enhanced data preprocessing
Your task is in full swing, please wait


DeepST trains an initial model: 100%|█████████████████████████████████████████████████████████████ [ time left: 00:00 ]
  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()
DeepST trains a final model: |                                                                     [ time left: 00:00 ]


Step 3: DeepST training has been Done!
Current memory usage：3.5143 GB
Total time: 2.52 minutes
Your task has been completed, thank you
Of course, you can also perform downstream analysis on the processed data


NameError: name 'n_domains' is not defined