In [12]:
import scanpy as sc
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import torch
import os
from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

from skimage.io import imread
from PIL import Image
Image.MAX_IMAGE_PIXELS = None

from pathlib import Path

In [13]:
data_root = Path('DLPFC/151673')

### data preprocess

In [18]:
adata = sc.read_visium(data_root)
adata.var_names_make_unique()


In [19]:
adata.layers['count'] = adata.X.toarray()
sc.pp.filter_genes(adata, min_cells=50)
sc.pp.filter_genes(adata, min_counts=10)
sc.pp.normalize_total(adata, target_sum=1e6)
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", layer='count', n_top_genes=2000)
adata = adata[:, adata.var['highly_variable'] == True]
sc.pp.scale(adata)

In [20]:
adata

AnnData object with n_obs × n_vars = 3639 × 2000
    obs: 'in_tissue', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'n_counts', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std'
    uns: 'spatial', 'hvg'
    obsm: 'spatial'
    layers: 'count'

### ground truth

In [46]:
import pandas as pd

# 假设 truth.txt 文件的路径
truth_path = '/stLearn/stlearn/data/151673/truth.txt'

# 读取 truth.txt 文件并分割每一行的序列标识符和标签
truth_labels = []
Barcode = []
with open(truth_path, 'r') as f:
    for line in f:
        parts = line.strip().split('\t')
        if len(parts) == 2:  # 确保行被正确分割为两部分
            truth_labels.append(parts[1])  # 只取标签部分
            Barcode.append(parts[0])
        else:
            parts.append("NAN")
            truth_labels.append(parts[1])  # 只取标签部分
            Barcode.append(parts[0])
label_map = {
    "Layer 1": "0",
    "Layer 2": "1",
    "Layer 3": "2",
    "Layer 4": "3",
    "Layer 5": "4",
    "Layer 6": "5",
    "WM": "6",
    "NAN":"7"
}

# 将标签映射到数字
numeric_labels = [label_map.get(label, "7") for label in truth_labels]  # 如果标签不在字典中，返回"7"

# 确保 data 是你的数据框架，并且它的长度与 numeric_labels 列表匹配
# 这里我们假设 data.obs 已经包含了与 truth.txt 文件中相同顺序的行
if len(adata.obs) == len(numeric_labels):
    adata.obs['true_label'] = numeric_labels
    adata.obs['true_label'] = pd.Categorical(adata.obs['true_label'], categories=['0', '1', '2', '3', '4', '5', '6', '7'],ordered=False)
else:
    print("数据框架的长度与标签列表的长度不匹配。")

In [47]:
adata.obsm['spatial']

array([[ 9791,  8468],
       [ 5769,  2807],
       [ 4068,  9505],
       ...,
       [ 4631,  7831],
       [ 5571, 11193],
       [ 6317,  3291]], dtype=int64)

In [48]:
adata.write( 'DLPFC/151673/new_data.h5ad')

### load model

In [3]:
import torch
import pretrain_dblp
from demo.demo_new.pretrain_dblp.Model_new import Pre_model

ModuleNotFoundError: No module named 'pretrain_dblp'