In [None]:
from sclab.scanpy.readwrite import _download
from scipy.sparse import csr_matrix
from pathlib import Path

import pandas as pd
import scanpy as sc
import sclab
import sclab.preprocess

## Download resources

In [None]:
url = "https://poisson.math.umb.edu/data/mpm2025/TgondiiME49v68_genes.csv"
filepath = Path("TgondiiME49v68_genes.csv")
if not filepath.exists():
    _download(url, filepath)

In [None]:
url = "https://poisson.math.umb.edu/data/mpm2025/011_me49_allg.h5ad"
filepath = Path("011_me49_allg.h5ad")
if not filepath.exists():
    _download(url, filepath)

In [None]:
url = "https://poisson.math.umb.edu/data/mpm2025/0701+0702_adata_TGME49_genes.h5ad"
filepath = Path("0701+0702_adata_TGME49_genes.h5ad")
if not filepath.exists():
    _download(url, filepath)

## Prepare Genes Table

In [None]:
# if find_spec("pyranges") is None:
#     !{sys.executable} -m pip install pyranges

# import sys
# import urllib.parse
# import pyranges

# genes_table = pyranges.read_gff3("ToxoDB-68_TgondiiME49.gff").df
# genes_table = genes_table.loc[~genes_table["ebi_biotype"].isna(), ["ID", "description", "Name", "ebi_biotype"]]
# genes_table.columns = "gene_" + genes_table.columns.str.lower()
# genes_table = genes_table.set_index("gene_id")
# genes_table["gene_description"] = genes_table["gene_description"].map(urllib.parse.unquote)
# genes_table = genes_table.fillna("")
# genes_table["cytochrome"] = genes_table.gene_description.str.lower().str.contains('cytochrome')
# genes_table["ribosomal_rna"] = genes_table.gene_description.str.lower().str.contains(' ribosomal rna')
# genes_table.to_csv("TgondiiME49v68_genes.csv")
# genes_table.head()

In [None]:
genes_table = pd.read_csv("TgondiiME49v68_genes.csv", index_col=0).fillna("")
genes_table.head()

## Prepare Datasets

In [None]:
adata_0701p0702 = sc.read_h5ad("0701+0702_adata_TGME49_genes.h5ad")

# (optional) keep only genes in annotation
sclab.preprocess.subset_var(adata_0701p0702, adata_0701p0702.var_names.intersection(genes_table.index))
adata_0701p0702.var = adata_0701p0702.var.join(genes_table)

adata_0701p0702

In [None]:
adata_011_me49 = sc.read_h5ad("011_me49_allg.h5ad")
adata_011_me49.X = csr_matrix(adata_011_me49.layers["matrix"])
del adata_011_me49.layers

# (optional) keep only genes in annotation
sclab.preprocess.subset_var(adata_011_me49, adata_011_me49.var_names.intersection(genes_table.index))
adata_011_me49.var = adata_011_me49.var.join(genes_table)

adata_011_me49

### Merge the two datasets

In [None]:
combined = sc.concat({
    "0701p0702": adata_0701p0702,
    "011_me49": adata_011_me49,
}, join="outer", label="batch", index_unique="_")

# (optional) keep only genes in annotation
sclab.preprocess.subset_var(combined, combined.var_names.intersection(genes_table.index))
combined.var = combined.var.join(genes_table)

combined

## Display App

In [None]:
db = sclab.SCLabDashboard()

In [None]:
db