In [None]:
import sys,os
import pandas as pd
import subprocess
import numpy as np
from tqdm import tqdm
from IPython.display import HTML
import scanpy as sc

import anndata as ad
import networkx as nx
import scanpy as sc
#import scvelo as scv
import scglue
from matplotlib import rcParams
import dill  

!bedtools --version

## scRNA data 

rna = ad.read_h5ad("/media/AnalysisDisk2/scglue/rna.h5ad")
rna

scglue.data.get_gene_annotation(
    rna, gtf="/media/AnalysisDisk2/scglue/gencode.v46.chr_patch_hapl_scaff.annotation.gtf.gz",
    gtf_by="gene_name"
)
rna.var.loc[:, ["chrom", "chromStart", "chromEnd"]].head()

rna.var['dell']=np.zeros(len(rna.var))
a=rna.var[~rna.var['chromStart'].isnull()].index
rna.var.loc[a,'dell']=1
rna=rna[:,rna.var.dell==1]

%%time
rna.var = rna.var.astype({"chromStart": int, "chromEnd": int})

rna.var.loc[:, ["chrom", "chromStart", "chromEnd"]].head()

## atac data

atac = ad.read_h5ad("/media/AnalysisDisk2/scglue/atac.h5ad")
#atac.obs['domain']='scATAC-seq'
atac

sc.pl.umap(atac, color= "leiden", wspace=0.65)

# guidance

%%time
guidance = scglue.genomics.rna_anchored_guidance_graph(rna, atac)
guidance

scglue.graph.check_graph(guidance, [rna, atac])

atac.var.head()

##  training

scglue.models.configure_dataset(
    rna, "NB", use_highly_variable=True,
    use_layer="counts", use_rep="X_pca"
)

scglue.models.configure_dataset(
    atac, "NB", use_highly_variable=True,
    use_rep="X_spectral"
)

from itertools import chain  

guidance_hvf = guidance.subgraph(chain(
    rna.var.query("highly_variable").index,
    atac.var.query("highly_variable").index
)).copy()

%%time
glue = scglue.models.fit_SCGLUE(
    {"rna": rna, "atac": atac}, guidance_hvf,init_kws={"h_dim":512,"random_seed":999},
     fit_kws={"directory": "glue-_X_spectral","data_batch_size":8192}
)

glue.save("/media/AnalysisDisk2/scglue/GLUE-preprocessed-data/glue-new-_X_spectral.dill")


dx = scglue.models.integration_consistency(
    glue, {"rna": rna, "atac": atac}, guidance_hvf
)
dx

import seaborn as sns

_ = sns.lineplot(x="n_meta", y="consistency", data=dx).axhline(y=0.05, c="darkred", ls="--")

##  application model

rna.obsm["X_glue"] = glue.encode_data("rna", rna)
atac.obsm["X_glue"] = glue.encode_data("atac", atac)

feature_embeddings = glue.encode_graph(guidance_hvf)
feature_embeddings = pd.DataFrame(feature_embeddings, index=glue.vertices)
feature_embeddings.iloc[:5, :5]

rna.varm["X_glue"] = feature_embeddings.reindex(rna.var_names).to_numpy()
atac.varm["X_glue"] = feature_embeddings.reindex(atac.var_names).to_numpy()

scglue.data.transfer_labels(rna, atac, "celltype_L3", use_rep="X_glue",n_jobs=-1)

sc.pl.umap(atac, color= "celltype_L3", wspace=0.65)

rna.write("/media/AnalysisDisk2/scglue/rna-emb.h5ad", compression="gzip")
atac.write("/media/AnalysisDisk2/scglue/atac-emb.h5ad", compression="gzip")
nx.write_graphml(guidance_hvf, "/media/AnalysisDisk2/scglue/guidance-hvf.gz")