# Loading packages
- tutorial[https://docs.scvi-tools.org/en/stable/tutorials/notebooks/multimodal/MultiVI_tutorial.html]

In [None]:
import numpy as np
import scanpy as sc
import scvi
import pandas as pd
import anndata as ad

import time

from scipy.sparse import issparse
def combine(adata_RNA, adata_ADT):
    adata_RNA.var['modality'] = 'Gene Expression'
    adata_ADT.var['modality'] = 'ADT'
    
    if issparse(adata_RNA.X):
        adata_RNA.X = adata_RNA.X.toarray()
    if  issparse(adata_ADT.X):    
        adata_ADT.X = adata_ADT.X.toarray()
        
    exp = np.hstack([np.array(adata_RNA.X), np.array(adata_ADT.X)])
    cell_name = list(adata_RNA.obs_names)
    gene_name = list(adata_RNA.var_names) + list(adata_ADT.var_names)
    modality = ['Gene Expression'] * adata_RNA.n_vars + ['ADT'] * adata_ADT.n_vars

    obs = pd.DataFrame(index=cell_name)
    var = pd.DataFrame(index=gene_name)
    adata_RNA_ADT = ad.AnnData(X=exp, obs=obs, var=var)

    adata_RNA_ADT.var['modality'] = modality
    adata_RNA_ADT.obsm['spatial'] = adata_RNA.obsm['spatial']

    return adata_RNA_ADT
    

for path in ['../../datasets/Human_Lymph_Node_A1/','../../datasets/Human_Lymph_Node_D1/']:
    adata_RNA = sc.read_h5ad(path + 'adata_RNA.h5ad')
    adata_ADT = sc.read_h5ad(path + 'adata_ADT.h5ad')
    
    adata_RNA.var_names_make_unique()
    adata_ADT.var_names_make_unique()
    
    adata = combine(adata_RNA, adata_ADT)
    adata.var_names_make_unique()
    
    # split to three datasets by modality (RNA, ATAC, Multiome), and corrupt data
    # by remove some data to create single-modality data
    n = int(0.3*adata.n_obs) 
    adata_rna = adata[:n].copy()
    adata_paired = adata[n:2*n].copy()
    adata_atac = adata[2*n:].copy()
    
    # We can now use the organizing method from scvi to concatenate these anndata
    adata_mvi = scvi.data.organize_multiome_anndatas(adata_paired, adata_rna, adata_atac)
    
    adata_mvi = adata_mvi[:, adata_mvi.var["modality"].argsort()].copy()
    
    print(adata_mvi.shape)
    sc.pp.filter_genes(adata_mvi, min_cells=int(adata_mvi.shape[0] * 0.01))
    #sc.pp.filter_cells(adata_mvi, min_genes=3)
    # print(adata_mvi.shape)
    
    #adata_mvi.obs['modality'] = 'paired'
    
    scvi.model.MULTIVI.setup_anndata(adata_mvi, batch_key="modality")
    
    mvi = scvi.model.MULTIVI(
        adata_mvi,
        n_hidden=64,
        n_latent=64,
        n_genes=(adata_mvi.var["modality"] == "Gene Expression").sum(),
        n_regions=(adata_mvi.var["modality"] == "ADT").sum(),
    )
    mvi.view_anndata_setup()
    
    
    # fill nan value with 0
    import pandas as pd
    df = pd.DataFrame(adata_mvi.X)
    df.fillna(0, inplace=True)
    adata_mvi.X = df.values
    
    ## Training MultiVI model
    #mvi.to_device("cuda:1")
    mvi.train()
    
    # obtain latent representation
    adata_mvi.obsm["X_MultiVI"] = mvi.get_latent_representation()
    
    # save result
    result_path=path.replace("datasets","results")
    adata_mvi.write_h5ad(result_path+'adata_MultiVI.h5ad')

  return multi_anndata.concatenate(other, join="outer", batch_key=modality_key)
  return multi_anndata.concatenate(other, join="outer", batch_key=modality_key)


(19737, 23503)


An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.


Trainer will use only 1 of 4 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=4)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
/home/ws6tg/anaconda3/envs/scvi/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have

Epoch 470/500:  94%|██████▌| 470/500 [52:38<03:21,  6.72s/it, v_num=1, train_loss_step=5.21e+3, train_loss_epoch=5.09e+3]
Monitored metric reconstruction_loss_validation did not improve in the last 50 records. Best score: 5085.342. Signaling Trainer to stop.
