# intNMF example

example using a 10x dataset with cell type annotations. Firstly download the data.

In [5]:
%%bash
wget --no-check-certificate --no-verbose -P ../data/  https://costalab.ukaachen.de/open_data/MOJITOO/PBMC-Multiom_annotation.tsv
wget --no-verbose -P ../data/ https://cf.10xgenomics.com/samples/cell-arc/1.0.0/pbmc_granulocyte_sorted_10k/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5

pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5
PBMC-Multiom_annotation.tsv


import required packages for loading the data then load the data

In [8]:
import muon as mu
import anndata as ad
import scanpy as sc
import os
import pandas as pd

In [13]:
def load_multiome(file, labels):
    '''Function to load multiome data from .h5, .h5ad or .h5mu file types'''

    _, extension = os.path.splitext(file)
    if extension == '.h5':
        mu_data = mu.read_10x_h5(file)
            
    elif extension == '.h5ad':
        h5ad = ad.read_h5ad(file)
        rna = h5ad[:, h5ad.var['feature_types'] == 'GEX']
        atac = h5ad[:, h5ad.var['feature_types'] == 'ATAC']
        mu_data = mu.MuData({'rna': rna, 'atac': atac})
        mu_data.update()
        mu.pp.intersect_obs(mu_data)
    elif extension == '.h5mu':
        mu_data = mu.read(file)
    
    # If there are labels for the dataset load the labels and remove cells without a label.
    if labels is None:
        print('no labels')
    else:
        meta = pd.read_csv(labels, sep="\t", header=0, index_col=0)
        mu.pp.filter_obs(mu_data, meta.index.values)
        mu_data.obs = meta
        
    return mu_data


In [14]:
file = '../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5'
labels = '../data/PBMC-Multiom_annotation.tsv'

mu_data = load_multiome(file, labels)

Variable names are not unique. To make them unique, call `.var_names_make_unique`.


Added `interval` annotation for features from ../data/pbmc_granulocyte_sorted_10k_filtered_feature_bc_matrix.h5


Variable names are not unique. To make them unique, call `.var_names_make_unique`.


In [15]:
mu_data

import intNMF package and then run

In [17]:
import sys

path_to_nmf = '../../scnmf/'
module_path = os.path.abspath(os.path.join(path_to_nmf))

if module_path not in sys.path:
    sys.path.append(module_path)
from nmf_models_mod_updates import intNMF, log_tf_idf


In [21]:
rna_tf_idf = log_tf_idf(mu_data['rna'].X)
atac_tf_idf = log_tf_idf(mu_data['atac'].X)

In [22]:
nmf_model = intNMF(10)  # NMF model with k=10
nmf_model.fit(rna_tf_idf, atac_tf_idf)


In [27]:
mu_data.obsm['intNMF'] = nmf_model.theta

In [30]:
mu_data['rna'].varm['intNMF'] = nmf_model.phi_rna.T
mu_data['atac'].varm['intNMF'] = nmf_model.phi_atac.T