# Scratch

In [39]:
import scanpy as sc
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import sys
sys.path.append('/home/ubuntu/Github/memento/')
import memento
import memento.auxillary.simulate as simulate
import scipy.sparse as sparse

import sklearn.datasets as sklearn_datasets


DATA_PATH = '/home/ubuntu/Data/'
CELL_TYPE = 'CD4 T cells - ctrl'

NUM_TRIALS = 20
METHODS = ['naive', 'poisson', 'hypergeometric']
CAPTURE_EFFICIENCIES = [0.01, 0.05, 0.1, 0.2, 0.3, 0.5, 0.8, 1]
NUMBER_OF_CELLS = [50, 100, 500]


def get_simulation_parameters(q=0.07):
    """ Extracts simulation parameters. """
    
    adata = sc.read(DATA_PATH + 'interferon_filtered.h5ad')
    adata = adata[adata.obs.cell_type == CELL_TYPE]
    data = adata.X.copy()
    relative_data = data.toarray()/data.sum(axis=1)
    
    x_param, z_param, Nc, good_idx = simulate.extract_parameters(adata.X, q=q, min_mean=0.01)
    
    return x_param, z_param, Nc


def simulate_data(n_cells, z_param, Nc):
    """ Generates simulated data. """
    
    true_data = simulate.simulate_transcriptomes(n_cells=n_cells, means=z_param[0], variances=z_param[1], Nc=Nc, norm_cov='uncorrelated')
    true_data[true_data < 0] = 0
    
    qs, captured_data = simulate.capture_sampling(true_data, q, q_sq=None)
    captured_data = sparse.csr_matrix(captured_data)
    
    return true_data, captured_data

In [40]:
x_param, z_param, Nc = get_simulation_parameters(q=0.07)




In [41]:
cov_matrix = sklearn_datasets.make_spd_matrix(500)

In [42]:
n_cells = 3
true_data = simulate.simulate_transcriptomes(n_cells=n_cells, means=z_param[0], variances=z_param[1], Nc=Nc, norm_cov=cov_matrix)
true_data[true_data < 0] = 0
size_factor = true_data.sum(axis=1)

In [43]:
import itertools

In [45]:
a, b = zip(*list(itertools.combinations(np.arange(3), 2)))

In [46]:
memento.estimator.RNAHypergeometric(1).covariance(sparse.csr_matrix(true_data, dtype=np.float64), size_factor,idx1=a, idx2=b)

array([-3.41885938e-08,  0.00000000e+00,  0.00000000e+00])

In [None]:
memento.estimator.RNAHypergeometric(1).covariance(sparse.csr_matrix(true_data, dtype=np.float64), size_factor,idx1=a, idx2=b)

In [30]:
sparse.csr_matrix(true_data)

<500x6263 sparse matrix of type '<class 'numpy.int64'>'
	with 1045780 stored elements in Compressed Sparse Row format>

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2