In [38]:
import operator
import sys
import os
import numbers
# Pytest-based test suite for band_hic_matrix
import pytest
import numpy as np
from scipy.sparse import coo_matrix
import types
import copy
import importlib
import numpy.ma as ma
import bandhic as bh

In [39]:
importlib.reload(bh)

<module 'bandhic' from '/Users/wwb/Documents/workspace/BandHiC-Master/bandhic/__init__.py'>

In [40]:
mat = bh.straw_chr('../data/GSE130275_mESC_WT_combined_1.3B_microc.hic',chrom='chr19',resolution=10000,diag_num=200)



In [43]:
bh.save_npz('../data/sample.npz', mat)

In [44]:
mat.memory_usage()

9814800

In [45]:
mat_dense = mat.todense()

In [46]:
mat_dense.nbytes

301007648

In [47]:
mat.any(axis=1)

array([False, False, False, ...,  True,  True,  True])

In [48]:
mat.add_mask_row_col(np.logical_not(mat.any(axis=1)))

In [49]:
mat.mask_row_col

array([ True,  True,  True, ..., False, False, False])

In [50]:
np.sum(mat.mask_row_col)/mat.mask_row_col.size

0.05216824258232801

In [51]:
mat.data

array([[  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.],
       ...,
       [762., 229.,  10., ...,   0.,   0.,   0.],
       [435., 111.,   0., ...,   0.,   0.,   0.],
       [ 26.,   0.,   0., ...,   0.,   0.,   0.]])

In [52]:
mat_dense=mat.todense()

In [53]:
mat_dense

masked_array(
  data=[[--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        [--, --, --, ..., --, --, --],
        ...,
        [--, --, --, ..., 762.0, 229.0, 10.0],
        [--, --, --, ..., 229.0, 435.0, 111.0],
        [--, --, --, ..., 10.0, 111.0, 26.0]],
  mask=[[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False]],
  fill_value=0.0)

In [54]:
mat_min=mat.min(axis=0)

In [55]:
mat_dense_min=mat_dense.min(axis=0)

In [56]:
mat_min

masked_array(data=[--, --, --, ..., 0.0, 0.0, 0.0],
             mask=[ True,  True,  True, ..., False, False, False],
       fill_value=0.0)

In [57]:
mat_dense_min

masked_array(data=[--, --, --, ..., 0.0, 0.0, 0.0],
             mask=[ True,  True,  True, ..., False, False, False],
       fill_value=1e+20)

In [58]:
ma.allequal(mat_min,mat_dense_min)

True

In [59]:
np.array_equal(mat_min.mask,mat_dense_min.mask)

True

In [60]:
np.array_equal(mat_dense.T,mat_dense)

True

In [61]:
diags=[mat_dense.diagonal(k).min() for k in range(mat.diag_num)]

In [62]:
diags=np.array(diags)

In [63]:
ma.allequal(mat.min(axis='diag'),ma.MaskedArray(diags,mask=False))

True

In [64]:
ma.allequal(mat.min(),ma.MaskedArray(mat_dense).min())

True

In [66]:
params = {
        "path": "/Users/wwb/Documents/workspace/BandHiC-Master/data/GSE130275_mESC_WT_combined_1.3B_microc.hic",
        "chrom": "chr19",
        "resolution": 10000,
        "diag_num": 200
    }

if "path" in params:
    mat = bh.straw_chr(params['path'],params['chrom'],params['resolution'],diag_num=params['diag_num'])
    mat_dense = mat.todense()
    row_sum = mat_dense.sum(axis=0)
    mat.add_mask_row_col(row_sum==0)
    mat_dense=np.ma.masked_array(mat_dense,mask=False,fill_value=0)
    mat_dense[row_sum==0,:]=np.ma.masked
    mat_dense[:,row_sum==0]=np.ma.masked



In [67]:
ma.allequal(mat.todense(),mat_dense)

True

In [68]:
def get_index(bin_num, diag_num):
    """
    Generate full grid of row and column indices.

    Returns
    -------
    tuple of ndarray
        (row_idx_grid, col_idx_grid).

    Examples
    --------
    >>> import bandhic as bh
    >>> mat = bh.ones((3,3), diag_num=2)
    >>> X, Y = mat.get_index()
    """
    x_index = np.repeat(
        np.arange(0, bin_num).reshape(-1, 1), diag_num, axis=1
    )
    y_index = x_index + np.arange(0, diag_num)
    is_valid = y_index < bin_num
    return x_index[is_valid], y_index[is_valid]

In [69]:
get_index(20,5)

(array([ 0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,
         3,  3,  3,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  6,  6,  6,  6,
         6,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9, 10,
        10, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 13,
        13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16, 16, 17,
        17, 17, 18, 18, 19]),
 array([ 0,  1,  2,  3,  4,  1,  2,  3,  4,  5,  2,  3,  4,  5,  6,  3,  4,
         5,  6,  7,  4,  5,  6,  7,  8,  5,  6,  7,  8,  9,  6,  7,  8,  9,
        10,  7,  8,  9, 10, 11,  8,  9, 10, 11, 12,  9, 10, 11, 12, 13, 10,
        11, 12, 13, 14, 11, 12, 13, 14, 15, 12, 13, 14, 15, 16, 13, 14, 15,
        16, 17, 14, 15, 16, 17, 18, 15, 16, 17, 18, 19, 16, 17, 18, 19, 17,
        18, 19, 18, 19, 19]))