Skip to content

Commit

Permalink
Speed gains for skipping in file
Browse files Browse the repository at this point in the history
  • Loading branch information
kaukrise committed Jan 25, 2022
1 parent 4228cf7 commit 318933d
Showing 1 changed file with 27 additions and 47 deletions.
74 changes: 27 additions & 47 deletions fanc/compatibility/juicer.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,19 +546,13 @@ def _skip_to_normalised_expected_values(req):
n_values = struct.unpack('<2i', req.read(8))[0]
else:
n_values = struct.unpack('<i', req.read(4))[0]
for i in range(n_values):
if version > 8:
req.read(4)
else:
req.read(8)
skip_size = 4 if version > 8 else 8
req.read(skip_size * n_values)

n_scaling_factors = struct.unpack('<i', req.read(4))[0]
for _ in range(n_scaling_factors):
if version > 8:
req.read(8)
else:
req.read(12)

skip_size = 8 if version > 8 else 12
req.read(skip_size * n_scaling_factors)

@staticmethod
def _skip_to_normalisation_vectors(req):
version = JuicerHic._version(req)
Expand All @@ -577,18 +571,15 @@ def _skip_to_normalisation_vectors(req):
else:
n_values = struct.unpack('<i', req.read(4))[0]

for _ in range(n_values):
if version > 8:
req.read(4)
else:
req.read(8)
skip_size = 4 if version > 8 else 8
#for _ in range(n_values):
req.read(skip_size * n_values)

n_scaling_factors = struct.unpack('<i', req.read(4))[0]
for _ in range(n_scaling_factors):
if version > 8:
req.read(8)
else:
req.read(12)
skip_size = 8 if version > 8 else 12
req.read(skip_size * n_scaling_factors)
# for _ in range(n_scaling_factors):
# req.read(skip_size)

def _matrix_positions(self):
"""
Expand Down Expand Up @@ -626,13 +617,12 @@ def _expected_value_vectors_from_pos(req, normalisation=None, unit='BP', version
n_values = struct.unpack('<2i', req.read(8))[0]
else:
n_values = struct.unpack('<i', req.read(4))[0]
ev = []
for _ in range(n_values):
if version > 8:
v = struct.unpack('<f',req.read(4))[0]
else:
v = struct.unpack('<d',req.read(8))[0]
ev.append(v)

value_size = 4 if version > 8 else 8
value_type = '<f' if version > 8 else '<d'
ev = np.zeros(n_values)
for i in range(n_values):
ev[i] = struct.unpack(value_type, req.read(value_size))[0]

if entry_unit == unit and (normalisation is None or entry_normalisation == normalisation):
expected_values[bin_size] = ev
Expand Down Expand Up @@ -762,18 +752,16 @@ def normalisation_vector(self, chromosome, normalisation=None, resolution=None,
entry_unit == unit):
req.seek(file_position)

vector = []
if version > 8:
n_values = struct.unpack('<2i', req.read(8))[0]
else:
n_values = struct.unpack('<i', req.read(4))[0]

for _ in range(n_values):
if version > 8:
v = struct.unpack('<f',req.read(4))[0]
else:
v = struct.unpack('<d',req.read(8))[0]
vector.append(v)
value_size = 4 if version > 8 else 8
value_type = '<f' if version > 8 else '<d'
vector = np.zeros(n_values)
for i in range(n_values):
vector[i] = struct.unpack(value_type,req.read(value_size))[0]

return vector

Expand Down Expand Up @@ -949,7 +937,7 @@ def _read_block(self, req, file_position, block_size_in_bytes):
use_short_bin_x = struct.unpack('<b', block[13:14])[0] == 0
use_short_bin_y = struct.unpack('<b', block[14:15])[0] == 0
ix_continue = 15

row_bytes = 2 if use_short_bin_x else 4
row_type = '<h' if use_short_bin_x else '<f'
col_bytes = 2 if use_short_bin_y else 4
Expand All @@ -973,17 +961,8 @@ def _read_block(self, req, file_position, block_size_in_bytes):
x_raw = struct.unpack(col_type, block[temp:(temp + col_bytes)])[0]
temp += col_bytes
x = x_offset + x_raw
# weight = struct.unpack('<h', block[temp:(temp + weight_bytes)])[0]
# temp += weight_bytes

weight = struct.unpack(weight_type, block[temp:(temp + weight_bytes)])[0]
temp += weight_bytes
# if not use_short:
# weight = struct.unpack('<h', block[temp:(temp + 2)])[0]
# temp += 2
# else:
# weight = struct.unpack('<f', block[temp:(temp + 4)])[0]
# temp += 4
yield x, y, weight

index += 1
Expand All @@ -1010,7 +989,7 @@ def _read_block(self, req, file_position, block_size_in_bytes):
if weight != 0x7fc00000:
yield x, y, weight
index = index + 1

def _read_matrix(self, region1, region2):
region1 = self._convert_region(region1)
region2 = self._convert_region(region2)
Expand Down Expand Up @@ -1191,4 +1170,5 @@ def mappable(self, region=None):
return np.array([r.valid for r in self.regions(region, lazy=True)])

def bias_vector(self):
return np.array([r.bias for r in self.regions(lazy=True)])
x = np.array([r.bias for r in self.regions(lazy=True)])
return x

0 comments on commit 318933d

Please sign in to comment.