Skip to content

Commit

Permalink
Fix multi-color walk bug
Browse files Browse the repository at this point in the history
  • Loading branch information
winni2k committed Feb 15, 2018
1 parent 4e86761 commit ad5f4e7
Show file tree
Hide file tree
Showing 13 changed files with 440 additions and 293 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,5 @@ jobs:
- export GDRIVE_ACCOUNT_CREDENTIALS_JSON='gdrive-credentials.json'
- echo "$GDRIVE_ACCOUNT_CREDENTIALS" > ~/.gdrive/$GDRIVE_ACCOUNT_CREDENTIALS_JSON
- make setup-benchmark
script: make benchmark
script: make test-fixtures && make benchmark
after_success: make -C cortex_tools_benchmark upload-results
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ unit:
acceptance:
$(TEST_COMMAND) cortexpy/test/test_acceptance

fixtures:
$(MAKE) -C $(BENCHMARK_DIR) test-fixtures

test:
$(TEST_COMMAND) cortexpy

Expand Down
2 changes: 1 addition & 1 deletion cortexpy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = '0.5.1'
__version__ = '0.5.2'
VERSION_STRING = 'cortexpy version {}'.format(__version__)
8 changes: 7 additions & 1 deletion cortexpy/graph/parser/kmer.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,12 @@ def increment_color_coverage(self, color):
def __eq__(self, other):
return kmer_eq(self, other)

def __str__(self):
string_parts = [self.kmer]
string_parts += [str(c) for c in self.coverage]
string_parts += [e.to_str() for e in self.edges]
return ' '.join(string_parts)

@property
def colors(self):
return range(self.num_colors)
Expand All @@ -238,7 +244,7 @@ def has_outgoing_edge_to_kmer_in_color(self, other, color):
raise ValueError('Kmers are not neighbors')
edge_set = self.edges[color]
if edge_set.is_edge(other_kmer_letter) != other.edges[color].is_edge(this_kmer_letter):
raise ValueError('Kmers do not agree on connection')
raise ValueError('Kmers ({}) and ({}) do not agree on connection'.format(self, other))
return edge_set.is_edge(other_kmer_letter)

def has_incoming_edge_from_kmer_in_color(self, other, color):
Expand Down
45 changes: 31 additions & 14 deletions cortexpy/graph/traversal/branch.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@ class Traverser(object):
ra_parser = attr.ib()
traversal_color = attr.ib(0)
graph = attr.ib(attr.Factory(SERIALIZER_GRAPH))
other_stopping_colors = attr.ib(attr.Factory(set))
kmer = attr.ib(init=False, default=None)
kmer_string = attr.ib(init=False)
prev_kmer = attr.ib(init=False)
prev_kmer_string = attr.ib(init=False)
orientation = attr.ib(init=False)
parent_graph = attr.ib(init=False)

def __attrs_post_init__(self):
assert self.traversal_color not in self.other_stopping_colors

def traverse_from(self, kmer_string, *,
orientation=EdgeTraversalOrientation.original,
parent_graph=None):
Expand All @@ -38,30 +42,43 @@ def traverse_from(self, kmer_string, *,
except KmerStringAlreadySeen:
return Traversed(self.graph, orientation=self.orientation)

while True:
oriented_edge_set = self.kmer.edges[self.traversal_color].oriented(self.orientation)
if (
self._get_num_neighbors(oriented_edge_set) != 1
or self._get_num_neighbors(oriented_edge_set.other_orientation()) > 1
):
break
try:
self._add_next_kmer_string_to_graph_and_get_next_kmer(oriented_edge_set)
self._add_edges()
except KmerStringAlreadySeen:
break
last_oriented_edge_set = self._traverse()

reverse_neighbor_kmer_strings = set(
self._get_neighbors(oriented_edge_set.other_orientation()))
self._get_neighbors(last_oriented_edge_set.other_orientation()))
if self.prev_kmer_string is not None:
reverse_neighbor_kmer_strings.remove(self.prev_kmer_string)
return Traversed(self.graph,
orientation=self.orientation,
first_kmer_string=first_kmer_string,
last_kmer_string=self.kmer_string,
neighbor_kmer_strings=self._get_neighbors(oriented_edge_set),
neighbor_kmer_strings=self._get_neighbors(last_oriented_edge_set),
reverse_neighbor_kmer_strings=list(reverse_neighbor_kmer_strings))

def _traverse(self):
while True:
traversal_edge_set = self.kmer.edges[self.traversal_color].oriented(
self.orientation)
if (
self._get_num_neighbors(traversal_edge_set) != 1
or self._get_num_neighbors(traversal_edge_set.other_orientation()) > 1
):
return traversal_edge_set

for stop_color in self.other_stopping_colors:
stop_color_edge_set = self.kmer.edges[stop_color].oriented(self.orientation)
if (
self._get_num_neighbors(stop_color_edge_set) != 0
or self._get_num_neighbors(stop_color_edge_set.other_orientation()) != 0
):
return traversal_edge_set

try:
self._add_next_kmer_string_to_graph_and_get_next_kmer(traversal_edge_set)
self._add_edges()
except KmerStringAlreadySeen:
return traversal_edge_set

def _get_num_neighbors(self, oriented_edge_set):
return oriented_edge_set.num_neighbor(self.kmer_string)

Expand Down
20 changes: 11 additions & 9 deletions cortexpy/graph/traversal/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ def traverse_from_each_kmer_in(self, contig):
start_kmer = contig[start:(start + kmer_size)]
try:
self.graph = nx.compose(
self.graph,
self._traverse_from(start_kmer).graph
self.graph,
self._traverse_from(start_kmer).graph
)
except KeyError:
pass
Expand All @@ -60,7 +60,9 @@ def traverse_from(self, start_string):
def _traverse_from(self, start_string):
assert len(start_string) == self.ra_parser.header.kmer_size
self.branch_traverser = {
color: branch.Traverser(self.ra_parser, traversal_color=color)
color: branch.Traverser(self.ra_parser,
traversal_color=color,
other_stopping_colors=set(self.traversal_colors) - {color})
for color in self.traversal_colors
}
self.queuer = branch.Queuer(self.branch_queue,
Expand All @@ -72,8 +74,8 @@ def _traverse_from(self, start_string):
self._traverse_a_branch_from_queue()
if len(self.graph) > self.max_nodes:
logger.warning(
"Max nodes ({}) exceeded: {} nodes found".format(self.max_nodes,
len(self.graph)))
"Max nodes ({}) exceeded: {} nodes found".format(self.max_nodes,
len(self.graph)))
return self

def _post_process_graph(self):
Expand All @@ -97,7 +99,7 @@ def _process_initial_branch(self, start_string):
if self.orientation == EngineTraversalOrientation.both:
for color in self.traversal_colors:
oriented_edge_set = start_kmer.edges[color].oriented(
EdgeTraversalOrientation.reverse)
EdgeTraversalOrientation.reverse)
kmer_strings = oriented_edge_set.neighbor_kmer_strings(start_string)
if len(kmer_strings) == 1:
self.queuer.add_from(start_string=kmer_strings[0],
Expand Down Expand Up @@ -133,8 +135,8 @@ def _link_branch_and_queue_neighbor_traversals(self, branch):
orientations_and_kmer_strings = [(branch.orientation, branch.neighbor_kmer_strings)]
if self.orientation == EngineTraversalOrientation.both:
orientations_and_kmer_strings.append(
(EdgeTraversalOrientation.other(branch.orientation),
branch.reverse_neighbor_kmer_strings)
(EdgeTraversalOrientation.other(branch.orientation),
branch.reverse_neighbor_kmer_strings)
)
for orientation, kmer_strings in orientations_and_kmer_strings:
for neighbor_string in kmer_strings:
Expand All @@ -156,7 +158,7 @@ def _add_neighbors_from_other_colors_to_branch(self, branch):
oriented_edge_set = last_kmer.edges[traversal_color].oriented(branch.orientation)
neighbor_kmer_strings |= set(oriented_edge_set.neighbor_kmer_strings(last_kmer_string))
reverse_neighbor_kmer_strings |= set(
oriented_edge_set.other_orientation().neighbor_kmer_strings(last_kmer_string))
oriented_edge_set.other_orientation().neighbor_kmer_strings(last_kmer_string))

branch = copy.copy(branch)
branch.neighbor_kmer_strings = list(neighbor_kmer_strings)
Expand Down
12 changes: 7 additions & 5 deletions cortexpy/test/expectation/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,17 @@ def has_node(self, node):
return KmerNodeExpectation(self.graph.node[node])

def has_n_nodes(self, n):
assert len(self.graph) == n
assert n == len(self.graph)
return self

def has_n_edges(self, n):
assert len(self.graph.edges) == n
assert n == len(self.graph.edges)
return self

def has_nodes(self, *expected_nodes):
expected_nodes = set(expected_nodes)
assert set(self.graph.nodes) == expected_nodes
assert len(self.graph.nodes) == len(expected_nodes)
assert expected_nodes == set(self.graph.nodes)
assert len(expected_nodes) == len(self.graph.nodes)
return self

def has_edges(self, *edges):
Expand All @@ -50,7 +50,7 @@ def has_edges(self, *edges):
edge[2] = int(edge[2])
expected_edges.append(tuple(edge))

assert set(self.graph.edges) == set(expected_edges)
assert set(expected_edges) == set(self.graph.edges)
return self

def has_edge(self, source, target, color):
Expand All @@ -63,6 +63,8 @@ class KmerNodeExpectation(object):
kmer_node = attr.ib()

def has_coverages(self, *coverages):
if len(coverages) == 1 and isinstance(coverages[0], str):
coverages = [int(c) for c in coverages[0].split(' ')]
assert np.all(self.kmer_node['kmer'].coverage == np.array(coverages))
return self

Expand Down
4 changes: 2 additions & 2 deletions cortexpy/test/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def view_contig(self, contig, graph, output_format=None, other_args=()):
if output_format is not None:
run_args.extend(['--output-format', output_format])
run_args += list(other_args)
return self.run(['view', 'contig', graph, contig] + run_args)
return self.run(['view', 'contig', str(graph), contig] + run_args)

def view_traversal(self, contig, graph, output_format='json', output_type='kmers',
orientation='both', color=0, max_nodes=None, colors=None):
Expand All @@ -68,7 +68,7 @@ def view_traversal(self, contig, graph, output_format='json', output_type='kmers
if isinstance(colors, int):
colors = [colors]
command = ['view', 'traversal',
graph, contig,
str(graph), contig,
'--output-format', output_format,
'--colors', ','.join(str(c) for c in colors),
'--output-type', output_type]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import os

import cortexpy.graph.parser as parser


Expand Down
Loading

0 comments on commit ad5f4e7

Please sign in to comment.