Skip to content

Commit

Permalink
Error when the VCF will have a zero position site
Browse files Browse the repository at this point in the history
  • Loading branch information
benjeffery authored and Ben Jeffery committed May 14, 2024
1 parent b1d7c4d commit 49c0fe5
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 43 deletions.
9 changes: 8 additions & 1 deletion python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
[0.5.7] - 2023-XX-XX
--------------------

**Breaking Changes**

- The VCF writing methods (`ts.write_vcf`, `ts.as_vcf`) now error if a site with
position zero is encountered. The VCF spec does not allow zero position sites.
Suppress this error with the `allow_position_zero` argument.
(:user:`benjeffery`, :pr:`2901`, :issue:`2838`)

**Features**

- Add ``TreeSequence.extend_edges`` method that extends ancestral haplotypes
Expand All @@ -17,7 +24,7 @@
`TreeSequence.allele_frequency_spectrum(mode="branch", polarised=False)`,
which was half as big as it should have been. (:user:`petrelharp`,
:user:`nspope`, :pr:`2933`)

--------------------
[0.5.6] - 2023-10-10
--------------------
Expand Down
42 changes: 38 additions & 4 deletions python/tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2018-2022 Tskit Developers
# Copyright (c) 2018-2024 Tskit Developers
# Copyright (c) 2017 University of Oxford
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -295,6 +295,22 @@ def test_vcf_contig_id(self, flags, expected):
assert args.tree_sequence == tree_sequence
assert args.contig_id == expected

@pytest.mark.parametrize(
"flags,expected",
(
[[], False],
[["-0"], True],
[["--allow-position-zero"], True],
),
)
def test_vcf_allow_position_zero(self, flags, expected):
parser = cli.get_tskit_parser()
cmd = "vcf"
tree_sequence = "test.trees"
args = parser.parse_args([cmd, tree_sequence, *flags])
assert args.tree_sequence == tree_sequence
assert args.allow_position_zero == expected

def test_upgrade_default_values(self):
parser = cli.get_tskit_parser()
cmd = "upgrade"
Expand Down Expand Up @@ -372,7 +388,7 @@ def test_trees_long_args(self):

class TestTskitConversionOutput(unittest.TestCase):
"""
Tests the output of msp to ensure it's correct.
Tests the output of tskit to ensure it's correct.
"""

@classmethod
Expand Down Expand Up @@ -544,14 +560,16 @@ def test_fasta(self):

def verify_vcf(self, output_vcf):
with tempfile.TemporaryFile("w+") as f:
self._tree_sequence.write_vcf(f)
self._tree_sequence.write_vcf(f, allow_position_zero=True)
f.seek(0)
vcf = f.read()
assert output_vcf == vcf

def test_vcf(self):
cmd = "vcf"
stdout, stderr = capture_output(cli.tskit_main, [cmd, self._tree_sequence_file])
stdout, stderr = capture_output(
cli.tskit_main, [cmd, "-0", self._tree_sequence_file]
)
assert len(stderr) == 0
self.verify_vcf(stdout)

Expand Down Expand Up @@ -582,6 +600,22 @@ def test_trees_draw(self):
assert len(stdout.splitlines()) > 3 * ts.num_trees


class TestVCFZeroPosition:
"""
Tests that we can write VCF files with position 0.
"""

def test_zero_position(self, tmp_path):
ts = msprime.simulate(10, mutation_rate=1, random_seed=1)
ts.dump(tmp_path / "test.trees")
with pytest.raises(ValueError):
capture_output(cli.tskit_main, ["vcf", str(tmp_path / "test.trees")])
stdout, stderr = capture_output(
cli.tskit_main, ["vcf", "-0", str(tmp_path / "test.trees")]
)
assert len(stderr) == 0


class TestBadFile:
"""
Tests that we deal with IO errors appropriately.
Expand Down

0 comments on commit 49c0fe5

Please sign in to comment.