Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error when the VCF will have a zero position site #2901

Merged
merged 1 commit into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 8 additions & 1 deletion python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
[0.5.7] - 2023-XX-XX
--------------------

**Breaking Changes**

- The VCF writing methods (`ts.write_vcf`, `ts.as_vcf`) now error if a site with
position zero is encountered. The VCF spec does not allow zero position sites.
Suppress this error with the `allow_position_zero` argument.
(:user:`benjeffery`, :pr:`2901`, :issue:`2838`)

**Features**

- Add ``TreeSequence.extend_edges`` method that extends ancestral haplotypes
Expand All @@ -17,7 +24,7 @@
`TreeSequence.allele_frequency_spectrum(mode="branch", polarised=False)`,
which was half as big as it should have been. (:user:`petrelharp`,
:user:`nspope`, :pr:`2933`)

--------------------
[0.5.6] - 2023-10-10
--------------------
Expand Down
42 changes: 38 additions & 4 deletions python/tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2018-2022 Tskit Developers
# Copyright (c) 2018-2024 Tskit Developers
# Copyright (c) 2017 University of Oxford
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -295,6 +295,22 @@ def test_vcf_contig_id(self, flags, expected):
assert args.tree_sequence == tree_sequence
assert args.contig_id == expected

@pytest.mark.parametrize(
"flags,expected",
(
[[], False],
[["-0"], True],
[["--allow-position-zero"], True],
),
)
def test_vcf_allow_position_zero(self, flags, expected):
parser = cli.get_tskit_parser()
cmd = "vcf"
tree_sequence = "test.trees"
args = parser.parse_args([cmd, tree_sequence, *flags])
assert args.tree_sequence == tree_sequence
assert args.allow_position_zero == expected

def test_upgrade_default_values(self):
parser = cli.get_tskit_parser()
cmd = "upgrade"
Expand Down Expand Up @@ -372,7 +388,7 @@ def test_trees_long_args(self):

class TestTskitConversionOutput(unittest.TestCase):
"""
Tests the output of msp to ensure it's correct.
Tests the output of tskit to ensure it's correct.
"""

@classmethod
Expand Down Expand Up @@ -544,14 +560,16 @@ def test_fasta(self):

def verify_vcf(self, output_vcf):
with tempfile.TemporaryFile("w+") as f:
self._tree_sequence.write_vcf(f)
self._tree_sequence.write_vcf(f, allow_position_zero=True)
f.seek(0)
vcf = f.read()
assert output_vcf == vcf

def test_vcf(self):
cmd = "vcf"
stdout, stderr = capture_output(cli.tskit_main, [cmd, self._tree_sequence_file])
stdout, stderr = capture_output(
cli.tskit_main, [cmd, "-0", self._tree_sequence_file]
)
assert len(stderr) == 0
self.verify_vcf(stdout)

Expand Down Expand Up @@ -582,6 +600,22 @@ def test_trees_draw(self):
assert len(stdout.splitlines()) > 3 * ts.num_trees


class TestVCFZeroPosition:
"""
Tests that we can write VCF files with position 0.
"""

def test_zero_position(self, tmp_path):
ts = msprime.simulate(10, mutation_rate=1, random_seed=1)
ts.dump(tmp_path / "test.trees")
with pytest.raises(ValueError):
capture_output(cli.tskit_main, ["vcf", str(tmp_path / "test.trees")])
stdout, stderr = capture_output(
cli.tskit_main, ["vcf", "-0", str(tmp_path / "test.trees")]
)
assert len(stderr) == 0


class TestBadFile:
"""
Tests that we deal with IO errors appropriately.
Expand Down