Skip to content

Commit

Permalink
Using pytest and runnning tests in parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasHickman committed Aug 9, 2017
1 parent 5861fdc commit 6350059
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 64 deletions.
10 changes: 8 additions & 2 deletions README.md
Expand Up @@ -82,12 +82,18 @@ cwl-runner cwl_files_3.5/HaplotypeCaller.cwl examples/HaplotypeCaller_inputs.yml

## Tests

To run the tests, add example data to `cwl-example-data` such that `examples/HaplotypeCaller_inputs.yml` will run, then:
First install the test requirements
```
pip install -r test_requirements.txt
```
Then add example data to `cwl-example-data` such that `examples/HaplotypeCaller_inputs.yml` will run, then:

```bash
python gatkcwlgenerator/tests/test.py
py.test gatkcwlgenerator/tests/test.py
```

You can also run the tests in parallel with `-n` to improve performance

## Limitations:

- The parameter `annotation` (for example, in [HaplotypeCaller](https://software.broadinstitute.org/gatk/documentation/tooldocs/current/org_broadinstitute_gatk_tools_walkers_haplotypecaller_HaplotypeCaller.php#--annotation)) is specified to take in a string in the generated CWL file, not an enumeration of all the possible options
Expand Down
111 changes: 49 additions & 62 deletions gatkcwlgenerator/tests/test.py
Expand Up @@ -7,9 +7,9 @@
import subprocess
import os
from os import path
import unittest
from multiprocessing import Process
import tempfile
import pytest

import requests

Expand Down Expand Up @@ -62,112 +62,99 @@ def __init__(self, stdout, stderr, exitcode):
path: {0}/cwl-example-data/chr22_cwl_test.cram
out: out.gvcf.gz""".format(base_dir)

def run_haplotype_caller(extra_info="",interval=1, filetext=None, expect_failure=False):
return run_tool("HaplotypeCaller", extra_info, interval, filetext, expect_failure)
def run_haplotype_caller(extra_info, version, interval=1, filetext=None, expect_failure=False):
return run_tool("HaplotypeCaller", extra_info, version, interval, filetext, expect_failure)

"""
Runs the haplotype_caller tool with the specified data
"""
def run_tool(toolname, extra_info="",interval=1, filetext=None, expect_failure=False):
def run_tool(toolname, extra_info, version, interval=1, filetext=None, expect_failure=False):
if filetext is None:
extra_info += "\nintervals: [chr22:10591400-{}]".format(10591400 + interval)
filetext = "analysis_type: {}\n".format(toolname) + default_args + "\n" + extra_info

with tempfile.NamedTemporaryFile() as f:
f.write(filetext)
f.flush()
return run_command("cwl-runner cwl_files_3.5/cwl/{}.cwl {}".format(
return run_command("cwl-runner cwl_files_{}/cwl/{}.cwl {}".format(
version,
toolname,
f.name
), expect_failure=expect_failure)

# Unit tests

class TestGenerateCWL(unittest.TestCase):
supported_versions = ["3.5", "current"]
def test_get_json_links(self):
for version in self.supported_versions:
json_links = cwl_gen.get_json_links(version)
for link_type, links in json_links.__dict__.items():
self.assertTrue(links,
"There are no links of type '{}' in gatk version {}".format(
link_type,
version
))

def test_no_arguments_in_annotator(self):
# If arguments are in annotator modules, we probably need to add them to the CWL file
for version in self.supported_versions:
for url in cwl_gen.get_json_links(version).annotator_urls:
ann_json = requests.get(url).json()
self.assertFalse(ann_json["arguments"])
supported_versions = ["3.5", "3.6", "3.7", "3.8"]

# Integration tests
@pytest.mark.parametrize("version", supported_versions)
class TestGenerateCWL:
def test_get_json_links(self, version):
json_links = cwl_gen.get_json_links(version)
for link_type, links in json_links.__dict__.items():
assert links, "There are no links of type '{}' in gatk version {}".format(
link_type,
version
)

class TestRunsCorrectly(unittest.TestCase):
supported_versions = ["3.5", "current", "4.beta-latest"]
def test_runs(self):
for version in self.supported_versions:
run_command("python2 gatkcwlgenerator/main.py -v {} --dev".format(version))
def test_no_arguments_in_annotator(self, version):
# If arguments are in annotator modules, we probably need to add them to the CWL file
for url in cwl_gen.get_json_links(version).annotator_urls:
ann_json = requests.get(url).json()
assert not ann_json["arguments"]

class TestGeneratedCWLFiles(unittest.TestCase):
base_cwl_path = path.join(base_dir, "cwl_files_3.5/cwl")
# Integration tests

def is_cwlfile_valid(self, cwl_file):
run_command("cwl-runner --validate " + path.join(self.base_cwl_path, cwl_file))
@pytest.mark.parametrize("version", supported_versions + ["4.beta-latest"])
def test_runs(version):
run_command("python2 gatkcwlgenerator/main.py -v {} --dev".format(version))

@unittest.skip("")
def test_are_cwl_files_valid(self):
@pytest.mark.parametrize("version", supported_versions)
class TestGeneratedCWLFiles:
def get_base_cwl_path(self, version):
return path.join(base_dir, "cwl_files_{}/cwl".format(version))

def test_are_cwl_files_valid(self, version):
exceptions = []
for cwl_file in os.listdir(self.base_cwl_path):
for cwl_file in os.listdir(self.get_base_cwl_path(version)):
try:
print("Validated " + cwl_file)
run_command("cwl-runner --validate " + path.join(self.base_cwl_path, cwl_file))
run_command("cwl-runner --validate " + path.join(self.get_base_cwl_path(version), cwl_file))
except AssertionError as e:
print(e)
exceptions.append(e)

if exceptions:
raise AssertionError("Not all cwl files are valid:\n" + "\n\n".join(exceptions))

def test_haplotype_caller(self):
def test_haplotype_caller(self, version):
run_command("cwl-runner cwl_files_3.5/cwl/HaplotypeCaller.cwl examples/HaplotypeCaller_inputs.yml")

# Test if the haplotype caller accepts all the correct types

def test_boolean_type(self):
self.assertIn("ThreadEfficiencyMonitor", run_haplotype_caller("monitorThreadEfficiency: True").stderr)
def test_boolean_type(self, version):
assert "ThreadEfficiencyMonitor" in run_haplotype_caller("monitorThreadEfficiency: True", version).stderr

def test_integers_type(self):
self.assertIn("42 data thread", run_haplotype_caller("num_threads: 42", expect_failure=True).stderr)
def test_integers_type(self, version):
assert "42 data thread" in run_haplotype_caller("num_threads: 42", version, expect_failure=True).stderr

def test_string_type(self):
self.assertIn("Specified name does not exist in input bam files",
run_haplotype_caller("sample_name: invalid_sample_name", expect_failure=True).stderr)
def test_string_type(self, version):
assert "Specified name does not exist in input bam files" in \
run_haplotype_caller("sample_name: invalid_sample_name", version, expect_failure=True).stderr

def test_file_type(self):
def test_file_type(self, version):
BQSR_arg = """
BQSR:
class: File
path: {0}/cwl-example-data/chr22_cwl_test.fa
""".format(base_dir)
self.assertIn("Bad input: The GATK report has an unknown/unsupported version in the header",
run_haplotype_caller(BQSR_arg, expect_failure=True).stderr)
assert "Bad input: The GATK report has an unknown/unsupported version in the header" in \
run_haplotype_caller(BQSR_arg, version, expect_failure=True).stderr

def test_enum_type(self):
self.assertIn("Strictness is LENIENT", run_haplotype_caller("validation_strictness: LENIENT").stderr)
def test_enum_type(self, version):
assert "Strictness is LENIENT" in run_haplotype_caller("validation_strictness: LENIENT", version).stderr

def test_list_type(self):
run_with_larger_intervals = run_haplotype_caller(
def test_list_type(self, version):
run_with_larger_intervals = run_haplotype_caller(extra_info="", version=version,
filetext="analysis_type: HaplotypeCaller\n" + default_args + "\nintervals: [chr22:10591400-10591500, chr22:10591500-10591645]")

self.assertIn("Processing 246 bp from intervals", run_with_larger_intervals.stderr)

"""
The entry point for testing
"""
def test():
unittest.main()

if __name__ == "__main__":
test()
assert "Processing 246 bp from intervals" in run_with_larger_intervals.stderr
1 change: 1 addition & 0 deletions setup.py
Expand Up @@ -13,6 +13,7 @@ def read_markdown(file):
version="1.0.0",
packages=find_packages(exclude=["tests"]),
install_requires=open("requirements.txt", "r").readlines(),
tests_require=open("test_requirements.txt", "r").readlines(),
url="https://github.com/wtsi-hgi/gatk-cwl-generator",
license="MIT",
description="Generates CWL files from the GATK documentation Edit",
Expand Down
3 changes: 3 additions & 0 deletions test_requirements.txt
@@ -0,0 +1,3 @@
pytest
pytest-xdist
requests_cache

0 comments on commit 6350059

Please sign in to comment.