From 4632591ee928d784f8452e30f6ad58b8ff04ccfb Mon Sep 17 00:00:00 2001 From: cgates Date: Sat, 23 May 2015 16:30:12 -0400 Subject: [PATCH] ex-264 (cgates): Adjusted to work in py3; minor PEP8 cleanup; removed some deprecated methods --- jacquard/expand.py | 4 +++- jacquard/translate.py | 4 ++-- jacquard/utils/summarize_rollup_transform.py | 1 + jacquard/utils/summarize_zscore_transform.py | 2 +- jacquard/utils/utils.py | 2 ++ .../variant_caller_transforms/common_tags.py | 14 +++++------- jacquard/variant_caller_transforms/mutect.py | 13 ++++------- jacquard/variant_caller_transforms/strelka.py | 7 ++---- .../variant_caller_factory.py | 1 + jacquard/variant_caller_transforms/varscan.py | 16 ++------------ test/utils/test_case.py | 22 +++++++++---------- .../common_tags_test.py | 16 +++++++------- test/variant_caller_transforms/mutect_test.py | 8 ------- 13 files changed, 42 insertions(+), 68 deletions(-) diff --git a/jacquard/expand.py b/jacquard/expand.py index 6221644..b5a6cbe 100644 --- a/jacquard/expand.py +++ b/jacquard/expand.py @@ -192,7 +192,9 @@ def execute(args, dummy_execution_context): #for the moment, there is no good place to put the execution context input_file = os.path.abspath(args.input) output_file = os.path.abspath(args.output) - col_spec = args.selected_columns_file if args.selected_columns_file else None + col_spec = None + if args.selected_columns_file: + col_spec = args.selected_columns_file logger.debug("Expanding [{}] to [{}]", input_file, diff --git a/jacquard/translate.py b/jacquard/translate.py index 379de48..4679ab1 100644 --- a/jacquard/translate.py +++ b/jacquard/translate.py @@ -250,8 +250,8 @@ def add_subparser(subparser): parser.add_argument("-v", "--verbose", action='store_true') #TODO (cgates): This module is both a command and also manipulates VcfRecords -# like a caller. This is the only body of code that does both these things. -# Does this bother anyone else? +# like a caller transform. This is the only body of code that does both these +# things. Does this bother anyone else? def execute(args, execution_context): validate_args(args) diff --git a/jacquard/utils/summarize_rollup_transform.py b/jacquard/utils/summarize_rollup_transform.py index 0f90a93..3e4f358 100644 --- a/jacquard/utils/summarize_rollup_transform.py +++ b/jacquard/utils/summarize_rollup_transform.py @@ -228,6 +228,7 @@ def add_tag_values(vcf_record): _add_sample_count_values(vcf_record, JQ_PASSED, JQ_SAMPLES_PASSED) class _HCGenotypeTag(object): + #pylint: disable=too-few-public-methods _TAG_ID = "{}HC_GT".format(JQ_SUMMARY_TAG) _PATTERN = re.compile("^JQ_.*_GT$") diff --git a/jacquard/utils/summarize_zscore_transform.py b/jacquard/utils/summarize_zscore_transform.py index ae5af91..9af0ee8 100644 --- a/jacquard/utils/summarize_zscore_transform.py +++ b/jacquard/utils/summarize_zscore_transform.py @@ -163,7 +163,7 @@ def _get_dependent_value(tag_values, dependent_tag_id): return None def _init_population_stats(self, vcf_reader, dependent_tag_id): - '''Derive mean and stdev. + '''Derive mean and stdev. Adapted from online variance algorithm from Knuth, The Art of Computer Programming, volume 2 diff --git a/jacquard/utils/utils.py b/jacquard/utils/utils.py index 9653c3b..c528110 100644 --- a/jacquard/utils/utils.py +++ b/jacquard/utils/utils.py @@ -40,6 +40,7 @@ def sort_metaheaders(metaheaders): .split("=")[0]]) class _JacquardHelpFormatter(argparse.RawTextHelpFormatter): + #pylint: disable=too-few-public-methods def _format_usage(self, default_values): prog = '%(prog)s' % dict(prog=self._prog) usage = 'usage: {} {}'.format(prog, default_values) @@ -48,6 +49,7 @@ def _format_usage(self, default_values): def add_usage(self, default_values, actions=None, groups=None, prefix=None): self._add_item(self._format_usage, default_values) + class JQException(Exception): """Base class for all run-time exceptions in this module.""" def __init__(self, msg, *args): diff --git a/jacquard/variant_caller_transforms/common_tags.py b/jacquard/variant_caller_transforms/common_tags.py index 4c07a24..3d26653 100644 --- a/jacquard/variant_caller_transforms/common_tags.py +++ b/jacquard/variant_caller_transforms/common_tags.py @@ -1,7 +1,6 @@ """Common tags used by several callers.""" from __future__ import print_function, absolute_import, division import abc -from abc import abstractmethod import jacquard.utils.utils as utils CALLER_REPORTED_TAG = "CALLER_REPORTED" @@ -24,24 +23,21 @@ def __init__(self, abbreviation, vcf_type, vcf_number): GENOTYPE_TAG = _TagType("GT", "String", "1") ALLELE_FREQ_TAG = _TagType("AF", "Float", "A") SOMATIC_TAG = _TagType("HC_SOM", "Integer", "1") - + def __init__(self, variant_caller_abbrev, tag_type, description): if '"' in description: raise utils.JQException(("Metaheader descriptions cannot contain " "double quotes: [{}]"), description) - self.tag_id = "JQ_{}_{}".format(variant_caller_abbrev,tag_type.abbreviation) - + self.tag_id = "JQ_{}_{}".format(variant_caller_abbrev, + tag_type.abbreviation) self.metaheader = JacquardTag.FORMAT.format(self.tag_id, tag_type.vcf_number, tag_type.vcf_type, description) - - - @abstractmethod - def add_tag_values(self): - pass + def add_tag_values(self, vcf_record): + raise NotImplementedError() class ReportedTag(object): diff --git a/jacquard/variant_caller_transforms/mutect.py b/jacquard/variant_caller_transforms/mutect.py index ec6bb46..241d3f9 100644 --- a/jacquard/variant_caller_transforms/mutect.py +++ b/jacquard/variant_caller_transforms/mutect.py @@ -76,12 +76,15 @@ def add_tag_values(self, vcf_record): vcf_record.add_sample_tag_value(self.tag_id, sample_values) class _SomaticTag(common_tags.JacquardTag): + _DESCRIPTION = (\ +'''Jacquard somatic status for MuTect: 0=non-somatic,1=somatic (based on SS +FORMAT tag)''').replace("\n","") #pylint: disable=too-few-public-methods def __init__(self): super(self.__class__, self).__init__(MUTECT_ABBREVIATION, common_tags.JacquardTag.SOMATIC_TAG, - 'Jacquard somatic status for MuTect: 0=non-somatic,1=somatic (based on SS FORMAT tag)') + self._DESCRIPTION) def add_tag_values(self, vcf_record): sample_values = {} @@ -117,14 +120,6 @@ def __init__(self): self.name = "MuTect" self.abbr = "MT" - ##TODO (cgates): deprecate; remove - @staticmethod - def validate_input_file(meta_headers, dummy_column_header): - for line in meta_headers: - if line.startswith(Mutect._MUTECT_METAHEADER_PREFIX): - return True - return False - @staticmethod def _is_mutect_vcf(file_reader): if file_reader.file_name.lower().endswith(".vcf"): diff --git a/jacquard/variant_caller_transforms/strelka.py b/jacquard/variant_caller_transforms/strelka.py index b8ddfea..565e357 100644 --- a/jacquard/variant_caller_transforms/strelka.py +++ b/jacquard/variant_caller_transforms/strelka.py @@ -26,6 +26,8 @@ VERSION = "v2.0.15" class _GenotypeTag(common_tags.JacquardTag): + #pylint: disable=too-few-public-methods + _INDEL_VALUES = ["ref", "hom", "het"] def __init__(self): @@ -231,11 +233,6 @@ def __init__(self): self.abbr = "SK" self.meta_header = "##jacquard.normalize_strelka.sources={0},{1}\n" - ##TODO (cgates): deprecated; remove - @staticmethod - def validate_input_file(meta_headers, dummy_column_header): - return "##source=strelka" in meta_headers - @staticmethod def _is_strelka_vcf(file_reader): if file_reader.file_name.endswith(".vcf"): diff --git a/jacquard/variant_caller_transforms/variant_caller_factory.py b/jacquard/variant_caller_transforms/variant_caller_factory.py index eaaeeaf..ad9d9d8 100644 --- a/jacquard/variant_caller_transforms/variant_caller_factory.py +++ b/jacquard/variant_caller_transforms/variant_caller_factory.py @@ -16,6 +16,7 @@ "Strelka": strelka.VERSION} class VariantCallerFactory(object): + #pylint: disable=too-few-public-methods def __init__(self, args=None): self._callers = [varscan.Varscan(args), strelka.Strelka(), diff --git a/jacquard/variant_caller_transforms/varscan.py b/jacquard/variant_caller_transforms/varscan.py index 8cdf332..024f53a 100644 --- a/jacquard/variant_caller_transforms/varscan.py +++ b/jacquard/variant_caller_transforms/varscan.py @@ -196,18 +196,6 @@ def _get_hc_file_pattern(args): return compiled_regex - ##TODO (cgates): deprecated; remove - @staticmethod - def validate_input_file(meta_headers, column_header): - if "##source=VarScan2" not in meta_headers: - return 0 - - if _VARSCAN_SOMATIC_HEADER == column_header: - return 1 - else: - raise utils.JQException("Unexpected VarScan VCF structure - " - "missing NORMAL and TUMOR headers.") - @staticmethod def _validate_filter_file(file_reader): column_header = 0 @@ -394,7 +382,7 @@ def _create_vcf_readers(pair_tuples): #pylint: disable=too-many-locals def claim(self, file_readers): - """Recognizes and claims MuTect VCFs form the set of all input VCFs. + """Recognizes and claims VarScan VCFs form the set of all input VCFs. Each defined caller has a chance to evaluate and claim all the incoming files as something that it can process. Since VarScan can claim @@ -405,7 +393,7 @@ def claim(self, file_readers): file_readers: the collection of currently unclaimed files Returns: - A tuple of unclaimed readers and MuTectVcfReaders. + A tuple of unclaimed readers and VarScanVcfReaders. """ (prefix_to_readers, diff --git a/test/utils/test_case.py b/test/utils/test_case.py index bc7ada6..82dd88d 100644 --- a/test/utils/test_case.py +++ b/test/utils/test_case.py @@ -74,17 +74,17 @@ def assertCommand(self, command, expected_dir): output = command[2] - #try: - if os.path.isfile(output): - output_file = os.path.basename(output) - self._compare_files(output, output_file, expected_dir) - elif os.path.isdir(output): - for output_file in os.listdir(output): - new_output = os.path.join(output, output_file) - self._compare_files(new_output, output_file, expected_dir) - #except self.failureException as e: - # msg = "discrepancy in command [{}]: {}".format(" ".join(command), e) - # raise self.failureException(msg) + try: + if os.path.isfile(output): + output_file = os.path.basename(output) + self._compare_files(output, output_file, expected_dir) + elif os.path.isdir(output): + for output_file in os.listdir(output): + new_output = os.path.join(output, output_file) + self._compare_files(new_output, output_file, expected_dir) + except self.failureException as e: + msg = "discrepancy in command [{}]: {}".format(" ".join(command), e) + raise self.failureException(msg) def entab(self, string, old="|"): return string.replace(old, "\t") diff --git a/test/variant_caller_transforms/common_tags_test.py b/test/variant_caller_transforms/common_tags_test.py index c867b43..1265902 100644 --- a/test/variant_caller_transforms/common_tags_test.py +++ b/test/variant_caller_transforms/common_tags_test.py @@ -32,14 +32,14 @@ def test_somatic_tag(self): self.assertEquals("Integer", tag.vcf_type) self.assertEquals("1", tag.vcf_number) - def test_add_tag_values_is_abstract(self): - try: - class FakeTag(common_tags.JacquardTag): - def __init__(self): pass - FakeTag() - self.fail(("Should not be able to instantiate JacquardTag without " - "overrideing abstract methods.")) - except TypeError: pass + def test_add_tag_values_raisesNotImplementedError(self): + class FakeTag(common_tags.JacquardTag): + def __init__(self): pass + tag = FakeTag() + self.assertRaises(NotImplementedError, + tag.add_tag_values, + VcfRecord("1", "42", "A", "C") + ) def test_metaheader(self): tag_type = common_tags.JacquardTag.GENOTYPE_TAG diff --git a/test/variant_caller_transforms/mutect_test.py b/test/variant_caller_transforms/mutect_test.py index f82fd67..d8210c7 100644 --- a/test/variant_caller_transforms/mutect_test.py +++ b/test/variant_caller_transforms/mutect_test.py @@ -129,14 +129,6 @@ def setUp(self): super(MutectTestCase, self).setUp() self.caller = mutect.Mutect() - def test_validateInputFile_isValid(self): - metaheaders = ["##MuTect=blah"] - self.assertTrue(self.caller.validate_input_file(metaheaders, "#column_header")) - - def test_validateInputFile_isNotValid(self): - metaheaders = ["Foo"] - self.assertFalse(self.caller.validate_input_file(metaheaders, "#column_header")) - def test_claim(self): record1 = "chr1\t.\t.\t.\t.\t.\t.\t.\t." content1 = ["##foo", "##source=strelka", "#chrom", record1]