Skip to content

Commit

Permalink
JQ-319 (cgates): Adjusted desc of JQ_SUMMARY_DP_AVERAGE; removed obso…
Browse files Browse the repository at this point in the history
…lete badge from README; updated supported VC version
  • Loading branch information
cgates committed Jun 18, 2018
1 parent ec7812e commit 1826572
Show file tree
Hide file tree
Showing 11 changed files with 20 additions and 26 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Changelog
1.1.0 (6/18/2018)
-----------------
- Adjusted *translate* to correctly parse newer versions of Mutect
- Updated supported versions for Mutect, Strelka, Varscan
- Fixed error in JQ_SUMMARY_DP_AVERAGE tag description

1.0.0 (6/5/2018)
-----------------
Expand Down
6 changes: 1 addition & 5 deletions README.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
========
Jacquard
========
Suite of command-line tools to expedite analysis of exome variant data from
Suite of command-line tools to expedite analysis of exome variant data from
multiple patients and multiple variant callers.

.. image:: https://travis-ci.org/umich-brcf-bioinf/Jacquard.svg?branch=develop
Expand All @@ -20,9 +20,6 @@ multiple patients and multiple variant callers.
:target: https://pypi.python.org/pypi/jacquard/
:alt: Latest PyPI version

.. image:: https://img.shields.io/pypi/dm/Jacquard.svg
:target: https://pypi.python.org/pypi/jacquard/
:alt: Downloads Counter

The official repository is at:

Expand Down Expand Up @@ -61,4 +58,3 @@ See `ReadTheDocs <http://jacquard.readthedocs.org/>`_ for full documentation.
Email bfx-jacquard@umich.edu for support and questions.

UM BRCF Bioinformatics Core

Binary file modified examples/examples.zip
Binary file not shown.
10 changes: 5 additions & 5 deletions jacquard/utils/summarize_rollup_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ def __init__(self):
SUMMARY_ALLELE_FREQ_AVG,
('Average allele frequency across recognized '
'variant callers that reported frequency for '
'this position [average(JQ_*_AF)].'))
'this sample-locus [average(JQ_*_AF)].'))

def add_tag_values(self, record):
new_sample_tag_values = {}
Expand Down Expand Up @@ -364,9 +364,9 @@ def __init__(self):
super(self.__class__,
self).__init__(SUMMARY_TAG,
SUMMARY_DEPTH_AVG,
('Average allele frequency across recognized '
'variant callers that reported frequency for '
'this position; rounded to integer '
('Average depth across recognized '
'variant callers that reported depth for '
'this sample-locus; rounded to integer '
'[round(average(JQ_*_DP))].'))

def add_tag_values(self, record):
Expand Down Expand Up @@ -402,7 +402,7 @@ def __init__(self):
SUMMARY_SOMATIC_COUNT,
('Count of recognized variant callers that '
'reported confident somatic call for this '
'sample-position.'))
'sample-locus.'))

def add_tag_values(self, record):
new_sample_tag_values = {}
Expand Down
2 changes: 1 addition & 1 deletion jacquard/variant_caller_transforms/mutect.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

JQ_MUTECT_TAG = "JQ_MT_"
MUTECT_ABBREVIATION = "MT"
VERSION = "v1.1.4"
VERSION = "v1.1-4.0"

class _GenotypeTag(common_tags.AbstractJacquardTag):
#pylint: disable=too-few-public-methods
Expand Down
3 changes: 1 addition & 2 deletions jacquard/variant_caller_transforms/strelka.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

JQ_STRELKA_TAG = "JQ_SK_"
STRELKA_ABBREVIATION = "SK"
VERSION = "v1.0.14"
VERSION = "v1.0-2.9"

class _GenotypeTag(common_tags.AbstractJacquardTag):
#pylint: disable=too-few-public-methods
Expand Down Expand Up @@ -379,4 +379,3 @@ def _add_tags(self, vcf_record):
for tag in self.tags:
tag.add_tag_values(vcf_record)
return vcf_record

2 changes: 1 addition & 1 deletion jacquard/variant_caller_transforms/varscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"NORMAL|TUMOR").replace("|", "\t")
JQ_VARSCAN_TAG = "JQ_VS_"
VARSCAN_ABBREVIATION = "VS"
VERSION = "v2.3"
VERSION = "v2.3-2.4"

def _varscan_hc_fileheader(line):
return line.startswith("chrom\tposition")
Expand Down
6 changes: 3 additions & 3 deletions test/functional_tests/03_summarize/benchmark/summarized.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@
##FORMAT=<ID=JQ_SK_AF,Number=A,Type=Float,Description="Jacquard allele frequency for Strelka: Decimal allele frequency rounded to 4 digits (based on alt_depth/total_depth. Uses TAR if available, otherwise uses uses DP2 if available, otherwise uses ACGT tier2 depth)">
##FORMAT=<ID=JQ_SK_DP,Number=1,Type=Integer,Description="Jacquard depth for Strelka (uses DP2 if available, otherwise uses ACGT tier2 depth)">
##FORMAT=<ID=JQ_SK_HC_SOM,Number=1,Type=Integer,Description="Jacquard somatic status for Strelka: 0=non-somatic,1=somatic (based on PASS in FILTER column)">
##FORMAT=<ID=JQ_SUMMARY_AF_AVERAGE,Number=1,Type=Float,Description="Average allele frequency across recognized variant callers that reported frequency for this position [average(JQ_*_AF)].">
##FORMAT=<ID=JQ_SUMMARY_AF_AVERAGE,Number=1,Type=Float,Description="Average allele frequency across recognized variant callers that reported frequency for this sample-locus [average(JQ_*_AF)].">
##FORMAT=<ID=JQ_SUMMARY_AF_RANGE,Number=1,Type=Float,Description="Max(allele frequency) - min (allele frequency) across recognized callers.">
##FORMAT=<ID=JQ_SUMMARY_AF_ZSCORE,Number=1,Type=Float,Description="Concordance of reported allele frequencies across callers: [(this AF range - mean AF range)/standard dev(all AF ranges)]. Values with null or missing AF range will be assigned zscore of '.'; for multi-valued ranges, zscore is of largest range.">
##FORMAT=<ID=JQ_SUMMARY_CALLERS_PASSED_COUNT,Number=1,Type=Integer,Description="Count of variant callers where FILTER = PASS for this variant in the Jacquard tagged VCF">
##FORMAT=<ID=JQ_SUMMARY_CALLERS_PASSED_LIST,Number=.,Type=String,Description="Comma-separated list of variant caller short-names where FILTER = PASS for this variant in the Jacquard tagged VCF">
##FORMAT=<ID=JQ_SUMMARY_CALLERS_REPORTED_COUNT,Number=1,Type=Integer,Description="Count of variant callers which listed this variant in the Jacquard tagged VCF">
##FORMAT=<ID=JQ_SUMMARY_CALLERS_REPORTED_LIST,Number=.,Type=String,Description="Comma-separated list variant callers which listed this variant in the Jacquard tagged VCF">
##FORMAT=<ID=JQ_SUMMARY_DP_AVERAGE,Number=1,Type=Float,Description="Average allele frequency across recognized variant callers that reported frequency for this position; rounded to integer [round(average(JQ_*_DP))].">
##FORMAT=<ID=JQ_SUMMARY_DP_AVERAGE,Number=1,Type=Float,Description="Average depth across recognized variant callers that reported depth for this sample-locus; rounded to integer [round(average(JQ_*_DP))].">
##FORMAT=<ID=JQ_SUMMARY_DP_RANGE,Number=1,Type=Float,Description="Max(depth) - min (depth) across recognized callers.">
##FORMAT=<ID=JQ_SUMMARY_DP_ZSCORE,Number=1,Type=Float,Description="Concordance of reported depth across callers: [(this DP range - mean DP range)/standard dev(all DP ranges)]. Values with null or missing DP range will be assigned zscore of '.'.">
##FORMAT=<ID=JQ_SUMMARY_HC_GT,Number=1,Type=String,Description="High confidence consensus genotype (inferred from JQ_*_GT and JQ_*_CALLER_PASSED). Majority rules; ties go to the least unusual variant (0/1>0/2>1/1). Variants which failed their filter are ignored.">
##FORMAT=<ID=JQ_SUMMARY_SOM_COUNT,Number=1,Type=Integer,Description="Count of recognized variant callers that reported confident somatic call for this sample-position.">
##FORMAT=<ID=JQ_SUMMARY_SOM_COUNT,Number=1,Type=Integer,Description="Count of recognized variant callers that reported confident somatic call for this sample-locus.">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT tiny_strelka|NORMAL tiny_strelka|TUMOR
chr1 1147545 . A G . . JQ_SUMMARY_SAMPLES_REPORTED_COUNT=2;JQ_SUMMARY_SAMPLES_PASSED_COUNT=0 JQ_SK_AF:JQ_SK_DP:JQ_SK_HC_SOM:JQ_SK_CALLER_REPORTED:JQ_SK_CALLER_PASSED:JQ_SUMMARY_CALLERS_REPORTED_COUNT:JQ_SUMMARY_CALLERS_PASSED_COUNT:JQ_SUMMARY_CALLERS_REPORTED_LIST:JQ_SUMMARY_CALLERS_PASSED_LIST:JQ_SUMMARY_AF_AVERAGE:JQ_SUMMARY_AF_RANGE:JQ_SUMMARY_DP_AVERAGE:JQ_SUMMARY_DP_RANGE:JQ_SUMMARY_SOM_COUNT:JQ_SUMMARY_HC_GT 0.0:27:1:1:0:1:0:SK:.:0.0:.:27:.:1:. 0.31:35:1:1:0:1:0:SK:.:0.31:.:35:.:1:.
chr1 1169795 . C T . JQ_SUMMARY_SAMPLES_REPORTED_COUNT=2;JQ_SUMMARY_SAMPLES_PASSED_COUNT=2 JQ_SK_AF:JQ_SK_DP:JQ_SK_HC_SOM:JQ_SK_CALLER_REPORTED:JQ_SK_CALLER_PASSED:JQ_SUMMARY_CALLERS_REPORTED_COUNT:JQ_SUMMARY_CALLERS_PASSED_COUNT:JQ_SUMMARY_CALLERS_REPORTED_LIST:JQ_SUMMARY_CALLERS_PASSED_LIST:JQ_SUMMARY_AF_AVERAGE:JQ_SUMMARY_AF_RANGE:JQ_SUMMARY_DP_AVERAGE:JQ_SUMMARY_DP_RANGE:JQ_SUMMARY_SOM_COUNT:JQ_SUMMARY_HC_GT 0.0:37:0:1:1:1:1:SK:SK:0.0:.:37:.:0:. 0.22:46:1:1:1:1:1:SK:SK:0.22:.:46:.:1:.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,18 @@
##FORMAT=<ID=JQ_SK_DP,Number=1,Type=Integer,Description="Jacquard depth for Strelka (uses DP2 if available, otherwise uses ACGT tier2 depth)">
##FORMAT=<ID=JQ_SK_GT,Number=1,Type=String,Description="Jacquard genotype (based on SGT). Example for snv: REF=A, ALT=C, INFO:SGT=AA->AC is translated as normal=0/0, tumor=0/1. Example for indel: INFO:SGT=ref->het is translated as normal=0/0, tumor=0/1.">
##FORMAT=<ID=JQ_SK_HC_SOM,Number=1,Type=Integer,Description="Jacquard somatic status for Strelka: 0=non-somatic,1=somatic (based on PASS in FILTER column)">
##FORMAT=<ID=JQ_SUMMARY_AF_AVERAGE,Number=1,Type=Float,Description="Average allele frequency across recognized variant callers that reported frequency for this position [average(JQ_*_AF)].">
##FORMAT=<ID=JQ_SUMMARY_AF_AVERAGE,Number=1,Type=Float,Description="Average allele frequency across recognized variant callers that reported frequency for this sample-locus [average(JQ_*_AF)].">
##FORMAT=<ID=JQ_SUMMARY_AF_RANGE,Number=1,Type=Float,Description="Max(allele frequency) - min (allele frequency) across recognized callers.">
##FORMAT=<ID=JQ_SUMMARY_AF_ZSCORE,Number=1,Type=Float,Description="Concordance of reported allele frequencies across callers: [(this AF range - mean AF range)/standard dev(all AF ranges)]. Values with null or missing AF range will be assigned zscore of '.'; for multi-valued ranges, zscore is of largest range.">
##FORMAT=<ID=JQ_SUMMARY_CALLERS_PASSED_COUNT,Number=1,Type=Integer,Description="Count of variant callers where FILTER = PASS for this variant in the Jacquard tagged VCF">
##FORMAT=<ID=JQ_SUMMARY_CALLERS_PASSED_LIST,Number=.,Type=String,Description="Comma-separated list of variant caller short-names where FILTER = PASS for this variant in the Jacquard tagged VCF">
##FORMAT=<ID=JQ_SUMMARY_CALLERS_REPORTED_COUNT,Number=1,Type=Integer,Description="Count of variant callers which listed this variant in the Jacquard tagged VCF">
##FORMAT=<ID=JQ_SUMMARY_CALLERS_REPORTED_LIST,Number=.,Type=String,Description="Comma-separated list variant callers which listed this variant in the Jacquard tagged VCF">
##FORMAT=<ID=JQ_SUMMARY_DP_AVERAGE,Number=1,Type=Float,Description="Average allele frequency across recognized variant callers that reported frequency for this position; rounded to integer [round(average(JQ_*_DP))].">
##FORMAT=<ID=JQ_SUMMARY_DP_AVERAGE,Number=1,Type=Float,Description="Average depth across recognized variant callers that reported depth for this sample-locus; rounded to integer [round(average(JQ_*_DP))].">
##FORMAT=<ID=JQ_SUMMARY_DP_RANGE,Number=1,Type=Float,Description="Max(depth) - min (depth) across recognized callers.">
##FORMAT=<ID=JQ_SUMMARY_DP_ZSCORE,Number=1,Type=Float,Description="Concordance of reported depth across callers: [(this DP range - mean DP range)/standard dev(all DP ranges)]. Values with null or missing DP range will be assigned zscore of '.'.">
##FORMAT=<ID=JQ_SUMMARY_HC_GT,Number=1,Type=String,Description="High confidence consensus genotype (inferred from JQ_*_GT and JQ_*_CALLER_PASSED). Majority rules; ties go to the least unusual variant (0/1>0/2>1/1). Variants which failed their filter are ignored.">
##FORMAT=<ID=JQ_SUMMARY_SOM_COUNT,Number=1,Type=Integer,Description="Count of recognized variant callers that reported confident somatic call for this sample-position.">
##FORMAT=<ID=JQ_SUMMARY_SOM_COUNT,Number=1,Type=Integer,Description="Count of recognized variant callers that reported confident somatic call for this sample-locus.">
##FORMAT=<ID=JQ_VS_AF,Number=A,Type=Float,Description="Jacquard allele frequency for VarScan: Decimal allele frequency rounded to 4 digits (based on FREQ)">
##FORMAT=<ID=JQ_VS_CALLER_PASSED,Number=1,Type=Integer,Description="1 = variant FILTER is PASS in original VCF">
##FORMAT=<ID=JQ_VS_CALLER_REPORTED,Number=1,Type=Integer,Description="1 = variant present in original VCF">
Expand Down
2 changes: 0 additions & 2 deletions test/summarize_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import test.utils.test_case as test_case
from test.utils.vcf_test import MockFileWriter, MockVcfReader


#TODO (cgates): The module summarize is not adequately unit-tested
class MockSummarizeCaller(object):
def __init__(self, metaheaders_list=None):
Expand Down Expand Up @@ -85,4 +84,3 @@ def test_summarize(self):
"summarized.vcf")

self.assertCommand(command, expected_file)

7 changes: 3 additions & 4 deletions test/utils/summarize_rollup_transform_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ class AlleleFreqAverageTagTestCase(test_case.JacquardBaseTestCase):

def test_metaheader(self):
split_meta_header = summarize_caller._AlleleFreqAverageTag().metaheader.split("\n")
self.assertEqual('##FORMAT=<ID={0}AF_AVERAGE,Number=1,Type=Float,Description="Average allele frequency across recognized variant callers that reported frequency for this position [average(JQ_*_AF)].">'.format(summarize_caller.JQ_SUMMARY_TAG),
self.assertEqual('##FORMAT=<ID={0}AF_AVERAGE,Number=1,Type=Float,Description="Average allele frequency across recognized variant callers that reported frequency for this sample-locus [average(JQ_*_AF)].">'.format(summarize_caller.JQ_SUMMARY_TAG),
split_meta_header[0])

def test_add_tag_values(self):
Expand Down Expand Up @@ -501,7 +501,7 @@ class DepthAverageTagTestCase(test_case.JacquardBaseTestCase):

def test_metaheader(self):
split_meta_header = summarize_caller._DepthAverageTag().metaheader.split("\n")
self.assertEquals('##FORMAT=<ID={}DP_AVERAGE,Number=1,Type=Float,Description="Average allele frequency across recognized variant callers that reported frequency for this position; rounded to integer [round(average(JQ_*_DP))].">'.format(summarize_caller.JQ_SUMMARY_TAG),
self.assertEquals('##FORMAT=<ID={}DP_AVERAGE,Number=1,Type=Float,Description="Average depth across recognized variant callers that reported depth for this sample-locus; rounded to integer [round(average(JQ_*_DP))].">'.format(summarize_caller.JQ_SUMMARY_TAG),
split_meta_header[0])

def test_add_tag_values(self):
Expand Down Expand Up @@ -595,7 +595,7 @@ def test_metaheader(self):
self.assertEqual('##FORMAT=<ID={0}SOM_COUNT,Number=1,Type=Integer,' \
'Description="Count of recognized variant callers ' \
'that reported confident somatic call for this '\
'sample-position.">'\
'sample-locus.">'\
.format(summarize_caller.JQ_SUMMARY_TAG), split_meta_header[0])

def test_add_tag_values(self):
Expand Down Expand Up @@ -786,4 +786,3 @@ def test_get_new_metaheaders(self):
self.assertEqual(expected, first_meta_header)
self.assertEqual(12, len(actual))
self.assertEqual(1, len(split_actual))

0 comments on commit 1826572

Please sign in to comment.