Skip to content

Commit

Permalink
Merge branch 'jq-320_fix_expand_handling_of_name_collisions_between_f…
Browse files Browse the repository at this point in the history
…ixed_and_info' into develop
  • Loading branch information
cgates committed Jun 6, 2018
2 parents c9cbd39 + 7fc0020 commit 5adfd5c
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 13 deletions.
18 changes: 9 additions & 9 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
Changelog
=========

0.43 (XX/XX/XXXX)
0.43 (6/5/2018)
-----------------
- Removed obsolete spikes directory
- Fixed bug in *expand* which could overwrite fixed VCF fields (e.g. REF, ALT,
etc) if identically named fields in INFO.

0.42 (9/22/2015)
----------------
Expand All @@ -22,15 +24,15 @@ Changelog
- Improved checks for consistent VCF file sets
- Fixed bug in *merge* that caused error if any VCFs were unsorted
- Fixed bug in *summarize* that caused error if variant was called by subset
of callers
of callers

0.31 (3/17/2015)
----------------
- Downgraded VCF format from 4.2 to 4.1
- Fixed a bug that omitted CALLERS_REPORTED_LIST summary tag
- Simplified summary tags; removed dependency on numpy
- Adjusted VarScan translation to accept a file pattern to identify
high-confidence files
high-confidence files


0.3 (3/9/2015)
Expand All @@ -39,7 +41,7 @@ Changelog
on incoming data.
- Renamed *consensus* to *summarize*
- More consistent behavior in *expand*
- Significantly improved *merge* performance
- Significantly improved *merge* performance
- Added new summary tags:
- CALLERS_REPORTED_COUNT
- CALLERS_REPORTED_LIST
Expand All @@ -49,12 +51,10 @@ Changelog
- SAMPLES_PASSED_COUNT
- Fixed bug in how Strelka calculated AF on indels
- Improved command validation and error handling
- Added project/code documentation
- Added project/code documentation
- Removed dependencies on pandas


0.21 (10/2014)
--------------
- Initial public release


8 changes: 5 additions & 3 deletions jacquard/expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,12 @@ def _create_row_dict(column_list, vcf_record):
for format_key, format_value in format_key_values.items():
row_dict[format_key + "|" + sample_name] = format_value

new_dict = row_dict.copy()
new_dict.update(vcf_record.info_dict)
for (name, value) in vcf_record.info_dict.items():
if name in row_dict:
name = "INFO_" + name
row_dict[name] = value

return new_dict
return row_dict

def _filter_column_list(column_spec_list,
potential_col_list,
Expand Down
28 changes: 27 additions & 1 deletion test/expand_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,33 @@ def test_create_row_dict(self):
"AF|SAMPLE_A|TUMOR": "0.3"}
self.assertEquals(expected_dict, actual_dict)

def test_create_row_dict_fieldNamesMangledToAvoidCollision(self):
column_list = ["CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER",
"INFO" ] #, "FORMAT", "SAMPLE_A|NORMAL", "SAMPLE_A|TUMOR"]
# sample_tag_values = {"SAMPLE_A|NORMAL":{"DP":"50", "AF":"0.2"},
# "SAMPLE_A|TUMOR":{"DP":"87", "AF":"0.3"}}
vcf_record = vcf.VcfRecord("1", "42", "A", "AT",
vcf_id="rs32", qual="30", vcf_filter="PASS",
info="SNP;REF;ALT=Yep"
)#sample_tag_values=sample_tag_values)
actual_dict = expand._create_row_dict(column_list, vcf_record)

expected_dict = {"CHROM": "1",
"POS": "42",
"ID": "rs32",
"REF": "A",
"ALT": "AT",
"QUAL": "30",
"FILTER": "PASS",
"SNP": "SNP",
"INFO_REF": "REF",
"INFO_ALT": "Yep"}
# "DP|SAMPLE_A|NORMAL": "50",
# "DP|SAMPLE_A|TUMOR": "87",
# "AF|SAMPLE_A|NORMAL": "0.2",
# "AF|SAMPLE_A|TUMOR": "0.3"}
self.assertEquals(expected_dict, actual_dict)

def test_filter_column_list(self):
potential_col_list = OrderedDict([("CHROM", None),
("POS", None),
Expand Down Expand Up @@ -397,4 +424,3 @@ def test_expand_colSpec(self):
"expanded.txt")

self.assertCommand(command, expected_file)

0 comments on commit 5adfd5c

Please sign in to comment.