Skip to content

Commit

Permalink
configurable minimum base depth list for ampliconstats,
Browse files Browse the repository at this point in the history
make amliconstats qc check to save coverage data
  • Loading branch information
mgcam committed Sep 1, 2020
1 parent 367ee6b commit 5ff03ad
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 7 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Expand Up @@ -1055,6 +1055,7 @@ t/data/portable_pipelines/ncov2019-artic-nf/cf01166c42a/product_release.yml
t/data/portable_pipelines/ncov2019-artic-nf/cf01166c42a/product_release_no_pp.yml
t/data/portable_pipelines/ncov2019-artic-nf/cf01166c42a/product_release_no_study.yml
t/data/portable_pipelines/ncov2019-artic-nf/cf01166c42a/product_release_unknown_pp.yml
t/data/portable_pipelines/ncov2019-artic-nf/cf01166c42a/product_release_explicit_astats_depth.yml
t/data/portable_pipelines/ncov2019-artic-nf/v.3/product_release_no_staging_root.yml
t/data/portable_pipelines/ncov2019-artic-nf/v.3/product_release_two_pps.yml
t/data/portable_pipelines/ncov2019-artic-nf/v.3/product_release_two_studies.yml
Expand Down
12 changes: 9 additions & 3 deletions lib/npg_pipeline/function/stage2pp.pm
Expand Up @@ -33,8 +33,8 @@ Readonly::Hash my %PER_PP_REQS => (
ncov2019_artic_nf => {memory_mb => 5000, num_cpus => 4},
ncov2019_artic_nf_ampliconstats => {memory_mb => 1000, num_cpus => 2},
);

Readonly::Scalar my $AMPLICONSTATS_OPTIONS => q[-t 50 -d 1,10,20,100];
Readonly::Array my @DEFAULT_AMPLICONSTATS_DEPTH => qw(1 10 20 100);
Readonly::Scalar my $AMPLICONSTATS_OPTIONS => q[-t 50];

our $VERSION = '0';

Expand Down Expand Up @@ -311,6 +311,9 @@ sub _ncov2019_artic_nf_ampliconstats_create {
return;
}

my $depth_array = $pp->{'ampliconstats_min_base_depth'}
|| \@DEFAULT_AMPLICONSTATS_DEPTH;

my $lane_product = ($product->lanes_as_products)[0];
my $lane_pp_path = $self->pp_archive4product(
$lane_product, $pp, $self->pp_archive_path());
Expand Down Expand Up @@ -344,20 +347,23 @@ sub _ncov2019_artic_nf_ampliconstats_create {
'ampliconstats',
$sta_cpus_option,
$AMPLICONSTATS_OPTIONS,
q[-d ] . join(q[,], @{$depth_array}),
$self->_primer_bed_file($product),
$input_files_glob;
$sta_command = join q[ > ], $sta_command, $sta_file;
# Invoke a lane-level qc check on the ampliconstats file produced
# in the previous step with an option to fan out qc check outputs
# to individual per-sample directories.
my @sections = map {q[FPCOV-] . $_} @{$depth_array};
unshift @sections, q[FREADS];
my $qca_command = join q[ ], $self->qc_cmd,
'--check generic',
'--spec ampliconstats',
'--rpt_list ' . $lane_product->composition->freeze2rpt,
'--input_files ' . $sta_file,
'--pp_name ncov2019_artic_nf_ampliconstats',
'--pp_version ' . $self->pp_version($pp),
'--ampstats_section FREADS',
(map {'--ampstats_section ' . $_} @sections),
'--qc_out ' . $lane_qc_dir,
'--sample_qc_out ' . q['] . $lane_archive . q[/plex*/qc'];

Expand Down
33 changes: 29 additions & 4 deletions t/20-function-stage2pp.t
Expand Up @@ -274,7 +274,7 @@ subtest 'skip unknown pipeline' => sub {
};

subtest q(definition generation, 'ncov2019_artic_nf ampliconstats' pp) => sub {
plan tests => 28;
plan tests => 30;

local $ENV{NPG_CACHED_SAMPLESHEET_FILE} = q[t/data/samplesheet_33990.csv];

Expand All @@ -296,15 +296,25 @@ subtest q(definition generation, 'ncov2019_artic_nf ampliconstats' pp) => sub {

my @commands = ();
my @replacement_files = ();
for my $p ((1, 2)) {
my @astats_sections = qw(FREADS FPCOV-1 FPCOV-10 FPCOV-20 FPCOV-100);

my $count = 0;
for my $p ((1, 2, 1)) {

$count++;
my @s = @astats_sections;
$count == 3 and pop @s;
my $sections = join q[ ], map { q[--ampstats_section ] . $_ } @s;

my $pp_path = qq(${pp_archive_path}/lane${p}) .
qq(/ncov2019_artic_nf_ampliconstats/0.1/);
my $astats_file = $pp_path . qq(26291_${p}.astats);
my $replacement_map_file = $pp_path . q(replacement_map.txt);
push @replacement_files, $replacement_map_file;
push @commands,
'(' .
$dir . q(/samtools ampliconstats -@1 -t 50 -d 1,10,20,100 ) .
$dir . q(/samtools ampliconstats -@1 -t 50 -d 1,10,20) .
($count == 3 ? q( ) : q(,100 )) .
$dir . q(/primer_panel/nCoV-2019/default/SARS-CoV-2/MN908947.3/nCoV-2019.bed ) .
$pp_archive_path . qq(/lane${p}) .
q(/plex*/ncov2019_artic_nf/cf01166c42a) .
Expand All @@ -320,7 +330,7 @@ subtest q(definition generation, 'ncov2019_artic_nf ampliconstats' pp) => sub {
$dir . q(/qc --check generic --spec ampliconstats ) .
qq(--rpt_list 26291:${p} --input_files $astats_file ) .
q(--pp_name ncov2019_artic_nf_ampliconstats --pp_version 0.1 ) .
q(--ampstats_section FREADS ) .
qq($sections ) .
q(--qc_out ) . $archive_path . qq(/lane${p}/qc ) .
q(--sample_qc_out ') . $archive_path . qq(/lane${p}/plex*/qc') .
')';
Expand Down Expand Up @@ -379,6 +389,21 @@ subtest q(definition generation, 'ncov2019_artic_nf ampliconstats' pp) => sub {
is ($d->job_name, 'stage2App_ncov20.1_26291', 'job name');
is ($d->memory, 1000, 'memory');
is_deeply ($d->num_cpus, [2], 'number of CPUs');

$ppd = npg_pipeline::function::stage2pp->new(
product_conf_file_path => qq[$repo_dir/product_release_explicit_astats_depth.yml],
pipeline_type => 'stage2App',
archive_path => $archive_path,
runfolder_path => $runfolder_path,
id_run => 26291,
merge_lanes => 0,
timestamp => $timestamp,
repository => $dir);

$ds = $ppd->create;
is (@{$ds}, 2, 'two definitions are returned');
$d = $ds->[0];
is_deeply ([(split q[ ], $d->command)], $commands[2], 'correct command for lane 1');
};

1;
@@ -0,0 +1,26 @@
---
default:
s3:
enable: false
url: null
notify: false
irods:
enable: true
notify: false

study:
- study_id: "3073"
portable_pipelines:
- pp_name: "ncov2019-artic-nf"
pp_version: "cf01166c42a"
pp_type: "stage2pp"
pp_root: "t/data/portable_pipelines"
- pp_name: "ncov2019-artic-nf ampliconstats"
pp_version: "0.1"
pp_type: "stage2App"
pp_input_glob: "plex*/ncov2019_artic_nf/cf01166c42a/ncovIlluminaCram_ncovIllumina_sequenceAnalysis_trimPrimerSequences/*primertrimmed.sorted.bam"
ampliconstats_min_base_depth:
- 1
- 10
- 20
pp_archival_flag: false

0 comments on commit 5ff03ad

Please sign in to comment.