In [None]:
%%writefile main.nf
#!/usr/bin/env nextflow

/* pipeline input parameters, update this to your data dir */
gdc_token = "$baseDir/gdc_user_token.txt" 
file_guids = "$baseDir/file_guids.txt" 
reference_file =  "$baseDir/GRCh38.d1.vd1.fa " 
project_dir = projectDir

process download_bam {
    
    label 'gdcapi'
    
    input:
    file gdc_token
    path gdc_file_ids
    
    output:
    stdout emit: gdc_api_log
    path('*.bam'), emit: bam_files
    
    script:
    """
    python3 /utils/get_bam.py $gdc_token $gdc_file_ids
    """
}


process variant_calling {
    
    label 'generate_vcf'
    
    input:
    path bam_files
    file ref_file
    
    output:
    stdout emit: extract_metadata_log
    path('*.vcf'), emit: csv_files
    
    script:
    """
    gatk Mutect2 -R $ref_file -I $bam_files -O unfiltered.vcf
    gatk FilterMutectCalls -R $ref_file -V unfiltered.vcf -O filtered.vcf
    """
}


process vcf_extraction {
    
    label 'vcf_analysis'
    
    input:
    path vcf_files
    
    output:
    stdout emit: gdc_api_log
    path('*.csv'), emit: csv_files
    
    script:
    """
    python3 /utils/analyze_vcf.py $vcf_files
    """
    
}

// Define the entry workflow (initial workflow for Nextflow to run)
workflow {
   
    gdc_creds = file(gdc_token)
    
    gdc_file_ids = Channel.fromPath( file_guids ).splitText() { it.strip() }
    
    bam_files = download_bam(gdc_creds, gdc_file_ids)
    
    ref_file = file(reference_file)
    
    gdc_vcf_files = variant_calling(bam_files, ref_file)
    
    vcf_extraction(gdc_vcf_files)
    
    bdcat_vcf_files = Channel.fromPath( bdcat_vcfs )
    
    vcf_extraction(bdcat_vcf_files)
}

In [None]:
%%writefile nextflow.config

process {
    withLabel: gdcapi {
        executor = 'awsbatch'
        queue = 'placeholder'
        container = 'placeholder'
    } 
}

process {
    withLabel: generate_vcf {
        executor = 'awsbatch'
        queue = 'placeholder'
        container = 'placeholder'
    } 
}

process {
    withLabel: vcf_analysis {
        executor = 'awsbatch'
        queue = 'placeholder'
        container = 'placeholder'
    } 
}

aws {
    region = 'us-east-1'
    batch {
        cliPath = '/home/ec2-user/miniconda/bin/aws'
        jobRole = 'placeholder'
    }
}
workDir = 'placeholder'


docker.enabled = true

In [None]:
!nextflow run main.nf -dsl2