In [52]:
%%writefile main.nf
#!/usr/bin/env nextflow

/* pipeline input parameters */
dicom_data = "$baseDir/sdk_data/03/9409a5116a054f405ba06d10bebeb6/*.dcm"


process dicom_to_png {
    
    label 'dcm2png'
    
    input:
    path dicom_files
    
    output:
    stdout emit: dicom_to_png_log
    path('*.png'), emit: png_files
    
    script:
    """
    #!/usr/bin/env python3

    import pydicom
    import argparse
    import numpy as np
    from PIL import Image
    import os

    dicom_input = '$dicom_files'
    
    def main(dicom_input):
        png_out = dicom_input.split('.dcm')[0] + '.png'
        dicom_dataset = pydicom.dcmread(dicom_input)
        transformed_image = dicom_dataset.pixel_array.astype(float)
        scaled_image = np.uint8((np.maximum(transformed_image, 0) / transformed_image.max()) * 255.0)
        final_image = Image.fromarray(scaled_image)
        final_image.save(png_out)

    if __name__ == '__main__':
        main(dicom_input)
        """
}

process extract_metadata {
    
    label 'ext_metadata'
    
    input:
    path dicom_files
    
    output:
    stdout emit: extract_metadata_log
    path('*.csv'), emit: csv_files
    
    script:
    """
    #!/usr/bin/env python3

    import pandas as pd
    import os
    import argparse
    from dicom_csv import join_tree
    
    dicom_input = '$dicom_files'
    metadata_csv = 'dicom-metadata.csv'

    def main(dicom_input, metadata_csv):
        metadata_df = join_tree('.', verbose=2)
        dicom_metadata_df = metadata_df.loc[metadata_df['PixelRepresentation'].notnull()]
        dicom_metadata_df.drop_duplicates(inplace=True)
        return dicom_metadata_df.to_csv(metadata_csv)

    if __name__ == '__main__':
        main(dicom_input, metadata_csv)    
    """

}


// Define the entry workflow (initial workflow for Nextflow to run)
workflow {
    // def dicom_files = Channel.fromPath(dicom_data).buffer(size: 2)
    dicom_files = Channel.fromPath(dicom_data)
    dicom_to_png(dicom_files)
    extract_metadata(dicom_files)
}

Overwriting main.nf


In [53]:
%%writefile nextflow.config

process {
    withLabel: dcm2png {
        executor = 'awsbatch'
        queue = 'qa-brh-planx-pla-net-nf-job-queue-aartiv-40uchicago-2eedu'
        container = 'public.ecr.aws/l5b8a5z6/nextflow-approved:batch_poc2'
    }
    withLabel: ext_metadata {
        executor = 'awsbatch'
        queue = 'qa-brh-planx-pla-net-nf-job-queue-aartiv-40uchicago-2eedu'
        container = 'public.ecr.aws/l5b8a5z6/nextflow-approved:batch_poc2'
    } 
}

aws {
    region = 'us-east-1'
    batch {
        cliPath = '/home/ec2-user/miniconda/bin/aws'
        jobRole = 'arn:aws:iam::366143200747:role/qa-brh-planx-pla-net-hatchery-nf-aartiv-40uchicago-2eedu/qa-brh-planx-pla-net-nf-jobs-aartiv-40uchicago-2eedu'
    }
}
workDir = 's3://qa-brh-planx-pla-net-nf-366143200747/aartiv-40uchicago-2eedu'

docker.enabled = true

Overwriting nextflow.config


In [54]:
!nextflow run main.nf -dsl2

N E X T F L O W  ~  version 21.10.6
Launching `main.nf` [condescending_babbage] - revision: 8a5860a0d6
Uploading local `bin` scripts folder to s3://qa-brh-planx-pla-net-nf-366143200747/aartiv-40uchicago-2eedu/tmp/26/93c2093cd80a922410fb9bcff24a04/bin
[-        ] process > dicom_to_png     -[K
[-        ] process > extract_metadata -[K
[3A
[-        ] process > dicom_to_png     [  0%] 0 of 1[K
[-        ] process > extract_metadata [  0%] 0 of 2[K
[3A
[-        ] process > dicom_to_png     [  0%] 0 of 2[K
[-        ] process > extract_metadata [  0%] 0 of 4[K
[3A
executor >  awsbatch (5)[K
[fc/c02f00] process > dicom_to_png (3)     [  0%] 0 of 4[K
[7a/1778e5] process > extract_metadata (2) [  0%] 0 of 6[K
[4A
executor >  awsbatch (10)[K
[5b/c00960] process > dicom_to_png (4)     [  0%] 0 of 7[K
[ad/3a1082] process > extract_metadata (4) [  0%] 0 of 8[K
[4A
executor >  awsbatch (16)[K
[64/6c413b] process > dicom_to_png (6)     [  0%] 0 of 9[K
[1d/770920] process > extr