# Dual CRISPR Screen Analysis
# Construct Scaffold Trimming
Amanda Birmingham, CCBB, UCSD (abirmingham@ucsd.edu)

## Instructions

To run this notebook reproducibly, follow these steps:
1. Click **Kernel** > **Restart & Clear Output**
2. When prompted, click the red **Restart & clear all outputs** button
3. Fill in the values for your analysis for each of the variables in the [Input Parameters](#input-parameters) section
4. Click **Cell** > **Run All**

<a name = "input-parameters"></a>

## Input Parameters

In [1]:
g_num_processors = 4
g_fastqs_dir = '/data/raw/20160706_D00611_0304_BHVVJ3BCXX'
g_trimmed_fastqs_dir = '/data/interim/20160706_D00611_0304_BHVVJ3BCXX'
g_full_5p_r1 = 'TATATATCTTGTGGAAAGGACGAAACACCG'
g_full_5p_r2 = 'CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC'
g_full_3p_r1 = 'GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG'
g_full_3p_r2 = 'CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA'
g_code_location = '/home/ec2-user/jupyter-genomics/src/crispr'

## CCBB Library Imports

In [2]:
import sys
sys.path.append(g_code_location)

## Automated Set-Up

In [3]:
# %load -s describe_var_list /Users/Birmingham/Repositories/ccbb_tickets/20160210_mali_crispr/src/python/ccbbucsd/utilities/analysis_run_prefixes.py
def describe_var_list(input_var_name_list):
    description_list =  ["{0}: {1}\n".format(name, eval(name)) for name in input_var_name_list]
    return "".join(description_list)


In [4]:
from ccbbucsd.utilities.analysis_run_prefixes import check_or_set, get_run_prefix, get_timestamp
g_trimmed_fastqs_dir = check_or_set(g_trimmed_fastqs_dir, g_fastqs_dir)
print(describe_var_list(['g_trimmed_fastqs_dir']))

g_trimmed_fastqs_dir: /data/interim/20160706_D00611_0304_BHVVJ3BCXX



In [5]:
from ccbbucsd.utilities.files_and_paths import verify_or_make_dir
verify_or_make_dir(g_trimmed_fastqs_dir)

## Info Logging Pass-Through

In [6]:
from ccbbucsd.utilities.notebook_logging import set_stdout_info_logger
set_stdout_info_logger()

## Scaffold Trimming Functions

In [7]:
# %load /Users/Birmingham/Repositories/ccbb_tickets/20160210_mali_crispr/src/python/ccbbucsd/malicrispr/scaffold_trim.py
# standard libraries
import enum

# third-party libraries
import cutadapt.scripts.cutadapt

# ccbb libraries
from ccbbucsd.utilities.files_and_paths import get_file_name_pieces, make_file_path

__author__ = 'Amanda Birmingham'
__maintainer__ = "Amanda Birmingham"
__email__ = "abirmingham@ucsd.edu"
__status__ = "prototype"


class TrimType(enum.Enum):
    FIVE = "5"
    THREE = "3"
    FIVE_THREE = "53"


def get_trimmed_suffix(trimtype):
    return "_trimmed{0}.fastq".format(trimtype.value)


def trim_linked_scaffold(output_dir, fastq_fp, scaffold_seq_5p, scaffold_seq_3p, quiet=True):
    args = ["-a", "{0}...{1}".format(scaffold_seq_5p,scaffold_seq_3p)]
    return _run_cutadapt(output_dir, fastq_fp, TrimType.FIVE_THREE, args, quiet)


def trim_global_scaffold(output_dir, fastq_fp, scaffold_seq_5p=None, scaffold_seq_3p=None, quiet=True):
    curr_fastq_fp = fastq_fp

    if scaffold_seq_5p is not None:
        curr_fastq_fp = _run_cutadapt_global(output_dir, curr_fastq_fp, scaffold_seq_5p, True, quiet)

    if scaffold_seq_3p is not None:
        curr_fastq_fp = _run_cutadapt_global(output_dir, curr_fastq_fp, scaffold_seq_3p, False, quiet)

    return curr_fastq_fp


def _run_cutadapt_global(output_dir, input_fastq_fp, seq_to_trim, is_5p, quiet):
    end_switch = "-g"
    end_name = TrimType.FIVE
    if not is_5p:
        end_switch = "-a"
        end_name = TrimType.THREE

    args = [end_switch, seq_to_trim]
    return _run_cutadapt(output_dir, input_fastq_fp, end_name, args, quiet)


def _run_cutadapt(output_dir, input_fastq_fp, trim_name, partial_args, quiet):
    _, input_base, _ = get_file_name_pieces(input_fastq_fp)
    output_fastq_fp = make_file_path(output_dir, input_base, get_trimmed_suffix(trim_name))
    args = [x for x in partial_args]
    if quiet:
        args.append("--quiet")
    args.extend(["-o", output_fastq_fp, input_fastq_fp])
    cutadapt.scripts.cutadapt.main(args)
    return output_fastq_fp


In [8]:
def trim_fw_and_rv_reads(output_dir, full_5p_r1, full_3p_r1, full_5p_r2, full_3p_r2, fw_fastq_fp, rv_fastq_fp):        
    trim_linked_scaffold(output_dir, fw_fastq_fp, full_5p_r1, full_3p_r1)
    trim_linked_scaffold(output_dir, rv_fastq_fp, full_5p_r2, full_3p_r2) 

## Gzipped FASTQ Filenames

In [9]:
g_seq_file_ext_name = ".fastq"
g_gzip_ext_name = ".gz"

In [10]:
from ccbbucsd.utilities.files_and_paths import summarize_filenames_for_prefix_and_suffix
print(summarize_filenames_for_prefix_and_suffix(g_fastqs_dir, "", 
                                                "{0}{1}".format(g_seq_file_ext_name, g_gzip_ext_name), 
                                                all_subdirs=True))

A549_MV4_d14_1_S3_L001_R1_001.fastq.gz
A549_MV4_d14_1_S3_L001_R2_001.fastq.gz
A549_MV4_d14_1_S3_L002_R1_001.fastq.gz
A549_MV4_d14_1_S3_L002_R2_001.fastq.gz
A549_MV4_d14_2_S4_L001_R1_001.fastq.gz
A549_MV4_d14_2_S4_L001_R2_001.fastq.gz
A549_MV4_d14_2_S4_L002_R1_001.fastq.gz
A549_MV4_d14_2_S4_L002_R2_001.fastq.gz
A549_MV4_d20_1_S5_L001_R1_001.fastq.gz
A549_MV4_d20_1_S5_L001_R2_001.fastq.gz
A549_MV4_d20_1_S5_L002_R1_001.fastq.gz
A549_MV4_d20_1_S5_L002_R2_001.fastq.gz
A549_MV4_d20_2_S6_L001_R1_001.fastq.gz
A549_MV4_d20_2_S6_L001_R2_001.fastq.gz
A549_MV4_d20_2_S6_L002_R1_001.fastq.gz
A549_MV4_d20_2_S6_L002_R2_001.fastq.gz
A549_MV4_d28_1_S7_L001_R1_001.fastq.gz
A549_MV4_d28_1_S7_L001_R2_001.fastq.gz
A549_MV4_d28_1_S7_L002_R1_001.fastq.gz
A549_MV4_d28_1_S7_L002_R2_001.fastq.gz
A549_MV4_d28_2_S8_L001_R1_001.fastq.gz
A549_MV4_d28_2_S8_L001_R2_001.fastq.gz
A549_MV4_d28_2_S8_L002_R1_001.fastq.gz
A549_MV4_d28_2_S8_L002_R2_001.fastq.gz
A549_MV4_d3_1_S1_L001_R1_001.fastq.gz
A549_MV4_d3_1_S1_L001_R2_0

## FASTQ Gunzip Execution

In [11]:
from ccbbucsd.utilities.files_and_paths import gunzip_wildpath, move_to_dir_and_flatten

def unzip_and_flatten_seq_files(top_fastqs_dir, ext_name, gzip_ext_name, keep_gzs):
    # first, recursively unzip all fastq.gz files anywhere under the input dir
    gunzip_wildpath(top_fastqs_dir, ext_name + gzip_ext_name, keep_gzs, True)  # True = do recursive
    # now move all fastqs to top-level directory so don't have to work recursively in future
    move_to_dir_and_flatten(top_fastqs_dir, top_fastqs_dir, ext_name)

In [12]:
# False = don't keep gzs as well as expanding, True = do keep them (True only works for gzip 1.6+)
unzip_and_flatten_seq_files(g_fastqs_dir, g_seq_file_ext_name, g_gzip_ext_name, False)  

## FASTQ Filenames

In [13]:
print(summarize_filenames_for_prefix_and_suffix(g_fastqs_dir, "", g_seq_file_ext_name))

A549_MV4_d14_1_S3_L001_R1_001.fastq
A549_MV4_d14_1_S3_L001_R2_001.fastq
A549_MV4_d14_1_S3_L002_R1_001.fastq
A549_MV4_d14_1_S3_L002_R2_001.fastq
A549_MV4_d14_2_S4_L001_R1_001.fastq
A549_MV4_d14_2_S4_L001_R2_001.fastq
A549_MV4_d14_2_S4_L002_R1_001.fastq
A549_MV4_d14_2_S4_L002_R2_001.fastq
A549_MV4_d20_1_S5_L001_R1_001.fastq
A549_MV4_d20_1_S5_L001_R2_001.fastq
A549_MV4_d20_1_S5_L002_R1_001.fastq
A549_MV4_d20_1_S5_L002_R2_001.fastq
A549_MV4_d20_2_S6_L001_R1_001.fastq
A549_MV4_d20_2_S6_L001_R2_001.fastq
A549_MV4_d20_2_S6_L002_R1_001.fastq
A549_MV4_d20_2_S6_L002_R2_001.fastq
A549_MV4_d28_1_S7_L001_R1_001.fastq
A549_MV4_d28_1_S7_L001_R2_001.fastq
A549_MV4_d28_1_S7_L002_R1_001.fastq
A549_MV4_d28_1_S7_L002_R2_001.fastq
A549_MV4_d28_2_S8_L001_R1_001.fastq
A549_MV4_d28_2_S8_L001_R2_001.fastq
A549_MV4_d28_2_S8_L002_R1_001.fastq
A549_MV4_d28_2_S8_L002_R2_001.fastq
A549_MV4_d3_1_S1_L001_R1_001.fastq
A549_MV4_d3_1_S1_L001_R2_001.fastq
A549_MV4_d3_1_S1_L002_R1_001.fastq
A549_MV4_d3_1_S1_L002_R2_001.fa

## Scaffold Trim Execution

In [14]:
from ccbbucsd.utilities.parallel_process_fastqs import parallel_process_paired_reads, concatenate_parallel_results
g_parallel_results = parallel_process_paired_reads(g_fastqs_dir, g_seq_file_ext_name, g_num_processors, 
                                                   trim_fw_and_rv_reads, [g_trimmed_fastqs_dir, g_full_5p_r1, 
                                                                          g_full_3p_r1, g_full_5p_r2, g_full_3p_r2])

Starting parallel processing at 2016-07-14 00:08:53.018504


Starting A549_MV4_d14_2_S4_L001_001 at 2016-07-14 00:08:53.035748


Starting A549_MV4_d20_2_S6_L001_001 at 2016-07-14 00:08:53.035927


Starting A549_MV4_d14_1_S3_L001_001 at 2016-07-14 00:08:53.035676


Starting A549_MV4_d20_1_S5_L001_001 at 2016-07-14 00:08:53.035851


This is cutadapt 1.10 with Python 3.4.5


This is cutadapt 1.10 with Python 3.4.5


This is cutadapt 1.10 with Python 3.4.5


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_2_S4_L001_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_2_S4_L001_R1_001.fastq


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_2_S6_L001_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_2_S6_L001_R1_001.fastq


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_1_S3_L001_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_1_S3_L001_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_1_S5_L001_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_1_S5_L001_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_1_S5_L001_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_1_S5_L001_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_1_S3_L001_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_1_S3_L001_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_2_S4_L001_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_2_S4_L001_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_2_S6_L001_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_2_S6_L001_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d20_1_S5_L001_001 elapsed time: 0:06:55


Starting A549_MV4_d20_1_S5_L002_001 at 2016-07-14 00:15:48.584083


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_1_S5_L002_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_1_S5_L002_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d14_1_S3_L001_001 elapsed time: 0:07:22


Starting A549_MV4_d14_1_S3_L002_001 at 2016-07-14 00:16:15.743650


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_1_S3_L002_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_1_S3_L002_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d14_2_S4_L001_001 elapsed time: 0:07:58


Starting A549_MV4_d14_2_S4_L002_001 at 2016-07-14 00:16:51.828649


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_2_S4_L002_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_2_S4_L002_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d20_2_S6_L001_001 elapsed time: 0:10:15


Starting A549_MV4_d20_2_S6_L002_001 at 2016-07-14 00:19:08.585885


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_2_S6_L002_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_2_S6_L002_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_1_S5_L002_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_1_S5_L002_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_1_S3_L002_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_1_S3_L002_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_2_S4_L002_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d14_2_S4_L002_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d20_1_S5_L002_001 elapsed time: 0:07:12


Starting A549_MV4_d28_1_S7_L001_001 at 2016-07-14 00:23:01.246675


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_1_S7_L001_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_1_S7_L001_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d14_1_S3_L002_001 elapsed time: 0:07:19


Starting A549_MV4_d28_2_S8_L001_001 at 2016-07-14 00:23:35.187163


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_2_S8_L001_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_2_S8_L001_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_2_S6_L002_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d20_2_S6_L002_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d14_2_S4_L002_001 elapsed time: 0:07:49


Starting A549_MV4_d3_1_S1_L001_001 at 2016-07-14 00:24:41.394230


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_1_S1_L001_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_1_S1_L001_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_1_S7_L001_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_1_S7_L001_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_2_S8_L001_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_2_S8_L001_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_1_S1_L001_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_1_S1_L001_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d20_2_S6_L002_001 elapsed time: 0:10:10


Starting A549_MV4_d3_2_S2_L001_001 at 2016-07-14 00:29:18.755651


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_2_S2_L001_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_2_S2_L001_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d28_1_S7_L001_001 elapsed time: 0:07:29


Starting A549_MV4_d28_1_S7_L002_001 at 2016-07-14 00:30:30.635393


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_1_S7_L002_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_1_S7_L002_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d28_2_S8_L001_001 elapsed time: 0:07:57


Starting A549_MV4_d28_2_S8_L002_001 at 2016-07-14 00:31:32.456968


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_2_S8_L002_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_2_S8_L002_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d3_1_S1_L001_001 elapsed time: 0:07:13


Starting A549_MV4_d3_1_S1_L002_001 at 2016-07-14 00:31:54.754034


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_1_S1_L002_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_1_S1_L002_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_2_S2_L001_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_2_S2_L001_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_1_S7_L002_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_1_S7_L002_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_1_S1_L002_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_1_S1_L002_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_2_S8_L002_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d28_2_S8_L002_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d3_2_S2_L001_001 elapsed time: 0:07:44


Starting A549_MV4_d3_2_S2_L002_001 at 2016-07-14 00:37:03.441826


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_2_S2_L002_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_2_S2_L002_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d28_1_S7_L002_001 elapsed time: 0:07:36


Starting Hela_MV4_d20_1_S9_L001_001 at 2016-07-14 00:38:07.108974


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_1_S9_L001_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_1_S9_L001_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d3_1_S1_L002_001 elapsed time: 0:07:10


Starting Hela_MV4_d20_2_S10_L001_001 at 2016-07-14 00:39:04.963445


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_2_S10_L001_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_2_S10_L001_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d28_2_S8_L002_001 elapsed time: 0:07:52


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_2_S2_L002_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/A549_MV4_d3_2_S2_L002_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_1_S9_L001_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_1_S9_L001_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_2_S10_L001_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_2_S10_L001_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


A549_MV4_d3_2_S2_L002_001 elapsed time: 0:07:41


Hela_MV4_d20_1_S9_L001_001 elapsed time: 0:06:38


Starting Hela_MV4_d20_1_S9_L002_001 at 2016-07-14 00:44:45.432116


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_1_S9_L002_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_1_S9_L002_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


Hela_MV4_d20_2_S10_L001_001 elapsed time: 0:07:47


Starting Hela_MV4_d20_2_S10_L002_001 at 2016-07-14 00:46:52.931454


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a TATATATCTTGTGGAAAGGACGAAACACCG...GTTTCAGAGCTATGCTGGAAACTGCATAGCAAGTTGAAATAAGGCTAGTCCGTTATCAACTTGAAAAAGTGGCACCGAGTCGGTGCTTTTTTGTACTGAG --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_2_S10_L002_R1_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_2_S10_L002_R1_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_1_S9_L002_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_1_S9_L002_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


This is cutadapt 1.10 with Python 3.4.5


Command line parameters: -a CCTTATTTTAACTTGCTATTTCTAGCTCTAAAAC...CAAACAAGGCTTTTCTCCAAGGGATATTTATAGTCTCAAAACACACAATTACTTTACAGTTAGGGTGAGTTTCCTTTTGTGCTGTTTTTTAAAATA --quiet -o /data/interim/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_2_S10_L002_R2_001_trimmed53.fastq /data/raw/20160706_D00611_0304_BHVVJ3BCXX/Hela_MV4_d20_2_S10_L002_R2_001.fastq


Trimming 1 adapter with at most 10.0% errors in single-end mode ...


Hela_MV4_d20_1_S9_L002_001 elapsed time: 0:06:32


Hela_MV4_d20_2_S10_L002_001 elapsed time: 0:07:40


parallel processing elapsed time: 0:45:40


In [15]:
print(concatenate_parallel_results(g_parallel_results))

A549_MV4_d14_1_S3_L001_001: finished
A549_MV4_d14_1_S3_L002_001: finished
A549_MV4_d14_2_S4_L001_001: finished
A549_MV4_d14_2_S4_L002_001: finished
A549_MV4_d20_1_S5_L001_001: finished
A549_MV4_d20_1_S5_L002_001: finished
A549_MV4_d20_2_S6_L001_001: finished
A549_MV4_d20_2_S6_L002_001: finished
A549_MV4_d28_1_S7_L001_001: finished
A549_MV4_d28_1_S7_L002_001: finished
A549_MV4_d28_2_S8_L001_001: finished
A549_MV4_d28_2_S8_L002_001: finished
A549_MV4_d3_1_S1_L001_001: finished
A549_MV4_d3_1_S1_L002_001: finished
A549_MV4_d3_2_S2_L001_001: finished
A549_MV4_d3_2_S2_L002_001: finished
Hela_MV4_d20_1_S9_L001_001: finished
Hela_MV4_d20_1_S9_L002_001: finished
Hela_MV4_d20_2_S10_L001_001: finished
Hela_MV4_d20_2_S10_L002_001: finished



## Trimmed FASTQ Filenames

In [16]:
print(summarize_filenames_for_prefix_and_suffix(g_trimmed_fastqs_dir, "", get_trimmed_suffix(TrimType.FIVE_THREE)))

A549_MV4_d14_1_S3_L001_R1_001_trimmed53.fastq
A549_MV4_d14_1_S3_L001_R2_001_trimmed53.fastq
A549_MV4_d14_1_S3_L002_R1_001_trimmed53.fastq
A549_MV4_d14_1_S3_L002_R2_001_trimmed53.fastq
A549_MV4_d14_2_S4_L001_R1_001_trimmed53.fastq
A549_MV4_d14_2_S4_L001_R2_001_trimmed53.fastq
A549_MV4_d14_2_S4_L002_R1_001_trimmed53.fastq
A549_MV4_d14_2_S4_L002_R2_001_trimmed53.fastq
A549_MV4_d20_1_S5_L001_R1_001_trimmed53.fastq
A549_MV4_d20_1_S5_L001_R2_001_trimmed53.fastq
A549_MV4_d20_1_S5_L002_R1_001_trimmed53.fastq
A549_MV4_d20_1_S5_L002_R2_001_trimmed53.fastq
A549_MV4_d20_2_S6_L001_R1_001_trimmed53.fastq
A549_MV4_d20_2_S6_L001_R2_001_trimmed53.fastq
A549_MV4_d20_2_S6_L002_R1_001_trimmed53.fastq
A549_MV4_d20_2_S6_L002_R2_001_trimmed53.fastq
A549_MV4_d28_1_S7_L001_R1_001_trimmed53.fastq
A549_MV4_d28_1_S7_L001_R2_001_trimmed53.fastq
A549_MV4_d28_1_S7_L002_R1_001_trimmed53.fastq
A549_MV4_d28_1_S7_L002_R2_001_trimmed53.fastq
A549_MV4_d28_2_S8_L001_R1_001_trimmed53.fastq
A549_MV4_d28_2_S8_L001_R2_001_trim