# Dual CRISPR Screen Analysis
# Step 2: Construct Filter
Amanda Birmingham, CCBB, UCSD (abirmingham@ucsd.edu)

## Instructions

To run this notebook reproducibly, follow these steps:
1. Click **Kernel** > **Restart & Clear Output**
2. When prompted, click the red **Restart & clear all outputs** button
3. Fill in the values for your analysis for each of the variables in the [Input Parameters](#Input-Parameters) section
4. Click **Cell** > **Run All**

## Input Parameters

In [None]:
g_num_processors = 3
g_trimmed_fastqs_dir = ('/Users/Birmingham/Work/Repositories/ccbb_tickets_2017/mali-dual-crispr-pipeline/src/python/'
    'test_files/known_goods/trimmed_fastq_20160706_HeLa_A549_CV4')
g_filtered_fastqs_dir = ('/Users/Birmingham/Work/Repositories/ccbb_tickets_2017/mali-dual-crispr-pipeline/src/python/'
    'test_files/test_outputs/notebook2_20160706_HeLa_A549_CV4')
g_min_trimmed_grna_len = 19
g_max_trimmed_grna_len = 21
g_len_of_seq_to_match = 19
g_code_location = '/Users/Birmingham/Work/Repositories/ccbb_tickets_2017/mali-dual-crispr-pipeline/src/python/'

## Automated Set-Up

In [None]:
import inspect
import sys
sys.path.append(g_code_location)

import ccbbucsd.utilities.analysis_run_prefixes as ns_runs
import ccbbucsd.utilities.files_and_paths as ns_files
import ccbbucsd.utilities.notebook_logging as ns_logs


def describe_var_list(input_var_name_list):
    description_list =  ["{0}: {1}\n".format(name, eval(name)) for name in input_var_name_list]
    return "".join(description_list)


ns_logs.set_stdout_info_logger()

In [None]:
g_filtered_fastqs_dir = ns_runs.check_or_set(g_filtered_fastqs_dir, g_trimmed_fastqs_dir)
print(describe_var_list(['g_filtered_fastqs_dir']))
ns_files.verify_or_make_dir(g_filtered_fastqs_dir)

## Construct Filtering Functions

In [None]:
import ccbbucsd.malicrispr.scaffold_trim as trim
print(inspect.getsource(trim))

In [None]:
import ccbbucsd.malicrispr.count_filterer as fltr
print(inspect.getsource(fltr))

In [None]:
import ccbbucsd.utilities.parallel_process_fastqs as ns_parallel

g_parallel_results = ns_parallel.parallel_process_paired_reads(g_trimmed_fastqs_dir, 
    trim.get_trimmed_suffix(trim.TrimType.FIVE_THREE), g_num_processors, 
    fltr.filter_pair_by_len, [g_min_trimmed_grna_len, g_max_trimmed_grna_len, 
    g_len_of_seq_to_match, g_filtered_fastqs_dir])

In [None]:
print(ns_parallel.concatenate_parallel_results(g_parallel_results))