# Dual CRISPR Screen Analysis
# Step 6: Scoring Preparation
Amanda Birmingham, CCBB, UCSD (abirmingham@ucsd.edu)


## Instructions

To run this notebook reproducibly, follow these steps:
1. Click **Kernel** > **Restart & Clear Output**
2. When prompted, click the red **Restart & clear all outputs** button
3. Fill in the values for your analysis for each of the variables in the [Input Parameters](#Input-Parameters) section
4. Click **Cell** > **Run All**

## Input Parameters

In [None]:
g_dataset_name = "smallNotebookTest"
g_library_fp = ('/Users/Birmingham/Work/Repositories/ccbb_tickets_2017/mali-dual-crispr-pipeline/src/python/'
    'test_files/small_notebook_test/CV4_2spacers_w_probe_names_wo_duplicate.txt')
g_count_fps_or_dirs = ('/Users/Birmingham/Work/Repositories/ccbb_tickets_2017/mali-dual-crispr-pipeline/src/python/'
    'test_files/small_notebook_test/notebook4_small_notebook_test')
g_prepped_counts_run_prefix = "test_small_notebook"
g_prepped_counts_dir = ('/Users/Birmingham/Work/Repositories/ccbb_tickets_2017/mali-dual-crispr-pipeline/src/python/'
    'test_files/small_notebook_test/notebook6_small_notebook_test')
g_time_prefixes = "T,D"
g_code_location = '/Users/Birmingham/Work/Repositories/ccbb_tickets_2017/mali-dual-crispr-pipeline/src/python/'

## Automated Set-Up

In [None]:
import inspect
import sys
sys.path.append(g_code_location)

import ccbbucsd.utilities.analysis_run_prefixes as ns_runs
import ccbbucsd.utilities.files_and_paths as ns_files
import ccbbucsd.utilities.notebook_logging as ns_logs


def describe_var_list(input_var_name_list):
    description_list =  ["{0}: {1}\n".format(name, eval(name)) for name in input_var_name_list]
    return "".join(description_list)


ns_logs.set_stdout_info_logger()

In [None]:
import ccbbucsd.malicrispr.count_combination as ns_combine
print(inspect.getsource(ns_combine.get_combined_counts_file_suffix))

In [None]:
import ccbbucsd.utilities.string_utils as ns_string
print(inspect.getsource(ns_string.split_delimited_string_to_list))

In [None]:
import os 

def get_count_file_fps(comma_sep_fps_or_dirs_str):
    result = []
    
    fps_or_dirs = comma_sep_fps_or_dirs_str.split(",")
    for curr_fp_or_dir in fps_or_dirs:
        trimmed_curr = curr_fp_or_dir.strip()
        if os.path.isdir(trimmed_curr):
            combined_counts_fps = ns_files.get_filepaths_from_wildcard(trimmed_curr, 
                ns_combine.get_combined_counts_file_suffix())    
            result.extend(combined_counts_fps)
        else:
            result.append(trimmed_curr)
    
    return result
        

g_count_file_fps = get_count_file_fps(g_count_fps_or_dirs)
g_prepped_counts_run_prefix = ns_runs.check_or_set(g_prepped_counts_run_prefix,
                                                   ns_runs.generate_run_prefix(g_dataset_name))
g_time_prefixes_list = ns_string.split_delimited_string_to_list(g_time_prefixes)
print(describe_var_list(['g_count_file_fps', 'g_prepped_counts_run_prefix', 'g_time_prefixes_list']))
ns_files.verify_or_make_dir(g_prepped_counts_dir)

## Scoring-Ready File Preparation

In [None]:
import ccbbucsd.malicrispr.scoring_prep as ns_prep
print(inspect.getsource(ns_prep))

In [None]:
def merge_and_write_timepoint_counts(count_file_fps, constructs_fp, run_prefix, dataset_name, time_prefixes_list,
                                     output_dir, disregard_order=True):   
    
    joined_df = ns_prep.merge_and_annotate_counts(count_file_fps, constructs_fp, dataset_name, 
        time_prefixes_list, disregard_order=True)
    prepped_file_suffix = ns_prep.get_prepped_file_suffix()
    output_fp = ns_files.build_multipart_fp(output_dir, [run_prefix, prepped_file_suffix])
    joined_df.to_csv(output_fp, index=False, sep='\t')

In [None]:
merge_and_write_timepoint_counts(g_count_file_fps, g_library_fp, g_prepped_counts_run_prefix, g_dataset_name,
                                 g_time_prefixes_list, g_prepped_counts_dir, True)

In [None]:
print(ns_files.summarize_filenames_for_prefix_and_suffix(g_prepped_counts_dir, g_prepped_counts_run_prefix, 
                                                         ns_prep.get_prepped_file_suffix()))