# Dual CRISPR Screen Analysis
# Scoring Preparation
Amanda Birmingham, CCBB, UCSD (abirmingham@ucsd.edu)

## Instructions

To run this notebook reproducibly, follow these steps:
1. Click **Kernel** > **Restart & Clear Output**
2. When prompted, click the red **Restart & clear all outputs** button
3. Fill in the values for your analysis for each of the variables in the [Input Parameters](#input-parameters) section
4. Click **Cell** > **Run All**

## Input Parameters

In [1]:
g_timestamp = ""
g_dataset_name = "test"

# TODO: put back to plain assignments before using with nbparameterise
g_code_location = ('/Users/Birmingham/Work/Repositories/ccbb_tickets_2017/ideker-dual-crispr-software'
    '/src/python')
g_base_dir = "{0}{1}".format(g_code_location, "/test_files")
g_constructs_fp = "{0}{1}".format(g_base_dir, '/known_inputs/Metabolism_dual_spacers_w_probe_names.txt')
g_col_indices_str = "1,3,6,7,8,11,12"
g_count_fps_or_dirs = "{0}{1}".format(g_base_dir, '/known_inputs/20160706_HeLa_A549_MV4_19mer_1mm_py_20160713235254')
g_scoring_ready_counts_dir = "{0}{1}".format(g_base_dir, '/test_outputs')


# TODO: decide which (if any) of these I need
g_scoring_run_prefix = ""
g_project_name = "test_proj"
g_day_timepoints_str = "3,14,20,28"

## CCBB Library Imports

In [2]:
import sys
sys.path.append(g_code_location)

## Automated Set-Up

In [3]:
# %load -s describe_var_list /Users/Birmingham/Repositories/ccbb_tickets/20160210_mali_crispr/src/python/ccbbucsd/utilities/analysis_run_prefixes.py
def describe_var_list(input_var_name_list):
    description_list =  ["{0}: {1}\n".format(name, eval(name)) for name in input_var_name_list]
    return "".join(description_list)


In [4]:
# %load -s get_combined_counts_file_suffix /Users/Birmingham/Repositories/ccbb_tickets/20160210_mali_crispr/src/python/ccbbucsd/malicrispr/count_combination.py
def get_combined_counts_file_suffix():
    return "counts_combined.txt"


In [5]:
import os 
from ccbbucsd.utilities.analysis_run_prefixes import check_or_set, get_run_prefix, get_timestamp, get_ints_from_comma_sep_string
from ccbbucsd.utilities.files_and_paths import get_filepaths_from_wildcard

def get_count_file_fps(comma_sep_fps_or_dirs_str):
    result = []
    
    fps_or_dirs = comma_sep_fps_or_dirs_str.split(",")
    for curr_fp_or_dir in fps_or_dirs:
        trimmed_curr = curr_fp_or_dir.strip()
        if os.path.isdir(trimmed_curr):
            combined_counts_fps = get_filepaths_from_wildcard(trimmed_curr, 
                get_combined_counts_file_suffix())    
            result.extend(combined_counts_fps)
        else:
            result.append(trimmed_curr)
    
    return result
        

g_timestamp = check_or_set(g_timestamp, get_timestamp())
g_count_file_fps = get_count_file_fps(g_count_fps_or_dirs)
g_col_indices = get_ints_from_comma_sep_string(g_col_indices_str)
print(describe_var_list(['g_timestamp','g_count_file_fps', 'g_col_indices']))

g_timestamp: 20170307094751
g_count_file_fps: ['/Users/Birmingham/Work/Repositories/ccbb_tickets_2017/ideker-dual-crispr-software/src/python/test_files/known_inputs/20160706_HeLa_A549_MV4_19mer_1mm_py_20160713235254/20160706_HeLa_A549_MV4_19mer_1mm_py_20160713235254_mod_counts_combined.txt']
g_col_indices: [1, 3, 6, 7, 8, 11, 12]



In [6]:
from ccbbucsd.utilities.files_and_paths import verify_or_make_dir
verify_or_make_dir(g_scoring_ready_counts_dir)

## Count File Merging Functions

In [7]:
# %load /Users/Birmingham/Work/Repositories/ccbb_tickets/20161100_mali_crispr_software/src/python/ccbbucsd/malicrispr/construct_file_extracter.py
# third-party libraries
import pandas

# ccbb libraries
from ccbbucsd.utilities.bio_seq_utilities import trim_seq

__author__ = "Amanda Birmingham"
__maintainer__ = "Amanda Birmingham"
__email__ = "abirmingham@ucsd.edu"
__status__ = "prototype"

_CONSTRUCT_ID = "construct_id"
_PROBE_A_SEQ = "probe_a_seq"
_PROBE_B_SEQ = "probe_b_seq"
_PROBE_A_NAME = "probe_a_id"
_PROBE_B_NAME = "probe_b_id"
_TARGET_A_NAME = "target_a_id"
_TARGET_B_NAME = "target_b_id"


def get_construct_header():
    return _CONSTRUCT_ID


def get_probe_id_header(probe_letter):
    return _PROBE_A_NAME if _is_letter_a(probe_letter) else _PROBE_B_NAME


def get_probe_seq_header(probe_letter):
    return _PROBE_A_SEQ if _is_letter_a(probe_letter) else _PROBE_B_SEQ


def get_target_id_header(target_letter):
    return _TARGET_A_NAME if _is_letter_a(target_letter) else _TARGET_B_NAME


def extract_construct_and_grna_info(constructs_fp, column_indices):
    construct_table = _read_in_construct_table(constructs_fp, column_indices, rows_to_skip=1)
    seq_name_sets = _extract_unique_sets_across_a_and_b(construct_table,
        [_PROBE_A_NAME, _PROBE_A_SEQ], [_PROBE_B_NAME, _PROBE_B_SEQ])
    probe_name_seq_pairs = _validate_and_format_probe_seq_pairs(seq_name_sets)
    construct_names = construct_table[_CONSTRUCT_ID].unique().tolist()
    return construct_names, probe_name_seq_pairs


def trim_probes(probes_name_and_seq_list, retain_len):
    result = []
    for name_seq_tuple in probes_name_and_seq_list:
        probe_name = name_seq_tuple[0]
        full_seq = name_seq_tuple[1]
        trimmed_seq = trim_seq(full_seq, retain_len, False)  # False = do not retain from 5p end but from 3p end
        result.append((probe_name, trimmed_seq))
    return result


def _is_letter_a(letter):
    if letter.upper() == "A":
        result = True
    elif letter.upper() == "B":
        result = False
    else:
        raise ValueError("Input '{0}' is not recognized as A or B.".format(letter))

    return result


def _read_in_construct_table(constructs_fp, column_indices, rows_to_skip=1):
    result = pandas.read_table(constructs_fp, skiprows=rows_to_skip, header=None)
    result = _rename_columns(result, column_indices)
    return result


def _rename_columns(construct_table, column_indices):
    new_names = [_CONSTRUCT_ID, _TARGET_A_NAME, _PROBE_A_NAME, _PROBE_A_SEQ,
                 _TARGET_B_NAME, _PROBE_B_NAME, _PROBE_B_SEQ]
    existing_names = list(construct_table.columns.values)

    if len(column_indices) != len(new_names):
        raise ValueError("Expected indices for {0} columns but received indices for {1}.".format(
            len(new_names), len(column_indices)))

    existing_to_new_names = {}
    for curr_index in range(0, len(column_indices)):
        curr_col_index = column_indices[curr_index]
        curr_existing_name = existing_names[curr_col_index]
        existing_to_new_names[curr_existing_name] = new_names[curr_index]

    return construct_table.rename(columns=existing_to_new_names)


def _extract_unique_sets_across_a_and_b(construct_table, a_col_headers_list, b_col_headers_list):
    if len(a_col_headers_list) != len(b_col_headers_list):
        raise ValueError("A and B column header lists are not equal in length.")

    new_headers_list = ["temp_header_{0}".format(i) for i in range(0,len(a_col_headers_list))]

    # get the set of input columns for each of the two targets, assigning each the same
    # set of (generic) column headers so that they can easily be concatenated
    set_for_a = _extract_renamed_subset_df(construct_table, a_col_headers_list, new_headers_list)
    set_for_b = _extract_renamed_subset_df(construct_table, b_col_headers_list, new_headers_list)
    combined_set = pandas.concat([set_for_a, set_for_b])

    # extract only the unique sets
    grouped_combined_set = combined_set.groupby(new_headers_list).groups
    result = [x for x in grouped_combined_set]
    return sorted(result)  # NB sort so that output order is predictable


def _extract_renamed_subset_df(construct_table, col_headers_list, new_headers_list):
    result = construct_table[col_headers_list]
    rename_dictionary = dict(zip(col_headers_list, new_headers_list))
    result.rename(columns=rename_dictionary, inplace=True)
    return result


def _validate_and_format_probe_seq_pairs(probes_seq_and_name_list):
    expected_num_pieces = 2
    seqs_by_names = {}
    names_by_seqs = {}
    result = []

    for curr_set in probes_seq_and_name_list:
        if len(curr_set) != expected_num_pieces:
            raise ValueError(
                "input '{0}' has {1} pieces instead of the expected {2}".format(
                    curr_set, len(curr_set), expected_num_pieces
                ))
        curr_seq = curr_set[0]
        curr_name = curr_set[1]

        if curr_seq in names_by_seqs:
            raise ValueError(
                "sequence '{0}' associated with name '{1}' but was already associated with name '{2}'".format(
                    curr_seq, curr_name, names_by_seqs[curr_seq]
                ))

        if curr_name in seqs_by_names:
            raise ValueError(
                "name '{0}' associated with sequence '{1}' but was already associated with sequence '{2}'".format(
                    curr_name, curr_seq, seqs_by_names[curr_name]
                ))

        names_by_seqs[curr_seq] = curr_name
        seqs_by_names[curr_name] = curr_seq

        result.append((curr_name, curr_seq.upper())) # upper-case all probe seqs
    # next pair in

    return result


def _alphabetize_two_fields_in_row(input_df, header_of_col_to_be_first, header_of_col_to_be_second):
    is_row_dealphabetized = input_df[header_of_col_to_be_first] >input_df[header_of_col_to_be_second] # boolean array

    # for dealphabetized rows, get header_of_col_to_be_first's value
    orig_first_col_vals_for_out_of_order_rows = input_df.loc[is_row_dealphabetized, header_of_col_to_be_first]

    # replace header_of_col_to_be_first's value with header_of_col_to_be_second's value for dealphabetized rows
    input_df.loc[is_row_dealphabetized, header_of_col_to_be_first] = input_df.loc[is_row_dealphabetized,
                                                                                header_of_col_to_be_second]
    # replace header_of_col_to_be_second's value with header_of_col_to_be_first's original value for dealphabetized rows
    input_df.loc[is_row_dealphabetized, header_of_col_to_be_second] = orig_first_col_vals_for_out_of_order_rows
    return input_df


In [8]:
# %load /Users/Birmingham/Work/Repositories/ccbb_tickets/20161100_mali_crispr_software/src/python/ccbbucsd/malicrispr/scoring_prep.py
# standard libraries
import re

# ccbb libraries
from ccbbucsd.utilities.pandas_utils import add_series_to_dataframe

# project-specific libraries
from ccbbucsd.malicrispr.construct_file_extracter import get_target_id_header, \
    get_probe_id_header, get_construct_header, get_target_pair_id_header, \
    get_probe_pair_id_header

__author__ = "Amanda Birmingham"
__maintainer__ = "Amanda Birmingham"
__email__ = "abirmingham@ucsd.edu"
__status__ = "development"


_HEADER_DIVIDER = "_"
_TIME_PREFIX = "T"
_NUM_HEADER_PIECES = 3


def get_header_divider():
    return _HEADER_DIVIDER


def get_time_prefix():
    return _TIME_PREFIX


def _get_num_header_pieces():
    return _NUM_HEADER_PIECES


def _clip_count_header_suffix(count_header):
    # if count header comes out of Amanda's count pipeline, it will have
    # "_S#+_trimmed53_len_filtered_counts" on the end of it; get rid of this.
    # if it didn't come out of Amanda's pipeline, it won't have this particular
    # suffix and the below trimming will simply have no effect.
    #
    # Regex key:
    # r means following is a raw string--many special characters ignored
    # [0-9]+ means "at least one digit, maybe more"
    # $ means "end of string"
    pipeline_counts_regex = r'_S[0-9]+_trimmed53_len_filtered_counts$'
    result = re.sub(pipeline_counts_regex, "", count_header)
    return result


def _validate_and_decompose_count_header(count_header):
    # Required count header format: experiment_timept_rep
    num_expected_pieces = _get_num_header_pieces()
    divider = get_header_divider()
    trimmed_count_header = _clip_count_header_suffix(count_header)

    count_header_pieces = trimmed_count_header.split(divider)
    return _validate_and_standardize_count_header_pieces(count_header_pieces)


def _validate_and_standardize_count_header_pieces(count_header_pieces):
    # Required count header format: experiment_timept_rep
    num_expected_pieces = _get_num_header_pieces()
    if len(count_header_pieces) != num_expected_pieces:
        raise ValueError("Count header has {0} pieces instead of the expected {1}: '{2}'.",
            len(count_header_pieces), num_expected_pieces, count_header_pieces)

    some_id = count_header_pieces[0]
    timept = _validate_and_standardize_timepoint(count_header_pieces[1])
    replicate = _validate_and_standardize_replicate(count_header_pieces[2])

    return (some_id, timept, replicate)


def _validate_and_standardize_timepoint(timept):
    if isinstance(timept, str):
        # ensure timepoint is "t" or "T" plus a non-negative integer number
        expected_timepoint_prefix = get_time_prefix()
        timepoint_prefix = timept[:1]
        if timepoint_prefix.upper() != expected_timepoint_prefix.upper():
            raise ValueError("Time point '{0}' does not start with '{1}' or '{2}'.", timept,
                expected_timepoint_prefix.lower(), expected_timepoint_prefix.upper())

        timept = timept[1:]
    else:
        timept = str(timept)

    if not timept.isdigit():
        raise ValueError("Time point value '{0}' is not recognizable as a positive integer.",
                         timept)

    return int(timept)


def _validate_and_standardize_replicate(rep):
    if not isinstance(rep, int):
        rep = int(rep) if rep.isdigit() else rep
    return rep


def _validate_expt_structure(expt_structure_by_id):
    # expt_structure_by_id should have format {some_id: {timept: {set of replicates}}}

    # There must be only one experiment represented in the data structure

    # All timepoints in the experiment must have the exact same set of replicates:
    # e.g., can't have sample1_T1_1; sample1_T2_1, sample1_T2_2

    if len(expt_structure_by_id) != 1:
        raise ValueError(("Count headers must describe one and only one experiment, "
            "but {0} were detected: '{1}'.").format(len(expt_structure_by_id),
            sorted(list(expt_structure_by_id.keys()))))

    for curr_expt_id, curr_expt_structure in expt_structure_by_id.items():
        # ensure all timepoints for current sample have the same number of replicates
        is_first_timept = True
        reference_reps_set = None

        if len(curr_expt_structure) == 0:
            raise ValueError("Count headers must describe at least one timepoint for experiment, "
                "but 0 were detected.")

        for curr_timept, curr_rep_set in curr_expt_structure.items():
            if len(curr_rep_set) == 0:
                raise ValueError(("Count headers must describe at least one replicate for each timepoint, "
                                  "but 0 were detected for timepoint '{0}'.").format(curr_timept))

            if is_first_timept:
                reference_reps_set = curr_rep_set
                is_first_timept = False
            else:
                if curr_rep_set != reference_reps_set:
                    raise ValueError("For sample '{0}', timepoint {1} has "
                        "replicates '{2}' instead of the expected '{3}'".format(
                        curr_expt_id, curr_timept, sorted(curr_rep_set),
                        sorted(reference_reps_set)))


def validate_and_parse_data_column_headers(count_headers):
    # ensure that there is only one experiment represented in headers
    # ensure that every timepoint in the experiment has the exact same set of replicates

    expt_structure_by_id = {}
    result = []

    for curr_count_header in count_headers:
        # Required count header format: experiment_timept_rep
        count_header_pieces = _validate_and_decompose_count_header(curr_count_header)
        some_id = count_header_pieces[0]
        timept = count_header_pieces[1]
        replicate = count_header_pieces[2]
        result.append(count_header_pieces)

        # fill out structure {some_id: {timept: {set of replicates}}} for use in
        # validation after all columns are examined
        if some_id not in expt_structure_by_id: expt_structure_by_id[some_id] = {}
        curr_expt_structure = expt_structure_by_id[some_id]
        if timept not in curr_expt_structure: curr_expt_structure[timept] = set()
        curr_timept_replicates = curr_expt_structure[timept]
        curr_timept_replicates.add(replicate)

    _validate_expt_structure(expt_structure_by_id)
    return result


# So ... I wrote and tested _validate_expt_structure anticipating there could be
# MORE than ONE experiment in the input file.  Now I think that can't be the
# case.  However, it was a pain to write so in case I'm wrong, I'm leaving the
# version that handles that complexity here for now.
#
# def _validate_expt_structure(expt_structure_by_id):
#     # expt_structure_by_id should have format {some_id: {timept: {set of replicates}}}
#
#     # Every uid must have the exact same set of timepoints
#     # e.g., can't have sample1_T1_1, sample1_T2_1; sample2_T1_1, sample2_T2_1, sample2_T3_1
#     # Put all timepoints+replicates for a given sample in a set:
#     # set {T1_1, T2_1} doesn't match set {T1_1, T2_1, T3_1}
#
#     # All timepoints (across all expts) must have the exact same set of replicates.
#     # If timepts in *different* expts have different replicates, that would be
#     # caught by the above comparison of timept+replicate sets across samples.
#     # However, if timepts in the *same* sample have different numbers of replicates,
#     # that wouldn't be caught unless we keep sets by timept rather than timept+replicate.
#     # e.g., can't have sample1_T1_1; sample1_T2_1, sample1_T2_2
#
#     is_first_expt = True
#     reference_timepts_plus_reps_set = None
#
#     for curr_expt_id, curr_expt_structure in expt_structure_by_id.items():
#         # ensure all timepoints for current sample have the same number of replicates
#         is_first_timept = True
#         reference_reps_set = None
#
#         for curr_timept, curr_rep_set in curr_expt_structure.items():
#             if is_first_timept:
#                 reference_reps_set = curr_rep_set
#                 is_first_timept = False
#             else:
#                 if curr_rep_set != reference_reps_set:
#                     raise ValueError("For sample '{0}', timepoint {1} has "
#                                      "replicates '{2}' instead of the expected '{3}'".format(
#                         curr_expt_id, curr_timept, sorted(curr_rep_set),
#                         sorted(reference_reps_set)))
#
#             # make a new list of timept+rep for this timept
#
#
#         # TODO: handle case where no timepts exist, or no reps exist for timept
#         curr_timepts_plus_reps_set = {"{0}_{1}".format(timept, rep)
#                                  for timept in curr_expt_structure
#                                  for rep in reference_reps_set}
#
#         if is_first_expt:
#             reference_timepts_plus_reps_set = curr_timepts_plus_reps_set
#             is_first_expt = False
#         else:
#             if curr_timepts_plus_reps_set != reference_timepts_plus_reps_set:
#                 raise ValueError("Sample {0} has timepoints+replicates "
#                                  "'{1}' instead of the expected '{2}'".format(
#                     curr_expt_id, sorted(curr_timepts_plus_reps_set),
#                     sorted(reference_timepts_plus_reps_set)))
#
#
# def validate_data_column_headers(count_headers):
#     # ensure that every timepoint in an experiment has the exact same set of replicates
#     # ensure that every experiment has the exact same set of timepoints+replicates
#
#     expt_structure_by_id = {}
#     for curr_count_header in count_headers:
#         # Required count header format: experiment_timept_rep
#         count_header_pieces = _validate_and_decompose_count_header(curr_count_header)
#         some_id = count_header_pieces[0]
#         timept = _validate_and_standardize_timepoint(count_header_pieces[1])
#         replicate = count_header_pieces[2]
#
#         # fill out structure {some_id: {timept: {set of replicates}}} for use in
#         # validation after all columns are examined
#         if some_id not in expt_structure_by_id: expt_structure_by_id[some_id] = {}
#         curr_expt_structure = expt_structure_by_id[some_id]
#         if timept not in curr_expt_structure: curr_expt_structure[timept] = set()
#         curr_timept_replicates = curr_expt_structure[timept]
#         curr_timept_replicates.add(replicate)
#
#     _validate_expt_structure(expt_structure_by_id)


def _generate_scoring_friendly_annotation(annotation_df):
    construct_id_header = get_construct_header()
    target_pair_id_header = get_target_pair_id_header()
    probe_pair_id_header = get_probe_pair_id_header()

    divider = get_header_divider()

    result = annotation_df.loc[:, (construct_id_header, get_target_id_header("a"),
        get_probe_id_header("a"), get_target_id_header("b"), get_probe_id_header("b"))]
    target_pairs = (result[get_target_id_header("a")] + divider +
                    result[get_target_id_header("b")])
    probe_pairs = (result[get_probe_id_header("a")] + divider + divider +
                   result[get_probe_id_header("b")])
    add_series_to_dataframe(result, target_pairs, target_pair_id_header)
    add_series_to_dataframe(result, probe_pairs, probe_pair_id_header)
    return result


def validate_and_recompose_count_header(expt_timeptnum_rep_tuple):
    standardized_pieces = _validate_and_standardize_count_header_pieces(expt_timeptnum_rep_tuple)

    divider = get_header_divider()
    result = "{}{}{}{}{}{}".format(standardized_pieces[0], divider, get_time_prefix(), standardized_pieces[1],
                divider, standardized_pieces[2])
    return result


In [9]:
from ccbbucsd.utilities.pandas_utils import add_series_to_dataframe, merge_files_by_shared_header

def load_annotation_df(constructs_fp, column_indices, disregard_order):
    construct_df = _read_in_construct_table(constructs_fp, column_indices, rows_to_skip=1)
    if disregard_order:
        construct_df = _alphabetize_two_fields_in_row(construct_df, 
            get_target_id_header("a"), get_target_id_header("b"))
        construct_df = _alphabetize_two_fields_in_row(construct_df, 
            get_probe_id_header("a"), get_probe_id_header("b"))
    return construct_df  


def _recompose_headers_from_tuples(header_pieces_tuples_list, sort=False):
    input_list = sorted(header_pieces_tuples_list) if sort else header_pieces_tuples_list
    return [validate_and_recompose_count_header(x) for x in input_list]


def _validate_and_rename_counts_columns(combined_counts_df):
    data_col_headers = list(combined_counts_df.columns.values)
    data_col_headers.remove(get_construct_header())
    
    result = validate_and_parse_data_column_headers(data_col_headers)
    
    # recompose headers rather than using data_col_headers because their components
    # may have been changed by standardization done in validate_and_parse_data_column_headers
    unsorted_headers_list = _recompose_headers_from_tuples(result)
    rename_dictionary = dict(zip(data_col_headers, unsorted_headers_list))
    combined_counts_df.rename(columns=rename_dictionary, inplace=True)   
    
    return result 


def _get_sorted_joined_df_column_headers(minimal_annotation_df, header_pieces_tuples_list):
    result = list(minimal_annotation_df.columns.values)
    sorted_data_headers = _recompose_headers_from_tuples(header_pieces_tuples_list, True)
    result.extend(sorted_data_headers)    
    return result


def merge_and_annotate_counts(count_file_fps, constructs_fp, column_indices, disregard_order=True):   
    construct_id_header = get_construct_header()
    
    # load and validate the counts file(s)
    combined_counts_df = merge_files_by_shared_header(count_file_fps, construct_id_header)
    header_pieces_tuples_list = _validate_and_rename_counts_columns(combined_counts_df)
    expt_name = header_pieces_tuples_list[0][0]
      
    # load and standardize the annotation file (containing construct definitions)
    annotation_df = load_annotation_df(constructs_fp, column_indices, disregard_order)
    minimal_annotation_df = _generate_scoring_friendly_annotation(annotation_df)
    
    # join counts to annotation and sort into required order
    joined_df = minimal_annotation_df.merge(combined_counts_df, on=construct_id_header) 
    sorted_col_headers = _get_sorted_joined_df_column_headers(minimal_annotation_df,
        header_pieces_tuples_list)
    return expt_name, joined_df.loc[:, sorted_col_headers]    


def merge_and_write_timepoint_counts(count_file_fps, constructs_fp, column_indices, disregard_order=True):   
    expt_name, joined_df = merge_and_annotate_counts(count_file_fps, constructs_fp, 
        column_indices, disregard_order=True)
    output_fp = os.path.join(g_scoring_ready_counts_dir, 
        "{0}_timepoint_counts.csv".format(expt_name))
    joined_df.to_csv(output_fp, index=False)

In [10]:
merge_and_write_timepoint_counts(g_count_file_fps, g_constructs_fp, g_col_indices, True)