# Introduction

The goal of this analysis is to select and predict CRISPR perturbations of desired motifs at a specific loci.

# Computational setup

In [1]:
import warnings
warnings.filterwarnings("ignore")
from tensorflow.python.util import deprecation
deprecation._PRINT_DEPRECATION_WARNINGS = False

#Packages
import os
import sys
import pandas as pd
import numpy as np
from tqdm import tqdm
from pybedtools import BedTool
import keras.backend as K
from keras.models import load_model

## Working options
os.chdir(f'/n/projects/mw2098/publications/2024_weilert_acc/code/2_analysis/')
pd.set_option('display.max_columns', 100)
bpreveal_path = '/n/projects/mw2098/publications/2024_weilert_acc/public/software/bpreveal_404/'
python_path = '/home/mw2098/anaconda3/envs/bpreveal_404/bin/python'

sys.path.insert(0, f'{bpreveal_path}/src')
import losses

## Custom functions
sys.path.insert(0, f'scripts/py/functions')
from functional import shuffle_seqs, one_hot_encode_sequence, one_hot_encode_sequences, \
    one_hot_decode_sequence, insert_motif, logitsToProfile
from motifs import extract_seqs_from_df, resize_coordinates


#Pre-existing variables

motif_to_task_dict = {'Oct4-Sox2': 'oct4', 
                      'Sox2': 'sox2',
                      'Klf4': 'klf4',
                      'Zic3': 'zic3',
                      'Nanog': 'nanog'}
region_dict = {'oct4': 'narrowpeak/mesc_oct4_nexus_peaks.narrowPeak',
               'sox2': 'narrowpeak/mesc_sox2_nexus_peaks.narrowPeak',
               'klf4': 'narrowpeak/mesc_klf4_nexus_peaks.narrowPeak',
               'nanog': 'narrowpeak/mesc_nanog_nexus_peaks.narrowPeak',
               'zic3': 'narrowpeak/mesc_zic3_nexus_peaks.narrowPeak'}
concentration_atac_timepoints = list(range(0, 16, 3))
modeling_design_dict = {
    'bpnet_osknz':{
        'tasks': list(region_dict.keys()),
        'cov': {k: {'pos': 'bw/mesc_' + k + '_nexus_combined_positive.bw',
                    'neg': 'bw/mesc_' + k + '_nexus_combined_negative.bw'}
                for k in region_dict.keys()},
        'model_dir': 'models/bpnet_osknz_fold1.model',
        'num-channels': 2
    },
    'atac_wt_fold1':{
        'tasks': ['atac'],
        'cov': 'bw/mesc_native_atac_cutsites_combined.bw',
        'model_dir': 'models/atac_wt_fold1_residual.model/',
        'num-channels': 1
    },
    'atac_wt_fold2':{
        'tasks': ['atac'],
        'cov': 'bw/mesc_native_atac_cutsites_combined.bw',
        'model_dir': 'models/atac_wt_fold2_residual.model/',
        'num-channels': 1
    },
    'atac_wt_fold3':{
        'tasks': ['atac'],
        'cov': 'bw/mesc_native_atac_cutsites_combined.bw',
        'model_dir': 'models/atac_wt_fold3_residual.model/',
        'num-channels': 1
    }
}
for timepoint in concentration_atac_timepoints:
    modeling_design_dict[f'atac_{timepoint}h'] = {
            'tasks': ['atac'],
            'cov': f'bw/GSE174774_mesc_atac_{timepoint}h_combined.bw',
            'model_dir': f'models/atac_{timepoint}h_fold1_residual.model/',
            'num-channels': 1
    }
input_length = 2032
output_length = 1000
flank_length = (input_length - output_length)//2
trials = 256
seed = 2356

genome = '../0_setup/fa/mm10.fa'
figure_path = 'figures/13_perturb_crispr'
regions_path = 'bed/mapped_motifs/all_islands_curated_0based_sized_to_input.bed'
motifs_path = 'tsv/mapped_motifs/all_instances_curated_0based_w_perturb.tsv.gz'

!mkdir -p {figure_path} tsv/genomic/crispr

2024-03-06 08:09:49.582397: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Import regions of interest.

In [2]:
regions_df = pd.read_csv(regions_path,  sep = '\t', names = ['chrom','start','end','region_id','score','strand'])

# Perturb Avsec et al published site

This is the site CRISPRed in Figure 6 of Avsec et al (2021). What was not previously noted was that there is a low-affinity Oct4-Sox2 motif between the Sox2 and Nanog motifs that is highly contributing to accessibility and binding. This is an ideal candidate to explore (1) can low-affinity motifs be cooperative, (2) by increasing affinity, can we increase cooperativity and (3) how does the distance of the two motifs influence the designated cooperativity?

For this case, motifA will be Sox2, and motifB will be Oct4-Sox2.

In [3]:
sox2_motif_id = 191220
oct4sox2_motif_id = 139897
region_id = 86159

#Isolate motifs
motifs_df = pd.read_csv(motifs_path,  sep = '\t')
motifs_df['pattern_center'] = motifs_df['pattern_center'] + flank_length
sox2_motif_df = motifs_df[motifs_df.motif_id==sox2_motif_id]
sox2_motif_df['wt_seq'] = extract_seqs_from_df(sox2_motif_df, genome)
oct4sox2_motif_df = motifs_df[motifs_df.motif_id==oct4sox2_motif_id]
oct4sox2_motif_df['wt_seq'] = extract_seqs_from_df(oct4sox2_motif_df, genome)

#Keep metadata of use
columns_of_interest = ['motif', 'wt_seq', 'motif_window_start', 'motif_window_end', 'pattern_center', 'seq_match_quantile', 'contrib_magnitude_quantile']
sox2_df = sox2_motif_df[columns_of_interest]
oct4sox2_df = oct4sox2_motif_df[columns_of_interest]
print(sox2_df)
print(oct4sox2_df)

       motif     wt_seq  motif_window_start  motif_window_end  pattern_center  \
211837  Sox2  GCCTTTGTT                 770               779             774   

        seq_match_quantile  contrib_magnitude_quantile  
211837                 1.0                    0.984379  
            motif           wt_seq  motif_window_start  motif_window_end  \
209052  Oct4-Sox2  AATTATAATGATAAT                 804               819   

        pattern_center  seq_match_quantile  contrib_magnitude_quantile  
209052             811            0.184686                    0.755007  


In [4]:
# region_start = sox2_motif_df.region_start.values[0]
# region_start
# #original: 85539378

Print region start coordinate.

In [5]:
region_start = sox2_motif_df.region_start.values[0]
region_start

85538862

Overwrite distance injection for `dist_coop` scenario.

In [6]:
print('Current center-to-center distance: ', oct4sox2_df.pattern_center.values - sox2_df.pattern_center.values)

Current center-to-center distance:  [37]


In [7]:
distal_center_to_center_distance = 140
distal_to_add = distal_center_to_center_distance - (oct4sox2_df.pattern_center.values - sox2_df.pattern_center.values)[0]
distal_to_add

103

## Define Sox2

Define all CRISPR scenarios for Sox2. Sox2 will not change in position or be enhanced by affinity.

In [8]:
sox2_states_df = pd.DataFrame([['AB', 'A', 'B', 'null'], ['GCCTTTGTT', 'GCCTTTGTT', 'GCCTAGGTT', 'GCCTAGGTT']])\
    .transpose()\
    .rename({0: 'state', 1:'forward_seq'}, axis = 1)
sox2_states_df['motif'] = 'Sox2'
sox2_states_df = sox2_states_df.merge(sox2_df.drop(['seq_match_quantile', 'contrib_magnitude_quantile'], axis = 1))
sox2_states_df

Unnamed: 0,state,forward_seq,motif,wt_seq,motif_window_start,motif_window_end,pattern_center
0,AB,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774
1,A,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774
2,B,GCCTAGGTT,Sox2,GCCTTTGTT,770,779,774
3,,GCCTAGGTT,Sox2,GCCTTTGTT,770,779,774


## Define Oct4-Sox2

First, define default state where the low-affinity WT Oct4-Sox2 is mutated

In [9]:
oct4sox2_states_df = pd.DataFrame([['WT_coop', 'WT_coop', 'WT_coop', 'WT_coop', 
                                    'enh_coop', 'enh_coop', 'enh_coop', 'enh_coop', 
                                    'dist_coop', 'dist_coop', 'dist_coop', 'dist_coop'],
                                   ['AB','A','B','null', 
                                    'AB','A','B','null',
                                    'AB','A','B','null'], 
                                   ['AATTATAATGATAAT', 'AATCATAAGGATAAT', 'AATTATAATGATAAT', 'AATCATAAGGATAAT',
                                    'AATTGTAATGCTAAT', 'AATCATAAGGATAAT', 'AATTGTAATGCTAAT', 'AATCATAAGGATAAT',
                                    'AATTATAATGATAAT', 'AATCATAAGGATAAT', 'AATTATAATGATAAT', 'AATCATAAGGATAAT'],
                                   [0, 0, 0, 0, 0, 0, 0, 0, distal_to_add, distal_to_add, distal_to_add, distal_to_add]])\
    .transpose()\
    .rename({0: 'scenario', 1:'state', 2:'forward_seq', 3: 'distance_to_add'}, axis = 1)
oct4sox2_states_df['motif'] = 'Oct4-Sox2'
oct4sox2_states_df = oct4sox2_states_df.merge(oct4sox2_df.drop(['seq_match_quantile', 'contrib_magnitude_quantile'], axis = 1))

In [10]:
oct4sox2_states_df['motif_window_start'] = oct4sox2_states_df['motif_window_start'] + oct4sox2_states_df['distance_to_add']
oct4sox2_states_df['motif_window_end'] = oct4sox2_states_df['motif_window_end'] + oct4sox2_states_df['distance_to_add']
oct4sox2_states_df['pattern_center'] = oct4sox2_states_df['pattern_center'] + oct4sox2_states_df['distance_to_add']

In [11]:
oct4sox2_states_df

Unnamed: 0,scenario,state,forward_seq,distance_to_add,motif,wt_seq,motif_window_start,motif_window_end,pattern_center
0,WT_coop,AB,AATTATAATGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811
1,WT_coop,A,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811
2,WT_coop,B,AATTATAATGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811
3,WT_coop,,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811
4,enh_coop,AB,AATTGTAATGCTAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811
5,enh_coop,A,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811
6,enh_coop,B,AATTGTAATGCTAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811
7,enh_coop,,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811
8,dist_coop,AB,AATTATAATGATAAT,103,Oct4-Sox2,AATTATAATGATAAT,907,922,914
9,dist_coop,A,AATCATAAGGATAAT,103,Oct4-Sox2,AATTATAATGATAAT,907,922,914


## Merge coordinates together

In [12]:
crispr_scenarios_df = oct4sox2_states_df.merge(sox2_states_df, how = 'left', on = 'state', suffixes=('_OS', '_S'))
crispr_scenarios_df[['genomic_start_OS', 'genomic_end_OS', 'genomic_center_OS']] = crispr_scenarios_df[['motif_window_start_OS', 'motif_window_end_OS', 'pattern_center_OS']] + region_start
crispr_scenarios_df[['genomic_start_S', 'genomic_end_S', 'genomic_center_S']] = crispr_scenarios_df[['motif_window_start_S', 'motif_window_end_S', 'pattern_center_S']] + region_start

In [13]:
crispr_scenarios_df

Unnamed: 0,scenario,state,forward_seq_OS,distance_to_add,motif_OS,wt_seq_OS,motif_window_start_OS,motif_window_end_OS,pattern_center_OS,forward_seq_S,motif_S,wt_seq_S,motif_window_start_S,motif_window_end_S,pattern_center_S,genomic_start_OS,genomic_end_OS,genomic_center_OS,genomic_start_S,genomic_end_S,genomic_center_S
0,WT_coop,AB,AATTATAATGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636
1,WT_coop,A,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636
2,WT_coop,B,AATTATAATGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTAGGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636
3,WT_coop,,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTAGGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636
4,enh_coop,AB,AATTGTAATGCTAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636
5,enh_coop,A,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636
6,enh_coop,B,AATTGTAATGCTAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTAGGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636
7,enh_coop,,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTAGGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636
8,dist_coop,AB,AATTATAATGATAAT,103,Oct4-Sox2,AATTATAATGATAAT,907,922,914,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539769,85539784,85539776,85539632,85539641,85539636
9,dist_coop,A,AATCATAAGGATAAT,103,Oct4-Sox2,AATTATAATGATAAT,907,922,914,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539769,85539784,85539776,85539632,85539641,85539636


## Generate predictions based on the different scenarios

First, prepare WT genomic sequences according to input accessibility features.

In [14]:
crispr_region_df = regions_df[regions_df.region_id==region_id]
crispr_region_df['wt_seq'] = extract_seqs_from_df(crispr_region_df, genome)

In [15]:
print(crispr_region_df)
print(crispr_region_df.wt_seq.values[0][:10])
print(crispr_region_df.wt_seq.values[0][-10:])

       chrom     start       end  region_id  score strand  \
86159  chr10  85538862  85540894      86159      0      .   

                                                  wt_seq  
86159  CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...  
CAAATATCAA
AGCCCAGGTT


In [16]:
crispr_region_df.to_csv('tsv/genomic/crispr/crispr_coop_coord.bed', sep = '\t', index = False, header = False)

Next, import model.

In [17]:
acc_model = load_model(modeling_design_dict['atac_wt_fold1']['model_dir'], custom_objects = {'multinomialNll' : losses.multinomialNll, 'reweightableMse': losses.dummyMse})

Next, inject motifs.

In [18]:
wt_seq = crispr_region_df.wt_seq.values[0]

crispr_seqs = []
for i,row in crispr_scenarios_df.iterrows():
    if row.scenario=='dist_coop':
        original_os_start = crispr_scenarios_df.motif_window_start_OS.values[0]
        original_os_end = crispr_scenarios_df.motif_window_end_OS.values[0]
        new_os_start = row.motif_window_start_OS
        new_os_end = row.motif_window_end_OS
        
        #Overwrite Sox2 motif
        s_seq = wt_seq[:(row.motif_window_start_S)] + row.forward_seq_S + wt_seq[(row.motif_window_end_S):]
        
        #Overwrite Oct4-Sox2 motif by flipping the sequence
        final_seq = s_seq[:(original_os_start)] + \
        wt_seq[(new_os_start):(new_os_end)] + \
        s_seq[(original_os_end):(new_os_start)] + \
        row.forward_seq_OS + \
        s_seq[(new_os_end):]
        
        assert len(final_seq)==input_length
        crispr_seqs.append(final_seq)
    else:
        #Overwrite Sox2 motif
        s_seq = wt_seq[:(row.motif_window_start_S)] + row.forward_seq_S + wt_seq[(row.motif_window_end_S):]
        
        #Overwrite Oct4-Sox2 motif
        final_seq = s_seq[:(row.motif_window_start_OS)] + row.forward_seq_OS + s_seq[(row.motif_window_end_OS):]
        assert len(final_seq)==input_length
        crispr_seqs.append(final_seq)

crispr_scenarios_df['inj_seq'] = crispr_seqs
crispr_scenarios_df['scenario_index'] = list(range(crispr_scenarios_df.shape[0]))

In [19]:
crispr_scenarios_df['inj_seq_85539400_to_85540000'] = [s[(85539400 - crispr_region_df.start.values[0]):(85539400 - crispr_region_df.start.values[0] + 600) ]
                                                       for s in crispr_scenarios_df['inj_seq'].values]
crispr_scenarios_df

Unnamed: 0,scenario,state,forward_seq_OS,distance_to_add,motif_OS,wt_seq_OS,motif_window_start_OS,motif_window_end_OS,pattern_center_OS,forward_seq_S,motif_S,wt_seq_S,motif_window_start_S,motif_window_end_S,pattern_center_S,genomic_start_OS,genomic_end_OS,genomic_center_OS,genomic_start_S,genomic_end_S,genomic_center_S,inj_seq,scenario_index,inj_seq_85539400_to_85540000
0,WT_coop,AB,AATTATAATGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636,CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...,0,TTTGTTAGACCAGTGGAAGTGGGGATAGAGGTGGGAAGAGAGGATG...
1,WT_coop,A,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636,CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...,1,TTTGTTAGACCAGTGGAAGTGGGGATAGAGGTGGGAAGAGAGGATG...
2,WT_coop,B,AATTATAATGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTAGGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636,CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...,2,TTTGTTAGACCAGTGGAAGTGGGGATAGAGGTGGGAAGAGAGGATG...
3,WT_coop,,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTAGGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636,CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...,3,TTTGTTAGACCAGTGGAAGTGGGGATAGAGGTGGGAAGAGAGGATG...
4,enh_coop,AB,AATTGTAATGCTAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636,CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...,4,TTTGTTAGACCAGTGGAAGTGGGGATAGAGGTGGGAAGAGAGGATG...
5,enh_coop,A,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636,CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...,5,TTTGTTAGACCAGTGGAAGTGGGGATAGAGGTGGGAAGAGAGGATG...
6,enh_coop,B,AATTGTAATGCTAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTAGGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636,CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...,6,TTTGTTAGACCAGTGGAAGTGGGGATAGAGGTGGGAAGAGAGGATG...
7,enh_coop,,AATCATAAGGATAAT,0,Oct4-Sox2,AATTATAATGATAAT,804,819,811,GCCTAGGTT,Sox2,GCCTTTGTT,770,779,774,85539666,85539681,85539673,85539632,85539641,85539636,CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...,7,TTTGTTAGACCAGTGGAAGTGGGGATAGAGGTGGGAAGAGAGGATG...
8,dist_coop,AB,AATTATAATGATAAT,103,Oct4-Sox2,AATTATAATGATAAT,907,922,914,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539769,85539784,85539776,85539632,85539641,85539636,CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...,8,TTTGTTAGACCAGTGGAAGTGGGGATAGAGGTGGGAAGAGAGGATG...
9,dist_coop,A,AATCATAAGGATAAT,103,Oct4-Sox2,AATTATAATGATAAT,907,922,914,GCCTTTGTT,Sox2,GCCTTTGTT,770,779,774,85539769,85539784,85539776,85539632,85539641,85539636,CAAATATCAAGGCTTTCCAGTACTTTGCTACCATATCCTAAGAACC...,9,TTTGTTAGACCAGTGGAAGTGGGGATAGAGGTGGGAAGAGAGGATG...


In [20]:
crispr_scenarios_df.to_csv('tsv/genomic/crispr/crispr_coop_scenarios.tsv.gz', sep = '\t', index = False)

In [21]:
[s[(original_os_start):(original_os_end)] for s in crispr_scenarios_df.inj_seq.values]

['AATTATAATGATAAT',
 'AATCATAAGGATAAT',
 'AATTATAATGATAAT',
 'AATCATAAGGATAAT',
 'AATTGTAATGCTAAT',
 'AATCATAAGGATAAT',
 'AATTGTAATGCTAAT',
 'AATCATAAGGATAAT',
 'TGGAAGTTCTCCATT',
 'TGGAAGTTCTCCATT',
 'TGGAAGTTCTCCATT',
 'TGGAAGTTCTCCATT']

Predict injected sequences.

In [22]:
#Convert logits and logcounts to human-readable ChIP-nexus profile with counts
crispr_df = pd.DataFrame()
for model_name in ['atac_wt_fold1', 'atac_wt_fold2', 'atac_wt_fold3']:
    acc_model = load_model(modeling_design_dict[model_name]['model_dir'], 
                           custom_objects = {'multinomialNll' : losses.multinomialNll, 'reweightableMse': losses.dummyMse})
    crispr_preds = acc_model.predict(one_hot_encode_sequences(crispr_scenarios_df['inj_seq'].values))
    
    for i in crispr_scenarios_df['scenario_index'].values:
        profile = logitsToProfile(logitsAcrossSingleRegion = crispr_preds[0][i], 
                                  logCountsAcrossSingleRegion = crispr_preds[1][i])
        #Convert to tidy pd.df
        df = pd.DataFrame(profile, columns = ['pred'])
        df['position'] = list(range(df.shape[0]))
        df['genomic_position'] = df['position'] + region_start + flank_length
        df['task'] = 'atac'
        df['model_name'] = model_name
        df['scenario_index'] = i
        crispr_df = pd.concat([crispr_df, df])
crispr_df = crispr_df.merge(crispr_scenarios_df[['scenario_index', 'scenario', 'state', 'genomic_center_OS', 'genomic_center_S']], how = 'left')
crispr_df.head(n=10)



Unnamed: 0,pred,position,genomic_position,task,model_name,scenario_index,scenario,state,genomic_center_OS,genomic_center_S
0,4.678259,0,85539378,atac,atac_wt_fold1,0,WT_coop,AB,85539673,85539636
1,5.226127,1,85539379,atac,atac_wt_fold1,0,WT_coop,AB,85539673,85539636
2,3.08679,2,85539380,atac,atac_wt_fold1,0,WT_coop,AB,85539673,85539636
3,2.628764,3,85539381,atac,atac_wt_fold1,0,WT_coop,AB,85539673,85539636
4,3.779432,4,85539382,atac,atac_wt_fold1,0,WT_coop,AB,85539673,85539636
5,4.455588,5,85539383,atac,atac_wt_fold1,0,WT_coop,AB,85539673,85539636
6,1.991279,6,85539384,atac,atac_wt_fold1,0,WT_coop,AB,85539673,85539636
7,3.31211,7,85539385,atac,atac_wt_fold1,0,WT_coop,AB,85539673,85539636
8,4.876622,8,85539386,atac,atac_wt_fold1,0,WT_coop,AB,85539673,85539636
9,4.706685,9,85539387,atac,atac_wt_fold1,0,WT_coop,AB,85539673,85539636


In [23]:
crispr_df.to_csv('tsv/genomic/crispr/crispr_coop_predictions.tsv.gz', sep = '\t', index = False)