# Design CTP-14 human genome DNA-MERFISH library 

by Pu Zheng

2022.10.14


# Table of contents


> 0. [Minimum required packages and settings](#0)
>>
>> 0.1: [import required packages](#0.1)
>
> 1. [Extract region sequences](#1)

<a id='0'></a>
# 0 Minimum required packages and settings

<a id='0.1'></a>
## 0.1 load required packages

In [3]:
%run "..\..\Startup_py3.py"
sys.path.append(r"..\..\..\..\Documents")

import ImageAnalysis3 as ia
%matplotlib notebook

from ImageAnalysis3 import *
print(os.getpid())

# library design specific tools
from ImageAnalysis3.library_tools import LibraryDesigner as ld
from ImageAnalysis3.library_tools import LibraryTools as lt
# biopython imports
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML

18804


<a id='1'></a>
# 1 Extract region sequences

In [4]:
## Some folders
# human genome
reference_folder = r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\Genomes\human\hg38'
genome_folder = os.path.join(reference_folder, 'Genome')
# Library directories
pool_folder = r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain'
if not os.path.exists(pool_folder):
    print(f"Create pool_folder: {pool_folder}")
    os.makedirs(pool_folder)
else:
    print(f"Use pool_folder: {pool_folder}")

Use pool_folder: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain


In [5]:
resolution = 0
flanking = 10000
# folder for sub-pool
library_folder = os.path.join(pool_folder, f'human_brain_promoter_enhancer')
if not os.path.exists(library_folder):
    print(f"create library folder: {library_folder}")
    os.makedirs(library_folder)
# folder for fasta sequences
sequence_folder = os.path.join(library_folder, 'sequences_enhancers')
if not os.path.exists(sequence_folder):
    print(f"create sequence folder: {sequence_folder}")
    os.makedirs(sequence_folder)
# folder to save result probes
report_folder = os.path.join(library_folder, 'reports')
if not os.path.exists(report_folder):
    print(f"create report folder: {report_folder}")
    os.makedirs(report_folder)
    
print(f"-- library_folder: {library_folder}")
print(f"-- sequence_folder: {sequence_folder}")
print(f"-- report_folder: {report_folder}")

-- library_folder: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer
-- sequence_folder: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers
-- report_folder: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\reports


<a id='1.1'></a>
## 1.1 select regions

### Load selected SEs

In [6]:
import os
import pandas as pd
se_df_filename = os.path.join(library_folder, 'hM1_selected_final_cell_type_SE.csv')

se_df = pd.read_csv(se_df_filename).rename(columns={'Unnamed: 0':'region_name'})

In [7]:
se_df

Unnamed: 0,region_name,Imaged_mSE,liftover_th,covering_hSE_100kb_center_to_liftover,human_cell_type,mouse_cell_type,shared_cell_type,chr,merged_START,merged_STOP,merged_initial_index,merged_enhancerRank,merged_enhancerNUMBER,score,ave_score,merged_SIZE,adjacent_mSE_10kb_center_to_liftover,liftover_loci_name,merged_mSE
0,chr1_1430239_1442070,,0.7,,Endo; VLMC,Astro; L5_IT; Oligo,,1,1430239,1442070,"[445, 7840]","[61, 154]",2.0,[1. 0.92258],0.961290,11831,chr4_155769707_155791654; chr4_155769885_15579...,chr4_155768303_155781615; chr4_155768889_15578...,chr4_155769707_155792098
1,chr1_6634488_6683694,,0.1,,Vip; Pvalb,Sncg; Sst; Vip,Vip,1,6634488,6683694,"[7535, 6686]","[234, 480]",2.0,[1. 0.78053],0.890265,49206,chr4_151945203_151981828; chr4_151948574_15198...,chr4_151948895_151981697; chr4_151959619_15198...,chr4_151945203_151981909
2,chr1_8620665_8687442,,0.3,,Pvalb,Sncg; Vip,,1,8620665,8687442,[6593],[387],1.0,[1.],1.000000,66777,chr4_150371536_150427020; chr4_150396749_15047...,chr4_150385599_150434896,chr4_150371536_150473957
3,chr1_9345782_9411449,,0.3,,Oligo,MicroPVM; Oligo,Oligo,1,9345782,9411449,[5612],[44],1.0,[1.],1.000000,65667,chr4_149838364_149945586; chr4_149838647_14992...,chr4_149872176_149914498,chr4_149838364_149945586
4,chr1_9593339_9630197,,0.3,,VLMC,L6_CT,,1,9593339,9630197,[7833],[147],1.0,[1.],1.000000,36858,chr4_149710643_149734099,chr4_149713698_149734072,chr4_149710643_149734099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
563,chr22_41957901_41979895,,0.4,,Astro,L5_6_NP; L6b,,22,41957901,41979895,[308],[309],1.0,[1.],1.000000,21994,chr15_82266345_82293772; chr15_82266528_82293896,chr15_82259794_82277947,chr15_82266345_82293896
564,chr22_44316829_44352293,,0.5,,Pvalb; Sst,L5_IT; L6_IT; Vip,,22,44316829,44352293,"[6466, 7263]","[260, 286]",2.0,[1. 0.67962],0.839810,35464,chr15_84399265_84451297; chr15_84407942_844511...,chr15_84433762_84462296; chr15_84433796_84453370,chr15_84399265_84451297
565,chr22_44571858_44599820,,0.1,,Pvalb,Sncg; Sst,,22,44571858,44599820,[6662],[456],1.0,[1.],1.000000,27962,chr15_84588255_84640251; chr15_84610040_84631963,chr15_84600041_84614158,chr15_84588255_84640251
566,chr22_44928982_44981587,,0.1,,Lamp5,Astro,,22,44928982,44981587,[5019],[264],1.0,[1.],1.000000,52605,chr15_84800208_84854291,chr15_84812906_84832338,chr15_84800208_84854291


In [30]:
len([_r for _r in se_df['Imaged_mSE'] if isinstance(_r, str)])

86

In [22]:
np.nan

nan

In [11]:
np.isnan(se_df['Imaged_mSE'].values.astype(np.float64))

ValueError: could not convert string to float: 'chr4_131791450_131800142'

### Convert into region_dicts

In [58]:
import re

region_dicts = []

for _i, _row in se_df.iterrows():
    
#for _i, _n in se_df[''].items():
    _match = re.match(r'chr(?P<chr>[0-9XY]+)_(?P<start>[0-9]+)_(?P<end>[0-9]+)', _row['region_name'])
    # assemble dict
    if _match:
        _dict = {
            'Chr': _match.groupdict()['chr'],
            'Start': int(_match.groupdict()['start']),
            'End': int(_match.groupdict()['end']),
            'Name': f"hSE-{_i}",
            'Gene': f'hSE-{_i}',
            'Region': f"{_match.groupdict()['chr']}:{int(_match.groupdict()['start'])}-{int(_match.groupdict()['end'])}",
            'Strand': '+',
        }
        if np.isfinite(_row['merged_enhancerNUMBER']):
            _dict['Name'] = _dict['Name'] + f"_mergeNum_{int(_row['merged_enhancerNUMBER'])}" 
        if np.isfinite(_row['merged_SIZE']):
            _dict['Name'] = _dict['Name'] + f"_mergeSize_{int(_row['merged_SIZE'])}" 
        
        # append
        region_dicts.append(_dict)

In [59]:
print(len(region_dicts))
print(region_dicts[0])

568
{'Chr': '1', 'Start': 1430239, 'End': 1442070, 'Name': 'hSE-0_mergeNum_2_mergeSize_11831', 'Gene': 'hSE-0', 'Region': '1:1430239-1442070', 'Strand': '+'}


In [62]:
# save region dict
se_dicts_filename = os.path.join(library_folder, 'enhancer_region_dicts.pkl')
overwrite = False

if not os.path.exists(se_dicts_filename) or overwrite:
    print(f"Saving to file: {se_dicts_filename}")
    pickle.dump(region_dicts, open(se_dicts_filename, 'wb'))
else:
    print(f"File: {se_dicts_filename} already exists, skip.")

File: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\enhancer_region_dicts.pkl already exists, skip.


## Load specific sequences

In [None]:
reload(library_tools.references)
reload(library_tools.sequences)

# prepare sequence reader
reload(ia.library_tools.sequences)
seq_rd = ia.library_tools.sequences.sequence_reader(genome_folder)
seq_rd.load_ref_sequences() # load genomic sequences



In [63]:
# load tss sequences
for _tss_d in region_dicts:
    seq_rd.find_sequence_for_region(_tss_d)
print(len(seq_rd.seq_dict))

seq_rd.save_sequences(sequence_folder)

-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in record: 1.
-- searching among 1 references
-- a match found in rec

-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-69_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-70_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-71_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-72_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-73_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-74_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\

-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-158_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-159_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-160_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-161_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-162_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-163_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enh

-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-244_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-245_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-246_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-247_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-248_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-249_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enh

-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-330_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-331_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-332_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-333_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-334_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-335_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enh

-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-421_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-422_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-423_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-424_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-425_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-426_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enh

-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-522_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-523_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-524_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-525_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-526_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences\hSE-527_reg_0.fasta
-- save to file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enh

<a id='2'></a>
# 2. Design probe targeting sequences by probe_designer

run probe_designer, remeber to clear the memory usage because each table should take ~32GB

<a id='2.1'></a>
## 2.1 Construct count table with all the 17-mers in the genome

Only do this if you don't have pre-built 17-mer

However you can do almost the same thing for your own library during quality check

This library requires hg38 genome

In [64]:
overwrite_table = False

### construct map for whole genome

In [65]:
from ImageAnalysis3 import library_tools

In [66]:
reload(library_tools.design)
 
genome_table_file = os.path.join(reference_folder, 'hg38_genome_17w.npy')

if not os.path.exists(genome_table_file) or overwrite_table:
    # genome
    _genome_filenames = [os.path.join(genome_folder, _fl) 
         for _fl in os.listdir(genome_folder) 
         if _fl.split(os.extsep)[-1]=='fasta' or _fl.split(os.extsep)[-1]=='fa']
    print(len(_genome_filenames))

    ct = library_tools.design.countTable(word=17,save_file=genome_table_file, 
                       sparse=False)
    ct.verbose=True

    ct.read(_genome_filenames) # read sequences from fasta files

    ct.consume_loaded(num_threads=24) # convert sequences into integers

    ct.complete(verbose=True)

    ct.save()

    # clear RAM if contructed countable 
    del(ct)
else:
    print(f"reference table: {genome_table_file} already exist, skip.")

reference table: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\Genomes\human\hg38\hg38_genome_17w.npy already exist, skip.


### construct map for transcriptome

In [67]:
from tqdm import tqdm
# transcriptome
transcriptome_folder = os.path.join(reference_folder, 'Transcriptome')

transcriptome_table_file = os.path.join(reference_folder, 'hg38_transcriptome_17w.npy')

if not os.path.exists(transcriptome_table_file) or overwrite_table:
    # transcriptome
    _transcriptome_filenames = [os.path.join(transcriptome_folder, _fl) 
         for _fl in os.listdir(transcriptome_folder) 
         if _fl.split(os.extsep)[-1]=='fasta' or _fl.split(os.extsep)[-1]=='fa']
    print(len(_transcriptome_filenames))
    
    ct = library_tools.design.countTable(word=17,save_file=transcriptome_table_file, 
                       sparse=False)
    ct.verbose=True

    ct.read(_transcriptome_filenames) # read sequences from fasta files

    ct.consume_loaded(num_threads=24) # convert sequences into integers

    ct.complete(verbose=True)

    ct.save()
    
    # clear RAM if contructed countable 
    del(ct)
else:
    print(f"reference table: {transcriptome_table_file} already exist, skip.")

reference table: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\Genomes\human\hg38\hg38_transcriptome_17w.npy already exist, skip.


### construct map for repeats from RepBase

In [68]:
from tqdm import tqdm
# repeat
repeat_folder = r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\Genomes\Repbase'

repeat_table_file = os.path.join(reference_folder, 'Repbase_v2603_repeat_17w.npy')

if not os.path.exists(repeat_table_file) or overwrite_table:
    # repeat
    _repeat_filenames = [os.path.join(repeat_folder, _fl) 
         for _fl in os.listdir(repeat_folder) 
         if _fl.split(os.extsep)[-1]=='fasta' or _fl.split(os.extsep)[-1]=='fa']
    print(len(_repeat_filenames))
    
    ct = library_tools.design.countTable(word=17,save_file=repeat_table_file, 
                       sparse=False)
    ct.verbose=True

    ct.read(_repeat_filenames) # read sequences from fasta files

    ct.consume_loaded(num_threads=24) # convert sequences into integers

    ct.complete(verbose=True)

    ct.save()
    
    # clear RAM if contructed countable 
    del(ct)
else:
    print(f"reference table: {repeat_table_file} already exist, skip.")

reference table: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\Genomes\human\hg38\Repbase_v2603_repeat_17w.npy already exist, skip.


<a id='2.2'></a>
## 2.2 Design probes

In [82]:
from ImageAnalysis3 import library_tools
# requires pre_defined genome_folder and library_folder
# Indices
genome_index = os.path.join(reference_folder, 'hg38_genome_17w.npy')
transcriptome_index = os.path.join(reference_folder, 'hg38_transcriptome_17w.npy') 
repeat_index = os.path.join(reference_folder, 'Repbase_v2603_repeat_17w.npy')
#ref_merfish_index = os.path.join(reference_folder, 'M1_meng_MERFISH_17w.npy') # merfish designed by Meng
# get input files 
input_files = glob.glob(os.path.join(sequence_folder, '*.fasta'))
input_files = sorted(input_files, key=lambda f:int(os.path.basename(f).split('_reg')[0].split('-')[1]))
print(f"{len(input_files)} regions loaded to design probes.")

if not os.path.exists(report_folder):
    os.makedirs(report_folder)
    
# filename to save probe reports
probe_savefile = os.path.join(report_folder, f'hSE_probes.pbr')
print(probe_savefile)

568 regions loaded to design probes.
\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\reports\hSE_probes.pbr


In [83]:
input_files

['\\\\10.245.74.212\\Chromatin_NAS_2\\Chromatin_Libraries\\CTP-14_human_brain\\human_brain_promoter_enhancer\\sequences_enhancers\\hSE-0_reg_0.fasta',
 '\\\\10.245.74.212\\Chromatin_NAS_2\\Chromatin_Libraries\\CTP-14_human_brain\\human_brain_promoter_enhancer\\sequences_enhancers\\hSE-1_reg_0.fasta',
 '\\\\10.245.74.212\\Chromatin_NAS_2\\Chromatin_Libraries\\CTP-14_human_brain\\human_brain_promoter_enhancer\\sequences_enhancers\\hSE-2_reg_0.fasta',
 '\\\\10.245.74.212\\Chromatin_NAS_2\\Chromatin_Libraries\\CTP-14_human_brain\\human_brain_promoter_enhancer\\sequences_enhancers\\hSE-3_reg_0.fasta',
 '\\\\10.245.74.212\\Chromatin_NAS_2\\Chromatin_Libraries\\CTP-14_human_brain\\human_brain_promoter_enhancer\\sequences_enhancers\\hSE-4_reg_0.fasta',
 '\\\\10.245.74.212\\Chromatin_NAS_2\\Chromatin_Libraries\\CTP-14_human_brain\\human_brain_promoter_enhancer\\sequences_enhancers\\hSE-5_reg_0.fasta',
 '\\\\10.245.74.212\\Chromatin_NAS_2\\Chromatin_Libraries\\CTP-14_human_brain\\human_brain_pro

### create pb_designer class

In [85]:
reload(library_tools)
reload(library_tools.design)

pb_designer = library_tools.design.pb_reports_class(
    sequence_dic={'file':input_files,
                  'rev_com':True, # design two strands
                  'two_stranded':True},
    map_dic={'genome':{'file':genome_index,'rev_com':False,'two_stranded':True},
             'transcriptome':{'file':transcriptome_index,'rev_com':True,'two_stranded':False},
             'rep_genome':{'file':repeat_index,'rev_com':False,'two_stranded':True},
             'self_sequences':{'file':input_files,'force_list':True,'rev_com':False,'two_stranded':True},
             #'ref_merfish':{'file':ref_merfish_index,'rev_com':False,'two_stranded':True},
             },
        save_file=probe_savefile,
    params_dic={'word_size':17,'pb_len':42,'buffer_len':2,'max_count':2**16-1,
                'check_on_go': False, # whether automatically check probes
                'auto': False, # whether automatically convert reference maps
               },
    check_dic={('genome','self_sequences'): 25,
               'rep_genome': 0,
               'transcriptome': 25, # 14
               #'ref_merfish': 14,
               'gc':[0.25,0.75],'tm': 37+0.62*50+5,
               }
    )
print(pb_designer)
pb_designer.load_from_file(load_probes_only=True)


Probe designer derived from Bogdan Bintu:
https://github.com/BogdanBintu/ChromatinImaging/blob/master/LibraryDesign/LibraryDesigner.py
by Pu Zheng, 2020.11

Major changes:
    1. allow design of two strands
    2. separate reverse_complement (rev_com) and from two strands (two_stranded) as 
    two different inputs for map_dic and sequence_dic
    3. replace 'local_genome' with 'self_sequences' to be more explicit, and only 
    exclude the counts for the corresponding self_sequence within each input. 

Key information:
    - number of input_sequence(s): 568
    - save_file location: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\reports\hSE_probes.pbr

- Fail to load from savefile: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\reports\hSE_probes.pbr, file doesn't exist.


False

### calculate probe reports

In [86]:
%%time
pb_designer.computeOTmaps() # load the tables 
pb_designer.compute_pb_report() # design candidate probes

-- setting attribute: map_genome
--- finish map_genome in 119.473s.
-- setting attribute: map_transcriptome
--- finish map_transcriptome in 144.201s.
-- setting attribute: map_rep_genome
--- finish map_rep_genome in 143.362s.
Time(s): 407.0357446670532
- Designing targeting sequence for 568 regions
-- designing region: 1:1430239-1442070_strand_+_gene_hSE-0 -- region: 0, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-0_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.279s.
- Designed 23582 candidate probes in 8.738s.
-- designing region: 1:6634488-6683694_strand_+_gene_hSE-1 -- region: 1, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-1_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0

--- finish map_self_sequences in 0.302s.
- Designed 79566 candidate probes in 21.148s.
-- designing region: 1:40383030-40405268_strand_+_gene_hSE-21 -- region: 21, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-21_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.260s.
- Designed 44396 candidate probes in 12.122s.
-- designing region: 1:42924178-42932394_strand_+_gene_hSE-22 -- region: 22, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-22_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.215s.
- Designed 16352 candidate probes in 4.867s.
-- designing region: 1:43271997-43295887_strand_+_gene_hSE-23 -- region: 23, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_hu

--- finish map_self_sequences in 0.407s.
- Designed 145380 candidate probes in 38.253s.
-- designing region: 1:166057386-166096188_strand_+_gene_hSE-43 -- region: 43, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-43_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.320s.
- Designed 77524 candidate probes in 20.764s.
-- designing region: 1:167600121-167663464_strand_+_gene_hSE-44 -- region: 44, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-44_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.375s.
- Designed 126606 candidate probes in 33.484s.
-- designing region: 1:168360565-168403520_strand_+_gene_hSE-45 -- region: 45, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\

--- finish map_self_sequences in 0.332s.
- Designed 99442 candidate probes in 26.399s.
-- designing region: 1:248850425-248874090_strand_+_gene_hSE-65 -- region: 65, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-65_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.254s.
- Designed 47250 candidate probes in 12.920s.
-- designing region: 2:2285196-2326618_strand_+_gene_hSE-66 -- region: 66, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-66_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.314s.
- Designed 82764 candidate probes in 22.041s.
-- designing region: 2:17892542-17940538_strand_+_gene_hSE-67 -- region: 67, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_h

--- finish map_self_sequences in 0.312s.
- Designed 76688 candidate probes in 20.449s.
-- designing region: 2:158619217-158676750_strand_+_gene_hSE-87 -- region: 87, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-87_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.354s.
- Designed 114986 candidate probes in 30.446s.
-- designing region: 2:161217490-161250781_strand_+_gene_hSE-88 -- region: 88, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-88_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.300s.
- Designed 66502 candidate probes in 18.277s.
-- designing region: 2:162146510-162203239_strand_+_gene_hSE-89 -- region: 89, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\C

--- finish map_self_sequences in 0.370s.
- Designed 119718 candidate probes in 31.755s.
-- designing region: 3:42419688-42459881_strand_+_gene_hSE-109 -- region: 109, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-109_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.309s.
- Designed 80306 candidate probes in 21.545s.
-- designing region: 3:42712818-42763438_strand_+_gene_hSE-110 -- region: 110, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-110_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.336s.
- Designed 101160 candidate probes in 26.956s.
-- designing region: 3:50243183-50260429_strand_+_gene_hSE-111 -- region: 111, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Librarie

--- finish map_self_sequences in 0.328s.
- Designed 110806 candidate probes in 29.250s.
-- designing region: 3:133362559-133423952_strand_+_gene_hSE-131 -- region: 131, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-131_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.363s.
- Designed 122706 candidate probes in 32.424s.
-- designing region: 3:151346283-151399373_strand_+_gene_hSE-132 -- region: 132, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-132_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.339s.
- Designed 106100 candidate probes in 28.100s.
-- designing region: 3:156381375-156417831_strand_+_gene_hSE-133 -- region: 133, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_L

--- finish map_self_sequences in 0.459s.
- Designed 190722 candidate probes in 50.008s.
-- designing region: 4:86279377-86344472_strand_+_gene_hSE-153 -- region: 153, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-153_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.384s.
- Designed 130110 candidate probes in 34.374s.
-- designing region: 4:87030818-87079602_strand_+_gene_hSE-154 -- region: 154, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-154_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.331s.
- Designed 97488 candidate probes in 25.880s.
-- designing region: 4:87514660-87559831_strand_+_gene_hSE-155 -- region: 155, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Librarie

--- finish map_self_sequences in 0.354s.
- Designed 105354 candidate probes in 28.052s.
-- designing region: 5:61603463-61635762_strand_+_gene_hSE-175 -- region: 175, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-175_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.292s.
- Designed 64518 candidate probes in 17.451s.
-- designing region: 5:65791562-65832174_strand_+_gene_hSE-176 -- region: 176, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-176_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.501s.
- Designed 81144 candidate probes in 21.965s.
-- designing region: 5:66608766-66669231_strand_+_gene_hSE-177 -- region: 177, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries

--- finish map_self_sequences in 0.325s.
- Designed 94356 candidate probes in 25.156s.
-- designing region: 5:140341565-140373327_strand_+_gene_hSE-197 -- region: 197, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-197_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.282s.
- Designed 63444 candidate probes in 17.205s.
-- designing region: 5:140569758-140612578_strand_+_gene_hSE-198 -- region: 198, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-198_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.318s.
- Designed 85560 candidate probes in 22.901s.
-- designing region: 5:140977023-141017732_strand_+_gene_hSE-199 -- region: 199, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libr

--- finish map_self_sequences in 0.335s.
- Designed 97686 candidate probes in 25.989s.
-- designing region: 6:41676420-41723865_strand_+_gene_hSE-219 -- region: 219, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-219_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.338s.
- Designed 94810 candidate probes in 25.246s.
-- designing region: 6:46199402-46252074_strand_+_gene_hSE-220 -- region: 220, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-220_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.337s.
- Designed 105264 candidate probes in 27.988s.
-- designing region: 6:47236110-47294034_strand_+_gene_hSE-221 -- region: 221, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries

--- finish map_self_sequences in 0.254s.
- Designed 49584 candidate probes in 13.465s.
-- designing region: 7:6393259-6442405_strand_+_gene_hSE-241 -- region: 241, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-241_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.318s.
- Designed 98212 candidate probes in 26.075s.
-- designing region: 7:16766600-16801496_strand_+_gene_hSE-242 -- region: 242, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-242_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.298s.
- Designed 69712 candidate probes in 18.764s.
-- designing region: 7:27983649-28037674_strand_+_gene_hSE-243 -- region: 243, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CT

--- finish map_self_sequences in 0.289s.
- Designed 71622 candidate probes in 19.676s.
-- designing region: 7:150962876-150989026_strand_+_gene_hSE-263 -- region: 263, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-263_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.270s.
- Designed 52220 candidate probes in 19.121s.
-- designing region: 8:756590-806322_strand_+_gene_hSE-264 -- region: 264, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-264_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.718s.
- Designed 99384 candidate probes in 31.120s.
-- designing region: 8:1062006-1128511_strand_+_gene_hSE-265 -- region: 265, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-

--- finish map_self_sequences in 0.352s.
- Designed 113402 candidate probes in 30.033s.
-- designing region: 8:133518552-133555672_strand_+_gene_hSE-285 -- region: 285, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-285_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.285s.
- Designed 74160 candidate probes in 19.852s.
-- designing region: 8:134531678-134582535_strand_+_gene_hSE-286 -- region: 286, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-286_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.343s.
- Designed 101634 candidate probes in 27.006s.
-- designing region: 8:135169030-135251947_strand_+_gene_hSE-287 -- region: 287, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Li

--- finish map_self_sequences in 0.330s.
- Designed 92478 candidate probes in 24.768s.
-- designing region: 9:133543811-133583883_strand_+_gene_hSE-307 -- region: 307, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-307_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.302s.
- Designed 80064 candidate probes in 21.482s.
-- designing region: 9:135707212-135737686_strand_+_gene_hSE-308 -- region: 308, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-308_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.275s.
- Designed 60868 candidate probes in 16.489s.
-- designing region: 9:137286852-137321080_strand_+_gene_hSE-309 -- region: 309, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libr

--- finish map_self_sequences in 0.344s.
- Designed 101786 candidate probes in 26.997s.
-- designing region: 10:100994187-101019427_strand_+_gene_hSE-329 -- region: 329, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-329_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.256s.
- Designed 50400 candidate probes in 13.705s.
-- designing region: 10:101272051-101332356_strand_+_gene_hSE-330 -- region: 330, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-330_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.367s.
- Designed 120530 candidate probes in 31.806s.
-- designing region: 10:114343621-114401569_strand_+_gene_hSE-331 -- region: 331, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin

--- finish map_self_sequences in 0.351s.
- Designed 100444 candidate probes in 26.702s.
-- designing region: 11:66408429-66430438_strand_+_gene_hSE-351 -- region: 351, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-351_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.255s.
- Designed 43938 candidate probes in 12.079s.
-- designing region: 11:67922419-67956070_strand_+_gene_hSE-352 -- region: 352, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-352_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.290s.
- Designed 67222 candidate probes in 18.096s.
-- designing region: 11:71639338-71673806_strand_+_gene_hSE-353 -- region: 353, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Librar

--- finish map_self_sequences in 0.292s.
- Designed 68276 candidate probes in 18.341s.
-- designing region: 12:54191464-54240162_strand_+_gene_hSE-373 -- region: 373, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-373_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.319s.
- Designed 97316 candidate probes in 26.195s.
-- designing region: 12:54358060-54380083_strand_+_gene_hSE-374 -- region: 374, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-374_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.257s.
- Designed 43966 candidate probes in 12.972s.
-- designing region: 12:54564716-54603615_strand_+_gene_hSE-375 -- region: 375, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Librari

--- finish map_self_sequences in 0.346s.
- Designed 99112 candidate probes in 26.493s.
-- designing region: 13:48144569-48194812_strand_+_gene_hSE-395 -- region: 395, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-395_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.336s.
- Designed 100406 candidate probes in 26.825s.
-- designing region: 13:48542660-48596074_strand_+_gene_hSE-396 -- region: 396, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-396_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.338s.
- Designed 106748 candidate probes in 28.385s.
-- designing region: 13:80132673-80181401_strand_+_gene_hSE-397 -- region: 397, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libra

--- finish map_self_sequences in 0.322s.
- Designed 87386 candidate probes in 23.490s.
-- designing region: 14:78553499-78623809_strand_+_gene_hSE-417 -- region: 417, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-417_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.426s.
- Designed 140540 candidate probes in 37.383s.
-- designing region: 14:81303275-81350740_strand_+_gene_hSE-418 -- region: 418, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-418_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.333s.
- Designed 94850 candidate probes in 25.381s.
-- designing region: 14:89336606-89409807_strand_+_gene_hSE-419 -- region: 419, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Librar

--- finish map_self_sequences in 0.366s.
- Designed 120310 candidate probes in 31.889s.
-- designing region: 15:66126160-66191768_strand_+_gene_hSE-439 -- region: 439, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-439_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.375s.
- Designed 131136 candidate probes in 34.762s.
-- designing region: 15:74793364-74831296_strand_+_gene_hSE-440 -- region: 440, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-440_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.285s.
- Designed 75784 candidate probes in 20.296s.
-- designing region: 15:78034435-78088454_strand_+_gene_hSE-441 -- region: 441, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libra

--- finish map_self_sequences in 0.308s.
- Designed 73010 candidate probes in 19.672s.
-- designing region: 16:51538954-51592359_strand_+_gene_hSE-461 -- region: 461, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-461_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.351s.
- Designed 106730 candidate probes in 28.392s.
-- designing region: 16:53065257-53110096_strand_+_gene_hSE-462 -- region: 462, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-462_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.327s.
- Designed 89598 candidate probes in 23.957s.
-- designing region: 16:56539594-56558762_strand_+_gene_hSE-463 -- region: 463, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Librar

--- finish map_self_sequences in 0.334s.
- Designed 102944 candidate probes in 27.198s.
-- designing region: 17:30933793-30968911_strand_+_gene_hSE-483 -- region: 483, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-483_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.284s.
- Designed 70156 candidate probes in 18.788s.
-- designing region: 17:33079134-33128151_strand_+_gene_hSE-484 -- region: 484, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-484_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.342s.
- Designed 97954 candidate probes in 28.944s.
-- designing region: 17:40515665-40557340_strand_+_gene_hSE-485 -- region: 485, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Librar

--- finish map_self_sequences in 0.495s.
- Designed 179312 candidate probes in 47.258s.
-- designing region: 18:50429142-50478724_strand_+_gene_hSE-505 -- region: 505, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-505_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.333s.
- Designed 99084 candidate probes in 26.438s.
-- designing region: 18:55371554-55415715_strand_+_gene_hSE-506 -- region: 506, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-506_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.326s.
- Designed 88242 candidate probes in 23.561s.
-- designing region: 18:75220811-75258016_strand_+_gene_hSE-507 -- region: 507, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Librar

--- finish map_self_sequences in 0.469s.
- Designed 84790 candidate probes in 25.589s.
-- designing region: 20:9682911-9730866_strand_+_gene_hSE-527 -- region: 527, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-527_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.337s.
- Designed 95830 candidate probes in 25.432s.
-- designing region: 20:10311323-10352964_strand_+_gene_hSE-528 -- region: 528, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-528_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.318s.
- Designed 83202 candidate probes in 25.131s.
-- designing region: 20:16241799-16297292_strand_+_gene_hSE-529 -- region: 529, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries

--- finish map_self_sequences in 0.364s.
- Designed 112230 candidate probes in 29.805s.
-- designing region: 21:33359387-33394726_strand_+_gene_hSE-549 -- region: 549, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-549_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.287s.
- Designed 70598 candidate probes in 18.967s.
-- designing region: 21:45104328-45161413_strand_+_gene_hSE-550 -- region: 550, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\sequences_enhancers\hSE-550_reg_0.fasta
-- setting attribute: map_self_sequences
- Mapping no. of seqs: 1
--- finish map_self_sequences in 0.370s.
- Designed 114090 candidate probes in 30.377s.
-- designing region: 22:18810242-18833000_strand_+_gene_hSE-551 -- region: 551, input file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libra

### check probes

In [87]:
pbs, pb_scores = pb_designer.check_probes()

-- check region:0 1:1430239-1442070_strand_+_gene_hSE-0, 23582 candidate probes
--- 8209 probes passed check_dic selection.
finish in 1.090s, 162 probes kept.
-- check region:1 1:6634488-6683694_strand_+_gene_hSE-1, 94514 candidate probes
--- 29539 probes passed check_dic selection.
finish in 4.170s, 464 probes kept.
-- check region:2 1:8620665-8687442_strand_+_gene_hSE-2, 130980 candidate probes
--- 28534 probes passed check_dic selection.
finish in 4.682s, 538 probes kept.
-- check region:3 1:9345782-9411449_strand_+_gene_hSE-3, 127244 candidate probes
--- 49545 probes passed check_dic selection.
finish in 4.533s, 716 probes kept.
-- check region:4 1:9593339-9630197_strand_+_gene_hSE-4, 70700 candidate probes
--- 25324 probes passed check_dic selection.
finish in 2.254s, 411 probes kept.
-- check region:5 1:11784919-11803898_strand_+_gene_hSE-5, 37194 candidate probes
--- 12984 probes passed check_dic selection.
finish in 1.196s, 211 probes kept.
-- check region:6 1:13712764-13767099

-- check region:50 1:209850354-209886516_strand_+_gene_hSE-50, 71486 candidate probes
--- 21225 probes passed check_dic selection.
finish in 2.063s, 384 probes kept.
-- check region:51 1:211731778-211757085_strand_+_gene_hSE-51, 49820 candidate probes
--- 13514 probes passed check_dic selection.
finish in 1.403s, 262 probes kept.
-- check region:52 1:212450290-212515259_strand_+_gene_hSE-52, 127216 candidate probes
--- 47372 probes passed check_dic selection.
finish in 4.105s, 724 probes kept.
-- check region:53 1:213996599-214037834_strand_+_gene_hSE-53, 82036 candidate probes
--- 28832 probes passed check_dic selection.
finish in 2.540s, 492 probes kept.
-- check region:54 1:214303536-214372510_strand_+_gene_hSE-54, 135640 candidate probes
--- 44917 probes passed check_dic selection.
finish in 4.116s, 768 probes kept.
-- check region:55 1:223749018-223812070_strand_+_gene_hSE-55, 121690 candidate probes
--- 32310 probes passed check_dic selection.
finish in 3.355s, 603 probes kept.
-

--- 28974 probes passed check_dic selection.
finish in 2.559s, 453 probes kept.
-- check region:100 2:232303321-232335665_strand_+_gene_hSE-100, 64270 candidate probes
--- 21483 probes passed check_dic selection.
finish in 1.973s, 348 probes kept.
-- check region:101 3:229810-285643_strand_+_gene_hSE-101, 109460 candidate probes
--- 27651 probes passed check_dic selection.
finish in 2.927s, 495 probes kept.
-- check region:102 3:11183206-11212608_strand_+_gene_hSE-102, 57668 candidate probes
--- 18935 probes passed check_dic selection.
finish in 1.752s, 303 probes kept.
-- check region:103 3:16439998-16496611_strand_+_gene_hSE-103, 112668 candidate probes
--- 35828 probes passed check_dic selection.
finish in 3.346s, 609 probes kept.
-- check region:104 3:18369517-18408885_strand_+_gene_hSE-104, 78260 candidate probes
--- 19638 probes passed check_dic selection.
finish in 2.082s, 395 probes kept.
-- check region:105 3:30629143-30668635_strand_+_gene_hSE-105, 78202 candidate probes
--- 

-- check region:149 4:24084159-24122236_strand_+_gene_hSE-149, 75654 candidate probes
--- 26315 probes passed check_dic selection.
finish in 2.340s, 425 probes kept.
-- check region:150 4:24360696-24405801_strand_+_gene_hSE-150, 87844 candidate probes
--- 23893 probes passed check_dic selection.
finish in 2.433s, 403 probes kept.
-- check region:151 4:41619002-41661905_strand_+_gene_hSE-151, 85338 candidate probes
--- 26072 probes passed check_dic selection.
finish in 2.492s, 476 probes kept.
-- check region:152 4:82698339-82793740_strand_+_gene_hSE-152, 186138 candidate probes
--- 56894 probes passed check_dic selection.
finish in 5.466s, 937 probes kept.
-- check region:153 4:86279377-86344472_strand_+_gene_hSE-153, 127308 candidate probes
--- 26167 probes passed check_dic selection.
finish in 3.172s, 533 probes kept.
-- check region:154 4:87030818-87079602_strand_+_gene_hSE-154, 95982 candidate probes
--- 28132 probes passed check_dic selection.
finish in 2.746s, 482 probes kept.
--

--- 13747 probes passed check_dic selection.
finish in 1.946s, 276 probes kept.
-- check region:199 5:140977023-141017732_strand_+_gene_hSE-199, 79236 candidate probes
--- 23249 probes passed check_dic selection.
finish in 2.274s, 410 probes kept.
-- check region:200 5:149777851-149840046_strand_+_gene_hSE-200, 121350 candidate probes
--- 51067 probes passed check_dic selection.
finish in 4.150s, 751 probes kept.
-- check region:201 5:159504835-159577432_strand_+_gene_hSE-201, 143325 candidate probes
--- 48533 probes passed check_dic selection.
finish in 4.383s, 824 probes kept.
-- check region:202 5:159928360-159972308_strand_+_gene_hSE-202, 86562 candidate probes
--- 24651 probes passed check_dic selection.
finish in 2.460s, 430 probes kept.
-- check region:203 5:160479986-160537501_strand_+_gene_hSE-203, 112998 candidate probes
--- 32252 probes passed check_dic selection.
finish in 3.212s, 580 probes kept.
-- check region:204 5:161366128-161423146_strand_+_gene_hSE-204, 113666 candi

finish in 2.349s, 410 probes kept.
-- check region:248 7:73699309-73726146_strand_+_gene_hSE-248, 51830 candidate probes
--- 15863 probes passed check_dic selection.
finish in 1.545s, 270 probes kept.
-- check region:249 7:75925652-75989623_strand_+_gene_hSE-249, 122434 candidate probes
--- 37250 probes passed check_dic selection.
finish in 3.588s, 567 probes kept.
-- check region:250 7:78124134-78176575_strand_+_gene_hSE-250, 103872 candidate probes
--- 34604 probes passed check_dic selection.
finish in 3.141s, 578 probes kept.
-- check region:251 7:86747661-86798481_strand_+_gene_hSE-251, 100884 candidate probes
--- 28018 probes passed check_dic selection.
finish in 2.806s, 517 probes kept.
-- check region:252 7:90994075-91042574_strand_+_gene_hSE-252, 95728 candidate probes
--- 26360 probes passed check_dic selection.
finish in 2.636s, 472 probes kept.
-- check region:253 7:99092884-99137847_strand_+_gene_hSE-253, 88726 candidate probes
--- 20105 probes passed check_dic selection.
f

-- check region:297 9:94139725-94166856_strand_+_gene_hSE-297, 53132 candidate probes
--- 19436 probes passed check_dic selection.
finish in 1.674s, 287 probes kept.
-- check region:298 9:101638276-101692498_strand_+_gene_hSE-298, 106382 candidate probes
--- 23480 probes passed check_dic selection.
finish in 2.688s, 471 probes kept.
-- check region:299 9:114619141-114675546_strand_+_gene_hSE-299, 110278 candidate probes
--- 28788 probes passed check_dic selection.
finish in 3.054s, 507 probes kept.
-- check region:300 9:116800922-116861025_strand_+_gene_hSE-300, 118138 candidate probes
--- 29401 probes passed check_dic selection.
finish in 3.168s, 525 probes kept.
-- check region:301 9:119272760-119360861_strand_+_gene_hSE-301, 174004 candidate probes
--- 53471 probes passed check_dic selection.
finish in 5.078s, 921 probes kept.
-- check region:302 9:123992616-124019374_strand_+_gene_hSE-302, 53224 candidate probes
--- 21270 probes passed check_dic selection.
finish in 1.771s, 332 pro

-- check region:346 11:46628846-46697812_strand_+_gene_hSE-346, 133184 candidate probes
--- 32336 probes passed check_dic selection.
finish in 3.598s, 587 probes kept.
-- check region:347 11:47329633-47374307_strand_+_gene_hSE-347, 86524 candidate probes
--- 26682 probes passed check_dic selection.
finish in 2.592s, 462 probes kept.
-- check region:348 11:47506124-47540732_strand_+_gene_hSE-348, 66926 candidate probes
--- 11304 probes passed check_dic selection.
finish in 1.592s, 224 probes kept.
-- check region:349 11:60848132-60890857_strand_+_gene_hSE-349, 82740 candidate probes
--- 26275 probes passed check_dic selection.
finish in 2.502s, 442 probes kept.
-- check region:350 11:64028381-64078643_strand_+_gene_hSE-350, 99738 candidate probes
--- 43182 probes passed check_dic selection.
finish in 3.449s, 627 probes kept.
-- check region:351 11:66408429-66430438_strand_+_gene_hSE-351, 43768 candidate probes
--- 17958 probes passed check_dic selection.
finish in 1.488s, 281 probes kep

finish in 2.809s, 506 probes kept.
-- check region:395 13:48144569-48194812_strand_+_gene_hSE-395, 97452 candidate probes
--- 21484 probes passed check_dic selection.
finish in 2.498s, 394 probes kept.
-- check region:396 13:48542660-48596074_strand_+_gene_hSE-396, 105602 candidate probes
--- 25168 probes passed check_dic selection.
finish in 2.803s, 478 probes kept.
-- check region:397 13:80132673-80181401_strand_+_gene_hSE-397, 93502 candidate probes
--- 19331 probes passed check_dic selection.
finish in 2.331s, 337 probes kept.
-- check region:398 13:97108422-97142031_strand_+_gene_hSE-398, 66926 candidate probes
--- 26199 probes passed check_dic selection.
finish in 2.204s, 394 probes kept.
-- check region:399 13:98246675-98285807_strand_+_gene_hSE-399, 76234 candidate probes
--- 23465 probes passed check_dic selection.
finish in 2.229s, 392 probes kept.
-- check region:400 13:98801974-98897947_strand_+_gene_hSE-400, 190470 candidate probes
--- 62362 probes passed check_dic selecti

finish in 2.924s, 498 probes kept.
-- check region:444 15:84181639-84215873_strand_+_gene_hSE-444, 67704 candidate probes
--- 0 probes passed check_dic selection.
finish in 1.187s, 0 probes kept.
-- check region:445 15:93018435-93066120_strand_+_gene_hSE-445, 94630 candidate probes
--- 34417 probes passed check_dic selection.
finish in 3.048s, 592 probes kept.
-- check region:446 15:96345500-96366808_strand_+_gene_hSE-446, 42506 candidate probes
--- 21329 probes passed check_dic selection.
finish in 1.599s, 299 probes kept.
-- check region:447 15:100996071-101030602_strand_+_gene_hSE-447, 68382 candidate probes
--- 28391 probes passed check_dic selection.
finish in 2.333s, 425 probes kept.
-- check region:448 16:2692139-2716023_strand_+_gene_hSE-448, 47496 candidate probes
--- 18048 probes passed check_dic selection.
finish in 1.578s, 294 probes kept.
-- check region:449 16:4970404-5043076_strand_+_gene_hSE-449, 141586 candidate probes
--- 38860 probes passed check_dic selection.
finis

--- 34506 probes passed check_dic selection.
finish in 3.218s, 529 probes kept.
-- check region:494 17:66938552-66999946_strand_+_gene_hSE-494, 120907 candidate probes
--- 49740 probes passed check_dic selection.
finish in 4.107s, 746 probes kept.
-- check region:495 17:72501923-72540722_strand_+_gene_hSE-495, 76084 candidate probes
--- 28812 probes passed check_dic selection.
finish in 2.449s, 424 probes kept.
-- check region:496 17:75468170-75507384_strand_+_gene_hSE-496, 76856 candidate probes
--- 28684 probes passed check_dic selection.
finish in 2.511s, 476 probes kept.
-- check region:497 17:75674907-75705147_strand_+_gene_hSE-497, 59206 candidate probes
--- 20467 probes passed check_dic selection.
finish in 1.838s, 344 probes kept.
-- check region:498 17:76138008-76197157_strand_+_gene_hSE-498, 116248 candidate probes
--- 36425 probes passed check_dic selection.
finish in 3.477s, 610 probes kept.
-- check region:499 17:79286361-79323144_strand_+_gene_hSE-499, 72900 candidate pro

-- check region:543 20:57648277-57684109_strand_+_gene_hSE-543, 71400 candidate probes
--- 37877 probes passed check_dic selection.
finish in 2.839s, 534 probes kept.
-- check region:544 20:59560512-59645271_strand_+_gene_hSE-544, 169072 candidate probes
--- 68197 probes passed check_dic selection.
finish in 5.620s, 1035 probes kept.
-- check region:545 20:61619691-61664938_strand_+_gene_hSE-545, 89010 candidate probes
--- 44220 probes passed check_dic selection.
finish in 3.330s, 603 probes kept.
-- check region:546 20:62210388-62235513_strand_+_gene_hSE-546, 49464 candidate probes
--- 20137 probes passed check_dic selection.
finish in 1.684s, 327 probes kept.
-- check region:547 21:26127925-26173760_strand_+_gene_hSE-547, 90956 candidate probes
--- 21104 probes passed check_dic selection.
finish in 2.360s, 391 probes kept.
-- check region:548 21:33008810-33064965_strand_+_gene_hSE-548, 111538 candidate probes
--- 48041 probes passed check_dic selection.
finish in 3.875s, 712 probes k

### save probes

In [88]:
overwrite_savefile = True 
if not os.path.exists(pb_designer.save_file) or overwrite_savefile:
    #pb_designer.plots()
    #pb_designer.save_csv()
    pb_designer.save_to_file()
print(f"-- number of probes kept: {len(pb_designer.kept_probes)}")

- Save reports into file: \\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-14_human_brain\human_brain_promoter_enhancer\reports\hSE_probes.pbr
-- number of probes kept: 265626


In [77]:
del(pb_designer)