# Library design for CTP-05, Intronic RNA


by Pu Zheng

This library design is based on target regions designed by Stephen

## 0. Imports

In [4]:
%run "E:\Users\puzheng\Documents\Startup_py3.py"
sys.path.append(r"E:\Users\puzheng\Documents")

import ImageAnalysis3
from ImageAnalysis3 import get_img_info, visual_tools, corrections, library_tools

from ImageAnalysis3.library_tools import LibraryDesigner as ld
from ImageAnalysis3.library_tools import LibraryTools as lt

%matplotlib notebook
print(os.getpid())

95264


## 1. Load probes and convert to dic

In [13]:
# folder for genomic info
genome_folder = r'E:\Genomes\hg38'

# Library directories
pool_folder = r'X:\Libraries\CTP-07'
# folder for sub-pool
library_folder = os.path.join(pool_folder, 'chr21_intronic')
# folder for fasta sequences
sequence_folder = os.path.join(library_folder, 'sequences')

# folder to save result probes
save_folder = os.path.join(library_folder, 'reports')

In [50]:
import csv
source_folder = r'E:\Users\puzheng\Documents\Libraries\CTP-05\Candidate_Probes'
probe_file = 'Chr21genes_96Intron.csv'
probes = []
with open(os.path.join(source_folder, probe_file),'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='|')
    header = next(reader,None)
    for row in reader:
        _source_probe = {h.lower():info for h,info in zip(header, row)}
        _probe = {}
        for _key,_value in _source_probe.items():
            if 'position' in _key and 'position' not in _probe:
                _probe['position'] = int(_value)
            if 'gene' in _key and 'gene' not in _probe:
                if '_' in _value:
                    _probe['gene'] = _value.split('_')[0]
                else:
                    _probe['gene'] = _value
            if 'sequence' in _key and 'sequence' not in _probe:
                _probe['sequence'] = _value
        probes.append(_probe)
# merge into dictionary
probe_dic = {}
for _probe in probes:
    if _probe['gene'] not in probe_dic:
        probe_dic[_probe['gene']] = [_probe]
        
    else:
        probe_dic[_probe['gene']].append(_probe)

        
position_file = "chr21_intron_positions.csv"
position_dic = {}
with open(os.path.join(source_folder, position_file),'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in reader:
        position_dic[row[0]] = int(row[1])
        
# sort dictionary and append probe id, position
for _gene, _pb_list in probe_dic.items():
    _sorted_pb_list = sorted(_pb_list, key=lambda v:int(v['position']))
    # extract position
    _gene_start = position_dic[_gene]
    _gene_end = _sorted_pb_list[-1]['position'] + len(_sorted_pb_list[-1]['sequence'])+_gene_start
    for _i, _probe in enumerate(_sorted_pb_list):
        _sorted_pb_list[_i]['id'] = _i
        _sorted_pb_list[_i]['region'] = f'chr21:{_gene_start}-{_gene_end}'
        
print(len(probe_dic))

96


In [52]:
#>chr21:26842909-26847909_gene_ADAMTS1_pb_0_pos_0_readouts_[NDB_1147_u,NDB_1147_u,NDB_1147_u]

probe_dic['APP']

[{'gene': 'APP',
  'position': 21,
  'sequence': 'GCTGGTGTTGATTGGGGGCCTGGTCTTGAG',
  'id': 0,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 51,
  'sequence': 'GGAAGAAAAAGAGGATGCTCCTGTTAGGTC',
  'id': 1,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 81,
  'sequence': 'ACATACACAGACTTGTTCTTCAGCACATTG',
  'id': 2,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 111,
  'sequence': 'CCACTCTGTGTTGTACTGTGTTTTGGACTC',
  'id': 3,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 141,
  'sequence': 'TTGCAGTTACATTCTGTGCACTGACCCTAT',
  'id': 4,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 336,
  'sequence': 'AGGGGCCTTGTGTGTTTCTTCACATAGACT',
  'id': 5,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 370,
  'sequence': 'AGAAGAAAGAATCTAATGTGAAGCTGCAGC',
  'id': 6,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 469,
  'sequence': 'TTTGGTG

## 3. Screening genes and probes

In [81]:
# keep the most 5' probes and exclude genes without enough probes
min_num_probe = 12 # this number is from MERFISH experience as well as Long Cai 2018 paper
max_num_probe = 80 # this is just an arbitrary number larger than 2x min_num_probe
# initialize
kept_probe_dic = {}
for _gene, _pb_list in sorted(probe_dic.items(), key=lambda v:position_dic[v[0]]):
    if len(_pb_list) < min_num_probe:
        print(f"Gene: {_gene} has probes less than {min_num_probe}, skip")
    else:
        kept_probe_dic[_gene] = sorted(_pb_list, key=lambda p:int(p["position"]))[:min(len(_pb_list), max_num_probe)]
print("Number of genes kept:", len(kept_probe_dic))

Number of genes kept: 96


In [82]:
# save 
pickle.dump(kept_probe_dic, open(os.path.join(library_folder, 'filtered_candidate_dict.pkl'), 'wb'))

### save to fasta file

In [83]:
# biopython for SeqRecord
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC

# blast
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML 

## 4. Append barcode info

In [85]:
# design gene_readout_dict
gene_readout_dict = {_gene:['u'+str(i)]*3 for i,_gene in enumerate(kept_probe_dic)}

In [86]:
# load primers and readouts
reload(library_tools.probes)
primers = library_tools.probes.load_primers([4,3])
unique_readouts = library_tools.probes.load_readouts(len(gene_readout_dict), 'NDB_new', _num_colors=3, _start_id=29)
readout_dict = {'u':unique_readouts}

- Picked primer: ID: W1A05_primer_4
Name: W1A05_primer_4
Description: W1A05_primer_4
Number of features: 0
Seq('CATTCAGCATTGCGCAACGG', SingleLetterAlphabet())
- Picked primer: ID: W1A04_primer_3
Name: W1A04_primer_3
Description: W1A04_primer_3
Number of features: 0
Seq('TAATACGACTCACTATAGGGCGTTGTATGCCCTCCACGC', SingleLetterAlphabet())


In [87]:
kept_probe_dic['APP']

[{'gene': 'APP',
  'position': 21,
  'sequence': 'GCTGGTGTTGATTGGGGGCCTGGTCTTGAG',
  'id': 0,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 51,
  'sequence': 'GGAAGAAAAAGAGGATGCTCCTGTTAGGTC',
  'id': 1,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 81,
  'sequence': 'ACATACACAGACTTGTTCTTCAGCACATTG',
  'id': 2,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 111,
  'sequence': 'CCACTCTGTGTTGTACTGTGTTTTGGACTC',
  'id': 3,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 141,
  'sequence': 'TTGCAGTTACATTCTGTGCACTGACCCTAT',
  'id': 4,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 336,
  'sequence': 'AGGGGCCTTGTGTGTTTCTTCACATAGACT',
  'id': 5,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 370,
  'sequence': 'AGAAGAAAGAATCTAATGTGAAGCTGCAGC',
  'id': 6,
  'region': 'chr21:26171128-26324515'},
 {'gene': 'APP',
  'position': 469,
  'sequence': 'TTTGGTG

In [88]:
# Assemble probes
reload(library_tools.probes)
cand_probes, readout_summary = library_tools.probes.Assemble_probes(library_folder, kept_probe_dic, gene_readout_dict,
                                                                    readout_dict, primers, rc_targets=True, save=True)


- Assemble probes by given target sequences, readouts and primers.
-- included readout types: ['u']
--- assemblying 80 probes in region: U2AF1L5
--- assemblying 80 probes in region: KCNE1B
--- assemblying 76 probes in region: CYP4F29P
--- assemblying 80 probes in region: ANKRD20A11P
--- assemblying 64 probes in region: HSPA13
--- assemblying 80 probes in region: NRIP1
--- assemblying 80 probes in region: USP25
--- assemblying 80 probes in region: CXADR
--- assemblying 80 probes in region: BTG3
--- assemblying 80 probes in region: C21orf91
--- assemblying 80 probes in region: NCAM2
--- assemblying 80 probes in region: MRPL39
--- assemblying 80 probes in region: JAM2
--- assemblying 80 probes in region: GABPA
--- assemblying 51 probes in region: ATP5PF
--- assemblying 80 probes in region: APP
--- assemblying 73 probes in region: ADAMTS1
--- assemblying 80 probes in region: ADAMTS5
--- assemblying 80 probes in region: N6AMT1
--- assemblying 80 probes in region: LTN1
--- assemblying 80 pro

## 5. Check quality

In [89]:
# biopython for SeqRecord
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC

# blast
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML 

In [90]:
# folder for this library
# candidate full-length probe filename
candidate_full_name = 'candidate_probes.fasta'
# load full probes
full_records = []
with open(os.path.join(library_folder, candidate_full_name), 'r') as handle:
    for record in SeqIO.parse(handle, "fasta"):
        full_records.append(record)

print(f"Total probe loaded: {len(full_records)}")

Total probe loaded: 7430


In [91]:
import ImageAnalysis3.library_tools.quality_check as qc
reload(qc)

primer_check = qc._check_primer_usage(full_records, primers[0], primers[1])
print(f"primer_check:{primer_check}")
reg_size_dic, len_check = qc._check_region_size(full_records, min_size=15)
print(f"len_check:{len_check}")
reg_readout_dic, reg2readout_check = qc._check_region_to_readouts(full_records, readout_dict)
print(f"reg2readout_check:{reg2readout_check}")
readout_reg_dic, readout2reg_check = qc._check_readout_to_region(reg_readout_dic, full_records, readout_dict, target_len=30)
print(f"readout2reg_check:{readout2reg_check}")
int_map = qc._construct_internal_map(full_records, library_folder)
readout_count_dic, readout_count_check = qc._check_readout_in_probes(readout_reg_dic, reg_size_dic, int_map, readout_dict)
print(f"readout_count_check:{readout_count_check}")
kept_records, removed_count = qc._check_between_probes(full_records, int_map)
# save kept records
with open(os.path.join(library_folder, 'filtered_full_probes.fasta'), 'w') as output_handle:
    SeqIO.write(kept_records, output_handle, "fasta")

-- Checking primer usage, total probes: 7430
primer_check:True
gene: U2AF1L5 -> 80
gene: KCNE1B -> 80
gene: CYP4F29P -> 76
gene: ANKRD20A11P -> 80
gene: HSPA13 -> 64
gene: NRIP1 -> 80
gene: USP25 -> 80
gene: CXADR -> 80
gene: BTG3 -> 80
gene: C21orf91 -> 80
gene: NCAM2 -> 80
gene: MRPL39 -> 80
gene: JAM2 -> 80
gene: GABPA -> 80
gene: ATP5PF -> 51
gene: APP -> 80
gene: ADAMTS1 -> 73
gene: ADAMTS5 -> 80
gene: N6AMT1 -> 80
gene: LTN1 -> 80
gene: USP16 -> 80
gene: CCT8 -> 77
gene: BACH1 -> 80
gene: TIAM1 -> 80
gene: SOD1 -> 80
gene: SCAF4 -> 80
gene: HUNK -> 80
gene: MIS18A -> 80
gene: URB1 -> 80
gene: EVA1C -> 80
gene: SYNJ1 -> 80
gene: PAXBP1 -> 80
gene: IFNAR2 -> 80
gene: IL10RB -> 80
gene: IFNAR1 -> 80
gene: IFNGR2 -> 80
gene: SON -> 80
gene: DONSON -> 75
gene: ITSN1 -> 80
gene: CRYZL1 -> 80
gene: MRPS6 -> 80
gene: RCAN1 -> 50
gene: RUNX1 -> 80
gene: SETD4 -> 54
gene: CBR3 -> 76
gene: MORC3 -> 80
gene: CHAF1B -> 80
gene: SIM2 -> 80
gene: HLCS -> 80
gene: TTC3 -> 80
gene: DSCR9 -> 74
ge

-- saving internal 17-mer map to file:X:\Libraries\CTP-07\chr21_intronic\probe_table_17.npz
readout_count_check:True
--- Sequence:chr21:45643694-45659543_gene_PCBP3_pb_3_pos_129_readouts_[NDB_1300_u,NDB_1300_u,NDB_1300_u] got hits:59, dumped
--- Sequence:chr21:45643694-45659543_gene_PCBP3_pb_4_pos_175_readouts_[NDB_1300_u,NDB_1300_u,NDB_1300_u] got hits:58, dumped
--- Sequence:chr21:45643694-45659543_gene_PCBP3_pb_5_pos_239_readouts_[NDB_1300_u,NDB_1300_u,NDB_1300_u] got hits:72, dumped
--- Sequence:chr21:45643694-45659543_gene_PCBP3_pb_8_pos_362_readouts_[NDB_1300_u,NDB_1300_u,NDB_1300_u] got hits:70, dumped
--- Sequence:chr21:45643694-45659543_gene_PCBP3_pb_10_pos_422_readouts_[NDB_1300_u,NDB_1300_u,NDB_1300_u] got hits:57, dumped
--- Sequence:chr21:45643694-45659543_gene_PCBP3_pb_14_pos_588_readouts_[NDB_1300_u,NDB_1300_u,NDB_1300_u] got hits:58, dumped
--- Sequence:chr21:45643694-45659543_gene_PCBP3_pb_16_pos_648_readouts_[NDB_1300_u,NDB_1300_u,NDB_1300_u] got hits:62, dumped
-- to

## blast

In [93]:
qc.Blast_probes(kept_records, library_folder)

- Blast probe against \\SMIRNOV\Chromatin_NAS_3\Pu\Genomes\hg38\hg38
-- number of probes loaded: 7423
--- processing region: ADAMTS1 with 73 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_ADAMTS1.fasta
-- *blasting region: ADAMTS1
--- total time for blast ADAMTS1: 8.774511814117432
--- processing region: ADAMTS5 with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_ADAMTS5.fasta
-- *blasting region: ADAMTS5
--- total time for blast ADAMTS5: 9.524093389511108
--- processing region: ADARB1 with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_ADARB1.fasta
-- *blasting region: ADARB1
--- total time for blast ADARB1: 9.511083841323853
--- processing region: AGPAT3 with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_AGPAT3.fasta
-- *blasting region: AGPAT3
--- total time for blast AGPAT3: 9.448047161102295
--- processing region: ANKRD20A11P with 80 probes
--- *wri

--- total time for blast IFNGR2: 7.535588264465332
--- processing region: IL10RB with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_IL10RB.fasta
-- *blasting region: IL10RB
--- total time for blast IL10RB: 5.798319339752197
--- processing region: ITGB2 with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_ITGB2.fasta
-- *blasting region: ITGB2
--- total time for blast ITGB2: 10.561866521835327
--- processing region: ITSN1 with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_ITSN1.fasta
-- *blasting region: ITSN1
--- total time for blast ITSN1: 9.592145681381226
--- processing region: JAM2 with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_JAM2.fasta
-- *blasting region: JAM2
--- total time for blast JAM2: 9.298924207687378
--- processing region: KCNE1B with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_KCNE1B.fasta
-- 

--- total time for blast SLC19A1: 9.672214031219482
--- processing region: SLC37A1 with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_SLC37A1.fasta
-- *blasting region: SLC37A1
--- total time for blast SLC37A1: 11.468530893325806
--- processing region: SOD1 with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_SOD1.fasta
-- *blasting region: SOD1
--- total time for blast SOD1: 7.625679969787598
--- processing region: SON with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_SON.fasta
-- *blasting region: SON
--- total time for blast SON: 9.011712312698364
--- processing region: SUMO3 with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_SUMO3.fasta
-- *blasting region: SUMO3
--- total time for blast SUMO3: 16.342171907424927
--- processing region: SYNJ1 with 80 probes
--- *writing file: X:\Libraries\CTP-07\chr21_intronic\blast\probe_gene_SYNJ1.fasta
-- *bla

In [98]:
kept_pbs, blast_keep_dic, hard_count_list, soft_count_list = qc.Screening_Probes_by_Blast(library_folder, 60,
                                                                                          hard_thres=29)

- Number of probes loaded: 7423
- Number of regions in this library: 96
-- checking probes in region: U2AF1L5
--- number of probes: 80 , kept by blast: 80 , if remove dups: 4
-- number of probes kept for this region: 60
-- checking probes in region: KCNE1B
--- number of probes: 80 , kept by blast: 80 , if remove dups: 0
-- number of probes kept for this region: 60
-- checking probes in region: CYP4F29P
--- gene=CYP4F29P, id=1 removed by hard count = 6
--- gene=CYP4F29P, id=2 removed by hard count = 3
--- gene=CYP4F29P, id=5 removed by hard count = 4
--- gene=CYP4F29P, id=9 removed by hard count = 4
--- gene=CYP4F29P, id=10 removed by hard count = 4
--- gene=CYP4F29P, id=11 removed by hard count = 4
--- gene=CYP4F29P, id=12 removed by hard count = 5
--- gene=CYP4F29P, id=15 removed by hard count = 4
--- gene=CYP4F29P, id=19 removed by hard count = 3
--- gene=CYP4F29P, id=20 removed by hard count = 7
--- gene=CYP4F29P, id=23 removed by hard count = 14
--- gene=CYP4F29P, id=24 removed by 

--- gene=LTN1, id=63 removed by soft count = 141
--- number of probes: 80 , kept by blast: 79 , if remove dups: 79
--- remove duplicated probes
-- number of probes kept for this region: 60
-- checking probes in region: USP16
--- number of probes: 80 , kept by blast: 80 , if remove dups: 80
--- remove duplicated probes
-- number of probes kept for this region: 60
-- checking probes in region: CCT8
--- number of probes: 77 , kept by blast: 77 , if remove dups: 77
--- remove duplicated probes
-- number of probes kept for this region: 60
-- checking probes in region: BACH1
--- number of probes: 80 , kept by blast: 80 , if remove dups: 80
--- remove duplicated probes
-- number of probes kept for this region: 60
-- checking probes in region: TIAM1
--- gene=TIAM1, id=76 removed by soft count = 148
--- number of probes: 80 , kept by blast: 79 , if remove dups: 79
--- remove duplicated probes
-- number of probes kept for this region: 60
-- checking probes in region: SOD1
--- gene=SOD1, id=26 re

--- number of probes: 80 , kept by blast: 80 , if remove dups: 80
--- remove duplicated probes
-- number of probes kept for this region: 60
-- checking probes in region: MX2
--- number of probes: 80 , kept by blast: 80 , if remove dups: 80
--- remove duplicated probes
-- number of probes kept for this region: 60
-- checking probes in region: MX1
--- number of probes: 80 , kept by blast: 80 , if remove dups: 80
--- remove duplicated probes
-- number of probes kept for this region: 60
-- checking probes in region: PRDM15
--- gene=PRDM15, id=6 removed by soft count = 126
--- number of probes: 80 , kept by blast: 79 , if remove dups: 79
--- remove duplicated probes
-- number of probes kept for this region: 60
-- checking probes in region: C2CD2
--- gene=C2CD2, id=55 removed by soft count = 56
--- gene=C2CD2, id=57 removed by soft count = 474
--- number of probes: 80 , kept by blast: 78 , if remove dups: 78
--- remove duplicated probes
-- number of probes kept for this region: 60
-- checkin

In [99]:
kept_pbs

[SeqRecord(seq=Seq('CATTCAGCATTGCGCAACGGTGTGGGATGCGCATCTCACGTTCATTCTCCCCGC...CGC', SingleLetterAlphabet()), id='chr21:6484623-6489894_gene_U2AF1L5_pb_0_pos_189_readouts_[NDB_1213_u,NDB_1213_u,NDB_1213_u]', name='chr21:6484623-6489894_gene_U2AF1L5_pb_0_pos_189_readouts_[NDB_1213_u,NDB_1213_u,NDB_1213_u]', description='chr21:6484623-6489894_gene_U2AF1L5_pb_0_pos_189_readouts_[NDB_1213_u,NDB_1213_u,NDB_1213_u]', dbxrefs=[]),
 SeqRecord(seq=Seq('CATTCAGCATTGCGCAACGGTGTGGGATGCGCATCTCACGAAAGGCCCGGATGC...CGC', SingleLetterAlphabet()), id='chr21:6484623-6489894_gene_U2AF1L5_pb_1_pos_338_readouts_[NDB_1213_u,NDB_1213_u,NDB_1213_u]', name='chr21:6484623-6489894_gene_U2AF1L5_pb_1_pos_338_readouts_[NDB_1213_u,NDB_1213_u,NDB_1213_u]', description='chr21:6484623-6489894_gene_U2AF1L5_pb_1_pos_338_readouts_[NDB_1213_u,NDB_1213_u,NDB_1213_u]', dbxrefs=[]),
 SeqRecord(seq=Seq('CATTCAGCATTGCGCAACGGTGTGGGATGCGCATCTCACGCGAAAGTTGGAACA...CGC', SingleLetterAlphabet()), id='chr21:6484623-6489894_gene_U2AF1L5_p

In [100]:
final_probe_folder = os.path.join(library_folder, 'final_probes')
if not os.path.exists(final_probe_folder):
    os.makedirs(final_probe_folder)
reload(qc)

primer_check = qc._check_primer_usage(kept_pbs, primers[0], primers[1])
print(f"primer_check:{primer_check}")
reg_size_dic, len_check = qc._check_region_size(kept_pbs, min_size=15)
print(f"len_check:{len_check}")
reg_readout_dic, reg2readout_check = qc._check_region_to_readouts(kept_pbs, readout_dict)
print(f"reg2readout_check:{reg2readout_check}")
readout_reg_dic, readout2reg_check = qc._check_readout_to_region(reg_readout_dic, kept_pbs, readout_dict, target_len=30)
print(f"readout2reg_check:{readout2reg_check}")
int_map = qc._construct_internal_map(kept_pbs, library_folder)
readout_count_dic, readout_count_check = qc._check_readout_in_probes(readout_reg_dic, reg_size_dic, int_map, readout_dict)
print(f"readout_count_check:{readout_count_check}")
kept_records, removed_count = qc._check_between_probes(kept_pbs, int_map)
# save kept records
with open(os.path.join(final_probe_folder, 'extra_filtered_full_probes.fasta'), 'w') as output_handle:
    SeqIO.write(kept_records, output_handle, "fasta")

-- Checking primer usage, total probes: 5637
primer_check:True
gene: U2AF1L5 -> 60
gene: KCNE1B -> 60
gene: CYP4F29P -> 34
gene: ANKRD20A11P -> 32
gene: HSPA13 -> 60
gene: NRIP1 -> 60
gene: USP25 -> 60
gene: CXADR -> 60
gene: BTG3 -> 60
gene: C21orf91 -> 60
gene: NCAM2 -> 60
gene: MRPL39 -> 60
gene: JAM2 -> 60
gene: GABPA -> 60
gene: ATP5PF -> 51
gene: APP -> 60
gene: ADAMTS1 -> 60
gene: ADAMTS5 -> 60
gene: N6AMT1 -> 60
gene: LTN1 -> 60
gene: USP16 -> 60
gene: CCT8 -> 60
gene: BACH1 -> 60
gene: TIAM1 -> 60
gene: SOD1 -> 60
gene: SCAF4 -> 60
gene: HUNK -> 60
gene: MIS18A -> 60
gene: URB1 -> 60
gene: EVA1C -> 60
gene: SYNJ1 -> 60
gene: PAXBP1 -> 60
gene: IFNAR2 -> 60
gene: IL10RB -> 60
gene: IFNAR1 -> 60
gene: IFNGR2 -> 60
gene: SON -> 60
gene: DONSON -> 60
gene: ITSN1 -> 60
gene: CRYZL1 -> 60
gene: MRPS6 -> 60
gene: RCAN1 -> 49
gene: RUNX1 -> 60
gene: SETD4 -> 43
gene: CBR3 -> 60
gene: MORC3 -> 60
gene: CHAF1B -> 60
gene: SIM2 -> 60
gene: HLCS -> 60
gene: TTC3 -> 60
gene: DSCR9 -> 60
ge

-- saving internal 17-mer map to file:X:\Libraries\CTP-07\chr21_intronic\probe_table_17.npz
readout_count_check:True
-- total probes removed by internal screening: 0


In [101]:
len(kept_records)

5637

In [105]:
str(kept_records[0].seq)[40:70]

'TTCATTCTCCCCGCTCCATTTTTCCGCCGC'