# SI-14 New-Dna-Barcode Test

by Pu Zheng

2018.3.29

Testing barcodes on SI7+SI13-extension regions

In [1]:
#minimum imports:
import time,os,sys,glob
import cPickle as pickle
import numpy as np
import khmer
sys.path.append(r'/n/home13/pzheng/Documents/python-functions/python-functions-library')

from LibraryConstruction import fastaread,fastawrite,fastacombine
import LibraryDesigner as ld
import LibraryConstruction as lc

from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.SeqRecord import SeqRecord 

## 1. Load previously designed probes, generate pb_records and pb_lists

In [2]:
# dir
master_dir = r'/n/boslfs/LABS/zhuang_lab/User/pzheng/Libraries/SI-14/NDB_test';
# input filename
pb_filename = 'NDB_test.txt';

## Create pb_record and pb_list by pb_file
pb_records, reg_pb_dic = [],{}; # initialize
# loop through all designed probes
with open(master_dir+os.sep+pb_filename, 'rU') as handle:
    lines = handle.readlines()
    titles = lines[0].split("\n")[0].split("\t")
    for line in lines[1:]:
        seq, name = line.split("\n")[0].split("  ");
        pb_records.append(SeqRecord(Seq(seq.upper(),alphabet=IUPAC.unambiguous_dna),id=name, name=name,description=''))
        reg_id = int(name.split('reg_')[1].split("_")[0])
        pb_info = {'reg_index':reg_id, 'total_seq':seq, 'total_name':name};
        if reg_id not in reg_pb_dic.keys():
            reg_pb_dic[reg_id] = [pb_info]
        else:
            reg_pb_dic[reg_id].append(pb_info)
pb_lists = reg_pb_dic.values()
print "- Total candidate sequences:", len(pb_records)
# save
save_dir = 'final_probes'
if not os.path.exists(master_dir+os.sep+save_dir):
    os.makedirs(master_dir+os.sep+save_dir)
print "-- Save pb_lists"
pickle.dump(pb_lists, open(master_dir+os.sep+save_dir+os.sep+'list.pkl', 'w'));
print "-- Save pb_records"
with open(master_dir+os.sep+save_dir+os.sep+'candidate_probes.fasta', "w") as output_handle:
    SeqIO.write(pb_records, output_handle, 'fasta');

- Total candidate sequences: 61693
-- Save pb_lists
-- Save pb_records


## 2. Generate barcode_scheme
Not necessary for now

In [3]:
barcode_scheme = {};
for k,v in sorted(reg_pb_dic.items()):
    barcode_scheme[k] = {'id':k,
                         'bc_1': 3*k-3,
                         'bc_2': 3*k-2, 
                         'bc_3': 3*k-1,
                         'bc_4': 3*len(reg_pb_dic.keys())+k-1}

## 3. Imports

### 3.1 barcodes

In [4]:
## Read all barcodes
barcode_dir = r'/n/boslfs/LABS/zhuang_lab/User/pzheng/Barcodes';

# read all Stv barcodes
#stv_adaptor = [1,2,17,77,78,79,80,81,82,83,84] # barcodes saved for adaptors
#stv_bad = [34,38,41] # barcodes performed badly
#stv_mask = stv_adaptor + stv_bad 
stv_mask =[]
with open(barcode_dir+os.sep+'top_Stvs.fasta', "rU") as handle:
    stv_barcodes = [];
    for record in SeqIO.parse(handle, "fasta"):
        if int(record.id.split('_')[1]) not in stv_mask:
            stv_barcodes.append(record);
            
# read all NDB barcodes
ndb_mask = [];

with open(barcode_dir+os.sep+'NDBs.fasta', "rU") as handle:
    ndb_barcodes = [];
    for record in SeqIO.parse(handle, "fasta"):
        if int(record.id.split('_')[1]) not in ndb_mask:
            ndb_barcodes.append(record);
print "Barcodes loaded: Stv: "+str(len(stv_barcodes))+", NDB: "+str(len(ndb_barcodes));

Barcodes loaded: Stv: 75, NDB: 1052


### 3.2 Primers

In [5]:
## Read all primers
primer_dir = r'/n/boslfs/LABS/zhuang_lab/User/pzheng/Primers';
fwd_primer_filename = 'forward_primers_keep.fasta';
rev_primer_filename = 'reverse_primers_keep.fasta';

# read all forward primers
with open(primer_dir+os.sep+fwd_primer_filename, "rU") as handle:
    fwd_primers = [];
    for record in SeqIO.parse(handle, "fasta"):
        fwd_primers.append(record);
# read all forward primers
with open(primer_dir+os.sep+rev_primer_filename, "rU") as handle:
    rev_primers = [];
    for record in SeqIO.parse(handle, "fasta"):
        rev_primers.append(record);
print "Primers loaded: forward: "+str(len(fwd_primers))+", reverse: "+str(len(rev_primers));        

Primers loaded: forward: 11, reverse: 6


# 4. Check probes

In [6]:
## Parameters used for patch barcodes & primers
# dir
master_dir = r'/n/boslfs/LABS/zhuang_lab/User/pzheng/Libraries/SI-14/NDB_test';
# barcodes
barcode_source = {'bc_unique':'ndb'};
# primers
fprimer = fwd_primers[1];
print '- forward primer:', fprimer
rprimer = rev_primers[2];
print '- reverse primer:', rprimer
# dic for region -> tad
if not 'barcode_scheme' in vars():
    print 'loading barcode scheme'
    barcode_scheme = pickle.load(open(master_dir+os.sep+'scheme.pkl','r'))
if not 'pb_records' in vars():
    pb_dir = r'final_probes'
    print 'loading all probes'
    with open(master_dir+os.sep+pb_dir+os.sep+'candidate_probes.fasta', "rU") as handle:
        pb_records = [];
        for record in SeqIO.parse(handle, "fasta"):
            pb_records.append(record);
if not 'pb_lists' in vars():
    print '- loading pb_lists'
    pb_lists = pickle.load(open(master_dir+os.sep+pb_dir+os.sep+'list.pkl', "rU"))

- forward primer: ID: W1A03_primer_2
Name: W1A03_primer_2
Description: W1A03_primer_2
Number of features: 0
Seq('CCCGCAATGGCTGACAACCG', SingleLetterAlphabet())
- reverse primer: ID: W1A10_primer_9
Name: W1A10_primer_9
Description: W1A10_primer_9
Number of features: 0
Seq('TAATACGACTCACTATAGGGATTGCCGCATGGTTTCCG', SingleLetterAlphabet())


In [9]:
def Check_Probes(pb_records, pb_lists, reg_encodings, master_dir, 
                 fwd_primer,rev_primer,
                 barcode_source, stv_barcodes, ndb_barcodes, barcode_starts={'stv':1,'ndb':1},
                 report_folder=r'reports/centered_merged',save_folder=r'final_probes',
                 add_rand_gap=0, total_bc=4, barcode_len=20, target_len=42,  
                 word_size=17, max_internal_hits=5, max_genome_hits=200,
                 index_dir=r'/n/boslfs/LABS/zhuang_lab/User/pzheng/Indeces/human/hg38',
                 save=True, verbose=True):
    # imports
    import os,glob,sys
    sys.path.append(r'/n/home13/pzheng/Documents/python-functions/python-functions-library')
    from LibraryConstruction import fastaread,fastawrite,fastacombine
    import LibraryDesigner as ld
    import numpy as np
    
    def _check_primer_usage(pb_records=pb_records, fwd_primer=fwd_primer, rev_primer=rev_primer,
                            _verbose=verbose):
        '''Check whether forward or reverse primer are used in all probes'''
        if _verbose:
            print "-- Checking primer usage, total probes:", len(pb_records)
        fwd_len = len(fwd_primer.seq);
        rev_len = len(rev_primer.seq[-20:].reverse_complement());
        
        for record in pb_records:
            if record.seq[:fwd_len] != fwd_primer.seq:
                if _verbose:
                    print "--- Forward primer incorrect!"
                return False
            if record.seq[-rev_len:] != rev_primer.seq[-20:].reverse_complement():
                if _verbose:
                    print "--- Forward primer incorrect!"
                return False
        return True # if no error applies
    
    def _check_region_size(pb_records=pb_records, pb_lists=pb_lists):
        '''Generate a dirctionary '''
        # get original region size
        _reg_size_dic = {}
        for lst in pb_lists:
            _reg_size_dic[lst[0]['reg_index']] = len(lst);
        # get region size from probe names
        _size_from_rec = {}
        for record in pb_records:
            reg_id = int(record.id.split('_reg_')[1].split('_')[0]);
            if reg_id not in _size_from_rec.keys():
                _size_from_rec[reg_id] = 1; # if not in key, create
            else:
                _size_from_rec[reg_id] += 1; # otherwise, add count
        # compare
        _match = True;
        for k,v in sorted(_size_from_rec.items()):
            if k not in _reg_size_dic.keys():
                print "region list and region id in probes not match for", k
                _match = False
                break
            else:
                if v != _reg_size_dic[k]:
                    print "region size doesn't match for:", k
                    _match = False
                    break
        
        return _reg_size_dic, _match;
    
    
    def _check_region_to_barcode(pb_records=pb_records, stv_barcodes=stv_barcodes, ndb_barcodes=ndb_barcodes,
                                 total_bc=total_bc):
        '''Generate map from region id to barcodes used in this region'''
        import re
        _reg_to_barcode = {}
        for record in pb_records:
            # region id
            reg_id = int(record.id.split('_reg_')[1].split('_')[0]);
            if reg_id not in _reg_to_barcode.keys():
                # barcode ids
                stv_matches = re.findall('\'Stv_(.+?)\'', record.id, re.DOTALL)
                ndb_matches = re.findall('\'NDB_(.+?)\'', record.id, re.DOTALL)
                stv_names = ['Stv_'+str(stv_id) for stv_id in stv_matches]
                ndb_names = ['NDB_'+str(ndb_id) for ndb_id in ndb_matches]
                _reg_to_barcode[reg_id] = stv_names+ndb_names
        
        ## barcode check
        _barcode_check = True;
        # barcode names
        bc_names = [stv.id for stv in stv_barcodes] + [ndb.id for ndb in ndb_barcodes]
        # search through previous dictionary
        for reg,bcs in sorted(_reg_to_barcode.items()):
            for bc in bcs:
                if len(bcs) != total_bc:
                    print "-- Error in barcode number for region:", reg
                    _barcode_check = False
                    break
                if bc not in bc_names:
                    print "-- Wrong barcode name for barcode: "+str(bc)+", region: "+str(reg)
                    _barcode_check = False
                    break
        
        return _reg_to_barcode, _barcode_check;
        
    def _parsing_probe_sequence(record, fwd_primer=fwd_primer, rev_primer=rev_primer,
                                add_rand_gap=add_rand_gap, barcode_len=barcode_len, target_len=target_len):
        '''parse a probe sequence to acquire all barcode binding sites'''
        # take in a seq record, parse the sequence and return a list of all included barcodes (20mer,RC)
        barcode_list = [];
        _main_seq = record.seq[len(fwd_primer.seq):-20];
        
        
        # trim last 2 barcodes
        for i in range(2):
            barcode_list.append(_main_seq[-barcode_len:]);
            _main_seq = _main_seq[:-(barcode_len+add_rand_gap)];
        # trim all barcodes from the beginning
        while len(_main_seq) > target_len:
            barcode_list.append(_main_seq[:barcode_len]);
            _main_seq = _main_seq[(barcode_len+add_rand_gap):];
        
        return barcode_list;
    
    def _finding_barcode_name(barcode_list, stv_barcodes=stv_barcodes, ndb_barcodes=ndb_barcodes, 
                              barcode_len=barcode_len, total_bc=total_bc):
        '''Given barcode list generated by parsing probe, return a list of barcode names'''
        _name_list = [];
        for bc_site in barcode_list:
            for bc in stv_barcodes+ndb_barcodes:
                if bc.seq[-barcode_len:] == bc_site.reverse_complement():
                    _name_list.append(bc.id);
                    break;
        
        if len(_name_list) < total_bc:
            print "-- Failed in finding some barcodes."
            return False
        return _name_list;
    
    def _check_barcode_to_region(reg_to_barcode, 
                                 pb_records=pb_records, stv_barcodes=stv_barcodes, ndb_barcodes=ndb_barcodes):
        '''Generate map from barcode id to region id'''
        _barcode_to_reg = {}
        _reg_id_exists = []
        for record in pb_records:
            reg_id = int(record.id.split('_reg_')[1].split('_')[0]);
            if reg_id in _reg_id_exists:
                continue;
            else:
                _barcode_list = _parsing_probe_sequence(record)
                _name_list = _finding_barcode_name(_barcode_list)
                for _n in _name_list:
                    if _n not in _barcode_to_reg.keys(): # create if not in dic
                        _barcode_to_reg[_n] = [reg_id]
                    else: # otherwise, append
                        _barcode_to_reg[_n].append(reg_id)
            _reg_id_exists.append(reg_id)
        ## check region distribution
        # invert dic from reg_to_barcode
        _inv_dic = {}
        for reg,bcs in sorted(reg_to_barcode.items()):
            for bc in bcs:
                if bc not in _inv_dic.keys():
                    _inv_dic[bc] = [reg];
                else:
                    _inv_dic[bc].append(reg);
        # compare
        _region_check=True
        for bc, regs in sorted(_inv_dic.items()):
            if bc not in _barcode_to_reg.keys():
                print "-- "+str(bc)+" not in barcode_to_region dic!"
                _region_check = False
                break
            else:
                if sorted(regs) != sorted(_barcode_to_reg[bc]):
                    print "-- "+str(bc)+" and region"+str(regs)+" not compatible with barcode_to_region dic!"
                    _region_check = False
                    break
                    
        return _barcode_to_reg, _region_check
    
    def _check_barcode_to_color(pb_records=pb_records, stv_barcodes=stv_barcodes, ndb_barcodes=ndb_barcodes, 
                                stv_color=True, ndb_color=False,
                                _save=save, master_dir=master_dir, save_folder=save_folder):
        '''If multi_color is applied, generate a barcode_to_color dic for adaptor design'''
        if 'color' not in str(pb_records[0].id):
            print "-- color check not applied";
            return False
        elif not stv_color and not ndb_color:
            print "-- color check turned off in both stv and ndb";
            return False
        else:
            # get barcodes
            _barcode_names = []
            if stv_color: # if stv has multi-color
                _barcode_names += [bc.id for bc in stv_barcodes];
            if ndb_color: # if ndb has multi-color
                _barcode_names += [bc.id for bc in ndb_barcodes];
            # initialize color dic
            _barcode_to_color = {};
            _exist_regs = [];
            # search through all probes
            for record in pb_records:
                _reg_id = int(record.id.split('_reg_')[1].split('_')[0]); 
                if _reg_id in _exist_regs:
                    continue
                else: 
                    _exist_regs.append(_reg_id);
                _color = int(str(record.id).split('color_')[1])
                _barcode_list = _parsing_probe_sequence(record)
                _name_list = _finding_barcode_name(_barcode_list)
                
                for _name in _name_list:
                    if _name in _barcode_names:
                        if _name not in _barcode_to_color.keys():
                            _barcode_to_color[_name] = [_color]
                        else:
                            _barcode_to_color[_name].append(_color);
            # keep the unique colors
            _barcode_to_unique_color = {}
            for k,v in sorted(_barcode_to_color.items()):
                _barcode_to_unique_color[k] = np.unique(v)
            if _save:
                import csv
                with open(master_dir+os.sep+save_folder+os.sep+'color-usage.csv','w') as output_handle:
                    fieldnames = ['barcode', 'color']
                    writer = csv.DictWriter(output_handle, fieldnames=fieldnames)
                    writer.writeheader()
                    for _barcode, _color in sorted(_barcode_to_unique_color.items(), key=lambda (k,v):int(k.split('_')[1])):
                        writer.writerow({'barcode': _barcode, 'color': _color})
                
        return _barcode_to_unique_color
                            
    
    def _construct_internal_map(master_dir=master_dir, word_size=word_size):
        '''Using functions in LibraryDesign, compute an internal khmer map'''
        _int_map = khmer.Countgraph(word_size, 1e9, 2) 
        _int_map.set_use_bigcount(True)
        _nms,_seqs = fastaread(master_dir+os.sep+'final_probes'+os.sep+'candidate_probes.fasta')
        for _seq in _seqs:
            _int_map.consume(_seq.upper())
        return _int_map
    
    def _check_barcode_in_probes(barcode_to_reg, reg_size_dic, int_map, 
                                 stv_barcodes=stv_barcodes, ndb_barcodes=ndb_barcodes,
                                 barcode_len=barcode_len, max_internal_hits=max_internal_hits):
        '''Check barcode appearance in probes, whether that match barcode_to_region scheme'''
        _barcode_in_probes = {}
        for bc_name, regs in sorted(barcode_to_reg.items()):
            bc = None
            for _bc in stv_barcodes+ndb_barcodes:
                if bc_name == _bc.id:
                    bc = _bc
                    break
            bc_hits = int_map.get_kmer_counts( str(bc.seq[-barcode_len:].reverse_complement()).upper());
            if max(bc_hits) - min(bc_hits) > max_internal_hits:
                print "-- Barcode: "+str(bc)+" has more off-target in different part of itself!"
                return False
            else:
                regs,reg_cts = np.unique(regs, return_counts=True);
                bc_in_probe = 0;
                for reg,ct in zip(regs,reg_cts):
                    bc_in_probe += reg_size_dic[reg] * ct;
                if max(bc_hits) - bc_in_probe > max_internal_hits:
                    print "-- Barcode: "+str(bc)+" has more off-target than threshold!"
                    return False
            _barcode_in_probes[bc_name] = bc_in_probe;
        return _barcode_in_probes, True
    
    def _check_between_probes(int_map, pb_lists=pb_lists, pb_records=pb_records):
        pass 
    
    def _check_against_genome(pb_records=pb_records, max_genome_hits=max_genome_hits, index_dir=index_dir):
        '''Use Khmer to compare probe against genome'''
        hg38 = khmer.load_countgraph(index_dir+os.sep+'full_word17_.kmer')
        _failed_num = 0;
        _keep_pb_records = [];
        for record in pb_records:
            _kmer_hits = hg38.get_kmer_counts(str(record.seq).upper());
            if sum(_kmer_hits) > max_genome_hits:
                print '-- Max_genome_hits is: '+str(max_genome_hits)+", this seq got hits: "+ str(sum(_kmer_hits))
                _failed_num += 1;
            else:
                _keep_pb_records.append(record);
                
        return _keep_pb_records, _failed_num # if nothing goes wrong
    
    def _plot_info():
        pass
            
    ## check primers
    primer_usage = _check_primer_usage()
    if verbose:
        print "\n- 1.Passing primer usage check? -", primer_usage
    
    ## check region size
    reg_size_dic, size_match = _check_region_size()
    if verbose:
        print "\n- 2.Passing region size check? -", size_match    
        for k,v in sorted(reg_size_dic.items()):
            print k,':',v
        
    ## check region to barcode
    reg_to_barcode, reg2bc = _check_region_to_barcode()
    if verbose:
        print "\n- 3.Passing region to barcode mapping check? -", reg2bc    
        for k,v in sorted(reg_to_barcode.items(), key=lambda (k,v):k):
            print k,':',v
        
    ## check barcode to region (this step must be run after step 3) 
    barcode_to_reg, bc2reg = _check_barcode_to_region(reg_to_barcode)
    if verbose:
        print "\n- 4.Passing barcode to region mapping check? -", bc2reg    
        for k,v in sorted(barcode_to_reg.items(), key=lambda (k,v):[k[0],int(k.split('_')[1])]):
            print k,':',v
    
    ## check barcode to region (this step must be run after step 3) 
    barcode_to_color = _check_barcode_to_color()
    if verbose and barcode_to_color:
        print "\n- 5.Calculating barcode to color dictionary."
        for k,v in sorted(barcode_to_color.items(), key=lambda (k,v):[k[0],int(k.split('_')[1])]):
            print k,':',v    
    
    
    ## Construct an internal map
    int_map = _construct_internal_map();
    if verbose:
        print "\n- 6.Constructing internal khmer map";
    
    ## Check barcodes total counts in probes
    barcode_in_probes, _bc_counting = _check_barcode_in_probes(barcode_to_reg, reg_size_dic, int_map)
    if verbose:
        print "\n- 7.Passing if counting barcode appearance times in probes", _bc_counting;    

    ## Check against each other    
    
    ## Check against genome
    kept_records, failed_num = _check_against_genome();
    if verbose:
        print "\n- 8.Probes not passing through genome filter:", failed_num;  
    
    # check region size for kept probes
    _size_from_rec = {}
    for record in pb_records:
        reg_id = int(record.id.split('_reg_')[1].split('_')[0]);
        if reg_id not in _size_from_rec.keys():
            _size_from_rec[reg_id] = 1; # if not in key, create
        else:
            _size_from_rec[reg_id] += 1; # otherwise, add count
    if verbose:
        print "--  re-check region size:"
        for k,v in sorted(_size_from_rec.items()):
            print k,':',v
    
    if save:
        pb_savefile = master_dir + os.sep + save_folder + os.sep + 'filtered_probes.fasta';
        if verbose:
            print "\n- 9.Saving probes to:", pb_savefile
        with open(pb_savefile, 'w') as output_handle:
            SeqIO.write(kept_records, output_handle, 'fasta');  
        
        
    return kept_records, _size_from_rec

In [10]:
kept_records, kept_size_dic = Check_Probes(pb_records, pb_lists, barcode_scheme, master_dir, 
                                        fwd_primer=fprimer, rev_primer=rprimer, barcode_source=barcode_source,
                                        stv_barcodes=stv_barcodes, ndb_barcodes=ndb_barcodes)

-- Checking primer usage, total probes: 61693

- 1.Passing primer usage check? - True

- 2.Passing region size check? - True
1 : 414
2 : 364
3 : 363
4 : 430
5 : 419
6 : 344
7 : 451
8 : 454
9 : 440
10 : 443
11 : 464
12 : 435
13 : 471
14 : 442
15 : 385
16 : 356
17 : 373
18 : 430
19 : 303
20 : 359
21 : 409
22 : 445
23 : 454
24 : 473
25 : 484
26 : 452
27 : 430
28 : 412
29 : 426
30 : 353
31 : 342
32 : 448
33 : 381
34 : 437
35 : 427
36 : 467
37 : 449
38 : 397
39 : 435
40 : 481
41 : 531
42 : 430
43 : 421
44 : 336
45 : 433
46 : 459
47 : 454
48 : 361
49 : 423
50 : 460
51 : 487
52 : 426
53 : 322
54 : 430
55 : 452
56 : 434
57 : 400
58 : 450
59 : 447
60 : 356
61 : 402
62 : 323
63 : 402
64 : 399
65 : 436
66 : 339
67 : 397
68 : 353
69 : 408
70 : 312
71 : 229
72 : 215
73 : 271
74 : 340
75 : 305
76 : 371
77 : 286
78 : 359
79 : 387
80 : 419
81 : 355
82 : 306
83 : 426
84 : 423
85 : 432
86 : 347
87 : 460
88 : 351
89 : 443
90 : 486
91 : 467
92 : 440
93 : 441
94 : 298
95 : 418
96 : 341
97 : 504
98 : 342
99


- 4.Passing barcode to region mapping check? - True
NDB_1 : [1]
NDB_2 : [1]
NDB_3 : [1]
NDB_4 : [2]
NDB_5 : [2]
NDB_6 : [2]
NDB_7 : [3]
NDB_8 : [3]
NDB_9 : [3]
NDB_10 : [4]
NDB_11 : [4]
NDB_12 : [4]
NDB_13 : [5]
NDB_14 : [5]
NDB_15 : [5]
NDB_16 : [6]
NDB_17 : [6]
NDB_18 : [6]
NDB_19 : [7]
NDB_20 : [7]
NDB_21 : [7]
NDB_22 : [8]
NDB_23 : [8]
NDB_24 : [8]
NDB_25 : [9]
NDB_26 : [9]
NDB_27 : [9]
NDB_28 : [10]
NDB_29 : [10]
NDB_30 : [10]
NDB_31 : [11]
NDB_32 : [11]
NDB_33 : [11]
NDB_34 : [12]
NDB_35 : [12]
NDB_36 : [12]
NDB_37 : [13]
NDB_38 : [13]
NDB_39 : [13]
NDB_40 : [14]
NDB_41 : [14]
NDB_42 : [14]
NDB_43 : [15]
NDB_44 : [15]
NDB_45 : [15]
NDB_46 : [16]
NDB_47 : [16]
NDB_48 : [16]
NDB_49 : [17]
NDB_50 : [17]
NDB_51 : [17]
NDB_52 : [18]
NDB_53 : [18]
NDB_54 : [18]
NDB_55 : [19]
NDB_56 : [19]
NDB_57 : [19]
NDB_58 : [20]
NDB_59 : [20]
NDB_60 : [20]
NDB_61 : [21]
NDB_62 : [21]
NDB_63 : [21]
NDB_64 : [22]
NDB_65 : [22]
NDB_66 : [22]
NDB_67 : [23]
NDB_68 : [23]
NDB_69 : [23]
NDB_70 : [24]
NDB

NDB_561 : [111]
NDB_562 : [112]
NDB_563 : [113]
NDB_564 : [114]
NDB_565 : [115]
NDB_566 : [116]
NDB_567 : [117]
NDB_568 : [118]
NDB_569 : [119]
NDB_570 : [120]
NDB_571 : [121]
NDB_572 : [122]
NDB_573 : [123]
NDB_574 : [124]
NDB_575 : [125]
NDB_576 : [126]
NDB_577 : [127]
NDB_578 : [128]
NDB_579 : [129]
NDB_580 : [130]
NDB_581 : [131]
NDB_582 : [132]
NDB_583 : [133]
NDB_584 : [134]
NDB_585 : [135]
NDB_586 : [136]
NDB_587 : [137]
NDB_588 : [138]
NDB_589 : [139]
NDB_590 : [140]
NDB_591 : [141]
NDB_592 : [142]
NDB_593 : [143]
NDB_594 : [144]
NDB_595 : [145]
NDB_596 : [146]
NDB_597 : [147]
NDB_598 : [148]
NDB_599 : [149]
NDB_600 : [150]
-- color check not applied

- 6.Constructing internal khmer map

- 7.Passing if counting barcode appearance times in probes True
-- Max_genome_hits is: 200, this seq got hits: 374
-- Max_genome_hits is: 200, this seq got hits: 245
-- Max_genome_hits is: 200, this seq got hits: 207
-- Max_genome_hits is: 200, this seq got hits: 309
-- Max_genome_hits is: 200,

-- Max_genome_hits is: 200, this seq got hits: 247
-- Max_genome_hits is: 200, this seq got hits: 536
-- Max_genome_hits is: 200, this seq got hits: 387
-- Max_genome_hits is: 200, this seq got hits: 1516
-- Max_genome_hits is: 200, this seq got hits: 209
-- Max_genome_hits is: 200, this seq got hits: 743
-- Max_genome_hits is: 200, this seq got hits: 325
-- Max_genome_hits is: 200, this seq got hits: 577
-- Max_genome_hits is: 200, this seq got hits: 266
-- Max_genome_hits is: 200, this seq got hits: 206
-- Max_genome_hits is: 200, this seq got hits: 223
-- Max_genome_hits is: 200, this seq got hits: 266
-- Max_genome_hits is: 200, this seq got hits: 214
-- Max_genome_hits is: 200, this seq got hits: 278
-- Max_genome_hits is: 200, this seq got hits: 287
-- Max_genome_hits is: 200, this seq got hits: 210
-- Max_genome_hits is: 200, this seq got hits: 331
-- Max_genome_hits is: 200, this seq got hits: 214
-- Max_genome_hits is: 200, this seq got hits: 820
-- Max_genome_hits is: 200, th

-- Max_genome_hits is: 200, this seq got hits: 295
-- Max_genome_hits is: 200, this seq got hits: 326
-- Max_genome_hits is: 200, this seq got hits: 227
-- Max_genome_hits is: 200, this seq got hits: 246
-- Max_genome_hits is: 200, this seq got hits: 261
-- Max_genome_hits is: 200, this seq got hits: 260
-- Max_genome_hits is: 200, this seq got hits: 347
-- Max_genome_hits is: 200, this seq got hits: 250
-- Max_genome_hits is: 200, this seq got hits: 643
-- Max_genome_hits is: 200, this seq got hits: 2486
-- Max_genome_hits is: 200, this seq got hits: 209
-- Max_genome_hits is: 200, this seq got hits: 513
-- Max_genome_hits is: 200, this seq got hits: 269
-- Max_genome_hits is: 200, this seq got hits: 272
-- Max_genome_hits is: 200, this seq got hits: 214
-- Max_genome_hits is: 200, this seq got hits: 214
-- Max_genome_hits is: 200, this seq got hits: 227
-- Max_genome_hits is: 200, this seq got hits: 270
-- Max_genome_hits is: 200, this seq got hits: 324
-- Max_genome_hits is: 200, th

-- Max_genome_hits is: 200, this seq got hits: 337
-- Max_genome_hits is: 200, this seq got hits: 363
-- Max_genome_hits is: 200, this seq got hits: 287
-- Max_genome_hits is: 200, this seq got hits: 206
-- Max_genome_hits is: 200, this seq got hits: 390
-- Max_genome_hits is: 200, this seq got hits: 595
-- Max_genome_hits is: 200, this seq got hits: 484
-- Max_genome_hits is: 200, this seq got hits: 228
-- Max_genome_hits is: 200, this seq got hits: 340
-- Max_genome_hits is: 200, this seq got hits: 282
-- Max_genome_hits is: 200, this seq got hits: 312
-- Max_genome_hits is: 200, this seq got hits: 1160
-- Max_genome_hits is: 200, this seq got hits: 1962
-- Max_genome_hits is: 200, this seq got hits: 261
-- Max_genome_hits is: 200, this seq got hits: 441
-- Max_genome_hits is: 200, this seq got hits: 355
-- Max_genome_hits is: 200, this seq got hits: 222
-- Max_genome_hits is: 200, this seq got hits: 293
-- Max_genome_hits is: 200, this seq got hits: 281
-- Max_genome_hits is: 200, t

-- Max_genome_hits is: 200, this seq got hits: 952
-- Max_genome_hits is: 200, this seq got hits: 208
-- Max_genome_hits is: 200, this seq got hits: 222
-- Max_genome_hits is: 200, this seq got hits: 362
-- Max_genome_hits is: 200, this seq got hits: 622
-- Max_genome_hits is: 200, this seq got hits: 250
-- Max_genome_hits is: 200, this seq got hits: 246
-- Max_genome_hits is: 200, this seq got hits: 243
-- Max_genome_hits is: 200, this seq got hits: 761
-- Max_genome_hits is: 200, this seq got hits: 272
-- Max_genome_hits is: 200, this seq got hits: 524
-- Max_genome_hits is: 200, this seq got hits: 274
-- Max_genome_hits is: 200, this seq got hits: 309
-- Max_genome_hits is: 200, this seq got hits: 402
-- Max_genome_hits is: 200, this seq got hits: 238
-- Max_genome_hits is: 200, this seq got hits: 232
-- Max_genome_hits is: 200, this seq got hits: 475
-- Max_genome_hits is: 200, this seq got hits: 204
-- Max_genome_hits is: 200, this seq got hits: 204
-- Max_genome_hits is: 200, thi

-- Max_genome_hits is: 200, this seq got hits: 269
-- Max_genome_hits is: 200, this seq got hits: 244
-- Max_genome_hits is: 200, this seq got hits: 533
-- Max_genome_hits is: 200, this seq got hits: 957
-- Max_genome_hits is: 200, this seq got hits: 1891
-- Max_genome_hits is: 200, this seq got hits: 237
-- Max_genome_hits is: 200, this seq got hits: 214
-- Max_genome_hits is: 200, this seq got hits: 261
-- Max_genome_hits is: 200, this seq got hits: 206
-- Max_genome_hits is: 200, this seq got hits: 1537
-- Max_genome_hits is: 200, this seq got hits: 276
-- Max_genome_hits is: 200, this seq got hits: 500
-- Max_genome_hits is: 200, this seq got hits: 318
-- Max_genome_hits is: 200, this seq got hits: 215
-- Max_genome_hits is: 200, this seq got hits: 694
-- Max_genome_hits is: 200, this seq got hits: 549
-- Max_genome_hits is: 200, this seq got hits: 661
-- Max_genome_hits is: 200, this seq got hits: 379
-- Max_genome_hits is: 200, this seq got hits: 331
-- Max_genome_hits is: 200, t

-- Max_genome_hits is: 200, this seq got hits: 262
-- Max_genome_hits is: 200, this seq got hits: 231
-- Max_genome_hits is: 200, this seq got hits: 615
-- Max_genome_hits is: 200, this seq got hits: 221
-- Max_genome_hits is: 200, this seq got hits: 296
-- Max_genome_hits is: 200, this seq got hits: 572
-- Max_genome_hits is: 200, this seq got hits: 240
-- Max_genome_hits is: 200, this seq got hits: 2527
-- Max_genome_hits is: 200, this seq got hits: 522
-- Max_genome_hits is: 200, this seq got hits: 615
-- Max_genome_hits is: 200, this seq got hits: 224
-- Max_genome_hits is: 200, this seq got hits: 216
-- Max_genome_hits is: 200, this seq got hits: 233
-- Max_genome_hits is: 200, this seq got hits: 498
-- Max_genome_hits is: 200, this seq got hits: 222
-- Max_genome_hits is: 200, this seq got hits: 224
-- Max_genome_hits is: 200, this seq got hits: 444
-- Max_genome_hits is: 200, this seq got hits: 213
-- Max_genome_hits is: 200, this seq got hits: 658
-- Max_genome_hits is: 200, th

In [11]:
len(kept_records)

60178

In [41]:
60178+72225+71941

204344