# Library design for CTP-05, Exonic RNA


by Pu Zheng

This library design is based on target regions designed by Stephen

## 0. Imports

In [9]:
%run "E:\Users\puzheng\Documents\Startup_py3.py"
sys.path.append(r"E:\Users\puzheng\Documents")

import ImageAnalysis3
from ImageAnalysis3 import get_img_info, visual_tools, corrections
from LibraryDesigner3 import check
import LibraryDesigner3.LibraryDesigner as ld
%matplotlib notebook

In [3]:
import csv

## 1. Load probes

In [3]:
source_folder = r'E:\Users\puzheng\Documents\Libraries\CTP-05\Candidate_Probes'
probe_file = 'Chr21genes_71Exon.csv'
probes = []
with open(os.path.join(source_folder, probe_file),'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='|')
    header = next(reader,None)
    print('header',header)
    for row in reader:
        probes.append({h:info for h,info in zip(header, row)})
        
probe_dic = {}
for _probe in probes:
    if _probe['Gene name'] not in probe_dic:
        probe_dic[_probe['Gene name']] = [_probe]
    else:
        probe_dic[_probe['Gene name']].append(_probe)

print(len(probe_dic))

header ['Gene name', "Oligo Sequence (Don't Need to Reverse Complement)"]
71


## 2. Sort probes by coordinates

In [35]:
position_file = "chr21_exon_positions.csv"
position_dic = {}
with open(os.path.join(source_folder, position_file),'r') as csvfile:
    reader = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in reader:
        position_dic[row[0]] = int(row[1])

## 3. Screening genes and probes

In [5]:
# keep the most 5' probes and exclude genes without enough probes
min_num_probe = 24 # this number is from MERFISH experience as well as Long Cai 2018 paper
max_num_probe = 120 # this is just an arbitrary number larger than 2x min_num_probe
# initialize
kept_probe_dic = {}
for _gene, _pb_list in sorted(probe_dic.items(), key=lambda v:position_dic[v[0]]):
    if len(_pb_list) < min_num_probe:
        print(f"Gene: {_gene} has probes less than {min_num_probe}, skip")
    else:
        kept_probe_dic[_gene] = [_p["Oligo Sequence (Don't Need to Reverse Complement)"] for _p in _pb_list][:min(len(_pb_list), max_num_probe)]
print("Number of genes kept:", len(kept_probe_dic))

Number of genes kept: 71


### save to fasta file

In [6]:
# biopython for SeqRecord
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC

# blast
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML 

In [7]:
pb_records = []
for _gene, _pb_list in sorted(kept_probe_dic.items(), key=lambda v:position_dic[v[0]]):
    for _i, _seq in enumerate(_pb_list):
        _pb_id = 'gene_'+_gene+'_ind_'+str(_i)
        _pb_seq = Seq(_seq) # no need to RC for exon ones 
        # generate SeqRecord
        _pb_record = SeqRecord(_pb_seq, id=_pb_id, description='', name=_pb_id)
        pb_records.append(_pb_record)
# save to fasta
cand_probe_filename = 'CTP-05_chr21_exonic_RNA_candidates.fasta'

with open(os.path.join(source_folder, cand_probe_filename), 'w') as output_handle:
    SeqIO.write(pb_records, output_handle, 'fasta')

## 4. Append barcode info

In [4]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord


from LibraryDesigner3 import designer
reload(designer)

# folder for source files
source_folder = r'E:\Users\puzheng\Documents\Libraries\CTP-05\Candidate_Probes'
cand_probe_filename = 'CTP-05_chr21_exonic_RNA_candidates.fasta'

# folder for this library
library_folder = r'E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA'
# load all barcodes
hyb_matrix = np.load(r'E:\Users\puzheng\Documents\Libraries\CTP-05\Ref_Codebook\hyb_matrix.npy')
# generate gene-to-barcode map
#gene_barcode_dic = designer.Assign_Merfish_Barcodes(pb_records, hyb_matrix, repeat_times=20000, save_filename=os.path.join(library_folder, 'gene_barcode_dic.pkl'))
# load gene_barcode_dic
gene_barcode_dic = pickle.load(open(os.path.join(library_folder, 'gene_barcode_dic.pkl'),'rb'))

# extract primers and readouts
primers = designer.load_primers([2,9])
combo_readouts = designer.load_readouts(hyb_matrix.shape[1], 'combo', _num_colors=2, _start_id=8)
unique_readouts = designer.load_readouts(len(gene_barcode_dic), 'unique', _num_colors=3, _start_id=32)

- Picked primer: ID: W1A03_primer_2
Name: W1A03_primer_2
Description: W1A03_primer_2
Number of features: 0
Seq('CCCGCAATGGCTGACAACCG', SingleLetterAlphabet())
- Picked primer: ID: W1A10_primer_9
Name: W1A10_primer_9
Description: W1A10_primer_9
Number of features: 0
Seq('TAATACGACTCACTATAGGGATTGCCGCATGGTTTCCG', SingleLetterAlphabet())


In [1]:
import seqint

In [2]:
seqint.__file__

'E:\\Users\\puzheng\\AppData\\anaconda3\\lib\\site-packages\\seqint.cp36-win_amd64.pyd'

In [10]:
# generate probes
candidate_full_name = 'candidate_full_probes.fasta'
candidate_full_probes = designer.Patch_Barcodes(gene_barcode_dic, os.path.join(source_folder,cand_probe_filename), combo_readouts, unique_readouts, primers,
                                                save_filename=os.path.join(library_folder, candidate_full_name))

- Load candidate-probes from file:E:\Users\puzheng\Documents\Libraries\CTP-05\Candidate_Probes\CTP-05_chr21_exonic_RNA_candidates.fasta
- writing patched probes into file:E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\candidate_full_probes.fasta


## 5. Check quality

In [5]:
# biopython for SeqRecord
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC

# blast
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML 

In [7]:
# folder for this library
library_folder = r'E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA'
# candidate full-length probe filename
candidate_full_name = 'candidate_full_probes.fasta'
# load full probes
full_records = []
with open(os.path.join(library_folder, candidate_full_name), 'r') as handle:
    for record in SeqIO.parse(handle, "fasta"):
        full_records.append(record)

print(f"Total probe loaded: {len(full_records)}")

Total probe loaded: 5911


In [10]:
reload(check)

primer_check = check._check_primer_usage(full_records, primers[0], primers[1])

reg_size_dic, len_check = check._check_region_size(full_records)

reg_readout_dic, reg2readout_check = check._check_region_to_readouts(full_records, combo_readouts, unique_readouts)

readout_reg_dic, readout2reg_check = check._check_readout_to_region(reg_readout_dic, full_records, combo_readouts, unique_readouts)

-- Checking primer usage, total probes: 5911
gene: HSPA13 -> 100
gene: USP25 -> 100
gene: CXADR -> 100
gene: BTG3 -> 37
gene: C21orf91 -> 100
gene: GABPA -> 95
gene: ADAMTS1 -> 100
gene: RWDD2B -> 86
gene: USP16 -> 88
gene: CCT8 -> 52
gene: BACH1 -> 100
gene: TIAM1 -> 100
gene: SOD1 -> 28
gene: SCAF4 -> 100
gene: HUNK -> 100
gene: MIS18A -> 43
gene: URB1 -> 100
gene: TCP10L -> 47
gene: PAXBP1 -> 100
gene: IFNAR2 -> 86
gene: IL10RB -> 55
gene: IFNAR1 -> 100
gene: IFNGR2 -> 56
gene: SON -> 100
gene: ITSN1 -> 100
gene: CRYZL1 -> 48
gene: MRPS6 -> 30
gene: SLC5A3 -> 100
gene: RCAN1 -> 74
gene: CBR1 -> 36
gene: SETD4 -> 97
gene: CBR3 -> 31
gene: MORC3 -> 100
gene: CHAF1B -> 100
gene: SIM2 -> 100
gene: HLCS -> 100
gene: PIGP -> 100
gene: TTC3 -> 100
gene: DYRK1A -> 100
gene: PSMG1 -> 55
gene: BRWD1 -> 100
gene: HMGN1 -> 28
gene: WRB -> 44
gene: BACE2 -> 80
gene: MX2 -> 96
gene: C2CD2 -> 100
gene: ZBTB21 -> 100
gene: SLC37A1 -> 100
gene: WDR4 -> 69
gene: NDUFV3 -> 92
gene: PKNOX1 -> 100
gene:

In [14]:
reload(check)
reload(ld)
int_map = check._construct_internal_map(full_records, library_folder)

readout_count_dic, readout_count_check = check._check_readout_in_probes(readout_reg_dic, reg_size_dic, int_map, combo_readouts, unique_readouts)

kept_records, removed_count = check._check_between_probes(full_records, int_map)
# save kept records
with open(os.path.join(library_folder, 'filtered_full_probes.fasta'), 'w') as output_handle:
    SeqIO.write(kept_records, output_handle, "fasta")

-- saving internal 17-mer map to file:E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\probe_table_17.npz
-- total probes removed by internal screening: 0


## blast

In [15]:
# parse
pb_dic = check.split_probe_by_gene(kept_records)
# Folders
blast_subfolder = 'blast'
blast_folder = library_folder + os.sep + blast_subfolder
if not os.path.exists(blast_folder):
    os.makedirs(blast_folder);
    
# parameters
force=True
verbose=True
verbose_parse=False

hard_counts, soft_counts = [],[]

for _gene, _pbs in sorted(pb_dic.items()):
    
    if verbose:
        print ("- region", _gene)
        _start = time.time()
    if force or not os.path.exists(blast_folder+os.sep+'blast_gene_'+str(_gene)+'.xml'):  
        if verbose:
            print ("-- writing file:", blast_folder+os.sep+'probe_gene_'+str(_gene)+'.fasta')
        # save these number of probes into temp.fasta
        with open(blast_folder+os.sep+'probe_gene_'+str(_gene)+'.fasta', "w") as output_handle:
            SeqIO.write(_pbs, output_handle, "fasta")
        
        if verbose:
            print ("-- blasting region:", _gene)
        # Run BLAST and parse the output as XML
        output = NcbiblastnCommandline(query=blast_folder+os.sep+'probe_gene_'+str(_gene)+'.fasta',
                                       num_threads=16,
                                       db=r'E:\Users\puzheng\Documents\blast-2.7.1+\db\hg38',
                                       evalue=500,
                                       word_size=10,
                                       out=blast_folder+os.sep+'blast_gene_'+str(_gene)+'.xml',
                                       outfmt=5)()[0]
        
        if verbose:
            _after_blast = time.time()
            print("--- Total time for blast:", _after_blast-_start)
    else:
        _after_blast = time.time()
        
    # parsing output: 
    if verbose:
        print ("-- parsing blast result for region:", _gene)
    blast_records = NCBIXML.parse(open(blast_folder+os.sep+'blast_gene_'+str(_gene)+'.xml'))
    # save a vector to store keep or not
    hcs,scs=[],[]
    for blast_record in blast_records:
        #print blast_record.query_id, len(blast_record.alignments)
        hc, sc = check.acquire_blast_counts(blast_record, hard_thres=42, soft_thres=20, verbose=verbose_parse);
        hcs.append(hc)
        scs.append(sc)
    if verbose:
        _after_parse = time.time()
        print("--- Total time for parsing:", _after_parse - _after_blast)
    
    # save hard counts and soft counts stats
    hard_counts.append(hcs)
    soft_counts.append(scs)
    

- region ADAMTS1
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_ADAMTS1.fasta
-- blasting region: ADAMTS1
--- Total time for blast: 16.20005750656128
-- parsing blast result for region: ADAMTS1
--- Total time for parsing: 4.714510440826416
- region AIRE
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_AIRE.fasta
-- blasting region: AIRE
--- Total time for blast: 87.31599044799805
-- parsing blast result for region: AIRE
--- Total time for parsing: 91.64221143722534
- region BACE2
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_BACE2.fasta
-- blasting region: BACE2
--- Total time for blast: 6.365737438201904
-- parsing blast result for region: BACE2
--- Total time for parsing: 2.718022584915161
- region BACH1
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_BACH1.fasta
-- blasting region: BACH1
--- Total time for blast: 12.076

--- Total time for parsing: 46.78751993179321
- region IL10RB
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_IL10RB.fasta
-- blasting region: IL10RB
--- Total time for blast: 189.21884036064148
-- parsing blast result for region: IL10RB
--- Total time for parsing: 143.8840959072113
- region ITSN1
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_ITSN1.fasta
-- blasting region: ITSN1
--- Total time for blast: 13.524082899093628
-- parsing blast result for region: ITSN1
--- Total time for parsing: 3.9989593029022217
- region LSS
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_LSS.fasta
-- blasting region: LSS
--- Total time for blast: 7.070262908935547
-- parsing blast result for region: LSS
--- Total time for parsing: 1.2139043807983398
- region MCM3AP
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_MCM3AP.fasta
-- blasting re

--- Total time for parsing: 1.452108383178711
- region SUMO3
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_SUMO3.fasta
-- blasting region: SUMO3
--- Total time for blast: 5.541124105453491
-- parsing blast result for region: SUMO3
--- Total time for parsing: 0.9296915531158447
- region TCP10L
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_TCP10L.fasta
-- blasting region: TCP10L
--- Total time for blast: 5.33597207069397
-- parsing blast result for region: TCP10L
--- Total time for parsing: 0.9567124843597412
- region TIAM1
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_TIAM1.fasta
-- blasting region: TIAM1
--- Total time for blast: 12.857569694519043
-- parsing blast result for region: TIAM1
--- Total time for parsing: 2.9151699542999268
- region TRAPPC10
-- writing file: E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\blast\probe_gene_TRAPPC10.fasta
-- 

In [3]:
def Screening_Probes_by_Blast(library_folder, probe_per_region, keep_mode='front', blast_subfolder='blast', 
                              probe_subfolder='.', probe_filename='filtered_full_probes.fasta',
                              soft_count_th=30,
                              smallest_region_ratio=0.75,
                              save=True, save_filename='blast_full_probes.fasta',
                              verbose=True,):
    '''Read blast results in blast folder and probe in filtered__probes, keep'''
    # folders
    _blast_folder = library_folder + os.sep + blast_subfolder
    _probe_folder = library_folder + os.sep + probe_subfolder
    # load probes
    _probes = []
    with open(_probe_folder+os.sep+probe_filename, 'r') as _handle:
        for _record in SeqIO.parse(_handle, "fasta"):
            _probes.append(_record)
    if verbose:
        print("- Number of probes loaded:", len(_probes))
    # parse loaded probes by region
    _pb_dic = check.split_probe_by_gene(_probes)
    if verbose:
        print("- Number of regions in this library:", len(_pb_dic))
    # dictionary to store whether keep this probe
    _keep_dic = {}  # whether keep because of blast only
    _kept_pb_dic = {}
    _hard_count_list = []
    _soft_count_list = []
    # loop through all regions
    for _reg, _pbs in _pb_dic.items():
        if verbose:
            print("-- checking probes in region:", _reg)
        _keep_dic[_reg] = np.ones(len(_pbs), dtype=np.bool)  # initialize with True
        # parse blast result of this region
        blast_records = NCBIXML.parse(open(_blast_folder+os.sep+'blast_gene_'+str(_reg)+'.xml', 'r'))
        # loop through each probe in this region
        _hard_cts, _soft_cts = [], []
        for _pbid, blast_record in enumerate(blast_records):
            _hc, _sc = check.acquire_blast_counts(blast_record, hard_thres=30, soft_thres=20, verbose=verbose_parse)
            _hard_cts.append(_hc)
            _soft_cts.append(_sc)
            if _hc > 2 or _hc < 1:  # if this probe has no hit, or more than 2 hits, remove
                _keep_dic[_reg][_pbid] = False
                print(f"--- gene={_reg}, id={_pbid} removed by hard count = {_hc}")
                continue
            # if this probe has too many soft counts (20mer hits)
            if _sc > soft_count_th:
                _keep_dic[_reg][_pbid] = False
                print(f"--- gene={_reg}, id={_pbid} removed by soft count = {_sc}")
                continue
        # after looped through this region, check the hard counts
        _hard_cts = np.array(_hard_cts)
        _soft_cts = np.array(_soft_cts)
        _hard_count_list.append(_hard_cts)
        _soft_count_list.append(_soft_cts)
        if verbose:
            print("--- number of probes:", len(_pbs), ", kept by blast:", sum(_keep_dic[_reg]), ", if remove dups:", sum(_keep_dic[_reg] * (_hard_cts == 1)))
        # check duplicated probes
        if sum(_keep_dic[_reg] * (_hard_cts == 1)) / float(sum(_keep_dic[_reg])) >= smallest_region_ratio and sum(_keep_dic[_reg]) >= smallest_region_ratio*probe_per_region:
            print('--- remove duplicated probes')
            _keep_dic[_reg] = _keep_dic[_reg] * (_hard_cts == 1)

        # generate list of kept probes
        _kept_pbs = [_pb for _pb, _k in zip(_pbs, _keep_dic[_reg]) if _k]

        # keep the center of this region
        if sum(_keep_dic[_reg]) > probe_per_region:
            if keep_mode == 'center':
                if verbose:
                    print("--- keep probes from beginning")
                _start, _end = _pbs[0].id.split(':')[1].split('_')[0].split('-')
                _start, _end = int(_start), int(_end)
                _reg_len = np.abs(_end - _start)
                _kept_center_pbs = []
                for _pb in sorted(_kept_pbs, key=lambda p: np.abs(int(p.id.split('ind_')[1].split('_')[0])-_reg_len/2)):
                    _kept_center_pbs.append(_pb)
                    if len(_kept_center_pbs) >= probe_per_region:
                        break
                _kept_pb_dic[_reg] = sorted(_kept_center_pbs, key=lambda p: int(
                    p.id.split('ind_')[1].split('_')[0]))
            elif keep_mode == 'front':
                _kept_pbs = _kept_pbs[:probe_per_region]
                _kept_pb_dic[_reg] = _kept_pbs
        else:
            _kept_pb_dic[_reg] = sorted(_kept_pbs, key=lambda p: int(
                p.id.split('ind_')[1].split('_')[0]))
        if verbose:
            print('-- number of probes kept for this region:', len(_kept_pb_dic[_reg]))

    # SUMMARIZE
    _kept_probe_list = []
    if verbose:
        print("- summarize")
    for _reg, _pbs in _kept_pb_dic.items():
        if verbose:
            print("-- region:", _reg, ", number of probes:", len(_pbs))
        _kept_probe_list += _pbs

    print("- Number of probes kept:", len(_kept_probe_list))

    if save:
        if verbose:
            print("- Saving to file:", _probe_folder + os.sep + save_filename)
        with open(_probe_folder + os.sep + save_filename, 'w') as _output_handle:
            SeqIO.write(_kept_probe_list, _output_handle, 'fasta')

    return _kept_probe_list, _keep_dic, _hard_count_list, _soft_count_list

In [4]:
kept_pb_list, keep_dic, hard_count_list, soft_count_list = kept_pbs, blast_keep_dic, hard_count_list, soft_count_list = Screening_Probes_by_Blast(library_folder, 80)

NameError: name 'library_folder' is not defined

## extra selection

In [31]:
primer_check = check._check_primer_usage(kept_pb_list, primers[0], primers[1])
print(primer_check)
reg_size_dic, len_check = check._check_region_size(kept_pb_list,min_size=24)
print(len_check)
reg_readout_dic, reg2readout_check = check._check_region_to_readouts(kept_pb_list, combo_readouts, unique_readouts)
print(reg2readout_check)

readout_reg_dic, readout2reg_check = check._check_readout_to_region(reg_readout_dic, kept_pb_list, combo_readouts, unique_readouts)
print(readout2reg_check)

int_map = check._construct_internal_map(kept_pb_list, library_folder)

readout_count_dic, readout_count_check = check._check_readout_in_probes(readout_reg_dic, reg_size_dic, int_map, combo_readouts, unique_readouts)
print(readout_count_check)

kept_records, removed_count = check._check_between_probes(kept_pb_list, int_map)
# save kept records
with open(os.path.join(library_folder, 'final_probes', 'extra_filtered_full_probes.fasta'), 'w') as output_handle:
    SeqIO.write(kept_records, output_handle, "fasta")

-- Checking primer usage, total probes: 4797
True
gene: HSPA13 -> 80
gene: USP25 -> 80
gene: CXADR -> 74
gene: BTG3 -> 33
gene: C21orf91 -> 80
gene: GABPA -> 80
gene: ADAMTS1 -> 80
gene: RWDD2B -> 77
gene: USP16 -> 74
gene: CCT8 -> 41
gene: BACH1 -> 80
gene: TIAM1 -> 80
gene: SOD1 -> 24
gene: SCAF4 -> 80
gene: HUNK -> 80
gene: MIS18A -> 34
gene: URB1 -> 80
gene: TCP10L -> 41
gene: PAXBP1 -> 75
gene: IFNAR2 -> 74
gene: IL10RB -> 46
gene: IFNAR1 -> 80
gene: IFNGR2 -> 49
gene: SON -> 80
gene: ITSN1 -> 80
gene: CRYZL1 -> 36
gene: MRPS6 -> 28
gene: SLC5A3 -> 80
gene: RCAN1 -> 70
gene: CBR1 -> 34
gene: SETD4 -> 80
gene: CBR3 -> 30
gene: MORC3 -> 80
gene: CHAF1B -> 74
gene: SIM2 -> 80
gene: HLCS -> 80
gene: PIGP -> 80
gene: TTC3 -> 80
gene: DYRK1A -> 80
gene: PSMG1 -> 45
gene: BRWD1 -> 80
gene: HMGN1 -> 13
gene: WRB -> 40
gene: BACE2 -> 73
gene: MX2 -> 80
gene: C2CD2 -> 80
gene: ZBTB21 -> 80
gene: SLC37A1 -> 80
gene: WDR4 -> 59
gene: NDUFV3 -> 58
gene: PKNOX1 -> 80
gene: HSF2BP -> 45
gene: RR

-- saving internal 17-mer map to file:E:\Users\puzheng\Documents\Libraries\CTP-05\Exonic_RNA\probe_table_17.npz
True
-- total probes removed by internal screening: 0


In [34]:
len(kept_records)

4797

In [23]:
str(kept_records[0].seq)

'CCCGCAATGGCTGACAACCGAAATTGCGTGACGGACCTGGGCCCGCAGTAACGCTCCGAGATTTCTCTGTTTTAGATCAAATTCTGCTGGGTATACTCATCTCGGACAATGTATCCGTTCCCGACAAGCGCGGAAACCATGCGGCAATCC'

In [28]:
primers[1][-20:].seq.reverse_complement()

Seq('CGGAAACCATGCGGCAATCC', SingleLetterAlphabet())

In [36]:
for _gene,_pos in position_dic.items():
    print(_gene, gene_barcode_dic[_gene])

CBR1 [0 0 0 1 0 1 0 0 0 0 1 0 0 0 1 0]
CSTB [1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0]
AIRE [1 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0]
TCP10L [0 1 0 1 0 0 0 0 1 0 0 0 1 0 0 0]
ZBTB21 [0 0 0 0 0 0 0 1 0 0 1 0 0 1 0 1]
NDUFV3 [0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0]
POFUT2 [1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0]
PIGP [1 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0]
SLC5A3 [1 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0]
ICOSLG [0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0]
ERVH48-1 [0 0 0 0 1 0 1 0 1 1 0 0 0 0 0 0]
RWDD2B [0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 1]
HUNK [0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 0]
IFNAR1 [0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1]
SOD1 [0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 1]
CXADR [0 0 0 1 0 0 1 0 1 0 0 0 0 0 0 1]
C21orf91 [0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 1]
ADAMTS1 [0 0 1 0 0 1 0 0 1 0 1 0 0 0 0 0]
HSPA13 [1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1]
USP25 [0 0 0 0 0 0 0 1 0 1 0 1 1 0 0 0]
CCT8 [0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0]
TIAM1 [1 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0]
SCAF4 [1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0]
MIS18A [0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0]
IL10RB [1 0 0 0 0 0 0 0 0 0 1