# Design adaptors to convert CTP-08 Cy3 into Cy7 and Cy5

In [1]:
%run "E:\Users\puzheng\Documents\Startup_py3.py"
sys.path.append(r"E:\Users\puzheng\Documents")

import ImageAnalysis3 as ia
%matplotlib notebook

from ImageAnalysis3 import *
print(os.getpid())

18848


In [3]:
# biopython imports
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.SeqRecord import SeqRecord
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML

# Load library

In [2]:
probe_filename = r'\\10.245.74.69\Chromatin_NAS_2\Libraries\CTP-08_IgH\5kb\final_probes\batch_1_final_probes.fasta'
if not os.path.isfile(probe_filename):
    raise IOError(f"input probe file: {probe_filename} doesn't exist.")

In [4]:
pb_records = []
with open(probe_filename, 'r') as handle:
    for record in SeqIO.parse(handle, "fasta"):
        pb_records.append(record)

## get readout names in the library

In [5]:
readout_names = {}
for _r in pb_records:
    _rid = int(_r.id.split('gene_')[1].split('_')[0])
    _rd_names = _r.id.split('[')[1].split(']')[0].split(',')
    if _rid not in readout_names:
        readout_names[_rid] = []
        for _rd in _rd_names:
            if _rd not in readout_names[_rid]:
                if "_u" not in _rd:
                    readout_names[_rid].append(_rd)
                else:
                    readout_names[_rid].append(_rd.split('_u')[0])
                    

In [6]:
region_info = {}
for _r in pb_records:
    _rid = int(_r.id.split('gene_')[1].split('_')[0])
    
    if _rid not in region_info:
        # extract region info
        _reg_info = _r.id.split('_gene')[0].split('chr')[1]
        #print(_reg_info)
        chr_name = 'chr'+_reg_info.split(':')[0]
        start = int(_reg_info.split(':')[1].split('-')[0])
        end = int(_reg_info.split(':')[1].split('-')[1])
        mid = int((start+end)/2)
        region_info[_rid] = {'chr': chr_name,
                             'start': start,
                             'end': end,
                             'mid': mid,
                            }

# Load readouts

In [16]:
readout_folder = r'\\10.245.74.69\Chromatin_NAS_2\Libraries\Readouts'
ref_files = [_fl for _fl in os.listdir(readout_folder) if 'designed_readouts' in _fl]

ref_readout_dict = {}
ref_readout_record_dict = {}
for _fl in ref_files:
    _channel = int(_fl.split('designed_readouts_')[1].split('.fasta')[0])
    _ref_readout_names = []
    _ref_readout_records = []
    with open(os.path.join(readout_folder, _fl), 'r') as _rd_handle:
        for _readout in SeqIO.parse(_rd_handle, "fasta"):
            _ref_readout_names.append(_readout.id)
            _ref_readout_records.append(_readout)
    ref_readout_dict[_channel] = _ref_readout_names
    ref_readout_record_dict[_channel] = _ref_readout_records

In [69]:
# sort regions with readout types
readout_by_channel = {_c:{} for _c in ref_readout_dict}
for _reg, _names in readout_names.items():
    for _c in readout_by_channel:
        _rd = np.unique(_names)[0]
        if _rd in ref_readout_dict[_c] and _reg not in readout_by_channel.values():
            readout_by_channel[_c][_reg] = _rd

## here, adaptors for cy3 are the ones we want

In [17]:
# load readout sites
adaptor_folder = r'\\10.245.74.69\Chromatin_NAS_2\Libraries\Adaptors'
readout_site_file = os.path.join(adaptor_folder, 'Readout_sites.fasta')
readout_sites = []
with open(readout_site_file, 'r') as _rd_handle:
    for _readout in SeqIO.parse(_rd_handle, "fasta"):
        readout_sites.append(_readout)
print(readout_sites)

[SeqRecord(seq=Seq('TTTGCACTGCCGTCCTTGAC', SingleLetterAlphabet()), id='Stv_82', name='Stv_82', description='Stv_82 cy7 rev-com_last20', dbxrefs=[]), SeqRecord(seq=Seq('GATCCGATTGGAACCGTCCC', SingleLetterAlphabet()), id='Stv_1', name='Stv_1', description='Stv_1 cy5 rev-com_last20', dbxrefs=[]), SeqRecord(seq=Seq('TGCGAACTGTCCGGCTTTCA', SingleLetterAlphabet()), id='Stv_79', name='Stv_79', description='Stv_79 cy3 rev-com_last20', dbxrefs=[])]


144

In [26]:
# calculate number of cy3 convert to cy5 and cy7
num_cy7 = np.int(len(readout_names)/2)
num_cy5 = len(readout_names) - num_cy7
num_cy3_to_7 = num_cy7 - len(readout_by_channel[750])
num_cy3_to_5 = num_cy5 - len(readout_by_channel[647])


In [51]:
num_cy3_to_7

24

In [34]:
# pick records 
cy3_records = []
for _reg, _name in readout_by_channel[561].items():
    for _record in ref_readout_record_dict[561]:
        if _record.id == _name:
            cy3_records.append(_record)
            break
print(len(cy3_records))

46


In [52]:
# generate adaptor
reload(library_tools.readouts)
from ImageAnalysis3.library_tools.readouts import Generate_adaptors

cy3_adaptors = Generate_adaptors(cy3_records[:2*min(num_cy3_to_5, num_cy3_to_7)], readout_sites[:2])

cy3_adaptors += Generate_adaptors(cy3_records[2*min(num_cy3_to_5, num_cy3_to_7):], readout_sites[:1])

In [54]:
# generate csv file to order in IDT
import csv

with open(os.path.join(adaptor_folder, 'CTP-08_swap_cy3_adaptor.csv'), 'w') as csvfile:
    csvwriter = csv.writer(csvfile, delimiter=',', 
                       quotechar='|', quoting=csv.QUOTE_MINIMAL)
    # write header
    _header = ['Name', 'Sequence', 'Scale', 'Purification']
    csvwriter.writerow(_header)
    # write sequence
    for _adaptor in cy3_adaptors:
        _info = [_adaptor.id, str(_adaptor.seq), '25nm', 'STD']
        csvwriter.writerow(_info)

In [70]:
# update dict
new_readout_by_channel = {
    750: readout_by_channel[750],
    647: readout_by_channel[647],
}
for (_reg, _name), _adaptor in zip(readout_by_channel[561].items(),cy3_adaptors):
    print(_reg, _adaptor.id)
    if 'Stv_82' in _adaptor.id:
        new_readout_by_channel[750][_reg] = _name
    elif 'Stv_1' in _adaptor.id:
        new_readout_by_channel[647][_reg] = _name
    else:
        raise ValueError

43 Stv_119_2xStv_82
46 Stv_120_2xStv_1
49 Stv_121_2xStv_82
52 Stv_125_2xStv_1
55 Stv_127_2xStv_82
59 Stv_129_2xStv_1
62 Stv_130_2xStv_82
65 Stv_131_2xStv_1
68 Stv_133_2xStv_82
71 Stv_136_2xStv_1
74 Stv_145_2xStv_82
77 Stv_182_2xStv_1
80 NDB_3_2xStv_82
83 NDB_6_2xStv_1
86 NDB_9_2xStv_82
89 NDB_12_2xStv_1
92 NDB_15_2xStv_82
95 NDB_18_2xStv_1
98 NDB_21_2xStv_82
101 NDB_24_2xStv_1
104 NDB_27_2xStv_82
107 NDB_30_2xStv_1
110 NDB_33_2xStv_82
113 NDB_36_2xStv_1
325 NDB_243_2xStv_82
328 NDB_246_2xStv_1
331 NDB_249_2xStv_82
334 NDB_252_2xStv_1
341 NDB_258_2xStv_82
344 NDB_261_2xStv_1
347 NDB_264_2xStv_82
350 NDB_267_2xStv_1
353 NDB_270_2xStv_82
356 NDB_273_2xStv_1
359 NDB_276_2xStv_82
362 NDB_279_2xStv_1
365 NDB_282_2xStv_82
368 NDB_285_2xStv_1
371 NDB_288_2xStv_82
374 NDB_291_2xStv_1
377 NDB_294_2xStv_82
381 NDB_297_2xStv_1
384 NDB_300_2xStv_82
387 NDB_303_2xStv_1
390 NDB_306_2xStv_82
393 NDB_309_2xStv_82


In [71]:
new_readout_by_channel

{750: {41: 'Stv_19',
  44: 'Stv_20',
  47: 'Stv_21',
  50: 'Stv_22',
  53: 'Stv_23',
  56: 'Stv_25',
  60: 'Stv_26',
  63: 'Stv_27',
  66: 'Stv_28',
  69: 'Stv_29',
  72: 'Stv_30',
  75: 'Stv_31',
  78: 'NDB_1',
  81: 'NDB_4',
  84: 'NDB_7',
  87: 'NDB_10',
  90: 'NDB_13',
  93: 'NDB_16',
  96: 'NDB_19',
  99: 'NDB_22',
  102: 'NDB_25',
  105: 'NDB_28',
  108: 'NDB_31',
  111: 'NDB_34',
  114: 'NDB_37',
  323: 'NDB_241',
  326: 'NDB_244',
  329: 'NDB_247',
  332: 'NDB_250',
  335: 'NDB_253',
  339: 'NDB_256',
  342: 'NDB_259',
  345: 'NDB_262',
  348: 'NDB_265',
  351: 'NDB_268',
  354: 'NDB_271',
  357: 'NDB_274',
  360: 'NDB_277',
  363: 'NDB_280',
  366: 'NDB_283',
  369: 'NDB_286',
  372: 'NDB_289',
  375: 'NDB_292',
  379: 'NDB_295',
  382: 'NDB_298',
  388: 'NDB_304',
  391: 'NDB_307',
  394: 'NDB_310',
  43: 'Stv_119',
  49: 'Stv_121',
  55: 'Stv_127',
  62: 'Stv_130',
  68: 'Stv_133',
  74: 'Stv_145',
  80: 'NDB_3',
  86: 'NDB_9',
  92: 'NDB_15',
  98: 'NDB_21',
  104: 'NDB_27'

In [72]:
import csv

save_folder = r'\\10.245.74.158\Chromatin_NAS_6\20200920-B_DMSO_CTP-08_IgH\Analysis'
with open(os.path.join(save_folder, 'adaptor_sequences.csv'), 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile, delimiter=',', 
                           quotechar='|', quoting=csv.QUOTE_MINIMAL)
    
    _header = ['group', 'hyb', ]
    for _ch in new_readout_by_channel:
        _header += [f"{_ch}_region", f"{_ch}_readout"]
    dict_sizes = [len(_v) for _k,_v in new_readout_by_channel.items()]
    print(dict_sizes)
    csvwriter.writerow(_header)
    
    for _i in range(max(dict_sizes)):
        _row = [int((_i)/32)+1, _i%32+1,] 
        for _ch, _dict in new_readout_by_channel.items():
            if _i >= len(_dict):
                _row += ['', '']
            else:
                _regs = list(_dict.keys()) 
                _row += [_regs[_i], _dict[_regs[_i]]]      
        csvwriter.writerow(_row)

[72, 72]


# save color_usage

In [73]:
import csv

drift_channel = '488'
dapi_channel = '405'
chrom_labels = {'750': 'forward',
                '647': 'reverse',}

with open(os.path.join(save_folder, 'Color_Usage.csv'), 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile, delimiter=',', 
                           quotechar='|', quoting=csv.QUOTE_MINIMAL)
    
    # write header
    _header = ['Hyb']
    for _ch in new_readout_by_channel:
        _header .append(str(_ch))
    _header.append(drift_channel)
    _header.append(dapi_channel)
    print(_header)
    csvwriter.writerow(_header)
    
    # write reference frame
    _ref_row = ['H0R0']
    for _ch in new_readout_by_channel:
        if str(_ch) in chrom_labels:
            _ref_row.append(chrom_labels[str(_ch)]+'_chrom')
        else:
            _ref_row.append("")
    _ref_row.append('beads')
    _ref_row.append('DAPI')
    print(_ref_row)
    csvwriter.writerow(_ref_row)
    
    
    for _i in range(max(dict_sizes)):
        _row = [f"H{int(_i)+1}R{int(_i)+1}",] 
        for _ch, _dict in new_readout_by_channel.items():
            if _i >= len(_dict):
                _row += ['']
            else:
                _regs = sorted(_dict) 
                _row += [f"u{_regs[_i]}"]
        _row.append("beads")
        print(_row)
        csvwriter.writerow(_row)


['Hyb', '750', '647', '488', '405']
['H0R0', 'forward_chrom', 'reverse_chrom', 'beads', 'DAPI']
['H1R1', 'u41', 'u42', 'beads']
['H2R2', 'u43', 'u45', 'beads']
['H3R3', 'u44', 'u46', 'beads']
['H4R4', 'u47', 'u48', 'beads']
['H5R5', 'u49', 'u51', 'beads']
['H6R6', 'u50', 'u52', 'beads']
['H7R7', 'u53', 'u54', 'beads']
['H8R8', 'u55', 'u57', 'beads']
['H9R9', 'u56', 'u59', 'beads']
['H10R10', 'u60', 'u61', 'beads']
['H11R11', 'u62', 'u64', 'beads']
['H12R12', 'u63', 'u65', 'beads']
['H13R13', 'u66', 'u67', 'beads']
['H14R14', 'u68', 'u70', 'beads']
['H15R15', 'u69', 'u71', 'beads']
['H16R16', 'u72', 'u73', 'beads']
['H17R17', 'u74', 'u76', 'beads']
['H18R18', 'u75', 'u77', 'beads']
['H19R19', 'u78', 'u79', 'beads']
['H20R20', 'u80', 'u82', 'beads']
['H21R21', 'u81', 'u83', 'beads']
['H22R22', 'u84', 'u85', 'beads']
['H23R23', 'u86', 'u88', 'beads']
['H24R24', 'u87', 'u89', 'beads']
['H25R25', 'u90', 'u91', 'beads']
['H26R26', 'u92', 'u94', 'beads']
['H27R27', 'u93', 'u95', 'beads']
['H2

## save region_positions for analysis

In [74]:
import csv

with open(os.path.join(save_folder, 'Region_Positions.csv'), 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile, delimiter=',', 
                           quotechar='|', quoting=csv.QUOTE_MINIMAL)
    # write header
    csvwriter.writerow(['region', 'chr', 'start', 'end', 'midpoint',])
    
    for _i,_info in region_info.items():
        csvwriter.writerow([_i, 
                            _info['chr'], 
                            _info['start'],
                            _info['end'],
                            _info['mid'],
                           ])