# Generate adaptors 

1. for DNA-MERFISH bit66-99

2. for RNA bits

In [2]:
%run "E:\Users\puzheng\Documents\Startup_py3.py"
sys.path.append(r"E:\Users\puzheng\Documents")

import ImageAnalysis3 as ia
%matplotlib notebook

from ImageAnalysis3 import *
print(os.getpid())

40100


In [3]:
# biopython imports
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Blast.Applications import NcbiblastnCommandline

## Load readouts

In [4]:
library_folder = r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries'

In [5]:
readout_folder = os.path.join(library_folder, r'Readouts')
ref_files = [_fl for _fl in os.listdir(readout_folder) if 'designed_readouts' in _fl]

ref_readout_dict = {}
ref_readout_record_dict = {}
for _fl in ref_files:
    _channel = int(_fl.split('designed_readouts_')[1].split('.fasta')[0])
    _ref_readout_names = []
    _ref_readout_records = []
    with open(os.path.join(readout_folder, _fl), 'r') as _rd_handle:
        for _readout in SeqIO.parse(_rd_handle, "fasta"):
            _ref_readout_names.append(_readout.id)
            _ref_readout_records.append(_readout)
    ref_readout_dict[_channel] = _ref_readout_names
    ref_readout_record_dict[_channel] = _ref_readout_records

In [6]:
ref_readout_record_dict

{561: [SeqRecord(seq=Seq('TTCCCGCCAATGACGTCGGTTTGGACGAGA'), id='Stv_91', name='Stv_91', description='Stv_91 old_barcode', dbxrefs=[]),
  SeqRecord(seq=Seq('GCGTTGATGTCCCTTGTGAGCGCCCGACAT'), id='Stv_92', name='Stv_92', description='Stv_92 old_barcode', dbxrefs=[]),
  SeqRecord(seq=Seq('CGCTTATCGATGTCAGGTCCGCATGGGTCG'), id='Stv_94', name='Stv_94', description='Stv_94 old_barcode', dbxrefs=[]),
  SeqRecord(seq=Seq('AACGTCATCGGTGGATCCCAGAGTGCCAAA'), id='Stv_95', name='Stv_95', description='Stv_95 old_barcode', dbxrefs=[]),
  SeqRecord(seq=Seq('GTCCGATGAAACGTCCCGTGTGCTGTCGCG'), id='Stv_99', name='Stv_99', description='Stv_99 old_barcode', dbxrefs=[]),
  SeqRecord(seq=Seq('GACACGACGTCCAAAGTTGGTCTCGCGCAA'), id='Stv_100', name='Stv_100', description='Stv_100 old_barcode', dbxrefs=[]),
  SeqRecord(seq=Seq('ATGGCCACCCGACTGCGACGAACTCGATGC'), id='Stv_101', name='Stv_101', description='Stv_101 old_barcode', dbxrefs=[]),
  SeqRecord(seq=Seq('CTGGCGTTGCGCGATTGCCGCTTCAATCAA'), id='Stv_104', name='Stv_

## load readout sites

In [7]:
# load readout sites
adaptor_folder = os.path.join(library_folder, 'Adaptors')
readout_site_file = os.path.join(adaptor_folder, 'Readout_sites.fasta')
readout_sites = []
readout_site_dict = {}
readout_channels = ['750', '647', '561']

with open(readout_site_file, 'r') as _rd_handle:
    for _i, _readout in enumerate(SeqIO.parse(_rd_handle, "fasta")):
        readout_sites.append(_readout)
        readout_site_dict[readout_channels[_i]] = _readout

print(readout_site_dict)

{'750': SeqRecord(seq=Seq('TTTGCACTGCCGTCCTTGAC'), id='Stv_82rc', name='Stv_82rc', description='Stv_82rc cy7 rev-com_last20', dbxrefs=[]), '647': SeqRecord(seq=Seq('GATCCGATTGGAACCGTCCC'), id='Stv_1rc', name='Stv_1rc', description='Stv_1rc cy5 rev-com_last20', dbxrefs=[]), '561': SeqRecord(seq=Seq('TGCGAACTGTCCGGCTTTCA'), id='Stv_79rc', name='Stv_79rc', description='Stv_79rc cy3 rev-com_last20', dbxrefs=[])}


# 1. Gerate adaptors for DNA-bit 66-99

In [40]:
# load readout usage for ctp-11
ctp11_usage_fl = r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-11_brain\mouse_genome_1000\readout_usage.pkl'
ctp11_usage = pickle.load(open(ctp11_usage_fl, 'rb'))
sel_ctp11_readouts = ctp11_usage['c'][66:]

In [41]:
from ImageAnalysis3.library_tools.readouts import Generate_adaptors

sel_ctp11_adaptors = Generate_adaptors(sel_ctp11_readouts, [readout_site_dict['561']])


# 2. Generate adaptors for RNA

In [11]:
import pandas as pd

In [12]:
rna_readouts_df = pd.read_excel(r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\Readouts\MERFISH_readouts\readout_table_v2_corrected.xls',
                              na_filter=False)

rna_records = []
for _index, _row in rna_readouts_df.iterrows():
    _name = f"Bit-{_row['Bit number']}-{_row['Probe name']}"
    _seq = Seq(_row['Sequence'])
    _record = SeqRecord(_seq, id=_name, name='', description='')
    rna_records.append(_record)

In [44]:
# save records for now
with open(os.path.join(readout_folder, 'MERFISH_readouts.fasta'), 'w') as _output_handle:
    SeqIO.write(rna_records, _output_handle, "fasta")

In [48]:
# generate adaptors for first 38 bits
from ImageAnalysis3.library_tools.readouts import Generate_adaptors
sel_rna_records = rna_records[:38]
sel_rna_adaptors = Generate_adaptors(sel_rna_records, 
                                     [readout_site_dict['750'],
                                      readout_site_dict['647'],]
                                     )


## Generate for bit 48

In [9]:
from ImageAnalysis3.library_tools.readouts import Generate_adaptors

In [52]:
# generate adaptor for cy3 bit 48
_cy3b48_adaptor = Generate_adaptors([rna_records[47]], 
                                    [readout_site_dict['561']])

In [13]:
# generate adaptor for cy3 bit 48
_cy7b48_adaptor = Generate_adaptors([rna_records[47]], 
                                    [readout_site_dict['750']])
_cy5b48_adaptor = Generate_adaptors([rna_records[47]], 
                                    [readout_site_dict['647']])

In [53]:
# summarize
all_order_adaptors = sel_ctp11_adaptors + sel_rna_adaptors + _cy3b48_adaptor

In [14]:
all_order_adaptors = _cy7b48_adaptor + _cy5b48_adaptor

# Save csv

In [15]:
save_df = pd.DataFrame([], columns=['Name', 'Sequence', 'Scale', 'Purification'])
for _adt in all_order_adaptors:
    _info = {'Name':_adt.id, 
             'Sequence':str(_adt.seq), 
             'Scale':'25nm', 
             'Purification':'STD'}
    save_df = save_df.append(_info, ignore_index=True)

In [72]:
save_df

Unnamed: 0,Name,Sequence,Scale,Purification
0,NDB_853_2xStv_79rc,ATATCGGTCTAGTGGATACCTGCGAACTGTCCGGCTTTCATGCGAA...,25nm,STD
1,NDB_941_2xStv_79rc,GTGAAAGGTGCCGCTATTGCTGCGAACTGTCCGGCTTTCATGCGAA...,25nm,STD
2,NDB_838_2xStv_79rc,GTATTAGGCACTCATCCGACTGCGAACTGTCCGGCTTTCATGCGAA...,25nm,STD
3,NDB_647_2xStv_79rc,CTTGCCGCACGGGTTATTTGTGCGAACTGTCCGGCTTTCATGCGAA...,25nm,STD
4,NDB_994_2xStv_79rc,AAGATTTAGCGACGCTCTAATGCGAACTGTCCGGCTTTCATGCGAA...,25nm,STD
...,...,...,...,...
67,Bit-35-RS1312_2xStv_82rc,TCCCAACTAACCTAACATTCTTTGCACTGCCGTCCTTGACTTTGCA...,25nm,STD
68,Bit-36-RS1316_2xStv_1rc,ACATCCTAACTACAACCTTCGATCCGATTGGAACCGTCCCGATCCG...,25nm,STD
69,Bit-37-RS1326_2xStv_82rc,ATCCTCACTACATCATCCACTTTGCACTGCCGTCCTTGACTTTGCA...,25nm,STD
70,Bit-38-RS1334_2xStv_1rc,TCTCACACCACTTTCCTCATGATCCGATTGGAACCGTCCCGATCCG...,25nm,STD


In [73]:
save_df.to_csv(os.path.join(adaptor_folder, 'Order_20220227_ndbcy3-merfish-b48cy3.csv'),
               index=None)

In [16]:
save_df.to_csv(os.path.join(adaptor_folder, 'Order2_20220306_b48.csv'),
               index=None)

In [17]:
print(os.path.join(adaptor_folder, 'Order2_20220306_b48.csv'))

\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\Adaptors\Order2_20220306_b48.csv
