In [1]:
from Bio.Seq import Seq
import pandas as pd
import os

In [2]:
gRNA_FILENAME = 'gRNA_2023_08_02_clean.txt'

In [3]:
desktop = os.path.expanduser("~/Desktop")
gRNA_file_path = desktop + '/' + gRNA_FILENAME
gRNA_df = pd.read_csv(gRNA_file_path, header=None)

In [4]:
def convert_to_biopython_sequences(dataframe, sequence_column=0):
    """
    Convert gRNA seqs in a dataframe to a list of biopython sequences
    :param dataframe:
    :param sequence_column: header of the gRNA column, default=0
    :return: a list of Seqs
    """
    biopython_sequences = []
    for sequence in dataframe[sequence_column]:
        biopython_sequence = Seq(sequence).replace(" ","")
        biopython_sequences.append(biopython_sequence)
    return biopython_sequences

In [5]:
gRNA = convert_to_biopython_sequences(gRNA_df)

In [6]:

gRNA

[Seq('AGAAAGTGAGTCATTTTGGG'),
 Seq('ACCATGCTGTGGCCAAGCAG'),
 Seq('AAGGCTGAGCATCTGCGCAG'),
 Seq('TATTCTGatagataACTcta'),
 Seq('TATTCTGatagataACTcta'),
 Seq('GAAAAATTACTAGCCATAAT'),
 Seq('CCGGTAGTAAAAATtCATTT'),
 Seq('GCCACCACTAACAGCCACAG'),
 Seq('TGGCTTCCTTCTAGAAAGGC'),
 Seq('TACACAGAGGAAAGAGGAAG'),
 Seq('GACATGGCTGCACATTTTCT'),
 Seq('TGCAACCTGCAACTGATCTG'),
 Seq('CGGAAGGGGAAGAAGGGGCC'),
 Seq('tgcctcagagaatttctcta'),
 Seq('AGCAGAGGAGAGCACGCGTG'),
 Seq('atccctttgacagcaaaaCc'),
 Seq('CGTGCTGCGGCAAACTGGCA'),
 Seq('ggaggttaTgaCTGATCATT'),
 Seq('CTCTGTAAAGGCACAGAGAG')]

In [7]:

def creepy_primer(gRNA_list):
    """
    Design CREEPY primers based on a list of gRNAs
    For one guide RNA, return two oligos that generate sticky ends after annealing.
    For two guide RNAs, return two primers, with each primer incorperating one gRNA.
    For three and more guide RNAs, return a set of primers:
        The first and last primers introduce the first and last gRNAs, respectively.
        The rest of the primers introduce the middle gRNAs that are each split into two halves,
        with the first half in the reverse primer and the second half in the fwd primer.
    :param gRNA_list:
    :return:
    """
    sticky_end_left = 'GACT'    #left sticky end of pXW467/468/472
    sticky_end_right = 'AAAC'   #right sticky end of pXW67/468/472
    template_fwd = 'GTTTTAGAGCTAGAAATAGCAAGTTA' #Fwd primer to amplify scaffold RNA
    template_rev = 'TGCGCAAGCCCGGAATCGAACCGGG'  #Rev primer to amplify tRNA_Gly
    adaptor_left = 'aCGTCTCagacttt' #left adaptor for Esp3I to generate a GACT sticky end followed by tt
    adaptor_right = 'aCGTCTCcaaac'  #Right adaptor for Esp3I to generate a AAAC sticky end
    adaptor3 = 'aCGTCTCc'   #universal adaptor for Esp3I, will generate a sticky end that matches the 4 following nt

    if len(gRNA_list) == 1:
        # For one gRNA only
        primer1 = sticky_end_left + 'tt' + gRNA_list[0]
        primer2 = sticky_end_right + gRNA_list[0].reverse_complement() + 'aa'
        primer_set = [primer1, primer2]

    elif len(gRNA_list) == 2:
        # For two gRNAs
        primer1 = adaptor_left + gRNA_list[0] + template_fwd
        primer2 = adaptor_right + gRNA_list[1].reverse_complement() + template_rev
        primer_set = [primer1, primer2]

    else:
        # For three and more gRNAs (1st and last are introduced in one primer, all others are split into two primers and joined by golden gate
        overhang_list = [Seq('GACT'), Seq('AAAC')]
        overhang_start_list = []
        primer_first = adaptor_left + gRNA_list[0] + template_fwd
        primer_set = [primer_first]
        for n in range(1,len(gRNA_list)-1):
            seq = gRNA_list[n]
            overhang_start = 8
            overhang = seq[overhang_start:overhang_start + 4]
            i=1
            while overhang.reverse_complement() == overhang or overhang in overhang_list:
                    overhang_start = 8 + round(i/2) * (-1)^i
                    overhang = seq[overhang_start:overhang_start+4]
            overhang_start_list.append(overhang_start)
            overhang_list.append(overhang)
            primer_rev = adaptor3 + seq[:overhang_start+4].reverse_complement() + template_rev
            primer_fwd = adaptor3 + seq[overhang_start:] + template_fwd
            primer_set.extend([primer_rev, primer_fwd])

        primer_last = adaptor_right + gRNA_list[-1].reverse_complement() + template_rev
        primer_set.append(primer_last)

    return  primer_set

In [8]:
test_list = creepy_primer(gRNA)
creepy_primer_file = gRNA_file_path.replace('gRNA', 'creepy_primers')
output_list = []
for i in range(len(test_list)):
    output_list.append(str(test_list[i]))

with open(creepy_primer_file, 'w') as fp:
    fp.write('\n'.join(output_list))