In [326]:
from Bio.Seq import Seq
from Bio import SeqIO
from Bio.Restriction import *
import pandas as pd
import os
from Bio.SeqRecord import SeqRecord
import datetime

In [327]:
#Input filename
template = "pXW466.fa"
vector = "pXW467.fa"
primer = 'primers.txt'
primer2 = 'pXW470_creepy_primer_2023-10-30_18-21-24.txt'

#Output filename (partial)
gga_product_file = "GGA_product_" 

desktop = os.path.expanduser("~/Desktop")
template = desktop + "/" + template
vector = desktop + "/" + vector
primer_path = desktop + '/' + primer
primer_path_2 = desktop + '/' + primer2

In [328]:
# Load the fasta sequence from the file
def load_fasta_sequence(file_path):
    with open(file_path, "r") as handle:
        for record in SeqIO.parse(handle, "fasta"):
            return record.seq.upper()

In [329]:
# Function to load primer pairs from a file
def load_primer_pairs(file_path):
    primer_pairs = []
    with open(file_path, 'r') as file:
        lines = file.read().splitlines()
        for i in range(1, len(lines), 2):
            forward_primer = lines[i].split('\t')[1]
            reverse_primer = lines[i + 1].split('\t')[1]
            primer_pairs.append((Seq(forward_primer.upper()), Seq(reverse_primer.upper())))
    return pd.DataFrame(primer_pairs, columns=['Fwd', 'Rev'])        

In [330]:
# Load the data from the text file
def load_primer_pairs_2(file_path):
    with open(file_path, 'r') as file:
        lines = file.read().splitlines()
    
    # Initialize lists to store the primer names and sequences
    primer_names = []
    primer_sequences = []
    
    # Flag to start recording primer data
    record_primers = False
    
    # Iterate through the lines in the file
    for line in lines:
        # Check if the lowercase version of the line contains "primer" to start recording primer data
        if "primer" in line.lower():
            record_primers = True
            continue
        
        # Stop recording when other information begins
        if not line.strip():
            record_primers = False
        
        # If we are recording primer data, split the line and extract the primer name and sequence
        if record_primers:
            parts = line.split('\t')
            if len(parts) == 2:
                primer_name = parts[0]
                primer_sequence = Seq(parts[1].strip())
                primer_names.append(primer_name)
                primer_sequences.append(Seq(primer_sequence.upper()))
    
    # Create a Pandas DataFrame from the collected data
    data = {'Primer': primer_names, 'Sequence': primer_sequences}
    df = pd.DataFrame(data)
    
    return  df

In [331]:
# Function for PCR amplification with sticky ends
def pcr_amplification_with_sticky_ends(template, forward_primer, reverse_primer):
    # Define the sequences for sticky ends
    fwd_anneal = forward_primer[-15:]
    rev_anneal_rc = reverse_primer[-15:].reverse_complement()
    
    # Find the positions where the sticky ends anneal to the template
    fwd_anneal_pos = template.find(fwd_anneal)
    rev_anneal_pos = template.find(rev_anneal_rc)
    
    # Check if both sticky ends are present in the template
    if fwd_anneal_pos != -1 and rev_anneal_pos != -1:
        # Extract the template regions between the sticky ends
        template_region = template[fwd_anneal_pos + len(fwd_anneal):rev_anneal_pos]
        
        # Amplify the template region with the primers
        pcr_product = forward_primer + template_region + reverse_primer.reverse_complement()
        print(f"Amplified template with {str(forward_primer)} and {str(reverse_primer)}.")
        
        return pcr_product

    else:
        # One or both sticky ends are not present in the template
        print(f"WARNING: primers {str(forward_primer)} and {str(reverse_primer)} do NOT amplify the template.")
        return None

In [332]:
def batch_pcr(p_df, temp_seq):
    pcr_prod_list = []
    
    for i in range(0, p_df.shape[0], 2):
        try: product = pcr_amplification_with_sticky_ends(temp_seq, p_df['Sequence'][i], p_df['Sequence'][i+1])
        except None: pass
        pcr_prod_list.append(product)
        
    return pcr_prod_list

In [333]:
def batch_digestion_esp3i(pcr_list):
    pcr_dig_list = []
    for i in range(len(pcr_list)):
        pcr_dig_list.append(Esp3I.catalyze(pcr_list[i]))
    return pcr_dig_list

In [0]:
def gga_ligation(vect_dig, insert_list):
    final_prod = vect_dig[0]
    
    if vect_dig[1][:4] == insert_list[0][1][:4]:
        final_prod = final_prod + insert_list[0][1]
        print(f'ligating fragment 1')

    for i in range(len(insert_list)-1):
        if insert_list[i][2][:4] == insert_list[i+1][1][:4]:
            final_prod = final_prod + insert_list[i+1][1]
            print(f'ligating fragment {i+2}')
            
    if insert_list[-1][2][:4] == vect_dig[2][:4]:
        final_prod = final_prod + vect_dig[2]       
        
    return final_prod  

In [0]:
def gga_ligation_2(vect_dig, insert_list):
    final_prod = vect_dig[0]
    
    sticky_end = {}
    if vect_dig[1][:4] == insert_list[0][1][:4]:
        final_prod = final_prod + insert_list[0][1]
        print(f'ligating fragment 1')

    for i in range(len(insert_list)-1):
        if insert_list[i][2][:4] == insert_list[i+1][1][:4]:
            final_prod = final_prod + insert_list[i+1][1]
            print(f'ligating fragment {i+2}')
            
    if insert_list[-1][2][:4] == vect_dig[2][:4]:
        final_prod = final_prod + vect_dig[2]       
        
        
        
    return final_prod   

In [389]:
def make_sticky_ends_df(vect_frag, insert_list):
    # Initialize lists to store the extracted sticky ends
    sticky_ends1 = ['NA']
    sticky_ends2 = [str(vect_frag[1][:4])]
    
    # Extract the first 4 bases from the last 2 Seq objects in each tuple
    for tup in insert_list:
        sticky_end1 = str(tup[1][:4])
        sticky_end2 = str(tup[2][:4])
        sticky_ends1.append(sticky_end1)
        sticky_ends2.append(sticky_end2)
    
    sticky_ends1.append(str(vect_frag[2][:4]))
    sticky_ends2.append('NA')
    
    # Create a Pandas DataFrame from the collected data
    data = {'Sticky_End1': sticky_ends1, 'Sticky_End2': sticky_ends2}
    df = pd.DataFrame(data)
    
    return df

In [404]:
def gga_ligation_3(vect_dig, insert_list):
    #Initialize final_prod to be the left part of the digested vector first:
    final_prod = vect_dig[0]
    
    sticky_ends_df = make_sticky_ends_df(vect_dig, insert_list)
    
    # Check if adjacent sticky ends are compatible:
    for i in range(sticky_ends_df.shape[0]-1):
        if sticky_ends_df['Sticky_End2'][i] != sticky_ends_df['Sticky_End1'][i+1]:
            print(f'Sticky end #{i+1} is not compatible')
            break
    
    # Check if all sticky ends are unique and none are not ligating with reverse complements:
    sticky_ends_df['Sticky_End1_RevComp'] = sticky_ends_df['Sticky_End1'].apply(lambda x: str(Seq(x).reverse_complement()))
    are_all_unique = pd.concat([sticky_ends_df['Sticky_End1'][1:], sticky_ends_df['Sticky_End1_RevComp'][1:]]).is_unique
    if not are_all_unique:
        print('WARNING: some sticky ends are not unique')
        pass
   
    for i in range(len(insert_list)):
        final_prod = final_prod + insert_list[i][1]
        print(f'ligating fragment {i+1}')
    
    final_prod = final_prod + vect_dig[2]       
               
    return final_prod   

In [408]:
# Load plasmid sequences and  primer pairs:
template_sequence = load_fasta_sequence(template)
vector_sequence = load_fasta_sequence(vector)
primer_df = load_primer_pairs_2(primer_path)

In [409]:
# PCR amplification and digest PCR and vector:
pcr_amplication_list = batch_pcr(primer_df, template_sequence)
pcr_digestion_list = batch_digestion_esp3i(pcr_amplication_list)
vector_digestion = Esp3I.catalyze(vector_sequence)

Amplified template with ACGTCTCCGACTTTAGAAAGTGAGTCATTTTGGGGTTTTAGAGCTAGAAATAGCAAGTTA and ACGTCTCCCCACAGCATGGTTGCGCAAGCCCGGAATCGAACCGGG.
Amplified template with ACGTCTCCGTGGCCAAGCAGGTTTTAGAGCTAGAAATAGCAAGTTA and ACGTCTCCATGCTCAGCCTTTGCGCAAGCCCGGAATCGAACCGGG.
Amplified template with ACGTCTCCGCATCTGCGCAGGTTTTAGAGCTAGAAATAGCAAGTTA and ACGTCTCCTCTATCAGAATATGCGCAAGCCCGGAATCGAACCGGG.
Amplified template with ACGTCTCCTAGATAACTCTAGTTTTAGAGCTAGAAATAGCAAGTTA and ACGTCTCCCCTAGAGGCTTCATGCGCAAGCCCGGAATCGAACCGGG.
Amplified template with ACGTCTCCTAGGGCTTCTCGTTTTAGAGCTAGAAATAGCAAGTTA and ACGTCTCCTAGTAATTTTTCTGCGCAAGCCCGGAATCGAACCGGG.
Amplified template with ACGTCTCCACTAGCCATAATGTTTTAGAGCTAGAAATAGCAAGTTA and ACGTCTCCTTTTACTACCGGTGCGCAAGCCCGGAATCGAACCGGG.
Amplified template with ACGTCTCCAAAAATTCATTTGTTTTAGAGCTAGAAATAGCAAGTTA and ACGTCTCCGTTAGTGGTGGCTGCGCAAGCCCGGAATCGAACCGGG.
Amplified template with ACGTCTCCTAACAGCCACAGGTTTTAGAGCTAGAAATAGCAAGTTA and ACGTCTCCAGAAGGAAGCCATGCGCAAGCCCGGAATCGAACCGGG.
Amplified 

In [410]:
final_product = gga_ligation_3(vector_digestion, pcr_digestion_list)
# Create a SeqRecord object with your sequence and an identifier
record = SeqRecord(final_product, id='test', description='Golden Gate Assembly final product')

# Write the SeqRecord to a FASTA file
current_datetime = datetime.datetime.now()
date_time_str = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
gga_product_path = desktop + '/' + gga_product_file + date_time_str + '.fa'
SeqIO.write(record, gga_product_path, 'fasta')

ligating fragment 1
ligating fragment 2
ligating fragment 3
ligating fragment 4
ligating fragment 5
ligating fragment 6
ligating fragment 7
ligating fragment 8
ligating fragment 9
ligating fragment 10
ligating fragment 11
ligating fragment 12
ligating fragment 13
ligating fragment 14
ligating fragment 15
ligating fragment 16
ligating fragment 17
ligating fragment 18


1