In [1]:
import pandas as pd
from Bio import SeqIO
from Bio.Seq import Seq
import openpyxl

In [6]:

def load_template(fasta_file):
    """Load the target sequence from a FASTA file."""
    record = next(SeqIO.parse(fasta_file, "fasta"))
    return record.id, str(record.seq)

def find_binding_site(template_seq, primer_seq):
    """Find primer binding site in the template sequence.
       Returns the 0-based start index if found, or -1 if not found."""
    return template_seq.find(primer_seq)

def write_bed_line(chrom, start, end, name, score, strand, out_handle):
    """Write a single BED line to out_handle."""
    out_handle.write(f"{chrom}\t{start}\t{end}\t{name}\t{score}\t{strand}\n")

def generate_bed_from_excel(excel_file, fasta_file, bed_output):
    # Load primer pairs from Excel file.
    # The Excel file is assumed to have columns like:
    # 'Primer_Name', 'Primer_Type' (L or R), and 'Sequence'
    df = pd.read_excel(excel_file)
    
    chrom, template_seq = load_template(fasta_file)
    
    with open(bed_output, "w") as bed_out:
        # Iterate over each row in the Excel file.
        for index, row in df.iterrows():
            primer_name = row['Primer_Name']  # e.g., SNVSSeg_1_L
            primer_type = row['Type']  # 'L' or 'R'
            primer_seq = row['Sequence'].strip().upper()
            
            # For right primers, we search for the reverse complement.
            if primer_type.upper() == 'R':
                primer_seq_rc = str(Seq(primer_seq).reverse_complement())
                pos = find_binding_site(template_seq, primer_seq_rc)
                strand = '-'  # Right primer binds to the negative strand
                # The binding coordinates in BED: start = pos, end = pos + len(primer)
                start = pos
                end = pos + len(primer_seq)
            else:
                pos = find_binding_site(template_seq, primer_seq)
                strand = '+'  # Left primer binds to the positive strand
                start = pos
                end = pos + len(primer_seq)
            
            if pos == -1:
                continue  # Primer not found in the template sequence.
            else:
                # Write the BED entry. Here we use a fixed score of 60.
                write_bed_line(chrom, start, end, primer_name, 60, strand, bed_out)
                print(f"Primer {primer_name} ({primer_type}) mapped at {start}-{end} on {strand} strand.")

# Example usage:
excel_file = "../Primers/Primers_tiling.xlsx"       # Excel file containing primer data.
fasta_file = "../Reference/NM11_LRef.fasta"    # FASTA file for the target segment.
bed_output = "../Primers/SNV_L_primers.bed"      # Output BED file.

generate_bed_from_excel(excel_file, fasta_file, bed_output)

Primer SNVLSeg_1_L (L) mapped at 7-33 on + strand.
Primer SNVLSeg_1_R (R) mapped at 303-325 on - strand.
Primer SNVLSeg_2_L (L) mapped at 241-268 on + strand.
Primer SNVLSeg_2_R (R) mapped at 509-531 on - strand.
Primer SNVLSeg_3_L (L) mapped at 423-448 on + strand.
Primer SNVLSeg_3_R (R) mapped at 683-705 on - strand.
Primer SNVLSeg_4_L (L) mapped at 626-650 on + strand.
Primer SNVLSeg_4_R (R) mapped at 875-899 on - strand.
Primer SNVLSeg_5_L_3 (L) mapped at 715-739 on + strand.
Primer SNVLSeg_5_R_3 (R) mapped at 1192-1214 on - strand.
Primer SNVLSeg_6_L (L) mapped at 1034-1064 on + strand.
Primer SNVLSeg_6_R (R) mapped at 1328-1354 on - strand.
Primer SNVLSeg_7_L (L) mapped at 1262-1286 on + strand.
Primer SNVLSeg_7_R (R) mapped at 1538-1561 on - strand.
Primer SNVLSeg_8_L_2 (L) mapped at 1436-1458 on + strand.
Primer SNVLSeg_8_R_2 (R) mapped at 1772-1795 on - strand.
Primer SNVLSeg_9_L (L) mapped at 1716-1744 on + strand.
Primer SNVLSeg_9_R (R) mapped at 1978-2000 on - strand.
Prime