In [14]:
INPUT_FILE = "sequences/P_1-Genome.fasta"
OUTPUT_FILE = "sequences/P_1-Spike.fasta"

from Bio import SeqIO

# adjust these if your sequence has extra leading Ns/UTRs trimmed

for rec in SeqIO.parse(INPUT_FILE, "fasta"):
    spike_nt = rec
    # Write out as a new FASTA record:
    rec.id += "_Spike"
    rec.description = f"P_1-Proteome"
    spike_aa = spike_nt.translate(to_stop=False)
    rec.seq = spike_aa
    
    # Save the spike sequence to the output file
    with open(OUTPUT_FILE, "w") as output_handle:
        SeqIO.write(rec, output_handle, "fasta")
    
    print(f"Spike sequence saved to {OUTPUT_FILE}")
    print(f"Sequence length: {len(spike_nt)} nucleotides")
    print(rec.format("fasta"))



TypeError: data should be a string, bytes, bytearray, Seq, or MutableSeq object

## Sanity Check

In [12]:
from Bio.Seq import Seq
from Bio import SeqIO

# Read the saved spike sequence file
spike_record = SeqIO.read("sequences/P_1-Spike.fasta", "fasta")
spike_nt = spike_record.seq

# Translate to amino acids
spike_aa = spike_nt.translate(to_stop=False)

print(f"Nucleotide length: {len(spike_nt)}")
print(f"Amino acid length: {len(spike_aa)}")
print(f"Expected AA length: ~1270-1276")
print(f"Match expected range: {1270 <= len(spike_aa) <= 1276}")


Nucleotide length: 3822
Amino acid length: 1274
Expected AA length: ~1270-1276
Match expected range: True
