In [None]:
from Bio import SeqIO

# Path to the input FASTA file
input_fasta = "/home/yuki_ito/work2/Data/Jamaican_v3/gass_v1.1.genome.sm.fasta"
# Path to the output FASTA file
output_fasta = "/home/yuki_ito/work2/Tasks/mosdepth/Jamaican_v1.1/Jamaican_chr_1-15.fasta"

# Specify the range of contigs to extract
start_scaffold = 1
end_scaffold = 15

# List to store the extracted contigs
extracted_contigs = []

# Read the input FASTA file
with open(input_fasta, "r") as infile:
    for record in SeqIO.parse(infile, "fasta"):  # Parse the FASTA file using Biopython
        header = record.id  # Get the header (ID) of the sequence
        if header.startswith("Super-Scaffold_"):  # Check if the header starts with "Super-Scaffold_"
            parts = header.split("_")  # Split the header by "_"
            if len(parts) == 2 and parts[1].isdigit():  # Ensure the header has two parts and the second part is a digit
                scaffold_number = int(parts[1])  # Convert the scaffold number to an integer
                if start_scaffold <= scaffold_number <= end_scaffold:  # Check if the scaffold number is in the specified range
                    extracted_contigs.append(record)  # Add the record to the list of extracted contigs

# Save the extracted contigs to a new FASTA file
with open(output_fasta, "w") as outfile:
    SeqIO.write(extracted_contigs, outfile, "fasta")  # Write the sequences to the output file in FASTA format

print(f">Super-Scaffold_1 to >Super-Scaffold_{end_scaffold} have been saved to {output_fasta}.")