In [None]:
!pip install biopython


Collecting biopython
  Downloading biopython-1.81-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: biopython
Successfully installed biopython-1.81


In [None]:
from Bio import Entrez
from Bio import SeqIO
import os

# Set your email and tool name
Entrez.email = "uygareren@posta.mu.edu.tr"
Entrez.tool = "week3-Homework.ipynb"


In [None]:
# Perform a search to get accession UIDs
handle = Entrez.esearch(db="assembly", term="Ralstonia solanacearum", retmax=100)
record = Entrez.read(handle)
accession_uids = record["IdList"]


In [None]:
# Initialize a list to store the nucleotide UIDs
nucleotide_uids = []

# Iterate through each assembly UID and link to nucleotide entries
for assembly_uid in accession_uids:
    handle = Entrez.elink(dbfrom="assembly", db="nucleotide", from_uid=assembly_uid)
    links = Entrez.read(handle)

    # Extract nucleotide UIDs and add them to the list
    nucleotide_uids.extend(links[0]["LinkSetDb"][0]["Link"])



In [None]:
# Import StringIO from the io module
from io import StringIO

# Create a directory to store the downloaded sequences
output_dir = "genomes"
os.makedirs(output_dir, exist_ok=True)

# Limit to the first 20 nucleotide UIDs
nucleotide_uids = nucleotide_uids[:20]

# Iterate through the nucleotide UIDs and download the sequences
for nucleotide_uid in nucleotide_uids:
    try:
        # Fetch the data from NCBI
        handle = Entrez.efetch(db="nucleotide", id=nucleotide_uid["Id"], rettype="fasta", retmode="text")
        data = handle.read()  # Read the data from the handle

        # Print the data to diagnose any issues
        print(f"Data for {nucleotide_uid}:")

        # Parse the data as a FASTA record
        record = SeqIO.read(StringIO(data), "fasta")

        # Write the sequence to a FASTA file
        filename = os.path.join(output_dir, f"{nucleotide_uid}.fasta")
        with open(filename, "w") as fasta_file:
            SeqIO.write(record, fasta_file, "fasta")

        print(f"Sequence for {nucleotide_uid} downloaded and saved.")
    except Exception as e:
        print(f"Error for {nucleotide_uid}: {str(e)}")


Data for {'Id': '2533054996'}:
Sequence for {'Id': '2533054996'} downloaded and saved.
Data for {'Id': '2533054995'}:
Sequence for {'Id': '2533054995'} downloaded and saved.
Data for {'Id': '2533054994'}:
Sequence for {'Id': '2533054994'} downloaded and saved.
Data for {'Id': '2533054993'}:
Sequence for {'Id': '2533054993'} downloaded and saved.
Data for {'Id': '2533054992'}:
Sequence for {'Id': '2533054992'} downloaded and saved.
Data for {'Id': '2533054991'}:
Sequence for {'Id': '2533054991'} downloaded and saved.
Data for {'Id': '2533054990'}:
Sequence for {'Id': '2533054990'} downloaded and saved.
Data for {'Id': '2533054989'}:
Sequence for {'Id': '2533054989'} downloaded and saved.
Data for {'Id': '2533054988'}:
Sequence for {'Id': '2533054988'} downloaded and saved.
Data for {'Id': '2533054987'}:
Sequence for {'Id': '2533054987'} downloaded and saved.
Data for {'Id': '2533054986'}:
Sequence for {'Id': '2533054986'} downloaded and saved.
Data for {'Id': '2533054985'}:
Sequence for

In [None]:
# Combine all sequences into a single file
combined_filename = os.path.join(output_dir, "ralstonia_genomes.fasta")

with open(combined_filename, "w") as combined_file:
    for nucleotide_uid in nucleotide_uids:
        filename = os.path.join(output_dir, f"{nucleotide_uid}.fasta")
        with open(filename, "r") as fasta_file:
            combined_file.write(fasta_file.read())
