# Download virus genome
### ncbi datasets cli tool
`datasets download genome taxon 10298 --filename virus-temp-download/genome.zip`

In [13]:
import pathlib
import subprocess
import zipfile
import shutil

def download_genome(taxon_id) -> None:
    """
    """
    # Already downloaded taxon ids
    downloaded_taxon = pathlib.Path("downloaded-taxonids.txt")
    downloaded_taxon_text = downloaded_taxon.read_text().split()
    
    if str(taxon_id) in downloaded_taxon_text:
        print(f"------- {taxon_id} aldready downloaded! -------")
        return None
    else:
        with open(downloaded_taxon, mode="a") as f:
            f.write(f"{taxon_id}\n")
    
    
    # Download virus genome to temp dir first
    temp_dir = pathlib.Path("virus-temp-download")
    if not temp_dir.exists():
        temp_dir.mkdir()
    temp_genome = temp_dir / "genome.zip"
   
    # Download the genome using the NCBI datasets CLI tool
    download_command = [
        "datasets", 
        "download", 
        "genome",
        "taxon", 
        str(taxon_id), 
        "--assembly-level",
        "complete",
        "--filename", 
        str(temp_genome),
    ]
    
    try:
        subprocess.call(download_command)
    except subprocess.CalledProcessError as e:
        print(f"Error downloading genome for taxon ID {taxon_id}: {e}")
        raise e
        return None
    
    # Unzip the content of the downloaded file and move the genome to the virus-reference folder
    with zipfile.ZipFile(temp_genome) as sip:
        sip.extractall(path=temp_dir)
    
    # Take the first file of all fasta files
    fasta_file = sorted([x for x in temp_dir.rglob("*.fna") if ".ipynb" not in str(x)])[0]
    
    # move the file to the virus-references folder
    dest_folder = pathlib.Path("virus-references") / str(taxon_id)
    if not dest_folder.exists():
        dest_folder.mkdir()
    
    dest_file = dest_folder / f"{fasta_file.stem}.fna"
    fasta_file.rename(dest_file)
    
    # Remove the content in the temp folder
    shutil.rmtree(temp_dir)
    
    print(f"--- {taxon_id} downloaded succesfully!")

  


In [14]:
download_genome(10298)

Collecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting 3  records [------------------------------------------------]   0% 0/3
[1A[2KCollecting

--- 10298 downloaded succesfully!


Downloading: virus-temp-download/genome.zip    46.6kB 134kB/s
Downloading: virus-temp-download/genome.zip    46.6kB 134kB/s
Downloading: virus-temp-download/genome.zip    91.4kB 218kB/s
Downloading: virus-temp-download/genome.zip    98.3kB 234kB/s
Downloading: virus-temp-download/genome.zip    98.3kB 234kB/s
Downloading: virus-temp-download/genome.zip    98.3kB 234kB/s
Downloading: virus-temp-download/genome.zip    98.3kB 234kB/s
Downloading: virus-temp-download/genome.zip    98.3kB 234kB/s
Downloading: virus-temp-download/genome.zip    131kB 277kB/s
Downloading: virus-temp-download/genome.zip    131kB 277kB/s
Downloading: virus-temp-download/genome.zip    131kB 277kB/s
Downloading: virus-temp-download/genome.zip    131kB 277kB/s
Downloading: virus-temp-download/genome.zip    137kB done
