In [29]:
import os
import requests
import zipfile
import nibabel as nib
from bs4 import BeautifulSoup

In [30]:
# Function to download and extract all zip files
def download_and_extract_zip(url, download_folder):
    """
    Downloads and extracts ZIP files if they haven't been downloaded and processed.
    """
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    zip_links = [a['href'] for a in soup.find_all('a', href=True) if 'zip' in a['href']]

    for link in zip_links:
        # Clean the file name by removing query parameters
        file_url = 'https://zenodo.org' + link if link.startswith('/') else link
        file_name = file_url.split('/')[-1].split('?')[0]  # Remove query parameters
        file_path = os.path.join(download_folder, file_name)

        # Create a folder for each ZIP file
        folder_name = os.path.join(download_folder, file_name.replace(".zip", ""))
        
        # Skip download and extraction if the folder already exists
        if os.path.exists(folder_name):
            print(f"Folder {folder_name} already exists. Skipping download and extraction.")
            continue

        if not os.path.exists(file_path):
            print(f"Downloading {file_name}...")
            response = requests.get(file_url)
            with open(file_path, 'wb') as file:
                file.write(response.content)
            print(f"Downloaded {file_name}")

        # Extract the ZIP file into its corresponding folder
        try:
            with zipfile.ZipFile(file_path, 'r') as zip_ref:
                zip_ref.extractall(folder_name)
                print(f"Extracted {file_name} to {folder_name}")
            # Delete the ZIP file after extraction
            os.remove(file_path)
            print(f"Deleted ZIP file: {file_path}")
        except zipfile.BadZipFile:
            print(f"Error: {file_name} is not a valid zip file.")



In [31]:
# Function to convert .nii.gz to .nii
def convert_nii_gz_to_nii(input_folder, output_folder=None):
    """
    Converts all .nii.gz files in the input folder to .nii files.

    Parameters:
        input_folder (str): Path to the folder containing .nii.gz files.
        output_folder (str): Path to save .nii files (optional).
    """
    if not os.path.exists(input_folder):
        raise FileNotFoundError(f"The folder {input_folder} does not exist.")
    
    if output_folder is None:
        output_folder = input_folder
    else:
        os.makedirs(output_folder, exist_ok=True)

    for file_name in os.listdir(input_folder):
        if file_name.endswith(".nii.gz"):
            input_file = os.path.join(input_folder, file_name)
            output_file = os.path.join(output_folder, file_name.replace(".nii.gz", ".nii"))

            img = nib.load(input_file)
            nib.save(img, output_file)
            print(f"Converted {input_file} to {output_file}")

            # Delete the original .nii.gz file
            os.remove(input_file)
            print(f"Deleted original file: {input_file}")


In [32]:
def cleanup_residual_files(directory):
    """
    Deletes all files in the directory except folders.
    """
    for item in os.listdir(directory):
        item_path = os.path.join(directory, item)
        if os.path.isfile(item_path):
            os.remove(item_path)
            print(f"Deleted residual file: {item_path}")


In [35]:
# Main workflow
url = 'https://zenodo.org/record/3757476'  # Replace with the actual URL
download_folder = '../data'  # Set your folder path

# Ensure the download folder exists
os.makedirs(download_folder, exist_ok=True)

# Download, extract, and convert files
download_and_extract_zip(url, download_folder)

# Convert all extracted .nii.gz files to .nii
for folder in os.listdir(download_folder):
    folder_path = os.path.join(download_folder, folder)
    if os.path.isdir(folder_path):
        convert_nii_gz_to_nii(folder_path)


# Remove any remaining residual files
cleanup_residual_files(download_folder)


Folder ../data/COVID-19-CT-Seg_20cases already exists. Skipping download and extraction.
Folder ../data/COVID-19-CT-Seg_20cases already exists. Skipping download and extraction.
Folder ../data/COVID-19-CT-Seg_20cases already exists. Skipping download and extraction.
Folder ../data/Infection_Mask already exists. Skipping download and extraction.
Folder ../data/Infection_Mask already exists. Skipping download and extraction.
Folder ../data/Infection_Mask already exists. Skipping download and extraction.
Folder ../data/Lung_and_Infection_Mask already exists. Skipping download and extraction.
Folder ../data/Lung_and_Infection_Mask already exists. Skipping download and extraction.
Folder ../data/Lung_and_Infection_Mask already exists. Skipping download and extraction.
Folder ../data/Lung_Mask already exists. Skipping download and extraction.
Folder ../data/Lung_Mask already exists. Skipping download and extraction.
Folder ../data/Lung_Mask already exists. Skipping download and extraction.
D