In [10]:
import tarfile
import os

def extract_tar_contents(file_path, output_folder):
    """
    Extracts the contents of a .tar.gz file to a specified directory.

    :param file_path: Path to the .tar.gz file.
    :param output_folder: Directory where files should be extracted.
    """
    try:
        with tarfile.open(file_path, 'r:gz') as tar:
            tar.extractall(path=output_folder)
            print(f"Extracted contents of {file_path} to {output_folder}")
    except Exception as e:
        print(f"Error extracting from {file_path}: {e}")

def main():
    data_folder = './data/'  # Replace with the path to your data folder
    new_data_folder = './data2/'  # New folder for extracted contents
    os.makedirs(new_data_folder, exist_ok=True)  # Create the new folder if it doesn't exist

    disc1_file = os.path.join(data_folder, 'oasis_cross-sectional_disc1.tar.gz')
    disc2_file = os.path.join(data_folder, 'oasis_cross-sectional_disc2.tar.gz')
    disc3_file = os.path.join(data_folder, 'OAS2_RAW_PART1.tar.gz')

    extract_tar_contents(disc1_file, new_data_folder)
    extract_tar_contents(disc2_file, new_data_folder)
    extract_tar_contents(disc3_file, new_data_folder)

if __name__ == '__main__':
    main()


Extracted contents of ./data/oasis_cross-sectional_disc1.tar.gz to ./data2/
Extracted contents of ./data/oasis_cross-sectional_disc2.tar.gz to ./data2/
Extracted contents of ./data/OAS2_RAW_PART1.tar.gz to ./data2/


In [22]:
%pip install nibabel
%pip install Pillow

Collecting nibabel
  Using cached nibabel-5.1.0-py3-none-any.whl (3.3 MB)
Installing collected packages: nibabel
Successfully installed nibabel-5.1.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [26]:
import os
import shutil
import nibabel as nib
from PIL import Image
import numpy as np

def generate_unique_filename(output_folder, filename):
    base, extension = os.path.splitext(filename)
    counter = 1
    unique_filename = filename

    while os.path.exists(os.path.join(output_folder, unique_filename)):
        unique_filename = f"{base}_{counter}{extension}"
        counter += 1

    return unique_filename

def convert_nifti_to_image(nifti_file, output_folder, format='PNG'):
    img = nib.load(nifti_file)
    data = img.get_fdata()

    # Handle the possibility of 4D data (including time or multiple volumes)
    if len(data.shape) == 4:
        # Only take the first volume for visualization
        data = data[:, :, :, 0]

    for i in range(data.shape[2]):
        slice = data[:, :, i]
        slice_normalized = (slice - np.min(slice)) / (np.max(slice) - np.min(slice))
        slice_8bit = (slice_normalized * 255).astype(np.uint8)
        output_file = os.path.join(output_folder, f'OAS2{i}.{format.lower()}')
        Image.fromarray(slice_8bit).save(output_file)
        print(f"Saved {output_file}")


def extract_and_convert_images(source_folders, nifti_output_folder, image_output_folder, image_extensions):
    nifti_pairs = set()

    for source_folder in source_folders:
        for root, dirs, files in os.walk(source_folder):
            for file in files:
                if file.lower().endswith('.hdr') or file.lower().endswith('.img'):
                    source_file_path = os.path.join(root, file)
                    unique_filename = generate_unique_filename(nifti_output_folder, file)
                    destination_file_path = os.path.join(nifti_output_folder, unique_filename)
                    shutil.copy(source_file_path, destination_file_path)
                    print(f"Copied NIfTI {source_file_path} to {destination_file_path}")

                    nifti_base = os.path.splitext(unique_filename)[0]
                    nifti_pairs.add((nifti_base, root))  # Store base name and original path

                elif any(file.lower().endswith(ext) for ext in image_extensions):
                    source_file_path = os.path.join(root, file)
                    unique_filename = generate_unique_filename(image_output_folder, file)
                    destination_file_path = os.path.join(image_output_folder, unique_filename)
                    shutil.copy(source_file_path, destination_file_path)
                    print(f"Copied {source_file_path} to {destination_file_path}")

    for nifti_base, original_path in nifti_pairs:
        hdr_file = os.path.join(nifti_output_folder, nifti_base + '.hdr')
        img_file = os.path.join(nifti_output_folder, nifti_base + '.img')
        if os.path.exists(hdr_file) and os.path.exists(img_file):
            convert_nifti_to_image(hdr_file, image_output_folder, format='GIF')  # or 'PNG'

def main():
    source_folders = ['./data2/disc1/', './data2/disc2/', './data2/OAS2_RAW_PART1/']
    nifti_output_folder = './NIFTI_Images/'
    image_output_folder = './images/'
    image_extensions = ['.png', '.gif']  # Only for non-NIfTI images

    os.makedirs(nifti_output_folder, exist_ok=True)
    os.makedirs(image_output_folder, exist_ok=True)
    extract_and_convert_images(source_folders, nifti_output_folder, image_output_folder, image_extensions)

if __name__ == '__main__':
    main()

Copied NIfTI ./data2/disc1/OAS1_0016_MR1/FSL_SEG/OAS1_0016_MR1_mpr_n3_anon_111_t88_masked_gfc_fseg.hdr to ./NIFTI_Images/OAS1_0016_MR1_mpr_n3_anon_111_t88_masked_gfc_fseg.hdr
Copied ./data2/disc1/OAS1_0016_MR1/FSL_SEG/OAS1_0016_MR1_mpr_n3_anon_111_t88_masked_gfc_fseg_tra_90.gif to ./images/OAS1_0016_MR1_mpr_n3_anon_111_t88_masked_gfc_fseg_tra_90.gif
Copied NIfTI ./data2/disc1/OAS1_0016_MR1/FSL_SEG/OAS1_0016_MR1_mpr_n3_anon_111_t88_masked_gfc_fseg.img to ./NIFTI_Images/OAS1_0016_MR1_mpr_n3_anon_111_t88_masked_gfc_fseg.img
Copied NIfTI ./data2/disc1/OAS1_0016_MR1/PROCESSED/MPRAGE/SUBJ_111/OAS1_0016_MR1_mpr_n3_anon_sbj_111.img to ./NIFTI_Images/OAS1_0016_MR1_mpr_n3_anon_sbj_111.img
Copied ./data2/disc1/OAS1_0016_MR1/PROCESSED/MPRAGE/SUBJ_111/OAS1_0016_MR1_mpr_n3_anon_sbj_111_sag_88.gif to ./images/OAS1_0016_MR1_mpr_n3_anon_sbj_111_sag_88.gif
Copied NIfTI ./data2/disc1/OAS1_0016_MR1/PROCESSED/MPRAGE/SUBJ_111/OAS1_0016_MR1_mpr_n3_anon_sbj_111.hdr to ./NIFTI_Images/OAS1_0016_MR1_mpr_n3_anon_

  slice_normalized = (slice - np.min(slice)) / (np.max(slice) - np.min(slice))
  slice_8bit = (slice_normalized * 255).astype(np.uint8)


Saved ./images/OAS136.gif
Saved ./images/OAS137.gif
Saved ./images/OAS138.gif
Saved ./images/OAS139.gif
Saved ./images/OAS140.gif
Saved ./images/OAS141.gif
Saved ./images/OAS142.gif
Saved ./images/OAS143.gif
Saved ./images/OAS144.gif
Saved ./images/OAS145.gif
Saved ./images/OAS146.gif
Saved ./images/OAS147.gif
Saved ./images/OAS148.gif
Saved ./images/OAS149.gif
Saved ./images/OAS150.gif
Saved ./images/OAS151.gif
Saved ./images/OAS152.gif
Saved ./images/OAS153.gif
Saved ./images/OAS154.gif
Saved ./images/OAS155.gif
Saved ./images/OAS156.gif
Saved ./images/OAS157.gif
Saved ./images/OAS158.gif
Saved ./images/OAS159.gif
Saved ./images/OAS160.gif
Saved ./images/OAS161.gif
Saved ./images/OAS162.gif
Saved ./images/OAS163.gif
Saved ./images/OAS164.gif
Saved ./images/OAS165.gif
Saved ./images/OAS166.gif
Saved ./images/OAS167.gif
Saved ./images/OAS168.gif
Saved ./images/OAS169.gif
Saved ./images/OAS170.gif
Saved ./images/OAS171.gif
Saved ./images/OAS172.gif
Saved ./images/OAS173.gif
Saved ./imag

In [None]:
import pandas as pd

def extract_info_from_csv(file_path, columns):
    """
    Extract specified information from a CSV file.

    :param file_path: Path to the CSV file.
    :param columns: Columns to be extracted.
    :return: Extracted DataFrame.
    """
    try:
        df = pd.read_csv(file_path)
        return df[columns]
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return pd.DataFrame()

def main():
    data_folder = './data/'
    longitudinal_file = data_folder + 'oasis_longitudinal.csv'
    cross_sectional_file = data_folder + 'oasis_cross-sectional.csv'

    # Columns to extract for each file
    longitudinal_columns = ['Subject ID', 'MRI ID', 'Group', 'Visit', 'MR Delay', 'M/F', 'Hand', 'Age', 'EDUC', 'SES']
    cross_sectional_columns = ['ID', 'M/F', 'Hand', 'Age', 'Educ', 'SES', 'MMSE', 'CDR', 'eTIV', 'nWBV']

    # Extract info
    longitudinal_info = extract_info_from_csv(longitudinal_file, longitudinal_columns)
    cross_sectional_info = extract_info_from_csv(cross_sectional_file, cross_sectional_columns)

    # Displaying the extracted information
    print("Longitudinal Data:")
    print(longitudinal_info)
    print("\nCross-Sectional Data:")
    print(cross_sectional_info)

if __name__ == '__main__':
    main()

In [7]:
import pandas as pd

# Load datasets
longitudinal = pd.read_csv('data/oasis_longitudinal.csv')
cross_sectional = pd.read_csv('data/oasis_cross-sectional.csv')

# Rename columns in cross-sectional to match longitudinal for consistency
# Assuming 'ID' in cross-sectional corresponds to 'Subject ID' in longitudinal
cross_sectional.rename(columns={'ID': 'Subject ID', 'Educ': 'EDUC'}, inplace=True)

# Handle missing values
# This is a simple fill with a placeholder, but you might want more sophisticated handling
longitudinal.fillna(-1, inplace=True)  # -1 is just an example, adjust as needed
cross_sectional.fillna(-1, inplace=True)

# Merge datasets
# This is an example of an outer join, which includes all records from both datasets
combined = pd.merge(longitudinal, cross_sectional, on='Subject ID', how='outer', suffixes=('_long', '_cross'))

# Save the combined dataset
combined.to_csv('data/combined_oasis.csv', index=False)