In [36]:
%matplotlib inline
import os
import sys
import re
import glob

import pandas as pd
import numpy as np
import torch
import torch.utils.data
import torch.nn

from random import randrange
from PIL import Image
import matplotlib.pyplot as plt

### Download Files

In [42]:
import requests
import os
import tarfile

def download_file(url, local_filename):
    """
    Downloads a file from a given URL and saves it to a local path.
    """
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    return local_filename

def download_oasis1(base_dir="/Users/valenetjong/Downloads/"):
    base_url = "https://download.nrg.wustl.edu/data/oasis_cross-sectional_disc"
    total_disks = 12

    for i in range(1, total_disks + 1):
        url = f"{base_url}{i}.tar.gz"
        local_filename = f"oasis_cross-sectional_disc{i}.tar.gz"
        full_file_path = os.path.join(base_dir, local_filename)

        # Check if the file already exists
        if os.path.exists(full_file_path):
            print(f"File {local_filename} already exists. Skipping download.")
            continue

        print(f"Downloading: {url}")
        
        try:
            download_file(url, full_file_path)
            print(f"Downloaded {local_filename}")
        except Exception as e:
            print(f"Failed to download {local_filename}: {e}")

def extract_tar_gz(tar_path, extract_to_path):
    """
    Extracts a .tar.gz file to a specified directory.
    """
    with tarfile.open(tar_path, 'r:gz') as tar:
        tar.extractall(path=extract_to_path)
        print(f"Extracted {tar_path} to {extract_to_path}")

def extract_all_discs(base_disc_path="/Users/valenetjong/Downloads/", 
                    extract_to_path="/Users/valenetjong/Downloads/"):
    total_disks = 12

    for i in range(1, total_disks + 1):
        if os.path.exists(extract_to_path + f"/disc{i}") and os.path.isdir(extract_to_path + f"/disc{i}"):
            print(f"Folder for disc{i} already exists. Skipping extraction.")
            continue
        tar_path = os.path.join(base_disc_path, f"oasis_cross-sectional_disc{i}.tar.gz")
        os.makedirs(extract_to_path, exist_ok=True)
        extract_tar_gz(tar_path, extract_to_path)

        # Remove the tar.gz file after extraction
        # os.remove(tar_path)
        # print(f"Removed the archive: {tar_path}")

In [38]:
download_oasis1()

Downloading: https://download.nrg.wustl.edu/data/oasis_cross-sectional_disc1.tar.gz
Downloaded oasis_cross-sectional_disc1.tar.gz
Downloading: https://download.nrg.wustl.edu/data/oasis_cross-sectional_disc2.tar.gz
Downloaded oasis_cross-sectional_disc2.tar.gz
Downloading: https://download.nrg.wustl.edu/data/oasis_cross-sectional_disc3.tar.gz
Downloaded oasis_cross-sectional_disc3.tar.gz
Downloading: https://download.nrg.wustl.edu/data/oasis_cross-sectional_disc4.tar.gz
Downloaded oasis_cross-sectional_disc4.tar.gz
File oasis_cross-sectional_disc5.tar.gz already exists. Skipping download.
File oasis_cross-sectional_disc6.tar.gz already exists. Skipping download.
File oasis_cross-sectional_disc7.tar.gz already exists. Skipping download.
File oasis_cross-sectional_disc8.tar.gz already exists. Skipping download.
File oasis_cross-sectional_disc9.tar.gz already exists. Skipping download.
File oasis_cross-sectional_disc10.tar.gz already exists. Skipping download.
File oasis_cross-sectional_di

In [45]:
extract_all_discs()

Folder for disc1 already exists. Skipping extraction.
Extracted /Users/valenetjong/Downloads/oasis_cross-sectional_disc2.tar.gz to /Users/valenetjong/Downloads/
Extracted /Users/valenetjong/Downloads/oasis_cross-sectional_disc3.tar.gz to /Users/valenetjong/Downloads/
Extracted /Users/valenetjong/Downloads/oasis_cross-sectional_disc4.tar.gz to /Users/valenetjong/Downloads/
Extracted /Users/valenetjong/Downloads/oasis_cross-sectional_disc5.tar.gz to /Users/valenetjong/Downloads/
Extracted /Users/valenetjong/Downloads/oasis_cross-sectional_disc6.tar.gz to /Users/valenetjong/Downloads/
Extracted /Users/valenetjong/Downloads/oasis_cross-sectional_disc7.tar.gz to /Users/valenetjong/Downloads/
Extracted /Users/valenetjong/Downloads/oasis_cross-sectional_disc8.tar.gz to /Users/valenetjong/Downloads/
Extracted /Users/valenetjong/Downloads/oasis_cross-sectional_disc9.tar.gz to /Users/valenetjong/Downloads/
Extracted /Users/valenetjong/Downloads/oasis_cross-sectional_disc10.tar.gz to /Users/valen

### Pre-processing

In [56]:
import skimage.filters
import skimage.morphology
import cv2 as cv
import tempfile
import shutil

""" Pre-processing Functions """

DEMENTIA_MAP = {
    '0.0': "nondemented",
    '0.5': "mildly demented",
    '1.0': 'moderately demented',
    '2.0': 'severely demented'
}

# Pre-determined max dimensions of cropped images
CONV_WIDTH = 137
CONV_HEIGHT = 167

def normalize_intensity(img):
    """
    Normalizes the intensity of an image to the range [0, 255].

    Parameters:
    img: The image to be normalized.

    Returns:
    Normalized image.
    """
    img_min = img.min()
    img_max = img.max()
    normalized_img = (img - img_min) / (img_max - img_min) * 255
    return normalized_img.astype(np.uint8)

def pad_image_to_size(img, width, height):
    """
    Pads an image with zeros to the specified width and height.

    Parameters:
    img: The image to be padded.
    width: The desired width.
    height: The desired height.

    Returns:
    Padded image.
    """
    padded_img = np.zeros((height, width), dtype=img.dtype)
    y_offset = (height - img.shape[0]) // 2
    x_offset = (width - img.shape[1]) // 2
    padded_img[y_offset:y_offset+img.shape[0], x_offset:x_offset+img.shape[1]] = img
    return padded_img

def crop_black_boundary(mri_image):
    """
    Crops the black boundary from an MRI image.

    Parameters:
    mri_image: Input MRI image.

    Returns:
    Cropped MRI image with black boundaries removed.
    """
    _, thresh = cv.threshold(mri_image, 1, 255, cv.THRESH_BINARY)
    contours, _ = cv.findContours(thresh, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
    largest_contour = max(contours, key=cv.contourArea)
    x, y, w, h = cv.boundingRect(largest_contour)
    cropped_image = mri_image[y:y+h, x:x+w]
    return cropped_image

def extract_files(base_dir, target_dir, oasis_csv_path):
    """
    Extracts and processes MRI files from a given directory.

    Parameters:
    base_dir: Directory containing MRI files.
    target_dir: Directory where processed files will be saved.
    oasis_csv_path: Path to the CSV file containing metadata.
    """
    oasis_df = pd.read_csv(oasis_csv_path)

    for subdir in filter(lambda d: d != '.DS_Store', os.listdir(base_dir)):
        source_dir = os.path.join(base_dir, subdir, "FSL_SEG")
        print("source_dir", source_dir)
        num = subdir.split('_')[1]
        id = f'OAS1_{num}_MR1'
        num = int(num)
        row = oasis_df.loc[oasis_df['ID'] == id]
        dementia_type = row['CDR'].item()
        
        if pd.isna(dementia_type):
            continue

        for n_suffix in ['n3', 'n4']:
            fn = os.path.join(source_dir, f"{subdir}_mpr_{n_suffix}_anon_"
                                  f"111_t88_masked_gfc_fseg_tra_90.gif")
            if os.path.exists(fn):
                process_image(fn, target_dir, dementia_type, num)

def process_image(fn, target_dir, dementia_type, num):
    """
    Processes a single MRI image file and saves it to the target directory.

    Parameters:
    fn: Path of the file to be processed.
    target_dir: Directory where the processed file will be saved.
    dementia_type: Type of dementia associated with the image.
    num: Identifier number for the image.
    """
    with Image.open(fn) as img:
        img = np.array(img.convert('RGB'))
        img = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
    img = crop_black_boundary(img)
    img = normalize_intensity(img)
    img = pad_image_to_size(img, CONV_WIDTH, CONV_HEIGHT)

    target_subdir = os.path.join(target_dir, DEMENTIA_MAP[str(dementia_type)], str(num))
    os.makedirs(target_subdir, exist_ok=True)
    target_path = os.path.join(target_subdir, f"tra.png")
    cv.imwrite(target_path, img)

def process_all_discs(base_disc_path, base_extraction_path, oasis_csv_path):
    """
    Processes all discs found in the base directory.

    Parameters:
    base_disc_path: Base path where the discs are located.
    base_extraction_path: Base path where processed data will be saved.
    oasis_csv_path: Path to the OASIS CSV file.
    """
    total_disks = 12

    for i in range(1, total_disks + 1):
        disc_path = f'{base_disc_path}/disc{i}'
        if not os.path.exists(disc_path):
            print(f"Disc {i} does not exist at path {disc_path}. Skipping.")
            continue
        extract_files(disc_path, base_extraction_path, oasis_csv_path)
        print(f"Processed Disc {i}")

        # Cleanup: delete the folder after processing
        # cleanup_directory(disc_path)

def cleanup_directory(path):
    """
    Deletes a directory and all of its contents.

    Parameters:
    path: Path of the directory to be deleted.
    """
    try:
        shutil.rmtree(path)
        print(f"Cleaned up and deleted the directory: {path}")
    except OSError as e:
        print(f"Error: {e.filename} - {e.strerror}")

In [57]:
base_disc_path = '/Users/valenetjong/Downloads'
base_extraction_path = '/Users/valenetjong/alzheimer-classification/data'
oasis_csv_path = '/Users/valenetjong/alzheimer-classification/datacsv/oasis_cross-sectional.csv'

process_all_discs(base_disc_path, base_extraction_path, oasis_csv_path)

source_dir /Users/valenetjong/Downloads/disc1/OAS1_0016_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0002_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0003_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0017_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0001_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0015_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0029_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0028_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0014_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0038_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0004_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0010_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0011_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OAS1_0005_MR1/FSL_SEG
source_dir /Users/valenetjong/Downloads/disc1/OA