In [11]:
%matplotlib inline
import os
import sys
import re
import glob

import pandas as pd
import numpy as np
import torch
import torch.utils.data
import torch.nn

from random import randrange
from PIL import Image
import matplotlib.pyplot as plt

In [10]:
%pip install scikit-image

[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0mCollecting scikit-image
  Obtaining dependency information for scikit-image from https://files.pythonhosted.org/packages/71/23/9421f904701927d46b497489a48aacc5805284bdb93cb1a34f27d2b0ddfc/scikit_image-0.22.0-cp39-cp39-macosx_12_0_arm64.whl.metadata
  Downloading scikit_image-0.22.0-cp39-cp39-macosx_12_0_arm64.whl.metadata (13 kB)
Collecting tifffile>=2022.8.12 (from scikit-image)
  Obtaining dependency information for tifffile>=2022.8.12 from https://files.pythonhosted.org/packages/f5/72/68ea763b5f3e3d9871492683059ed4724fd700dbe54aa03cdda7a9692129/tifffile-2023.9.26-py3-none-any.whl.metadata
  Downloading tifffile-2023.9.26-py3-none-any.whl.metadata (30 kB)
Collecting lazy_loader>=0.3 (from scikit-image)
  Obtain

### Pre-processing

In [13]:
import skimage.filters
import skimage.morphology

""" Pre-processing Functions """

DEMENTIA_MAP = {
    '0.0': "nondemented",
    '0.5': "mildly demented",
    '1.0': 'moderately demented',
    '2.0': 'severely demented'
}

def extract_files(base_dir, target_dir, oasis_csv_path):
    oasis_df = pd.read_csv(oasis_csv_path)
    scan_types = ["cor_110", "sag_95", "tra_90"]

    for subdir in filter(lambda d: d != '.DS_Store', os.listdir(base_dir)):
        source_dir = os.path.join(base_dir, subdir, "PROCESSED", "MPRAGE", 
                                  "T88_111")
        num = int(subdir.split('_')[1])
        dementia_type = oasis_df.iloc[num]['CDR']
        if pd.isna(dementia_type):
            continue

        for scan_type in scan_types:
            for n_suffix in ['n3', 'n4']:
                fn = os.path.join(source_dir, f"{subdir}_mpr_{n_suffix}_anon_"
                                  f"111_t88_gfc_{scan_type}.gif")
                if os.path.exists(fn):
                    process_image(fn, target_dir, dementia_type, num, 
                                  scan_type)

def process_image(fn, target_dir, dementia_type, num, scan_type):
    with Image.open(fn) as img:
        img = skull_strip(img)
        target_subdir = os.path.join(target_dir, DEMENTIA_MAP[str(dementia_type)], str(num))
        os.makedirs(target_subdir, exist_ok=True)
        target_path = os.path.join(target_subdir, 
                                   f"{scan_type}.png")
        img.convert('RGB').save(target_path)

def skull_strip(img):
    # Convert to grayscale for skull stripping
    img_gray = img.convert('L')
    img_np = np.array(img_gray)

    # Basic skull stripping using Otsu's method
    threshold = skimage.filters.threshold_otsu(img_np)
    mask = img_np > threshold
    mask = skimage.morphology.remove_small_objects(mask, min_size=100)
    img_np[~mask] = 0

    # Intensity normalization
    img_normalized = img_np / 255.0

    # Convert back to PIL Image
    img_processed = Image.fromarray((img_normalized * 255).astype(np.uint8))
    return img_processed

# Replace 'path_to_disc1' with the actual path to your 'disc1' directory
path_to_disc1 = '/Users/msturman00/Documents/GitHub/alzheimer-classification/data2/disc1'
oasis_csv_path = '/Users/msturman00/Documents/GitHub/alzheimer-classification/datacsv/oasis_cross-sectional.csv'
extracted_files = extract_files(path_to_disc1, '/Users/msturman00/Documents/GitHub/alzheimer-classification/data', oasis_csv_path)