In [None]:
#this first block loads several modules that we may or may not use
#it serves as a check that you have them installed

from math import floor, sqrt, pi
from random import sample

import numpy as np
from sklearn.tree import DecisionTreeClassifier
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plot
plot.rcParams["figure.figsize"] = (20, 10) # (w, h)
import torch
import torch.nn as nn

#scikit image libs
from skimage import io, exposure
from skimage.util import img_as_float
from skimage.color import rgb2gray
from skimage.filters import frangi, meijering, prewitt, gabor
from skimage.feature import hog

#pathing libraries
import os
import glob

#loading images from Google Drive
from google.colab import drive

print("Modules loaded")

In [None]:
#prepares n subplot boxes
def npaxes(n):
    #
    r = floor(sqrt(n))
    while(n % r != 0):
        r -= 1
    c = int(n / r)
    #
    fig, ax = plot.subplots(r, c)
    r_ = 0
    c_ = 0

    axes = []

    for _ in range(n):
        if(c == 1 or r == 1):
          axes.append(ax[max(r_, c_)])
        else:
          axes.append(ax[r_, c_])
        #
        c_ += 1
        if(c_ >= c):
            c_ = 0
            r_ += 1

    #plot.tight_layout()
    #plot.show()

    return axes

In [None]:
#Access to Google Drive content
drive.mount('/content/drive')

In [None]:
#basic directory information

basePath = '/content/drive/My Drive/CSCE633HW5'

print(os.listdir(basePath))

train_dir = basePath + '/train_PREPROCESSED/' #destination of where images will be saved
test_dir = basePath + '/test_PREPROCESSED/' #destination of where images will be saved

In [None]:
#file listing and  loading

def test_extension(f):
  extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']
  ftitle, fext = os.path.splitext(os.path.basename(f))
  for ext in extensions:
    if(fext == ext):
      return True
  return False

#gets files in the given directory that match our extensions (images)
def get_files(dir):
  return [f for f in os.listdir(dir) if test_extension(f)]

#opens the image file as an skimage
def get_image(dir, f):
  return img_as_float(io.imread(os.path.join(dir, f)))

def normalize(img):
  return (img - img.mean()) / img.std()

In [None]:
img = get_image(train_dir, get_files(train_dir)[0])

def get_random_img(dir):
  return get_image(dir, sample(get_files(dir), 1)[0])

Feature Extraction and Visualization begins below.

Feature 1: Image mean and stantdard deviation, no visualization (just print the tuple)

Feature 2: gabor filters (horizontal and vertical) and prewitt edge detector, visualization through filtered image plot + histogram of filtered image values (which may also be useful for processing)

Feature 3: Histogram of Oriented Gradients (HOG), visualized through the scikit visualization support; the actual data comes out as a giant feature vector, ready for further processing

In [None]:
#feature 1: image mean and stdev (come shipped with scikit, yay!)
#while we often want to normalize images,
#it may be useful in this context to keep the mean/stdev
#(apparently the lightness/darkness of the lungs in x-rays can indicate their health)
#we can then normalize the images after extracting these features

def feature1(img):
  return (img.mean(), img.std())

def feature1_display(img):
  print(feature1(img))

In [None]:
feature1_display(get_random_img(train_dir))

In [None]:
#feature 2: a couple scikit-supported filters

def feature2(img):
  img = rgb2gray(img)
  g1 = gabor(img, 1, theta = 0)
  g2 = gabor(img, 1, theta = pi / 2)
  return (
  #first: gabor filters
  #these are basically edge detectors (or "orientation" detectors)
  #these are an essential part of every visual feature kit :^)
  #scikit returns "real and imaginary responses"
    g1[0],
    g2[0],
  #edge filter
    prewitt(img)
  #a filter for branching shapes
  #neither this nor the meijering filter worked well - maybe one of you can tinker and get it to do something interesting?
    #frangi(img, black_ridges = False)
  #second: meijering filter, a cool filter I found for "neuriteness" - branching shapes
  #which should highlight the internal structure of the lungs
    #meijering(img, black_ridges = False)
  )

#the way we'd use these filters, other than plugging them into a NN,
#is probably to compute their means or counts - this will give us a general notion
#of the "amount" of the input image that matches that filter
#or, similar to the HOG below, we can create histograms of the values in the image after application of the filters

#returns (hist, bin_edges)
#see https://numpy.org/doc/stable/reference/generated/numpy.histogram.html
def feature2_histogram(feat):
  return np.histogram(feat, 'sqrt')

  #this function displays the results of feature-extraction 2
#i.e.: a picture of the image along with the result after the image goes through each filter
#as well as histograms calculated over the values in each picture
def feature2_display(img):
  fs = (rgb2gray(img),) + feature2(img)
  n = len(fs)

  axes = npaxes(n * 2)

  for i in range(n):
    f = fs[i]
    axes[i].imshow(f, cmap="gray")
    axes[i + n].hist(f.flatten(), bins="sqrt")

In [None]:
#display extracted features from a random training image
feature2_display(get_random_img(train_dir))

In [None]:
#feature 3: HOG (histogram of oriented gradients)
#this extracts a huge amount of information about the orientation of edges in the picture
#and returns a giant feature vector.
#it has several options, but the defaults should be good enough for us (we can test this).
#the two options we are most likely to modify are pixels_per_cell and cells_per_block, which control the way that HOG splits up the image.

#this returns a feature vector
#it is not suitable for visualization
def feature3(img):
  return hog(img, multichannel=True)

#this plots the corresponding HOG image
def feature3_display(img):
  fd, hog_img = hog(img, multichannel = True, visualize=True)
  hog_img = exposure.rescale_intensity(hog_img, in_range=(0, 10))
  plot.imshow(hog_img, cmap="gray")

In [None]:
feature3_display(get_random_img(train_dir))