# Utility Functions

In [1]:
%%writefile code/threshold.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology

import json
import sys
import pickle

fcompose = lambda *args: compose(*args[::-1])

mapdict = lambda **kwargs: map(lambda data: dict(dict((k, f(data)) for k, f in kwargs.items()), **data))

## Helper functions
@curry
def dfassign(df, **kwargs):
    return df.assign(**dict(((k, f(df)) for k, f in kwargs.items())))

## View the images
reshape = lambda arr: arr if len(arr.shape) == 2 else arr[...,0]
to_array = lambda image: reshape(numpy.asarray(image.convert("L")))

def plt_arrays(arrs):
    """Plot a set of (n, n) arrays as row column sub plots.
    """
    fig = matplotlib.pyplot.figure(figsize=(7, 7))
    N = int(numpy.ceil(numpy.sqrt(len(arrs))))
    for i, arr in enumerate(arrs):
        ax = fig.add_subplot(N, N, i + 1)
        out = ax.imshow(arr, cmap='Greys_r', interpolation='none')
        out.axes.get_xaxis().set_visible(False)
        out.axes.get_yaxis().set_visible(False)
    matplotlib.pyplot.tight_layout()
    matplotlib.pyplot.show()

## Extract the metadata
@curry
def crop_image(image, cutoff=960):
    """Crop the images into the "upper" and "lower" portions.

    Splits the image into the actual image of the microstructure and the embedded metadata.

    Args:
      image: a PIL image
      cutoff: the cutoff height for the upper image

    Returns:
      {'upper' : upper_image, 'lower': lower_image}
    """
    return dict(
               upper=image.crop(box=(0, 0, image.size[0], cutoff)),
               lower=image.crop(box=(0, cutoff, image.size[0], image.size[1]))
           )

def plt_array(arr):
    """Plot a single 2D array
    """
    ax = matplotlib.pyplot.imshow(arr, cmap='Greys_r', interpolation='none')
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    matplotlib.pyplot.tight_layout()
    matplotlib.pyplot.show()

# NBVAL_IGNORE_OUTPUT
repair_string = lambda string: float('10' if string == 'mum' else string.replace('pm', ''))

scale_pixels = fcompose(
    to_array,
    lambda data: skimage.measure.label(data, background=0),
    skimage.measure.regionprops,
    get(1),
    lambda data: data.bbox[3] - data.bbox[1],
)

extract_strings = fcompose(
    lambda image: pytesseract.image_to_string(image),
    lambda string: string.split(),
    get([1, 3, -1]),
    lambda data: dict(scale_microns=repair_string(data[0]),
                      date=data[1].replace('-', ''),
                      time=data[2])
)

extract_metadata = fcompose(
    PIL.Image.open,
    crop_image,
    get('lower'),
    lambda image: dict(scale_pixels=scale_pixels(image), **extract_strings(image))
)

## Rescale the images
extract_image = fcompose(
    PIL.Image.open,
    crop_image,
    get('upper')
)

def scale_image(image, rescale_factor):
    """Scale the image using PIL's thumbnail

    thumbnail is an inplace operation so copies are required.

    Args:
      image: a PIL image
      rescale_factor: how much to rescale the image by

    Returns:
      a new image
    """
    copy_image = image.copy()
    copy_image.thumbnail(numpy.array(copy_image.size) * rescale_factor, PIL.Image.ANTIALIAS)
    return copy_image

get_df = fcompose(
    glob.glob,
    sorted,
    map(
        lambda filename: dict(filename=filename,
                              **extract_metadata(filename))
    ),
    list,
    pandas.DataFrame,
    dfassign(pixel_size=lambda df: df['scale_microns'] / df['scale_pixels']),
    dfassign(rescale_factor=lambda df: df['pixel_size'] / max(df['pixel_size'])),
)

scaled_images = fcompose(
    get_df,
    lambda df: df.T.to_dict().values(),
    mapdict(image=lambda data: extract_image(data['filename'])),
    mapdict(scaled_image=lambda data: scale_image(data['image'], data['rescale_factor'])),
    list
)

## Threshold the images into the ferrite and cementite phase
threshold_image = fcompose(
    PIL.Image.open,
    crop_image,
    get('upper'),
    to_array,
    lambda data: data > skimage.filters.threshold_otsu(data)
)

def threshold(filename):

    result = dict(filename=filename,
                threshold_image=threshold_image(filename),
                **extract_metadata(filename))
    return result

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    result = threshold(filename)

    pickle.dump(result, open("{0}-threshold.data".format(filename_cleaned), 'wb'))
    
    print("{0}-threshold.data".format(filename_cleaned))

Overwriting code/threshold.py


In [2]:
%%writefile code/min_size.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology

import sys
import json
import pickle

## Remove white specs
def f_min_size(scale_microns, scale_pixels, island_size=0.2):
    return (island_size * scale_pixels / scale_microns)**2

def min_size(data):
    data['min_size'] = f_min_size(data['scale_microns'], data['scale_pixels'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])

    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = min_size(data)
    pickle.dump(result, open("{0}-min_size.data".format(filename_cleaned), 'wb'))

    print("{0}-min_size.data".format(filename_cleaned))


Overwriting code/min_size.py


In [3]:
%%writefile code/clean.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology

import json
import sys
import pickle

## Binary closing to reveal the Pearlite Phase
remove_small_holes = curry(skimage.morphology.remove_small_holes)

def clean(data):
    data['clean_image'] = ~remove_small_holes(~data['threshold_image'], data['min_size'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])
    
    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = clean(data)
    pickle.dump(result, open("{0}-clean.data".format(filename_cleaned), 'wb'))


    print("{0}-clean.data".format(filename_cleaned))


Overwriting code/clean.py


In [4]:
%%writefile code/reveal.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology
import sys

import json
import pickle

fcompose = lambda *args: compose(*args[::-1])

## Binary closing to reveal the Pearlite Phase
closing = curry(flip(skimage.morphology.closing))
remove_small_holes = curry(skimage.morphology.remove_small_holes)

reveal_pearlite = fcompose(
    closing(skimage.morphology.square(5)),
    remove_small_holes(min_size=1000)
)

def reveal(data):
    data['pearlite_image'] = reveal_pearlite(data['clean_image'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])

    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = reveal(data)
    pickle.dump(result, open("{0}-reveal.data".format(filename_cleaned), 'wb'))

    print("{0}-reveal.data".format(filename_cleaned))


Overwriting code/reveal.py


In [5]:
%%writefile code/pearlite.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology
import sys

import json
import pickle

## Volume function
frac1 = lambda image: float(image.sum()) / image.size


def pearlite(data):
    data['pearlite_fraction'] = frac1(data['pearlite_image'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])
    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = pearlite(data)
    
    pickle.dump(result, open("{0}-pearlite.data".format(filename_cleaned), 'wb'))


    print("{0}-pearlite.data".format(filename_cleaned))


Overwriting code/pearlite.py


In [6]:
%%writefile code/ferrite.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology
import json
import sys
import pickle

## Volume function
frac1 = lambda image: float(image.sum()) / image.size
frac0 = lambda image: 1 - frac1(image)

def ferrite(data):
    data['ferrite_fraction'] = frac0(data['clean_image'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])
    
    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = ferrite(data)
    pickle.dump(result, open("{0}-ferrite.data".format(filename_cleaned), 'wb'))


    print("{0}-ferrite.data".format(filename_cleaned))


Overwriting code/ferrite.py


In [7]:
%%writefile code/cemmentite.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology

import json
import sys
import pickle

## Volume function
frac1 = lambda image: float(image.sum()) / image.size

def cemmentite(data):
    data['cemmentite_fraction'] = frac1(data['clean_image'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])
    
    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = cemmentite(data)
    pickle.dump(result, open("{0}-cemmentite.data".format(filename_cleaned), 'wb'))


    print("{0}-cemmentite.data".format(filename_cleaned))


Overwriting code/cemmentite.py


In [8]:
%%writefile code/save.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology
import sys

import json
import pickle

def save(data):
    clean_name = data['filename'].split("/")[-1].split(".")[0]
    file_path = "{0}.json".format(clean_name)
    filtered_data = {}
    filtered_data['filename'] = clean_name
    filtered_data['pearlite_fraction'] = data['pearlite_fraction']
    filtered_data['ferrite_fraction'] = data['ferrite_fraction']
    filtered_data['cemmentite_fraction'] = data['cemmentite_fraction']
    with open(file_path, "w") as save_file:
        save_file.write(json.dumps(filtered_data, sort_keys=True, indent=4, separators=(',', ': ')))

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])

    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = save(data)


Overwriting code/save.py


# Study Code Combined Sumatra

In [9]:
%%writefile code/combined.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology

import dask
import json
from dask.multiprocessing import get as dak_get

from dask.diagnostics import ResourceProfiler, Profiler, CacheProfiler, ProgressBar, visualize
from dask import compute
from dask.dot import dot_graph

from threshold import *
from min_size import *
from clean import *
from reveal import *
from pearlite import *
from ferrite import *
from cemmentite import *
from save import *

def finalize(saves):
    print("done.")
    
data_path = "/Users/fyc/Desktop/MRaDS-2017/MRaDS-2017-Demo-Study/data"

dsk = {}
files = sorted(glob.glob("{0}/*.tif".format(data_path)))
final_saves = []
for filename in files:
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    dsk['threshold-{0}'.format(filename_cleaned)] = (threshold, filename)
    dsk['min_size-{0}'.format(filename_cleaned)] = (min_size, 'threshold-{0}'.format(filename_cleaned))
    dsk['clean-{0}'.format(filename_cleaned)] = (clean, 'min_size-{0}'.format(filename_cleaned))
    dsk['reveal-{0}'.format(filename_cleaned)] = (reveal, 'clean-{0}'.format(filename_cleaned))
    dsk['pearlite-{0}'.format(filename_cleaned)] = (pearlite, 'reveal-{0}'.format(filename_cleaned))
    dsk['ferrite-{0}'.format(filename_cleaned)] = (ferrite, 'pearlite-{0}'.format(filename_cleaned))
    dsk['cemmentite-{0}'.format(filename_cleaned)] = (cemmentite, 'ferrite-{0}'.format(filename_cleaned))
    dsk['save-{0}'.format(filename_cleaned)] = (save, 'cemmentite-{0}'.format(filename_cleaned))
    final_saves.append('save-{0}'.format(filename_cleaned))
dsk['finalize'] = (finalize, final_saves)

dot_graph(dsk)

with ResourceProfiler(0.25) as rprof, Profiler() as prof, CacheProfiler() as cprof, ProgressBar():
    dak_get(dsk, 'finalize')

visualize([prof, rprof, cprof])

Overwriting code/combined.py


# Running the study

In [29]:
! git add --all

In [30]:
! git commit -m "Run updates..."

[master 49283fc] Run updates...
 2 files changed, 765 insertions(+), 1363 deletions(-)
 rewrite mydask.png (93%)
 rewrite sumatra.ipynb (69%)


In [31]:
! smt run --executable=python --main=code/combined.py 

Multiple versions found, using /Users/fyc/anaconda2/envs/demo-3.5/bin/python. If you wish to use a different version, please specify it explicitly
Multiple versions found, using /Users/fyc/anaconda2/envs/demo-3.5/bin/python. If you wish to use a different version, please specify it explicitly
b'\r[                                        ] | 0% Completed |  0.0s\r[                                        ] | 0% Completed |  0.1s\r[                                        ] | 0% Completed |  0.2s\r[                                        ] | 0% Completed |  0.3s\r[                                        ] | 0% Completed |  0.4s\r[                                        ] | 0% Completed |  0.5s\r[                                        ] | 0% Completed |  0.6s\r[                                        ] | 0% Completed |  0.7s\r[                                        ] | 0% Completed |  0.8s\r[                                        ] | 0% Completed |  0.9s\r[                               