# Study Spec Maestrowf

In [None]:
# %load sem-study.yaml
description:
    name: SEM-Images-Stats
    description: The goal of this work is to analyze images of steel from SEM. The initial data set consists of 9 images.

env:
    variables:
        OUTPUT_PATH: ./sample_output/sem-image-stats
        SEM_IMG_STATS_URL: https://github.com/wd15/sem-image-stats
        PROJECT_PATH: /Users/fyc/Desktop/Hackaton-09-11-2017/Demo/corr-maestrowf/samples/sem-images-stats
        CODE_PATH: $(PROJECT_PATH)/study/code
        DATA_PATH: $(PROJECT_PATH)/study/data
    labels:
        outfile: sem-image-stats.log
study:
    - name: run-threshold
      description: Do some thresholding.
      run:
          cmd: |
            python $(CODE_PATH)/threshold.py $(DATA_PATH)/$(DATA).tif
          depends: []
    - name: run-min_size
      description: Do some min_size.
      run:
          cmd: |
            python $(CODE_PATH)/min_size.py $(DATA)-threshold.data
          depends: [run-threshold]
    - name: run-clean
      description: Do some clean.
      run:
          cmd: |
            python $(CODE_PATH)/clean.py $(DATA)-min_size.data
          depends: [run-min_size]
    - name: run-reveal
      description: Do some reveal.
      run:
          cmd: |
            python $(CODE_PATH)/reveal.py $(DATA)-clean.data
          depends: [run-clean]
    - name: run-pearlite
      description: Do some ferrite.
      run:
          cmd: |
            python $(CODE_PATH)/pearlite.py $(DATA)-reveal.data
          depends: [run-reveal]
    - name: run-ferrite
      description: Do some ferrite.
      run:
          cmd: |
            python $(CODE_PATH)/ferrite.py $(DATA)-pearlite.data
          depends: [run-pearlite]
    - name: run-cemmentite
      description: Do some cemmentite.
      run:
          cmd: |
            python $(CODE_PATH)/cemmentite.py $(DATA)-ferrite.data
          depends: [run-ferrite]
    - name: run-save
      description: Do some save.
      run:
          cmd: |
            python $(CODE_PATH)/save.py $(DATA)-cemmentite.data
          depends: [run-cemmentite]

global.parameters:
    DATA:
        values  : ["1045_Steel_Nital-etch-1", "1045_Steel_Nital-etch-2", "1045_Steel_Nital-etch-3", "1045_Steel_Nital-etch-4", "1045_Steel_Nital-etch-5", "1045_Steel_Nital-etch-6", "1045_Steel_Nital-etch-7", "20150911_1045_Nital_etch-1", "20150911_1045_Nital_etch-2"]
        label   : "%%"


# Utility Functions

In [1]:
%%writefile code/threshold.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology

import json
import sys
import pickle

fcompose = lambda *args: compose(*args[::-1])

mapdict = lambda **kwargs: map(lambda data: dict(dict((k, f(data)) for k, f in kwargs.items()), **data))

## Helper functions
@curry
def dfassign(df, **kwargs):
    return df.assign(**dict(((k, f(df)) for k, f in kwargs.items())))

## View the images
reshape = lambda arr: arr if len(arr.shape) == 2 else arr[...,0]
to_array = lambda image: reshape(numpy.asarray(image.convert("L")))

def plt_arrays(arrs):
    """Plot a set of (n, n) arrays as row column sub plots.
    """
    fig = matplotlib.pyplot.figure(figsize=(7, 7))
    N = int(numpy.ceil(numpy.sqrt(len(arrs))))
    for i, arr in enumerate(arrs):
        ax = fig.add_subplot(N, N, i + 1)
        out = ax.imshow(arr, cmap='Greys_r', interpolation='none')
        out.axes.get_xaxis().set_visible(False)
        out.axes.get_yaxis().set_visible(False)
    matplotlib.pyplot.tight_layout()
    matplotlib.pyplot.show()

## Extract the metadata
@curry
def crop_image(image, cutoff=960):
    """Crop the images into the "upper" and "lower" portions.

    Splits the image into the actual image of the microstructure and the embedded metadata.

    Args:
      image: a PIL image
      cutoff: the cutoff height for the upper image

    Returns:
      {'upper' : upper_image, 'lower': lower_image}
    """
    return dict(
               upper=image.crop(box=(0, 0, image.size[0], cutoff)),
               lower=image.crop(box=(0, cutoff, image.size[0], image.size[1]))
           )

def plt_array(arr):
    """Plot a single 2D array
    """
    ax = matplotlib.pyplot.imshow(arr, cmap='Greys_r', interpolation='none')
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    matplotlib.pyplot.tight_layout()
    matplotlib.pyplot.show()

# NBVAL_IGNORE_OUTPUT
repair_string = lambda string: float('10' if string == 'mum' else string.replace('pm', ''))

scale_pixels = fcompose(
    to_array,
    lambda data: skimage.measure.label(data, background=0),
    skimage.measure.regionprops,
    get(1),
    lambda data: data.bbox[3] - data.bbox[1],
)

extract_strings = fcompose(
    lambda image: pytesseract.image_to_string(image),
    lambda string: string.split(),
    get([1, 3, -1]),
    lambda data: dict(scale_microns=repair_string(data[0]),
                      date=data[1].replace('-', ''),
                      time=data[2])
)

extract_metadata = fcompose(
    PIL.Image.open,
    crop_image,
    get('lower'),
    lambda image: dict(scale_pixels=scale_pixels(image), **extract_strings(image))
)

## Rescale the images
extract_image = fcompose(
    PIL.Image.open,
    crop_image,
    get('upper')
)

def scale_image(image, rescale_factor):
    """Scale the image using PIL's thumbnail

    thumbnail is an inplace operation so copies are required.

    Args:
      image: a PIL image
      rescale_factor: how much to rescale the image by

    Returns:
      a new image
    """
    copy_image = image.copy()
    copy_image.thumbnail(numpy.array(copy_image.size) * rescale_factor, PIL.Image.ANTIALIAS)
    return copy_image

get_df = fcompose(
    glob.glob,
    sorted,
    map(
        lambda filename: dict(filename=filename,
                              **extract_metadata(filename))
    ),
    list,
    pandas.DataFrame,
    dfassign(pixel_size=lambda df: df['scale_microns'] / df['scale_pixels']),
    dfassign(rescale_factor=lambda df: df['pixel_size'] / max(df['pixel_size'])),
)

scaled_images = fcompose(
    get_df,
    lambda df: df.T.to_dict().values(),
    mapdict(image=lambda data: extract_image(data['filename'])),
    mapdict(scaled_image=lambda data: scale_image(data['image'], data['rescale_factor'])),
    list
)

## Threshold the images into the ferrite and cementite phase
threshold_image = fcompose(
    PIL.Image.open,
    crop_image,
    get('upper'),
    to_array,
    lambda data: data > skimage.filters.threshold_otsu(data)
)

def threshold(filename):

    result = dict(filename=filename,
                threshold_image=threshold_image(filename),
                **extract_metadata(filename))
    return result

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    result = threshold(filename)

    pickle.dump(result, open("{0}-threshold.data".format(filename_cleaned), 'wb'))
    
    print "{0}-threshold.data".format(filename_cleaned)

Overwriting code/threshold.py


In [2]:
%%writefile code/min_size.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology

import sys
import json
import pickle

## Remove white specs
def f_min_size(scale_microns, scale_pixels, island_size=0.2):
    return (island_size * scale_pixels / scale_microns)**2

def min_size(data):
    data['min_size'] = f_min_size(data['scale_microns'], data['scale_pixels'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])

    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = min_size(data)
    pickle.dump(result, open("{0}-min_size.data".format(filename_cleaned), 'wb'))

    print "{0}-min_size.data".format(filename_cleaned)


Overwriting code/min_size.py


In [3]:
%%writefile code/clean.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology

import json
import sys
import pickle

## Binary closing to reveal the Pearlite Phase
remove_small_holes = curry(skimage.morphology.remove_small_holes)

def clean(data):
    data['clean_image'] = ~remove_small_holes(~data['threshold_image'], data['min_size'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])
    
    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = clean(data)
    pickle.dump(result, open("{0}-clean.data".format(filename_cleaned), 'wb'))


    print "{0}-clean.data".format(filename_cleaned)


Overwriting code/clean.py


In [4]:
%%writefile code/reveal.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology
import sys

import json
import pickle

fcompose = lambda *args: compose(*args[::-1])

## Binary closing to reveal the Pearlite Phase
closing = curry(flip(skimage.morphology.closing))
remove_small_holes = curry(skimage.morphology.remove_small_holes)

reveal_pearlite = fcompose(
    closing(skimage.morphology.square(5)),
    remove_small_holes(min_size=1000)
)

def reveal(data):
    data['pearlite_image'] = reveal_pearlite(data['clean_image'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])

    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = reveal(data)
    pickle.dump(result, open("{0}-reveal.data".format(filename_cleaned), 'wb'))

    print "{0}-reveal.data".format(filename_cleaned)


Overwriting code/reveal.py


In [5]:
%%writefile code/ferrite.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology
import json
import sys
import pickle

## Volume function
frac1 = lambda image: float(image.sum()) / image.size
frac0 = lambda image: 1 - frac1(image)

def ferrite(data):
    data['ferrite_fraction'] = frac0(data['clean_image'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])
    
    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = ferrite(data)
    pickle.dump(result, open("{0}-ferrite.data".format(filename_cleaned), 'wb'))


    print "{0}-ferrite.data".format(filename_cleaned)


Overwriting code/ferrite.py


In [6]:
%%writefile code/cemmentite.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology

import json
import sys
import pickle

## Volume function
frac1 = lambda image: float(image.sum()) / image.size

def cemmentite(data):
    data['cemmentite_fraction'] = frac1(data['clean_image'])
    return data

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])
    
    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = cemmentite(data)
    pickle.dump(result, open("{0}-cemmentite.data".format(filename_cleaned), 'wb'))


    print "{0}-cemmentite.data".format(filename_cleaned)


Overwriting code/cemmentite.py


In [7]:
%%writefile code/save.py
import glob
import numpy
import matplotlib.pyplot
import scipy.ndimage
import pytesseract
import PIL.Image
from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip
import pandas
import skimage
import skimage.measure
import skimage.filters
import skimage.morphology
import sys

import json
import pickle

def save(data):
    clean_name = data['filename'].split("/")[-1].split(".")[0]
    file_path = "{0}.json".format(clean_name)
    filtered_data = {}
    filtered_data['filename'] = clean_name
    filtered_data['pearlite_fraction'] = data['pearlite_fraction']
    filtered_data['ferrite_fraction'] = data['ferrite_fraction']
    filtered_data['cemmentite_fraction'] = data['cemmentite_fraction']
    with open(file_path, "w") as save_file:
        save_file.write(json.dumps(filtered_data, sort_keys=True, indent=4, separators=(',', ': ')))

if __name__ == '__main__':
    filename = sys.argv[1]
    filename_cleaned = filename.split("/")[-1].split(".")[0]
    filename_cleaned = "-".join(filename_cleaned.split("-")[0:-1])

    data = None
    with open(filename, "r") as intermediate:
        data = pickle.load(intermediate)

    result = save(data)


Overwriting code/save.py


# Running the study

In [8]:
! maestro -s -d 1 -y -c -t 2 sem-study.yaml

INFO:maestrowf.maestro:INFO Logging Level -- Enabled
2017-09-14 11:54:25,102 - maestrowf.maestro:setup_logging:131 - INFO - INFO Logging Level -- Enabled
CRITICAL:maestrowf.maestro:CRITICAL Logging Level -- Enabled
2017-09-14 11:54:25,102 - maestrowf.maestro:setup_logging:133 - CRITICAL - CRITICAL Logging Level -- Enabled
DEBUG:maestrowf.maestro:DEBUG Logging Level -- Enabled
2017-09-14 11:54:25,102 - maestrowf.maestro:setup_logging:134 - DEBUG - DEBUG Logging Level -- Enabled
DEBUG:maestrowf.datastructures.core.study:Used Parameters - 
{'run-cemmentite': set(['DATA']), 'run-reveal': set(['DATA']), 'run-ferrite': set(['DATA']), 'run-pearlite': set(['DATA']), 'run-clean': set(['DATA']), 'run-threshold': set(['DATA']), 'run-min_size': set(['DATA']), 'run-save': set(['DATA'])}
2017-09-14 11:54:25,103 - maestrowf.datastructures.core.study:_setup_parameterized:368 - DEBUG - Used Parameters - 
{'run-cemmentite': set(['DATA']), 'run-reveal': set(['DATA']), 'run-ferrite': set(['DATA']

DEBUG:maestrowf.datastructures.core.study:Resulting step name: run-threshold_1045_Steel_Nital-etch-3
2017-09-14 11:54:25,132 - maestrowf.datastructures.core.study:_setup_parameterized:405 - DEBUG - Resulting step name: run-threshold_1045_Steel_Nital-etch-3
DEBUG:root:Adding run-threshold_1045_Steel_Nital-etch-3...
2017-09-14 11:54:25,132 - root:add_node:60 - DEBUG - Adding run-threshold_1045_Steel_Nital-etch-3...
DEBUG:maestrowf.datastructures.dag:Node run-threshold_1045_Steel_Nital-etch-3 added. Value is of type <class 'maestrowf.datastructures.core.executiongraph._StepRecord'>.
2017-09-14 11:54:25,132 - maestrowf.datastructures.dag:add_node:66 - DEBUG - Node run-threshold_1045_Steel_Nital-etch-3 added. Value is of type <class 'maestrowf.datastructures.core.executiongraph._StepRecord'>.
INFO:root:Edge (_source, run-threshold_1045_Steel_Nital-etch-3) added.
2017-09-14 11:54:25,132 - root:add_edge:106 - INFO - Edge (_source, run-threshold_1045_Steel_Nital-etch-3) added.
INFO:mae

DEBUG:maestrowf.datastructures.core.study:Resulting step name: run-ferrite_1045_Steel_Nital-etch-6
2017-09-14 11:54:25,188 - maestrowf.datastructures.core.study:_setup_parameterized:405 - DEBUG - Resulting step name: run-ferrite_1045_Steel_Nital-etch-6
DEBUG:root:Adding run-ferrite_1045_Steel_Nital-etch-6...
2017-09-14 11:54:25,188 - root:add_node:60 - DEBUG - Adding run-ferrite_1045_Steel_Nital-etch-6...
DEBUG:maestrowf.datastructures.dag:Node run-ferrite_1045_Steel_Nital-etch-6 added. Value is of type <class 'maestrowf.datastructures.core.executiongraph._StepRecord'>.
2017-09-14 11:54:25,188 - maestrowf.datastructures.dag:add_node:66 - DEBUG - Node run-ferrite_1045_Steel_Nital-etch-6 added. Value is of type <class 'maestrowf.datastructures.core.executiongraph._StepRecord'>.
INFO:root:Edge (run-pearlite_1045_Steel_Nital-etch-6, run-ferrite_1045_Steel_Nital-etch-6) added.
2017-09-14 11:54:25,188 - root:add_edge:106 - INFO - Edge (run-pearlite_1045_Steel_Nital-etch-6, run-ferrite

2017-09-14 11:54:25,244 - maestrowf.datastructures.core.executiongraph:generate_scripts:233 - INFO - Step -- run-ferrite_1045_Steel_Nital-etch-1
Script: /Users/fyc/Desktop/Hackaton-09-11-2017/Demo/corr-maestrowf/samples/sem-images-stats/study/sample_output/sem-images-stats/SEM-Images-Stats_20170914-115425/1045_Steel_Nital-etch-1/run-ferrite_1045_Steel_Nital-etch-1.sh
Restart: None
Scheduled?: False
INFO:maestrowf.datastructures.core.executiongraph:Generating scripts...
2017-09-14 11:54:25,244 - maestrowf.datastructures.core.executiongraph:generate_scripts:226 - INFO - Generating scripts...
INFO:maestrowf.datastructures.core.executiongraph:Step -- run-cemmentite_1045_Steel_Nital-etch-1
Script: /Users/fyc/Desktop/Hackaton-09-11-2017/Demo/corr-maestrowf/samples/sem-images-stats/study/sample_output/sem-images-stats/SEM-Images-Stats_20170914-115425/1045_Steel_Nital-etch-1/run-cemmentite_1045_Steel_Nital-etch-1.sh
Restart: None
Scheduled?: False
2017-09-14 11:54:25,245 - maestrowf.

DEBUG:maestrowf.maestro:nohup conductor -t 2 -d 1 /Users/fyc/Desktop/Hackaton-09-11-2017/Demo/corr-maestrowf/samples/sem-images-stats/study/sample_output/sem-images-stats/SEM-Images-Stats_20170914-115425 &> /Users/fyc/Desktop/Hackaton-09-11-2017/Demo/corr-maestrowf/samples/sem-images-stats/study/sample_output/sem-images-stats/SEM-Images-Stats_20170914-115425/SEM-Images-Stats.txt
2017-09-14 11:54:25,307 - maestrowf.maestro:main:196 - DEBUG - nohup conductor -t 2 -d 1 /Users/fyc/Desktop/Hackaton-09-11-2017/Demo/corr-maestrowf/samples/sem-images-stats/study/sample_output/sem-images-stats/SEM-Images-Stats_20170914-115425 &> /Users/fyc/Desktop/Hackaton-09-11-2017/Demo/corr-maestrowf/samples/sem-images-stats/study/sample_output/sem-images-stats/SEM-Images-Stats_20170914-115425/SEM-Images-Stats.txt
