In [None]:
import resource
soft_limit = 8*1024*1024*1024
hard_limit = 10*1024*1024*1024
resource.setrlimit(resource.RLIMIT_DATA, (soft_limit, hard_limit))


In [1]:
from skimage import data, util, measure
from skimage.filters import threshold_otsu
from skimage.measure import label, regionprops
from skimage.segmentation import watershed
import numpy as np
import skimage.io as io
import os
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from scipy import ndimage 
from skimage.feature import peak_local_max
from skimage import morphology
from skimage import segmentation
import plotly
from skimage import (
    exposure, util
)
from concurrent.futures import ThreadPoolExecutor

In [2]:
#calculates treshold for whole 3d space and saves it as 8 bit image to save space
def calculate_treshold(images):
    image1 = (images*255).astype('uint8')
    treshold = threshold_otsu(image1)
    print(treshold)
    binary = image1 > treshold
    return binary
#only for 3d images
def median_filter_3d(image, x):
    filtered=sc.ndimage.median_filter(image, size=(x,x,x))
    return filtered

def plot_results(df, directory, directory_name):
    fig,ax = plt.subplots()
    ax.scatter(df["area"],df["ferret_diameter_max"])
    title = "Measurment number: "+directory_name
    ax.set_title(title)
    ax.set_xlabel('Area')
    ax.set_ylabel('Ferret diameter max')
    plt.show()
    fig.savefig(directory + "area_ferret_diamether")
    
def save_dataframe(df, directory):
    df.to_excel(directory+"pandas_watershed_3d_ferret_20_cores.xlsx")
    
#by default it uses only 1 thread, better to use apply_watershed_multiprocessing()
def apply_watershed(image):
    distance = ndimage.distance_transform_edt(image)
    coords = peak_local_max(distance, labels=image)
    mask = np.zeros(distance.shape, dtype=bool)
    mask[tuple(coords.T)] = True
    markers, _ = ndimage.label(mask)
    return watershed(-distance, markers, mask=image)

#function returns error when using with contencated np.array which was watersheded separatly, therefore use only after using label()    
def measure_props(labels):
    props1=measure.regionprops_table(labels, properties=("area", "area_filled", "axis_major_length","extent",  "equivalent_diameter_area","label"))
    #, "axis_major_length","extent",  "equivalent_diameter_area","label"
    df=pd.DataFrame(props1)
    print("Basic properties measured, start to measure ferret diameter max")
    prop = regionprops(labels)
    for i in range(len(df)):
        df.loc[i, 'area regionprops']=prop[i].area
        try:
            #print(str(prop[i].area) + "   "+ str(prop[i].feret_diameter_max))
            df.loc[i, 'ferret_diameter_max'] = prop[i].feret_diameter_max
        except:
            #print(str(prop[i].area) + " error")
            print(f'error ferret_diameter max measurment {directory_name} region {i} from {len(df)}')
            df.loc[i, 'ferret_diameter_max'] = -1
    return df
    
#apply label when using measure_props earlier
def apply_watershed_multiprocessing(space):
    subregions = np.array_split(space, 18, axis=2)
    def worker(subregion):
        work=np.array(apply_watershed(subregion))

        return work

    with ThreadPoolExecutor(18)as executor:
        results = executor.map(worker, subregions)
        contencated = np.concatenate(list(results), axis=2)
        print(f"From multiprocessing watershed is returning image with shape: {contencated.shape}")
        return contencated

In [3]:
path = ""

file_list = os.listdir(path)

In [None]:
for i in range():
    file_list.pop(0)
    
print(file_list[0])

In [None]:
for directory_name in file_list:
    directory = path+directory_name+'\\'
    print("file: "+directory)
    image = []
    image = np.array(io.imread_collection(directory+"*.tif", plugin='tifffile'))
    print(image.shape)
    #apply treshold
    print('treshold calculation start')
    binary=calculate_treshold(image)
    print('treshold applied')
    print("Start to median filter")
    filtered= median_filter_3d(binary, 4)
    print('Median filter done')
    #watershed for 3d
    watersheded = apply_watershed_multiprocessing(filtered)
    labeled = label(watersheded)
    print('labeling done')
    df = measure_props(labeled)
    print(df.shape)
    save_dataframe(df, directory)
    plot_results(df, directory, directory_name)
    print("Pandas excel sheet saved")
    
    
print('all done')