In [2]:
import numpy as np
import numpy.random as nr
import scipy.stats as ss
import tifffile, os
import multiprocess as mp
from numba import jit, prange
from dask import delayed

In [4]:


"""
    This script is for simulating multi-dimensional data for Klaus. The raw 3D
    is grabbed from https://www.digitalrocksportal.org/projects/6
    The 3D data is cropped from the original data.
"""
######### numba
ph1_cen = 41
ph1_std = 11

ph2_cen = 68
ph2_std = 9.5

ph3_cen = 82
ph3_std = 9.3

ph4_cen = 141
ph4_std = 9.2

y_p1 = ss.norm(ph1_cen, ph1_std).pdf
y_p2 = ss.norm(ph2_cen, ph2_std).pdf
y_p3 = ss.norm(ph3_cen, ph3_std).pdf
y_p4 = ss.norm(ph4_cen, ph4_std).pdf

p = np.ndarray([4])

#@jit
def ratio(gray_val):   
    p[0] = y_p1(gray_val)
    p[1] = y_p2(gray_val)
    p[2] = y_p3(gray_val)
    p[3] = y_p4(gray_val)
    ps = p.sum()
    #return (p[:i].sum()/ps for i in range(4))
    p0 = p[:1].sum()/ps
    p1 = p[:2].sum()/ps
    p2 = p[:3].sum()/ps
    p3 = p[:4].sum()/ps
    return p0, p1, p2, p3

#@jit
def discriminator(gray_val):
    c1 = 0
    c2 = 0 
    c3 = 0
    c4 = 0
    p1, p2, p3, p4 = ratio(gray_val)
    for ii in prange(100):
        seat = nr.random()
        if seat<p1:
            c1 += 1
        elif seat<p2:
            c2 += 1
        elif seat<p3:
            c3 += 1
        else:
            c4 += 1
    return c1, c2, c3, c4


data = tifffile.imread("/media/xiao_usb/originating/256x256x256.tif").reshape([-1])
n_cpu = os.cpu_count()
dim = data.shape
with mp.Pool(n_cpu-1) as pool:
    rlt = pool.starmap(discriminator, [data[ii] for ii in range(int(1e4))])
pool.join()
pool.close()

rlt = []
for ii in prange(dim[0]):
    rlt.append(discriminator(data[ii]))

TypeError: 'numpy.uint8' object is not iterable

In [None]:
a = [data[ii] for ii in range(int(1e4))]
print(type(a))
dir(tuple.__iter__)

In [None]:
########## dask
ph1_cen = 41
ph1_std = 11

ph2_cen = 68
ph2_std = 9.5

ph3_cen = 82
ph3_std = 9.3

ph4_cen = 141
ph4_std = 9.2

y_p1 = ss.norm(ph1_cen, ph1_std).pdf
y_p2 = ss.norm(ph2_cen, ph2_std).pdf
y_p3 = ss.norm(ph3_cen, ph3_std).pdf
y_p4 = ss.norm(ph4_cen, ph4_std).pdf

p = np.ndarray([4])

def ratio(gray_val):   
    p[0] = y_p1(gray_val)
    p[1] = y_p2(gray_val)
    p[2] = y_p3(gray_val)
    p[3] = y_p4(gray_val)
    ps = p.sum()
    #return (p[:i].sum()/ps for i in range(4))
    p0 = p[:1].sum()/ps
    p1 = p[:2].sum()/ps
    p2 = p[:3].sum()/ps
    p3 = p[:4].sum()/ps
    return p0, p1, p2, p3

def discriminator(gray_val):
    c1 = 0
    c2 = 0 
    c3 = 0
    c4 = 0
    p1, p2, p3, p4 = ratio(gray_val)
    #for ii in prange(100):
    seat = nr.random()
    if seat<p1:
        c1 += 1
    elif seat<p2:
        c2 += 1
    elif seat<p3:
        c3 += 1
    else:
        c4 += 1
    return c1, c2, c3, c4


data = tifffile.imread("/media/xiao_usb/originating/256x256x256.tif").reshape([-1])
n_cpu = os.cpu_count()
dim = data.shape
print(dim)


results = []
for ii in range(dim[0]):
    rlt = delayed(discriminator)(data[ii])
    results.append(rlt)
dask.compute(*results)

In [1]:
import numpy as np
import numpy.random as nr
import scipy.stats as ss
import tifffile, os, time
import multiprocess as mp

In [61]:
########## multiprocess

"""
    This script is for simulating multi-dimensional data for Klaus. The raw 3D
    is grabbed from https://www.digitalrocksportal.org/projects/6
    The 3D data is cropped from the original data.
"""

ph1_cen = 41
ph1_std = 11

ph2_cen = 68
ph2_std = 9.5

ph3_cen = 82
ph3_std = 9.3

ph4_cen = 141
ph4_std = 9.2

y_p1 = ss.norm(ph1_cen, ph1_std).pdf
y_p2 = ss.norm(ph2_cen, ph2_std).pdf
y_p3 = ss.norm(ph3_cen, ph3_std).pdf
y_p4 = ss.norm(ph4_cen, ph4_std).pdf

p = np.ndarray([4])

def ratio(gray_val):   
    p[0] = y_p1(gray_val)
    p[1] = y_p2(gray_val)
    p[2] = y_p3(gray_val)
    p[3] = y_p4(gray_val)
    ps = p.sum()
    # print(ps)
    return list(p[:i].sum()/ps for i in range(1, 5))
    
def discriminator(gray_val):
    c1 = c2 = c3 = c4 = 0
    p1, p2, p3, p4 = ratio(gray_val)
    for ii in range(1000):
        seat = nr.random()
        if seat<p1:
            c1 += 1
        elif seat<p2:
            c2 += 1
        elif seat<p3:
            c3 += 1
        else:
            c4 += 1
    return c1, c2, c3, c4
    # return nr.random()

# if __name__ == "__main__":
data = tifffile.imread("/media/xiao_usb/originating/256x256x256.tif").reshape([-1])
n_cpu = os.cpu_count()
dim = data.shape
print(data.shape, data[100])
results = []

print(time.asctime())
with mp.Pool(maxtasksperchild=10) as pool:
    rlt = pool.map(discriminator, [data[ii] for ii in np.int32(np.arange(dim[0]))])
    # results.append(rlt)
    # print(rlt)       
pool.close()
pool.join()

print(time.asctime())
print(len(rlt))

(16777216,) 96
Fri Jun 12 23:03:07 2020
Fri Jun 12 23:34:40 2020
16777216


In [47]:

print(ratio(80))
print(y_p1(80))
print(rlt[11910600])

[0.0, 0.001110046443019528, 0.31166018420855524, 0.9999999997976192]
6.759626871616375e-05
(0, 8, 559, 433)


In [62]:
results2 = np.array(rlt).reshape([256, 256, 256, 4])

In [63]:
tifffile.imsave("/media/xiao_usb/originating/channel1.tif", results2[:, :, :, 0].astype(np.float32))
tifffile.imsave("/media/xiao_usb/originating/channel2.tif", results2[:, :, :, 1].astype(np.float32))
tifffile.imsave("/media/xiao_usb/originating/channel3.tif", results2[:, :, :, 2].astype(np.float32))
tifffile.imsave("/media/xiao_usb/originating/channel4.tif", results2[:, :, :, 3].astype(np.float32))

In [57]:
%matplotlib qt
import matplotlib.pyplot as plt
import napari
plt.imshow(results2[:, :, 0, 0])

<matplotlib.image.AxesImage at 0x7f01bc343450>

In [58]:
with napari.gui_qt():
    napari.view_image(results2)



In [None]:
(16777216,) 96
Fri Jun 12 21:50:17 2020
Fri Jun 12 21:50:24 2020
100000

(16777216,) 96
Fri Jun 12 21:51:12 2020
Fri Jun 12 21:51:18 2020
100000



(16777216,) 96
Fri Jun 12 21:09:54 2020
Fri Jun 12 21:22:50 2020
16777216

In [128]:
a = np.array([2, 1], dtype=np.float32)
b = np.tile(a[:, np.newaxis, np.newaxis], [1, 1000, 1000])
c = np.tile(np.arange(100).astype(np.float32)[:, np.newaxis], [1, 2])

e = (a * c)
e1 = (a * c).sum(axis=1)
#d = (b * c[:, :, np.newaxis, np.newaxis, np.newaxis])
d1 = (b * c[:, :, np.newaxis, np.newaxis]).sum(axis=1)


print(a.shape)
print(c.shape)
print(e.shape)
#print(e[:, :, ...])
print(e1.shape)

print(b.shape)
#print(d.shape)
print(d1.shape)



(2,)
(100, 2)
(100, 2)
(100,)
(2, 1000, 1000)
(100, 1000, 1000)


In [134]:
np.vstack((np.ones([10])*2, np.ones([10]))).T.shape

(10, 2)

In [136]:
d1.sum(axis=0).shape

(1000, 1000)

In [1]:
%matplotlib qt
from larch.xafs import preedge
from larch.math.fitpeak import fit_peak
import h5py, os, time, timeit, numpy as np
import multiprocessing as mp
import matplotlib.pyplot as plt

[numexpr.utils] INFO : Note: NumExpr detected 24 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
[numexpr.utils] INFO : NumExpr defaulting to 8 threads.


In [2]:
fn = '/media/xiao_usb/3D_trial_reg_scan_id_29773-29873_2020-06-01-20-38-09.h5'
f = h5py.File(fn, 'r')
#data = f['/registration_results/reg_results/registered_xanes3D'][:, 0, 50:200, 420:550]
data = f['/registration_results/reg_results/registered_xanes3D'][:, 0, :, :]*1e4
eng = f['/registration_results/reg_results/eng_list'][:]*1e3
f.close()
# eng -= eng[0]

dim = data.shape
print(dim)

### pre_edge
# ids = list(np.arange(20)) + list(np.arange(40, 49))
### fit_peak
ids = list(np.arange(40, 54))
x = eng[ids]
y = data[ids, :].reshape([len(ids), -1])

n_cpu = os.cpu_count()
par_dict = {'amplitude':1e-3, 'center':x[-1], 'sigma':3.0,
            'slope':0.1, 'intercept':0.0}
par_bounds = ((1e-5, x[-8], 0.1, 0, 0), (1e-2, x[-1], 10, 1, 2))

print(x.shape, y.shape)
print(time.asctime())    
############ multiprocessing
with mp.Pool(n_cpu-1) as pool:
    # rlt = pool.starmap(preedge, [(x, y[:, ii]) for ii in np.int32(np.arange(dim[1]*dim[2]))])
    rlt = pool.starmap(fit_peak, [(x, y[:, ii], 'lorentzian') for ii in np.int32(np.arange(dim[1]*dim[2]))])
pool.join()
pool.close()

print(time.asctime())
#print(len(rlt))

(101, 383, 1040)
(14,) (14, 398320)
Sun Jun 14 14:06:40 2020
Sun Jun 14 15:00:28 2020


In [8]:
print((rlt[0].fit))

[-6.5672310e-07 -7.5151883e-07 -8.6970380e-07 -1.0166663e-06
 -1.2063750e-06 -1.4518193e-06 -1.7843104e-06 -2.2402783e-06
 -2.9040248e-06 -3.9033016e-06 -5.5066685e-06 -8.4416870e-06
 -1.4391217e-05 -2.9843643e-05]


In [10]:
print(rlt[0]._members().keys())

dict_keys(['x', 'y', 'dy', 'model', 'background', 'form', 'init_params', 'fit', 'fit_init', 'fit_details', 'chi_square', 'chi_reduced', 'aic', 'bic', 'covar', 'rfactor', 'params', 'nvarys', 'nfree', 'ndata', 'var_names', 'nfev', 'success', 'errorbars', 'message', 'lmdif_message', 'residual'])


In [25]:
plt.figure(1)
for ii in range(50, 55):
    for jj in range(75, 80):
        idx = ii*dim[2] + jj
        plt.plot(rlt[0].x, rlt[idx].y)
        #plt.plot(rlt[0].x, rlt[idx].fit)

In [20]:
plt.figure(2)
plt.imshow(y[10].reshape([dim[1], dim[2]]))

<matplotlib.image.AxesImage at 0x7f723034fc90>

In [56]:
a = data
b = np.reshape(a, (a.shape[0], -1))
print(a.shape, b.shape)

(101, 150, 130) (101, 19500)


In [1]:
import numpy as np
from scipy.optimize import minimize, curve_fit, least_squares as lsq, lsq_linear, nnls
import multiprocess as mp

from lmfit.lineshapes import (gaussian, lorentzian, voigt, pvoigt, moffat, pearson7,
                         breit_wigner, damped_oscillator, dho, logistic, lognormal,
                         students_t, expgaussian, donaich, skewed_gaussian,
                         skewed_voigt, step, rectangle,  
                         exponential, powerlaw, linear, parabolic, sine, 
                         expsine, split_lorentzian)

functions = {'gaussian':gaussian, 'lorentzian':lorentzian, 'voigt':voigt, 
             'pvoigt':pvoigt, 'moffat':moffat, 'pearson7':pearson7,
             'breit_wigner':breit_wigner, 'damped_oscillator':damped_oscillator,
             'dho':dho, 'logistic':logistic, 'lognormal':lognormal,
             'students_t':students_t, 'expgaussian':expgaussian, 
             'donaich':donaich, 'skewed_gaussian':skewed_gaussian,
             'skewed_voigt':skewed_voigt, 'step':step, 'rectangle':rectangle, 
             'exponential':exponential, 'powerlaw':powerlaw, 
             'linear':linear, 'parabolic':parabolic, 'sine':sine, 
             'expsine':expsine, 'split_lorentzian':split_lorentzian}

In [13]:
import h5py, os, time
fn = '/media/xiao_usb/3D_trial_reg_scan_id_29773-29873_2020-06-01-20-38-09.h5'
f = h5py.File(fn, 'r')
#data = f['/registration_results/reg_results/registered_xanes3D'][:, 0, 50:200, 420:550]
data = f['/registration_results/reg_results/registered_xanes3D'][:, 0, :, :]*1e4
eng = f['/registration_results/reg_results/eng_list'][:]*1e3
f.close()

ids = list(np.arange(40, 54))
x = eng[ids]
y = data[ids, :].reshape([len(ids), -1])

func = functions['lorentzian']   
dim = data.shape
print(dim)
print(x.shape, y.shape)

def res(x0, y0=None):
    return func(x0) - y0
    #rlt_t = lsq(func(x0)-y0, x0, bounds=(x0[0], x0[-1]))
    #return rlt_t
def local_lsq(x0, y0):
    lsq(res, x0, **{y0:y0})

n_cpu = os.cpu_count()
pars_ini = {}
#bnds = (x0[0], x0[-1])
print(time.asctime())
############ multiprocessing
with mp.Pool(n_cpu-1) as pool:
    rlt = pool.starmap(local_lsq, [(x, y[:, ii]) for ii in np.int32(np.arange(dim[1]*dim[2]))])
pool.join()
pool.close()
print(time.asctime())

(101, 383, 1040)
(14,) (14, 398320)
Mon Jun 15 08:00:12 2020


TypeError: unhashable type: 'numpy.ndarray'

In [25]:
"""
The res definition here works but it IS WRONG in meanings. func is a Lorentzian lineshape defined in lmfit. Its first argument is x, and Lorentzian
parmeters are defined as keywords with default values. The definition below assume the Lorentzian function has fixed parameters defined as their
default values and fit x to minimize the the fitted value and the given value.

To use lsq for curve fitting, we need to define a loss function in form of loss(func, fvars, x, y): return sum((func(fvars, x) - y)**2)
"""
def res(x0, *args):
    return func(x0) - args[0]

def local_lsq(x0, y0):
    lsq(res, x0, args=(y0))
    
res(x, y[:, 0])
r = lsq(res, (1, x[10], 2), args=(y[:, 0]))

scipy.optimize.curve_fit

In [None]:
"""
use scipy.optimize.curve_fit to fit experimental data
The easiest way to do curve fitting is to use curve_fit. The loss function is defined internally.
However, the convergence tolerance cannot be changed in curve_fit. The below case failed for exceeding
maximum allowed iterations.
"""
%matplotlib qt
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import minimize, curve_fit, least_squares as lsq, lsq_linear, nnls
import multiprocess as mp
import h5py, os, time

from lmfit.lineshapes import (gaussian, lorentzian, voigt, pvoigt, moffat, pearson7,
                         breit_wigner, damped_oscillator, dho, logistic, lognormal,
                         students_t, expgaussian, donaich, skewed_gaussian,
                         skewed_voigt, step, rectangle,  
                         exponential, powerlaw, linear, parabolic, sine, 
                         expsine, split_lorentzian)

functions = {'gaussian':gaussian, 'lorentzian':lorentzian, 'voigt':voigt, 
             'pvoigt':pvoigt, 'moffat':moffat, 'pearson7':pearson7,
             'breit_wigner':breit_wigner, 'damped_oscillator':damped_oscillator,
             'dho':dho, 'logistic':logistic, 'lognormal':lognormal,
             'students_t':students_t, 'expgaussian':expgaussian, 
             'donaich':donaich, 'skewed_gaussian':skewed_gaussian,
             'skewed_voigt':skewed_voigt, 'step':step, 'rectangle':rectangle, 
             'exponential':exponential, 'powerlaw':powerlaw, 
             'linear':linear, 'parabolic':parabolic, 'sine':sine, 
             'expsine':expsine, 'split_lorentzian':split_lorentzian}

In [31]:
import h5py, os, time
fn = '/media/xiao_usb/3D_trial_reg_scan_id_29773-29873_2020-06-01-20-38-09.h5'
f = h5py.File(fn, 'r')
#data = f['/registration_results/reg_results/registered_xanes3D'][:, 0, 50:200, 420:550]
data = f['/registration_results/reg_results/registered_xanes3D'][:, 0, :, :]*1e4
eng = f['/registration_results/reg_results/eng_list'][:]*1e3
f.close()

ids = list(np.arange(40, 54))
x = eng[ids]
y = data[ids, :].reshape([len(ids), -1])

func = functions['lorentzian']   
dim = data.shape
print(dim)
print(x.shape, y.shape)

def fitting(func, x0, y0, xinits, bnds=None):
    curve_fit(func, x0, y0, p0=xinits, bounds=bnds)

print(time.asctime())

bnds = ((0.01, x[0], 0.1), (5, x[-1], 13))
n_cpu = os.cpu_count()
with mp.Pool(n_cpu - 1) as pool:
    rlt = pool.starmap(fitting, [(func, x, y[:,ii], (1, x[10], 2), bnds) for ii in np.int32(np.arange(dim[1]*dim[2]))])
pool.close()
pool.join()

print(time.asctime())

(101, 383, 1040)
(14,) (14, 398320)
Mon Jun 15 10:52:49 2020


RuntimeError: Optimal parameters not found: The maximum number of function evaluations is exceeded.

define a customized curve fitting function with scipy.optimize.least_squares

In [34]:
"""
use scipy.optimize.curve_fit to fit experimental data
The easiest way to do curve fitting is to use curve_fit. The loss function is defined internally.
However, the convergence tolerance cannot be changed in curve_fit. The below case failed for exceeding
maximum allowed iterations.
"""
%matplotlib qt
import matplotlib.pyplot as plt
import numpy as np
from scipy.optimize import minimize, curve_fit, least_squares as lsq, lsq_linear, nnls
import multiprocess as mp
import h5py, os, time

from lmfit.lineshapes import (gaussian, lorentzian, voigt, pvoigt, moffat, pearson7,
                         breit_wigner, damped_oscillator, dho, logistic, lognormal,
                         students_t, expgaussian, donaich, skewed_gaussian,
                         skewed_voigt, step, rectangle,  
                         exponential, powerlaw, linear, parabolic, sine, 
                         expsine, split_lorentzian)

functions = {'gaussian':gaussian, 'lorentzian':lorentzian, 'voigt':voigt, 
             'pvoigt':pvoigt, 'moffat':moffat, 'pearson7':pearson7,
             'breit_wigner':breit_wigner, 'damped_oscillator':damped_oscillator,
             'dho':dho, 'logistic':logistic, 'lognormal':lognormal,
             'students_t':students_t, 'expgaussian':expgaussian, 
             'donaich':donaich, 'skewed_gaussian':skewed_gaussian,
             'skewed_voigt':skewed_voigt, 'step':step, 'rectangle':rectangle, 
             'exponential':exponential, 'powerlaw':powerlaw, 
             'linear':linear, 'parabolic':parabolic, 'sine':sine, 
             'expsine':expsine, 'split_lorentzian':split_lorentzian}

In [50]:
fn = '/media/xiao_usb/3D_trial_reg_scan_id_29773-29873_2020-06-01-20-38-09.h5'
f = h5py.File(fn, 'r')
#data = f['/registration_results/reg_results/registered_xanes3D'][:, 0, 50:200, 420:550]
data = f['/registration_results/reg_results/registered_xanes3D'][:, 0, :, :]*1e4
eng = f['/registration_results/reg_results/eng_list'][:]*1e3
f.close()

ids = list(np.arange(40, 54))
x = eng[ids]
y = data[ids, :].reshape([len(ids), -1])

func = functions['lorentzian']   
dim = data.shape
print(dim)
print(x.shape, y.shape)

def f(fvars, func, x, y):
    return (func(x, fvars[0], fvars[1], fvars[2]) - y)

def my_lsq(f, fvars, func, x, y, bnds, ftol, xtol):
    #return lsq(f, fvars, bounds=bnds, ftol=ftol, xtol=xtol, args=(func, x, y))
    return lsq(f, fvars, ftol=ftol, xtol=xtol, args=(func, x, y))

print(time.asctime())

bnds = ([0.01, x[0], 0.1], [5, x[-1], 13])
ftol = 1e-7
xtol = 1e-7
n_cpu = os.cpu_count()
with mp.Pool(n_cpu - 1) as pool:
    rlt = pool.starmap(my_lsq, [(f, (1, x[10], 2), func, x, y[:, ii], bnds, ftol, xtol) for ii in np.int32(np.arange(dim[1]*dim[2]))])
pool.close()
pool.join()

print(time.asctime())

(101, 383, 1040)
(14,) (14, 398320)
Mon Jun 15 22:02:17 2020
Mon Jun 15 22:03:19 2020


In [68]:
def f(fvars, func, x, y):
    return (func(x, fvars[0], fvars[1], fvars[2]) - y)

def my_lsq(f, fvars, func, x, y, bnds, ftol, xtol):
    return lsq(f, fvars, bounds=bnds, ftol=ftol, xtol=xtol, args=(func, x, y))
    #return lsq(f, fvars, args=(func, x, y))
bnds = (-np.inf, np.inf)
r1 = my_lsq(f, (1, x[10], 2), func, x, y[:, 1000], bnds, ftol, xtol)
fr = f((1, x[10], 2), func, x, y[:, 0])
#func(x, 1, x[10], 2)
print(r1)
print(fr)

 active_mask: array([0., 0., 0.])
        cost: 115.13583374023438
         fun: array([-3.6105313, -2.6567092, -4.0474324, -2.8279493, -3.762611 ,
       -3.474504 , -6.886277 , -4.192997 , -3.652342 , -4.9720206,
       -6.003239 , -1.748344 , -2.124915 , -3.5910625], dtype=float32)
        grad: array([0., 0., 0.])
         jac: array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])
     message: '`gtol` termination condition is satisfied.'
        nfev: 1
        njev: 1
  optimality: 0.0
      status: 1
     success: True
           x: array([1.00000000e+00, 8.35198828e+03, 2.00000000e+00])
[-3.2673974 -6.8749404 -2.3300834 -6.7442718 -5.4636497 -4.573359
 -2.9513004 -6.435964  -7.0283213 -3.3106365 -2.4589443 -5.1177034
 -4.0046678 -3.4856863]


In [56]:
print(rlt[12100])

 active_mask: array([0, 0, 0])
        cost: 244.14503479003906
         fun: array([22.097286], dtype=float32)
        grad: array([0., 0., 0.])
         jac: array([[0., 0., 0.]])
     message: '`gtol` termination condition is satisfied.'
        nfev: 1
        njev: 1
  optimality: 0.0
      status: 1
     success: True
           x: array([1.00000000e+00, 8.35198828e+03, 2.00000000e+00])
