In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

## Outflow from lake level drop after rain event

In [None]:
def df_resample(df):
    """
    Resample dataframe to daily sample rate.
    """
    # First upsample to 15 min intervals combined with a linear interpolation
    ndates = pd.date_range(start=df.index.date[0], end=df.index.date[-1],
                           freq='15T')
    ndf = df.reindex(ndates, method='nearest',
                     tolerance=np.timedelta64(15, 'm')).interpolate()
    # Then downsample to 1 day intervals assigning the new values to mid day
    #ndf = ndf.resample('1D', label='left', loffset='12H').mean()
    ndf = ndf.resample('1D', label='left').mean()
    return ndf


In [None]:
url = "https://fits.geonet.org.nz/observation?siteID={}&networkID=VO&typeID=z"
names = ['h', 'h_err']
ldf = pd.read_csv(url.format('RU001A'),
                  index_col=0, names=names, skiprows=1,
                  parse_dates=True)
ldf1 = ldf.loc[(ldf.index > '2018-01-05 9:00:00') & (ldf.index < '2018-01-12')]
ldf2 = ldf.loc[(ldf.index > '2018-02-01 14:00:00') & (ldf.index < '2018-02-08')]
ldf3 = ldf.loc[(ldf.index > '2018-02-20 22:00:00') & (ldf.index < '2018-02-27')]
ldf4 = ldf.loc[(ldf.index > '2018-03-07 19:00:00') & (ldf.index < '2018-03-12') ]
plt.figure()
ldf4['h'].plot()

In [None]:
plt.figure()
ldf['h'].loc[(ldf.index > '2017-11-01') & (ldf.index < '2018-04-2') ].plot()
#ldf['h'].plot()

In [None]:
def normalize(vals):
    return (vals - vals.min())/(vals.max()-vals.min())

In [None]:
rldf1 = df_resample(ldf1)
rldf2 = df_resample(ldf2)
rldf3 = df_resample(ldf3)
rldf4 = df_resample(ldf4)
rldf4 = rldf4.reindex(pd.date_range(start=rldf4.index[0], periods=7))

ndf = pd.DataFrame({'h1': rldf1['h'].values,
                    'h2': rldf2['h'].values,
                    'h3': rldf3['h'].values,
                    'h4': rldf4['h'].values}, index=rldf1.index)
if False:
    ndf = pd.DataFrame({'h1': rldf1['h'].values,
                        'h2': rldf2['h'].values,
                        'h3': rldf3['h'].values}, index=rldf1.index)
mn = ndf.mean(axis=1, skipna=True)
std = ndf.std(axis=1, skipna=True)
fig = plt.figure()
ax = fig.add_subplot(111)
x = np.arange(mn.size)
ax.fill_between(x, mn+std, mn-std, color='red', alpha=0.5)
ax.plot(x, mn, 'r--')
ndf.plot(ax=ax, use_index=False, legend=False, color='k')

In [None]:
import pymc3 as pm

In [None]:

basic_model = pm.Model()

with basic_model:
    lambda_ = pm.Uniform('lambda',0., 2.)
    v0 = pm.Uniform('v0', 2, 3)
    os = pm.Normal('offset', mu=2.0, sd=0.1)
    g = os + (v0-os)*np.exp(-lambda_*x)
    d_obs = pm.Normal('d_obs', mu=g, sd=std.values, observed=mn)
    trace = pm.sample(chains=4)

In [None]:
_ = pm.traceplot(trace)

In [None]:
print(trace['lambda'].mean(), trace['lambda'].std())
print(trace['offset'].mean(), trace['offset'].std())

In [None]:
nresample=200
ppc = pm.sample_ppc(trace, samples=nresample, model=basic_model)

In [None]:
mn_ppc = ppc['d_obs'].mean(axis=0)
std_ppc = ppc['d_obs'].std(axis=0)
fig = plt.figure()
ax = fig.add_subplot(111)
x = np.arange(mn_ppc.size)
ax.fill_between(x, mn_ppc+std_ppc, mn_ppc-std_ppc, color='red', alpha=0.5)
ax.plot(x, mn_ppc, 'r--')
ndf.plot(ax=ax, use_index=False, legend=False, color='k')

In [None]:
from clemb.forward_model import fullness

i = 0
len_trace = len(trace)
nchain = trace.nchains
nsamples=600
outflow = np.zeros((nsamples, ppc['d_obs'].shape[1]))
indices = np.random.randint(0, nchain*len_trace, nsamples)
for idx in indices:
    chain_idx, point_idx = np.divmod(idx, len_trace)
    param = trace._straces[chain_idx].point(point_idx)
    _l = param['lambda']
    _o = param['offset']
    _d = d_obs.distribution.random(point=param)
    ll = _d - _o
    a, vol = fullness(ll+2529.35)
    nvol = a*ll*1e3 # volume in liters
    dvol = nvol*_l # l/day
    dvol /= 86400 # l/s
    outflow[i] = dvol
    i += 1
fig = plt.figure()
ax = fig.add_subplot(111)
mn_outf = outflow.mean(axis=0)
std_outf = outflow.std(axis=0)
x = np.arange(mn_outf.size)
ax.fill_between(x, mn_outf+std_outf, mn_outf-std_outf, color='red', alpha=0.5)
ax.plot(x, mn_outf, 'r--')
ax.set_yticks(np.arange(0, 450,50))
ax.grid()


In [None]:
trace[100]

In [None]:
np.pi*(500.*500.)

### Nested Sampling

In [None]:
from sampling import NestedSampling, Uniform, Callback, Normal, SamplingException

In [None]:
class PyCallback(Callback):
    
    def __init__(self):
        Callback.__init__(self)
        
    def set_data(self, x, d, prec):
        self.x = x
        self.d = d
        self.prec = prec

    def run(self, vals):
        lambda_ = vals[0]
        v0 = vals[1]
        offset = vals[2]
        try:
            g = offset + (v0-offset)*np.exp(-lambda_*self.x)
            lh = -0.5*np.dot(self.d-g,np.dot(self.prec,self.d-g))
        except Exception as err:
            print(err)
            raise SamplingException()
        return lh

In [None]:
pycb = PyCallback()
pycb.set_data(x, mn.values, 1./(std.values*std.values) *np.eye(x.size))
pycb.run((0.5,2.25, 2))

In [None]:
l = Uniform('lambda', 0, 2)
v = Uniform('v0', 2, 3)
os = Normal('offset', 2.0, 0.1)
ns = NestedSampling()
pycb.__disown__()
ns.setCallback(pycb)
nsamples = 1000
rs = ns.explore(vars=[l, v, os], initial_samples=100,
                        maximum_steps=nsamples)

In [None]:
rs1 = rs.resample_posterior(100)

In [None]:
def forward(x, smp):
    n = len(smp)
    y = np.zeros((n,x.size))
    for i, s in enumerate(smp):
        lmb = s._vars[0].get_value()
        v0 = s._vars[1].get_value()
        off = s._vars[2].get_value()
        y[i] = off + (v0-off)*np.exp(-lmb*x)
    md = np.median(y, axis=0)
    miny = np.percentile(y, 16., axis=0)
    maxy = np.percentile(y, 84., axis=0)
    return (md, miny, maxy)

In [None]:
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.fill_between(x, mn+std, mn-std, color='red', alpha=0.5)
plt.plot(x, mn, 'r')
md, miny, maxy = forward(x, rs1)
plt.plot(x, md, color='#1f77b4')
plt.plot(x, maxy, color='#1f77b4', linestyle='--' )
plt.plot(x, miny, color='#1f77b4', linestyle='--' )

plt.subplot(1,2,2)
lmbds = []
for i, s in enumerate(rs1):
    lmbds.append(s._vars[0].get_value())
_ = plt.hist(lmbds, bins=30)

## Outflow from Mg++ concentration

### Get the raw data from FITS

In [None]:
from clemb.forward_model import fullness

def get_data(siteID, tstart=None, tend=None):
    # Get Mg++ concentration
    url = "https://fits.geonet.org.nz/observation?siteID={}&typeID=Mg-w"
    names = ['obs', 'obs_err']
    mg_df = pd.read_csv(url.format(siteID),
                        index_col=0, names=names, skiprows=1,
                        parse_dates=True)
    if tstart is not None:
        mg_df = mg_df.loc[mg_df.index >= tstart]
    if tend is not None:
        mg_df = mg_df.loc[mg_df.index <= tend]
        
    # Get lake level
    url = "https://fits.geonet.org.nz/observation?siteID={}&typeID=z"
    names = ['h', 'h_err']
    ldf = pd.read_csv(url.format('RU001'),
                      index_col=0, names=names, skiprows=1,
                      parse_dates=True)
    ldf1 = pd.read_csv(url.format('RU001A'),
                       index_col=0, names=names, skiprows=1,
                       parse_dates=True)
    ldf = ldf.combine_first(ldf1)
    ldf.loc[ldf.index < '1997-01-01', 'h'] = 2530. + \
        ldf.loc[ldf.index < '1997-01-01', 'h']
    ldf.loc[(ldf.index > '1997-01-01') & (ldf.index < '2012-12-31'),
            'h'] = 2529.5 + \
        (ldf.loc[(ldf.index > '1997-01-01') &
                 (ldf.index < '2012-12-31'), 'h'] - 1.3)
    ldf.loc[ldf.index > '2016-01-01', 'h'] = 2529.35 + \
        (ldf.loc[ldf.index > '2016-01-01', 'h'] - 2.0)
    if tstart is not None:
        ldf = ldf.loc[ldf.index >= tstart]
    if tend is not None:
        ldf = ldf.loc[ldf.index <= tend]
        
    ldf = ldf.reindex(index=mg_df.index, method='nearest',
                      tolerance=np.timedelta64(1,'D')).interpolate()

    a, vol = fullness(ldf['h'].values)
    mg_df['h'] = ldf['h']
    mg_df['V'] = vol*1e6
    mg_df['X'] = mg_df['obs']*mg_df['V']
    
    def common_date(date):
        """
        If there are several measurements in 
        one day, take the mean.
        """
        ndt = pd.Timestamp(year=date.year,
                           month=date.month,
                           day=date.day)
        return ndt

    mg_df = mg_df.groupby(common_date, axis=0).mean()
    return mg_df

tstart = '2009-09-25'
df1 = get_data('RU003', tstart)

fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(14,8))
y = df1['obs'].values
v = df1['V'].values
x = np.r_[0, np.cumsum(np.diff(df1.index).astype(int)/(86400*1e9))]
axs[0,0].plot(x, y, 'k+')
axs[0,0].set_ylabel('Concentration [mg/l]')
df1['obs'].plot(ax=axs[0, 1])
ax_01_1 = axs[0, 1].twinx()
ax_01_1.plot(df1['h'], color='g')
axs[1,0].plot(df1['V'])
axs[1,1].plot(df1['X'])
ax_11_1 = axs[1,1].twinx()
ax_11_1.plot(df1['obs'], 'r--')


In [None]:
df1.head()

In [None]:
tstart = '1982-09-01'
tend = '1988-12-31'
df2 = get_data('RU001', tstart, tend)

fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(14,8))
y1 = df2['obs'].values
v1 = df2['V'].values
x1 = np.r_[0, np.cumsum(np.diff(df2.index).astype(int)/(86400*1e9))]
axs[0,0].plot(x1, y1, 'k+')
axs[0,0].set_ylabel('Concentration [mg/l]')
df2['obs'].plot(ax=axs[0, 1])
ax_01_1 = axs[0, 1].twinx()
ax_01_1.plot(df2['h'], color='g')
axs[1,0].plot(df2['V'])
axs[1,1].plot(df2['X'])
ax_11_1 = axs[1,1].twinx()
ax_11_1.plot(df2['obs'], 'r--')



In [None]:
plt.plot(x, df1['X'].values, 'k+')
plt.plot(x1, df2['X'].values, 'k')

In [None]:
from theano import shared

def fit_model(x, y, v):
    mg_model = pm.Model()
    with mg_model:
        lambda_ = pm.Uniform('lambda',1e6, 3e7)
        v0 = pm.Uniform('v0', 900, 1300)
        os = pm.Normal('offset', mu=300, sd=50)
        g = os + (v0-os)*np.exp(-lambda_*x/v)
        sigma = pm.Normal('sigma', mu=50, sd=10)
        mg_obs = pm.Normal('mg_obs', mu=g, sd=np.ones(x.size)*sigma, observed=y)
        trace = pm.sample(draws=2000, tune=1000, chains=4)
    return (trace, mg_model)
trace1, model1 = fit_model(x, y, v)
trace2, model2 = fit_model(x1, y1, v1)

In [None]:
_ = pm.traceplot(trace1)

In [None]:
_ = pm.traceplot(trace2)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
def plot_model(ax, trace, model, x, y, nresample=200, color='red'):
    ppc = pm.sample_ppc(trace, samples=nresample, model=model)
    mn_mg = ppc['mg_obs'].mean(axis=0)
    std_mg = ppc['mg_obs'].std(axis=0)
    ax.fill_between(x, mn_mg+std_mg, mn_mg-std_mg, color=color, alpha=0.5)
    ax.plot(x, mn_mg, 'r--')
    ax.plot(x, y, 'k')
plot_model(ax, trace1, model1, x, y)
plot_model(ax, trace2, model2, x1, y1, color='blue')

In [None]:
a, vol = fullness(np.array([2529.35]))
mean = trace.get_values('lambda').mean()
print(vol, vol*(mean)*1e6/86400.)

In [None]:
from scipy.interpolate import interp1d
f = interp1d(x, v, kind='linear', bounds_error=False, 
             fill_value=v[-1])
plt.plot(x,v)
x_new = np.arange(0,3600)
plt.plot(x_new, f(x_new))

In [None]:
def predict(x, v, trace, samples):
    len_trace = len(trace)
    y_predict = np.zeros((samples, x.size))
    try:
        nchain = trace.nchains
    except AttributeError:
        nchain = 1
    indices = np.random.randint(0, nchain * len_trace, samples)
    cnt = 0
    for idx in indices:
        if nchain > 1:
            chain_idx, point_idx = np.divmod(idx, len_trace)
            param = trace._straces[chain_idx].point(point_idx)
        else:
            param = trace[idx]
        lambda_ = param['lambda']
        os = param['offset']
        v0 = param['v0']
        sigma = param['sigma']
        y_predict[cnt] = np.random.normal(loc=os + (v0-os)*np.exp(-lambda_*x/v),
                                          scale=sigma)
        cnt += 1
    return y_predict
xp = np.arange(3600)
vp = f(xp)
yp = predict(xp, vp, trace1, 10)

In [None]:
yp_mean = yp.mean(axis=0)
yp_std = yp.std(axis=0)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.fill_between(xp, yp_mean+yp_std, yp_mean-yp_std, color='blue', alpha=0.5)
ax.plot(xp, yp_mean, 'b--')
ax.plot(x, y, 'k')
yp_mean.shape

### Modeling Mg++ using Gaussian Processes

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, DotProduct, ExpSineSquared

In [None]:
k1 = 1e-3**2*DotProduct(sigma_0=y[0])
k2 = 10.**2*RBF(length_scale=10.0)
k3 =  2.* WhiteKernel(noise_level=1, noise_level_bounds=(1e1, 1e+3))
gp = GaussianProcessRegressor(kernel=k1*k1 + k2 + k3, alpha=0, normalize_y=True)
X_new = np.linspace(0,3500,350)
y_mean = gp.sample_y(X_new[:,np.newaxis], n_samples=10)

In [None]:
_ = plt.plot(y_mean)

In [None]:
idx = np.where(x<4000)
X = x[idx[0], np.newaxis]
Y = y[idx[0], np.newaxis]
gp.fit(X,y[idx[0]])

In [None]:
y_mean, y_cov = gp.predict(X_new[:,np.newaxis], return_cov=True)
plt.figure(figsize=(12,6))
plt.plot(X_new, y_mean, 'k', lw=1, zorder=9)
plt.fill_between(X_new, y_mean - 3*np.sqrt(np.diag(y_cov)),
                 y_mean + 3*np.sqrt(np.diag(y_cov)),
                 alpha=0.5, color='k')
plt.plot(x,y,'r+')

### Modeling Mg++ using Kalman Filter

In [None]:
from filterpy.kalman import UnscentedKalmanFilter as UKF
from filterpy.kalman import MerweScaledSigmaPoints, JulierSigmaPoints
from filterpy.common import Q_discrete_white_noise

In [None]:
def f_x(x, dt):
    _k = x[1]/8.8e2
    _dt = dt
    _y = x[0]
    if isinstance(dt, np.ndarray):
        _dt = dt[0]
    # 4th order Runge-Kutta
    k0 = -_k * _y * _dt
    k1 = -_k * (_y + 0.5 * k0) * _dt
    k2 = -_k * (_y + 0.5 * k1) * _dt
    k3 = -_k * (_y + k2) * _dt
    _y_next = _y + 1./6.*(k0 + 2 * k1 + 2 * k2 + k3)
    return np.array([_y_next, x[1]])

def h_x(x):
    return [x[0]]


In [None]:
def interpolate_mg(x, y, ndays, dt=1):
    """
    Inter- and extrapolate Mg++ measurements using a
    non-linear Kalman filter.
    """
    dts = np.arange(x[0], x[-1]+ndays+1, dt)[:, np.newaxis]
    ny = np.full(dts.size, None)
    ny[x.astype(int)] = y[:]
    
    points = MerweScaledSigmaPoints(n=2, alpha=.01, beta=2., kappa=1.)
    kf = UKF(dim_x=2, dim_z=1, dt=dt, fx=f_x, hx=h_x, points=points)
    kf.x = np.array([y[0], .6])
    kf.Q = Q_discrete_white_noise(2, dt=dt, var=1e-5)
    kf.P = np.diag([100.**2, 3.**2])
    kf.R = 50.**2
    #dts = np.diff(x)[:, np.newaxis]
    npoints = dts.size
    means = np.zeros((npoints-1, 2))
    covariances = np.zeros((npoints-1, 2, 2))
    for i, z_n in enumerate(ny[1:]):
        kf.predict()
        kf.update(z_n)
        means[i,:] = kf.x
        covariances[i, :, :] = kf.P
    #state, covs = kf.batch_filter(zs=ny[1:])
    Ms, P, K = kf.rts_smoother(means, covariances)
    y_new = np.r_[ny[0], Ms[:,0]]
    k = np.r_[.6, Ms[:, 1]]
    y_std = np.r_[100, np.sqrt(P[:, 0, 0])]
    k_std = np.r_[3, np.sqrt(P[:, 1, 1])]
    return (dts[:,0], y_new, y_std, k, k_std)

In [None]:
t, y_ext, y_std_ext, k, k_std = interpolate_mg(x, y, 300)

In [None]:
plt.figure(figsize=(14,6))
plt.subplot(121)
   
plt.plot(t, y_ext, 'k--')
plt.fill_between(t, y_ext-3*y_std_ext,
                 y_ext+3*y_std_ext, alpha=0.5)
plt.plot(x, y, 'k+')

plt.subplot(122)
plt.plot(t, k)
plt.fill_between(t, k-3*k_std,
                 k+3*k_std, alpha=0.5)
plt.ylim(-1,1)

In [None]:
plt.plot(k*1e-3*v[0]/86400.)

In [None]:
v

In [None]:
t = np.arange(x[0], x[-1]+1, 1)
ny = np.full(t.size, None)
ny[x.astype(int)] = y[:]

In [None]:
ny[1:].ndim

In [None]:
y

In [None]:
np.full(5, None)