In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#import pymc3 as pm

### MCMC for measured outflow

In [None]:
url = "https://fits.geonet.org.nz/observation?siteID={}&typeID=z"
names = ['h', 'h_err']
ldf = pd.read_csv(url.format('RU001'),
                  index_col=0, names=names, skiprows=1,
                  parse_dates=True)
ldf1 = pd.read_csv(url.format('RU001A'),
                   index_col=0, names=names, skiprows=1,
                   parse_dates=True)
ldf = ldf.combine_first(ldf1)
ldf.loc[ldf.index < '1997-01-01', 'h'] = 2530. + \
    ldf.loc[ldf.index < '1997-01-01', 'h']
ldf.loc[(ldf.index > '1997-01-01') & (ldf.index < '2012-12-31'),
        'h'] = 2529.5 + \
    (ldf.loc[(ldf.index > '1997-01-01') &
             (ldf.index < '2012-12-31'), 'h'] - 1.3)
ldf.loc[ldf.index > '2016-01-01', 'h'] = 2529.35 + \
    (ldf.loc[ldf.index > '2016-01-01', 'h'] - 2.0)

df = df_resample(ldf)
df = df.loc[df.index >= '2016-03-04']

In [None]:
of = np.array([['2019-02-26', 70.],
               ['2018-06-10',200.],
               ['2017-12-12', 86.],
               ['2015-04-24', 8.9],
               ['2012-01-25',23.],
               ['2011-12-09',79.5],
               ['2010-03-04', 86.],
               ['2010-01-29', 175.]])
dates = pd.DatetimeIndex(of[:,0])
vals = of[:,1].astype(float)
ofs = pd.Series(vals, index=dates)
df['of'] = ofs
df['of_err'] = pd.Series(vals*0.3, index=dates)

In [None]:
# from field observations we assume that there is no 
# outflow below 1.9 m which is 10 cm below the 
# assumed reference level
H_0 = 2529.25 

# Set the outflow below H0 to 0.
df['of'].loc[df['h'] < H_0] = 0.0

# for lake levels below 1.9 m assign an linearly increasing error
# to the outflow
h_tmp = df['h'].loc[df['h'] < H_0]
h_min = h_tmp.min()
df['of_err'].loc[df['h'] < H_0] = 25*(h_tmp - h_min)/(H_0 - h_min)
df = df.dropna()

In [None]:
of = df['of'].values
of_err = df['of_err'].values
h = df['h'].values
offset = h.min()
h -= h.min()

In [None]:
import pickle
import theano
of_model = pm.Model()
idx = h.argsort()
hx = h[idx]
ofy = of[idx]
ofe = of_err[idx]
ofe[0] = 1.
x_shared = theano.shared(hx)
y_shared = theano.shared(ofy)
e_shared = theano.shared(ofe)
with of_model:
    a = pm.Uniform('a',1.e-6, 1e1)
    b = pm.Uniform('b', 1.1, 1e3)
    c = pm.Uniform('c', 1.1, 1e3)
    g = a*np.exp(b*hx + c*hx*hx)
    sigma = pm.Normal('sigma', mu=ofe, sd=20, shape=(ofe.size))
    obs = pm.Normal('obs', mu=g, sd=sigma, observed=ofy)
    step = pm.Metropolis()
    trace = pm.sample(draws=40000, tune=1000, chains=4, step=step)

with open('outflow_model.pkl', 'wb') as buff:
    pickle.dump({'model': of_model, 'trace': trace}, buff)

In [None]:
_ = pm.traceplot(trace)

In [None]:
ppc = pm.sample_ppc(trace, samples=1000, model=of_model)
mn_of = ppc['obs'].mean(axis=0)
std_of = ppc['obs'].std(axis=0)
plt.fill_between(hx, mn_of+std_of, mn_of-std_of, alpha=0.5)
plt.plot(hx, mn_of, 'r--')
plt.plot(hx, ofy, 'k+')

In [None]:
from scipy.interpolate import interp1d

def predict(x, xorig, trace, samples):
    len_trace = len(trace)
    y_predict = np.zeros((samples, x.size))
    try:
        nchain = trace.nchains
    except AttributeError:
        nchain = 1
    indices = np.random.randint(0, nchain * len_trace, samples)
    cnt = 0
    for idx in indices:
        if nchain > 1:
            chain_idx, point_idx = np.divmod(idx, len_trace)
            param = trace._straces[chain_idx].point(point_idx)
        else:
            param = trace[idx]
        a = param['a']
        b = param['b']
        c = param['c']
        sigma = param['sigma']
        f = interp1d(xorig, sigma, fill_value='extrapolate')
        n_sigma = f(x)
        n_sigma = np.where(n_sigma > 0, n_sigma, 0.001)
        y_predict[cnt] = np.random.normal(loc=a*np.exp(b*x + c*x*x),
                                          scale=n_sigma)
        cnt += 1
    return y_predict
xp = np.linspace(0, .7, 1000)
yp = predict(xp, hx, trace, 10)


In [None]:
yp_mean = yp.mean(axis=0)
yp_std = yp.std(axis=0)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.fill_between(xp+offset, yp_mean+3*yp_std, yp_mean-3*yp_std, color='blue', alpha=0.5)
ax.plot(xp+offset, yp_mean, 'b--')
ax.plot(hx+offset, ofy, 'k')
yp_mean.shape

In [None]:
o_min = np.where((yp_mean - 3*yp_std) > 0., (yp_mean - 3*yp_std)*0.0864, 0.)
o_max = np.where((yp_mean + 3*yp_std)*0.0864 < 100., (yp_mean + 3*yp_std)*0.0864, 100.)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.fill_between(xp+offset, o_min, o_max, color='blue', alpha=0.5)
np.savez('outflow_prior.npz', z=xp+offset, o_min=o_min, o_max=o_max)

In [None]:
a = np.load('outflow_prior.npz')
a['z']