In [None]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import interp1d
from pumahu import get_data


### MCMC for measured outflow

In [None]:
def df_resample(df):
    """
    Resample dataframe to daily sample rate.
    """
    # First upsample to 15 min intervals combined with a
    # linear interpolation
    ndates = pd.date_range(start=df.index.date[0], end=df.index.date[-1],
                           freq='15T')
    ndf = df.reindex(ndates, method='nearest',
                     tolerance=np.timedelta64(15, 'm')).interpolate()
    # Then downsample to 1 day intervals assigning the new values
    # to mid day
    ndf = ndf.resample('1D', label='left').mean()
    return ndf

In [None]:
url = "https://fits.geonet.org.nz/observation?siteID={}&typeID=z"
names = ['h', 'h_err']
ldf = pd.read_csv(url.format('RU001'),
                  index_col=0, names=names, skiprows=1,
                  parse_dates=True)
ldf1 = pd.read_csv(url.format('RU001A'),
                   index_col=0, names=names, skiprows=1,
                   parse_dates=True)
ldf = ldf.combine_first(ldf1)
ldf = ldf.tz_localize(None)

df = df_resample(ldf)
df = df.loc[df.index >= '2016-03-04']

In [None]:
dfo = pd.read_csv(get_data('data/outflow.csv'), parse_dates=True, index_col=0,
                 names=['Date', 'of', 'of_err'], skiprows=1)
dfo

In [None]:
df['of'] = dfo['of']
df['of_err'] = dfo['of_err']
df[df.of>0]

In [None]:
data = df[['h', 'of', 'of_err']][df.of>0].values
min_level = 0.95*data.min(axis=0)[0]
lowest_level = 1.
zerolevels = []
for l in np.linspace(lowest_level, min_level, 3):
    l_error = 30*(l - lowest_level)/(min_level - lowest_level)
    zerolevels.append([l, 0.0, l_error])
zerolevels = np.array(zerolevels)
data = np.vstack((zerolevels, data))
x = data[:, 0]
y = data[:, 1]
yerr = data[:, 2]
idx = x.argsort()
x = x[idx]
y = y[idx]
yerr = yerr[idx]
yerr[0] = 1.

In [None]:
from nsampling import NestedSampling, Uniform, Normal
from functools import partial
from scipy.stats import multivariate_normal

def f(b, c, k, x):
    a = 250.
    return a / (1+np.exp(-k*(x*c -b)))

def likelihood(vals, sid, x, y, sigma):
    a = 250.
    b = vals[0]
    c = vals[1]
    k = vals[2]
    new_data = f(b, c, k, x)
    cov = np.eye(sigma.size)*sigma*sigma
    return multivariate_normal.logpdf(new_data, mean=y, cov=cov)

In [None]:
b = Uniform('b', 1., 1e2)
c = Uniform('c', 1., 1e2)
k = Uniform('k', 1e-6, 1.)
ns = NestedSampling(seed=42)
lh = partial(likelihood, x=x, y=y, sigma=yerr)
rs = ns.explore(vars=[b, c, k], initial_samples=100,
                maximum_steps=10000, 
                likelihood=lh, tolZ=-1, tolH=3)

In [None]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
from matplotlib.colors import Normalize
cmap = cm.ScalarMappable(norm=Normalize(vmin=-300, vmax=-150), cmap='RdBu_r')
logLs = []
smp = rs.get_samples()
smp1 = rs.resample_posterior(100)
bs = []
cs = []
ks = []
wt = []
h = []
z = []
fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(18,5))
for _s in smp:
    cl = cmap.to_rgba(_s.get_logL())
    logLs.append(_s.get_logL())
    b, c, k = _s.get_value()
    bs.append(b)
    cs.append(c)
    ks.append(k)
    wt.append(_s.get_logWt())
    z.append(_s.get_logZ())
    h.append(_s.get_H())
    axs[0].plot(b,k,ms=5,color=cl,marker='o',alpha=0.3)
    
for _s in smp1:
    b, c, k = _s.get_value()
    axs[0].plot(b,k,ms=5,color='k',marker='+')
cmap.set_array(logLs)
cb = plt.colorbar(cmap, ax=axs[0])
cb.set_label('Log-Likelihood')
axs[1].plot(z)
axs[1].set_ylabel('Log Evidence')
axs[1].set_xlabel('Sample #')

In [None]:
def predict(smp, x, xorig, sigma, error=True):
    y_predict = np.zeros((len(smp), x.size))
    cnt = 0
    for _s in smp:
        b, c, k = _s.get_value()
        a = 250. #param['a']
        f = interp1d(xorig, sigma, fill_value='extrapolate')
        n_sigma = f(x)
        n_sigma = np.where(n_sigma > 0, n_sigma, 0.001)
        if error:
            y_predict[cnt] = np.random.normal(loc=a / (1+np.exp(-k*(x*c -b))),
                                              scale=n_sigma)
        else:
            y_predict[cnt] = a / (1+np.exp(-k*(x*c -b)))
        cnt += 1
    return y_predict
xp = np.linspace(1.0, 2.3, 1000)
yp = predict(smp1, xp, x, yerr)


In [None]:
nstd = 2
offset = 2527.35 
yp_mean = yp.mean(axis=0)
yp_std = yp.std(axis=0)
o_min = np.where((yp_mean - nstd*yp_std) > 0., (yp_mean - nstd*yp_std), 0.)
o_max = yp_mean + nstd*yp_std
fig = plt.figure()
ax = fig.add_subplot(111)
ax.fill_between(xp, o_min*0.0864, o_max*0.0864, color='blue', alpha=0.1)
ax.errorbar(x, y*0.0864, yerr=yerr*0.0864, marker='o', elinewidth=1, linewidth=0)
ax.plot(xp, yp_mean*0.0864, 'k-')
if False:
    np.savez(get_data('data/outflow_prior.npz'), z=xp+offset, o_min=o_min*0.0864, o_max=o_max*0.0864,
             o_mean=yp_mean*0.0864)