In [None]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import pandas as pd
import numpy as np
import math
import os
from obspy import read, UTCDateTime, Stream
from scipy.optimize import brentq
from sklearn.neighbors.kde import KernelDensity
import xarray as xr

from filterpy.kalman import UnscentedKalmanFilter as UKF
from filterpy.kalman import KalmanFilter as KF
from filterpy.kalman import MerweScaledSigmaPoints
from filterpy.common import Q_continuous_white_noise

import progressbar

from clemb.forward_model import Forwardmodel
from clemb import clemb
from clemb.data import LakeData, WindData

## Table of contents
---------------------------
1. [Inversion](#Inversion)
2. [Forecasting](#Forecasting)

## Inversion

In [None]:
startdate = '2019-01-01'
enddate = '2019-08-26'
plot_dir = './'
rsam_dir = '/media/win_home/Geosciences_2018/'
prior_dir = '/home/yannik/workspace/crater_lake_temperature/src/clemb/notebook/'


In [None]:
c = clemb.Clemb(LakeData(), None, start=startdate, end=enddate, h=2.8)
c.drmg = True
rsb = c.run_backward()

In [None]:
if False:
    rs = c.run_forward(nsamples=1000, m_out_prior=os.path.join(prior_dir, 'outflow_prior.npz'), new=True,
                       intmethod='rk4')
else:
    rs = c.run_forward(nsamples=2000, nresample=-1, m_out_max=40.,
                       m_in_max=40., q_in_max=1500., new=True,
                       prior_sampling=True, tolZ=-1,
                       prior_resample=10000, tolH=3e30, seed=42, intmethod='rk4',
                       gradient=True)

In [None]:
def density_plot(ax, dates, data, prm_lim=(0, 1600, 1000),  bw=60., mode='kde',
                 likelihood=1.):
    nsteps = dates.size
    if mode == 'kde':
        m = []
        for i in range(nsteps):
            y = data[i].data
            idx = np.isnan(y)
            _kde = KernelDensity(kernel='gaussian', bandwidth=bw).fit(y[~idx].reshape(-1,1))
            X_plot = np.linspace(*prm_lim)
            log_dens = _kde.score_samples(X_plot[:, np.newaxis])
            m.append(np.exp(log_dens))
        m = np.array(m)
        ax.contourf(np.arange(nsteps), X_plot,  m.T, 30, cmap=plt.cm.get_cmap('RdBu_r'))
    if mode == 'scatter':
        nresample = data.shape[1]
        for k in range(nsteps):
            ax.scatter([dates[k].data]*nresample, data[k], s=2, c=likelihood,
                    cmap=plt.cm.get_cmap('RdBu_r'), alpha=0.3)
    return


def adjust_labels(ax, dates, rotation=30):
    new_labels = []
    new_ticks = []
    for _xt in ax.get_xticks():
        try:
            dt = dates[int(_xt)].astype('datetime64[us]').min()
            new_labels.append((pd.to_datetime(str(dt))
                                .strftime("%Y-%m-%d")))
            new_ticks.append(_xt)
        except IndexError:
            continue
    ax.set_xticks(new_ticks)
    ax.set_xticklabels(new_labels, rotation=rotation, horizontalalignment='right')


In [None]:
rs

In [None]:
days = mdates.DayLocator()  # every day
months = mdates.MonthLocator()
monthFmt = mdates.DateFormatter('%Y-%m-%d')
dayFmt = mdates.DateFormatter('%d')

datet = rs['dates']
data = c._df
t_data = data.index
nsteps = datet.size
nresample = 500

mpl.rcParams['figure.subplot.hspace'] = .9
fig, axs = plt.subplots(nrows=5, ncols=2, figsize=(18, 12))

axs[0,0].plot(np.ones(t_data.size)*4.5, ls='--')
adjust_labels(axs[0,0], t_data.values)
axs[0,0].set_xlim(0, data.shape[0])
axs[0,0].set_title('Wind speed [m/s]')

axs[0,1].plot(np.arange(data.shape[0]), data['X'], ls='--')
model_X = rs['model'].sel(obs='X')
x_index = np.arange(model_X.shape[0])
axs[0,1].fill_between(np.arange(nsteps+1), data['X']-3*data['X_err'],
                       data['X']+3*data['X_err'], alpha=0.5)
axs[0,1].plot(x_index, model_X.mean(axis=1), 'k-')
axs[0,1].plot(x_index, model_X.mean(axis=1)+3*model_X.std(axis=1), 'k--')
axs[0,1].plot(x_index, model_X.mean(axis=1)-3*model_X.std(axis=1), 'k--')
adjust_labels(axs[0,1], datet.values)
axs[0,1].set_xlim(0, data.shape[0])
axs[0,1].set_title('Mg++ amount [kt]')

axs[1,0].plot(np.arange(data.shape[0]), data['T'], ls='--')
axs[1,0].fill_between(np.arange(data.shape[0]), data['T']-3*data['T_err'],
                      data['T']+3*data['T_err'], alpha=0.5)

model_T = rs['model'].sel(obs='T')
axs[1,0].plot(x_index, model_T.mean(axis=1), 'k-')
axs[1,0].plot(x_index, model_T.mean(axis=1)+3*model_T.std(axis=1), 'k--')
axs[1,0].plot(x_index, model_T.mean(axis=1)-3*model_T.std(axis=1), 'k--')
adjust_labels(axs[1,0], datet.values)
axs[1,0].set_xlim(0, data.shape[0])
axs[1,0].set_title('Lake temperature [$^{\circ}C$]')

axs[1,1].plot(np.arange(data.shape[0]), data['M'], ls='--')
axs[1,1].fill_between(np.arange(data.shape[0]), data['M']-3*data['M_err'],
                      data['M']+3*data['M_err'], alpha=0.5)
model_M = rs['model'].sel(obs='M')
axs[1,1].plot(x_index, model_M.mean(axis=1), 'k-')
axs[1,1].plot(x_index, model_M.mean(axis=1)+3*model_M.std(axis=1), 'k--')
axs[1,1].plot(x_index, model_M.mean(axis=1)-3*model_M.std(axis=1), 'k--')
adjust_labels(axs[1,1], datet.values)
axs[1,1].set_xlim(0, data.shape[0])
axs[1,1].set_title('Lake mass [kt]')

density_plot(axs[2,0], rs, 'q_in', prm_lim=(0,1200,1000))
exp_q_in = rs['exp'].loc[:,'q_in']
var_q_in = rs['var'].loc[:,'q_in']
idx = np.argmax(rs['lh'], axis=1)
map_q_in = rs['q_in'][:, idx]
axs[2,0].plot(exp_q_in, 'k')
axs[2,0].plot(exp_q_in - 3*np.sqrt(var_q_in), 'k--')
axs[2,0].plot(exp_q_in + 3*np.sqrt(var_q_in), 'k--')
axs[2,0].plot(rsb['pwr'], 'b-')
#axs[2,0].plot(map_q_in, 'g+')
adjust_labels(axs[2,0], datet.data)
axs[2,0].set_ylim(0,1200)
axs[2,0].set_title('Heat input rate [MW]')

density_plot(axs[2,1], rs, 'h', bw=0.01, prm_lim=(2.6,2.9,10))
exp_h = rs['exp'].loc[:,'h']
axs[2,1].plot(exp_h, 'k')
#axs[2,1].plot(np.arange(nsteps), exp[:,3] - 3*np.sqrt(var[:,3]), 'k--')
#axs[2,1].plot(np.arange(nsteps), exp[:,3] + 3*np.sqrt(var[:,3]), 'k--')
#axs[2,1].plot(np.arange(nsteps+1), np.ones(ndf.index.size)*6.0, ls='--')
adjust_labels(axs[2,1], datet.data)
axs[2,1].set_title('Enthalpy [TJ/kt]')

density_plot(axs[3,0], rs, 'm_in', bw=1., prm_lim=(0,50,100))
exp_m_in = rs['exp'].loc[:,'m_in']
var_m_in = rs['var'].loc[:,'m_in']
axs[3,0].plot(exp_m_in, 'k')
axs[3,0].plot(exp_m_in - 3*np.sqrt(var_m_in), 'k--')
axs[3,0].plot(exp_m_in + 3*np.sqrt(var_m_in), 'k--')
axs[3,0].plot(rsb['fmelt'], 'b-')
adjust_labels(axs[3,0], datet.data)
axs[3,0].set_ylim(0, 50)
axs[3,0].set_title('Inflow [kt/day]')

density_plot(axs[3,1], rs, 'm_out', bw=1., prm_lim=(0,50,100))
exp_m_out = rs['exp'].loc[:,'m_out']
var_m_out = rs['var'].loc[:,'m_out']
axs[3,1].plot(exp_m_out, 'k')
axs[3,1].plot(exp_m_out - 3*np.sqrt(var_m_out), 'k--')
axs[3,1].plot(exp_m_out + 3*np.sqrt(var_m_out), 'k--')
axs[3,1].plot(rsb['inf'], 'b-')
adjust_labels(axs[3,1], datet.data)
axs[3,1].set_ylim(0, 50)
axs[3,1].set_title('Outflow [kt/day]')

if False:
    density_plot(axs[4,0], rs, 'steam', bw=1., prm_lim=(0,30,100))
    axs[4,0].plot(rsb['steam'], 'b-')
    adjust_labels(axs[4,0], datet.data)
    axs[4,0].set_ylim(0, 30)
    axs[4,0].set_title('Steam input [kt/day]')
    

density_plot(axs[4,1], rs, 'mevap', bw=0.5, prm_lim=(0,6,10))
axs[4,1].plot(rsb['evfl'], 'b-')
adjust_labels(axs[4,1], datet.data)
axs[4,1].set_ylim(0, 6)
_ = axs[4,1].set_title('Evaporation mass loss [kt/day]')

if False:
    for row in range(5):
        for col in range(2):
            axs[row,col].xaxis.set_major_locator(months)
            axs[row,col].xaxis.set_major_formatter(monthFmt)
            axs[row,col].xaxis.set_minor_locator(days)
            #axs[row,col].set_xlim(datet[0].data, datet[-1].data)
fout = os.path.join(plot_dir, 'clemb_inference_result_overview.png')
fig.savefig(fout, dpi=300, bbox_inches='tight')

In [None]:
rs1 = xr.open_dataset('../../../tests/data/forward_2019-01-01_2019-08-26.nc')

In [None]:
plt.plot(rs1['exp'].loc[:, 'q_in'])
idx = rs['dates'] >= rs1['dates'][0]
plt.plot(rs['exp'].loc[idx, 'q_in'])

In [None]:
y = rs['model'].loc[:, :, 'q_in']/0.0864

In [None]:
y.shape

In [None]:
#fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(20, 5),
#                        gridspec_kw={''})

fig = plt.figure(figsize=(16,5))
ax1 = fig.add_axes([0.01, 0.1, 0.8, 0.9])
ax2 = fig.add_axes([0.82, 0.1, 0.15, 0.9])
axs = [ax1, ax2]

density_plot(axs[0], rs['dates_p'], rs['model'].loc[:,:,'q_in']/0.0864, prm_lim=(0,1100,1000), mode='kde')
#axs[0].plot(rsb['pwr'], 'k')
exp_q_in = rs['exp'].loc[:,'q_in']
axs[0].plot(exp_q_in, 'k')
adjust_labels(axs[0], rs['dates'].data)
axs[0].set_ylabel('Energy input rate [MW]')
axs[0].set_xlim(0, nsteps-1)
axs[0].set_ylim(0, 1100)

X_plot = np.linspace(0, 1600, 1000)[:, np.newaxis]
for i in [-1, -2, -3, -4]:
    y = rs['q_in'][i].data
    idx = np.isnan(y)
    kde = KernelDensity(kernel='gaussian', bandwidth=60.).fit(y[~idx].reshape(-1,1))
    log_dens = kde.score_samples(X_plot)
    Y = np.exp(log_dens)
    date_str = pd.to_datetime(rs['dates'].data[i]).strftime("%Y-%m-%d")
    if i == -1:
        axs[1].plot(Y, X_plot[:, 0], linewidth=5., label=date_str)
    else:
        axs[1].plot(Y, X_plot[:, 0], label=date_str)

axs[1].legend()
#axs[1].set_xlabel('Energy input rate [MW]')
axs[1].yaxis.tick_right()
axs[1].set_xticks([])
axs[1].set_ylim(0, 1100)
fout = os.path.join(plot_dir, 'clemb_inference_result.png')
plt.savefig(fout, dpi=300, bbox_inches='tight')

In [None]:
dates = rs['dates'].data
t1 = UTCDateTime(pd.to_datetime(dates[0]))
t2 = UTCDateTime(pd.to_datetime(dates[-1]))
st = Stream()
for _fn in ['2018.MAVZ.10-HHZ.NZ.bp_1.00-4.00.rsam', '2019.MAVZ.10-HHZ.NZ.bp_1.00-4.00.rsam']:
    st_tmp = read(os.path.join(rsam_dir, _fn))
    tr = st_tmp[0]
    tr.stats.delta = 86400.0
    tr.stats.sampling_rate = 1./tr.stats.delta
    st += tr

st.merge(fill_value='interpolate')
st1 = st.trim(t1, t2)
tr = st1[0]

exp_q_in = rs['exp'].loc[:,'q_in']
std_q_in = np.sqrt(rs['var'].loc[:,'q_in'])
min_q = exp_q_in - std_q_in
max_q = exp_q_in + std_q_in
min_q = np.where(min_q < 0, 0., min_q)
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
ax.plot(dates, exp_q_in, 'k')
ax.fill_between(dates, min_q, max_q, color='k', alpha=0.2)
ax.set_ylabel('Energy input rate [MW]')
ax1 = ax.twinx()
ax1.plot(tr.times('matplotlib'), tr.data, color='#1f77b4')
ax1.set_ylabel('RSAM', color='#1f77b4')
ax1.spines['right'].set_color('#1f77b4')
ax1.xaxis.label.set_color('#1f77b4')
ax1.tick_params(axis='y', colors='#1f77b4')
fout = os.path.join(plot_dir, 'comp_Q_rsam.png')
plt.savefig(fout, dpi=300, bbox_inches='tight')

## Forecasting

In [None]:
def fullness_inv(v):
    """
    Compute lake level from lake volume.
    """
    def f(h, v):
        return (4.747475*np.power(h, 3)-34533.8*np.power(h, 2) + 83773360.*h-67772125000.)/1000. - v
    return brentq(f, 2400, 2600, args=v)

def f_x(x, dt, dQin, month, time, H, ws):
    """
    Forward model lake temperature
    """
    Q_in = x[0]
    M_in = x[1]
    M_out = x[2]
    T = x[3]
    M = x[4]
    X = x[5]
    a = x[6]
    v = x[7]
    y = np.array([T, M, X])
    solar = esol(dt, a, month)
    y_next, st, me = forward_model(y, dt, a, v, Q_in * 0.0864, 
                                   M_in, M_out, solar, H, ws)
    p0 = 1.003 - 0.00033 * T
    p1 = 1.003 - 0.00033 * y_next[0]
    v0 = M/p0
    v1 = y_next[1]/p1
    h0 = fullness_inv(v0)
    h1 = fullness_inv(v1)
    dh = h1 - h0
    a_next = (v1 - v0)*1e3/(h1 - h0)
    q_in_next = Q_in + dQin * dt
    return np.array([q_in_next, M_in, M_out, 
                     y_next[0], y_next[1], y_next[2],
                     a_next, v1])

def h_x(x):
    """
    Measurement function
    """
    return [x[0]]

dt = 1.
points = MerweScaledSigmaPoints(n=8, alpha=.1, beta=2., kappa=0.)
kf = UKF(dim_x=8, dim_z=1, dt=dt, fx=f_x, hx=h_x, points=points)
x = np.r_[rs['exp'][-1,0:3].data, rs['exp'][-1,4:].data]
P = np.eye(8)*np.r_[rs['var'][-1,0:3].data,rs['var'][-1,4:].data]
kf.x = x
kf.Q = np.eye(8)*1e-7
kf.P = P
kf.R = 50.**2
nperiods=10
datetime_new = pd.date_range(start=datet[-1].data, periods=nperiods, freq='D')
dQin = rs['exp'][-1,0].data - rs['exp'][-2,0].data
means = np.zeros((nperiods, 8))
covariances = np.zeros((nperiods, 8, 8))
for i in range(nperiods):
    kf.predict(dQin=dQin, time=i*dt,
               month=datetime_new.month[i],
               H=rs['exp'][-1,3].data,
               ws=4.5)
    means[i,:] = kf.x
    covariances[i, :, :] = kf.P

In [None]:
T_pred = np.r_[data['T'], means[:,3]]
dates = np.r_[t_data, datetime_new+1]
nvar = np.r_[data['T_err'], np.sqrt(covariances[:, 3, 3])]
plt.figure(figsize=(12,6))
plt.plot(dates, T_pred)
plt.fill_between(dates, T_pred -2*nvar, T_pred + 2*nvar, alpha=0.5)
_ = plt.ylabel(r'Temperature [$^\circ$C]')
#plt.savefig('RCL_T_prediction.png')

In [None]:
from clemb.syn_model import SynModel
from clemb import Clemb
import matplotlib.pyplot as plt
s = SynModel()
df = s.run(1000., mode='test', gradient=True)
c = Clemb(None, None, None, None, pre_txt='syn1',
          resultsd='./data', save_results=False)
c._df = df
c._dates = df.index

In [None]:
df

In [None]:
rs = c.run_forward(nsamples=5000, nresample=-1, m_out_max=40.,
                   m_in_max=40., q_in_max=1500., new=True,
                   prior_sampling=True, tolZ=-1,
                   prior_resample=10000, tolH=3e30, seed=42, intmethod='rk4',
                   gradient=True)

In [None]:
plt.hist(rs['dq_in'].isel(dates=2).data)

In [None]:
plt.hist(rs['q_in'].isel(dates=2).data)

In [None]:
plt.hist(rs['model'].loc['2017-01-04',:,'q_in']/0.0864)

In [None]:
plt.plot(rs['exp'].loc[:,'q_in'])

In [None]:
plt.plot(rs['exp'].loc[:, 'q_in'])