In [None]:
import numpy as np
import hickle as hkl
from scipy.sparse.linalg import splu
from scipy import sparse
import scipy
from tqdm import tqdm_notebook, tnrange
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
%run ../../src/downloading/utils.py

The idea is to:

* Look at the MSE only for the original data
* ID whether to horizontally or linearly interpolate the data in calc_and_save
* Look at a graph to see what the best value of lambda is

Do this for a different range of len(data_x) / 72 and plot the best value of lambda for each range of data availability
Adjust the scatterplto to show the original data and the smoothed data side by side with the residuals (?)

In [None]:
data_old = hkl.load("../../tile_data/uganda-agroforestry-2/2019/raw/s2/0_0.hkl")
dates = hkl.load("../../tile_data/uganda-agroforestry-2/2019/raw/misc/s2_dates_0_0.hkl")
clouds = hkl.load("../../tile_data/uganda-agroforestry-2/2019/raw/clouds/clouds_0_0.hkl")
#data = data = data[:, :, :, 0]

In [None]:
cloud_means = np.mean(clouds, axis = (1, 2))
to_remove = np.argwhere(cloud_means > 0.02)
data_old = np.delete(data_old, to_remove, 0)
dates = np.delete(dates, to_remove, 0)
dates.shape

In [None]:
idxes = []
idxes_original = []
closest = np.array([x for x in range(0, 360, 5)])
for idx, val in enumerate(closest):
    if np.min(abs(val - dates)) == 0:
        idxes.append(idx)
        idxes_original.append(np.argmin(abs(val - dates)))
idxes = np.array(idxes)
idxes_original = np.array(idxes_original)

In [None]:
data, _ = calculate_and_save_best_images(data_old, dates)

In [None]:
data = data[..., 0]
data_old = data_old[..., 0]

In [None]:
data.shape

In [None]:
def initialize_smoother(lmbd: int = 800, dim = 29) -> np.ndarray:
    diagonals = np.zeros(2*2+1)
    diagonals[2] = 1.
    for i in range(2):
        diff = diagonals[:-1] - diagonals[1:]
        diagonals = diff
    offsets = np.arange(2+1)
    shape = (dim - 2, dim)
    E = sparse.eye(dim, format = 'csc')
    D = scipy.sparse.diags(diagonals, offsets, shape)
    D = D.conj().T.dot(D) * lmbd
    coefmat = E + D
    splu_coef = splu(coefmat)
    return splu_coef

def smooth(y: np.ndarray, splu_coef: np.ndarray) -> np.ndarray:
    ''' 
    Apply whittaker smoothing to a 1-dimensional array, returning a 1-dimensional array
    '''
    return splu_coef.solve(np.array(y))

In [None]:
sns.scatterplot([x for x in range(data_old.shape[0])], data_old[:, 55, 55])

In [None]:
sns.scatterplot([x for x in range(data.shape[0])], data[:, 55, 55])

In [None]:
sns.scatterplot([x for x in range(data.shape[0])], data[:, 55, 55])

In [None]:
print("Horizintaol method")
errors2 = []
for lmbd in [x for x in range(0, 1000, 100)]:

    splu_coef = initialize_smoother(lmbd, int(data.shape[0]))

    smoothed = np.copy(data)
    for x_value in tnrange(smoothed.shape[1]):
        for y_value in range(smoothed.shape[2]):
            smoothed[:, x_value, y_value] = smooth(data[:, x_value, y_value], splu_coef)
    sns.scatterplot([x for x in range(data.shape[0])], smoothed[:, 35, 35])
    plt.show()
    
    original_data = data_old[idxes_original]
    new_data = smoothed[idxes]
    mean_error = np.sum(abs(new_data - original_data)) / np.sum(original_data)
    errors2.append(mean_error)
    print(f"Lambda {lmbd}: error: {mean_error}")

In [None]:
errors2 = []
for lmbd in [x for x in range(0, 1000, 100)]:

    splu_coef = initialize_smoother(lmbd, int(data.shape[0]))

    smoothed = np.copy(data)
    for x_value in tnrange(smoothed.shape[1]):
        for y_value in range(smoothed.shape[2]):
            smoothed[:, x_value, y_value] = smooth(data[:, x_value, y_value], splu_coef)
    sns.scatterplot([x for x in range(data.shape[0])], smoothed[:, 35, 35])
    plt.show()
    
    original_data = data_old[idxes_original]
    new_data = smoothed[idxes]
    mean_error = np.sum(abs(new_data - original_data)) / np.sum(original_data)
    errors2.append(mean_error)
    print(f"Lambda {lmbd}: error: {mean_error}")

In [None]:
# (27, ) out of 72
sns.scatterplot([x for x in range(len(errors2) - 1)], np.array(errors2)[1:])

In [None]:
# (10, ) out of 72
sns.scatterplot([x for x in range(len(errors2) - 1)], np.array(errors2)[1:]*1000)

In [None]:
np.array(errors2) - np.array(errors)