In [1]:
import os 
import numpy as np
import pandas as pd
from copy import deepcopy
import seaborn

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
%matplotlib qt

In [2]:
true_data = np.load('data/geyser.npy').astype(np.float64)
waiting = true_data[:, 0]
waiting = waiting[waiting != 108.].reshape(-1, 1)
bw = 5.2

In [3]:
outlier_list = np.concatenate([np.arange(90., 410., 10), np.arange(101., 110., 1)])
outlier_list = np.sort(outlier_list)
outlier_list

array([ 90., 100., 101., 102., 103., 104., 105., 106., 107., 108., 109.,
       110., 120., 130., 140., 150., 160., 170., 180., 190., 200., 210.,
       220., 230., 240., 250., 260., 270., 280., 290., 300., 310., 320.,
       330., 340., 350., 360., 370., 380., 390., 400.])

In [4]:
log_pen_param = -13
xlimit = (21., 410.)
plot_pts_cnt = 2000
newx = np.linspace(xlimit[0], xlimit[1], plot_pts_cnt)
kernelfunction_name = 'Gaussian'
var_name = 'waiting'
fontsize_label = 20
fontsize_tick = 10
fontsize_info = 20
fontsize_title = 12
fontsize_suptitle = 22

linewidth = 3.0
scilimits = (0, 3)

In [6]:
fig, ax = plt.subplots(
    nrows = 1, 
    ncols = 1, 
    figsize = (20, 10), 
    # tight_layout = True, 
    constrained_layout = False)

fig.subplots_adjust(top=0.9)
base = 'uniform'

def update_plots(outlier): 

    # read in the original data 
    true_data = np.load('data/geyser.npy').astype(np.float64)
    df = deepcopy(true_data[:, 0]).reshape(-1, 1)
    df[df == 108.0] = outlier
    
    pddf = pd.DataFrame({'vals': df.flatten(),
                         'cate': [False if df[i] != outlier else True for i in range(df.shape[0])]})
    
    ax.clear()
    
    # ---------------------------------------------------------------------------------------
    # set x-limit 
    ax.set_xlim(xlimit)
    # set x label 
    ax.set_xlabel(var_name, fontsize = fontsize_label)
    # set y label 
    ax.set_ylabel('unnormalized log density', fontsize = fontsize_label)
    # ax.set_ylim((-110., 110.))
    # formatting tick marks and tick labels 
    ax.tick_params(axis = 'both', labelsize = fontsize_tick)
    ax.ticklabel_format(axis = 'y')
    # add rug plot at normal observations 
    seaborn.rugplot(pddf['vals'], axis = 'x', ax = ax, color = 'tab:blue')
    seaborn.rugplot(np.array([outlier]), axis = 'x', ax = ax, color = 'red')
    
    file_name_grid = f'data/geyser-waiting-gaussiankernel-{base}base-bw{bw}-lambda-exp({log_pen_param})/' \
                     f'log-density-value-shift-{outlier}.npy'
    denvals_grid = np.load(file_name_grid)
    # plot log density when the basis functions are centered at grid points 
    ax.plot(newx.flatten(), denvals_grid.flatten(), color = 'tab:blue', linewidth = linewidth)
    
    # draw a vertical line at the outlier 
    ax.axvline(outlier, 0, 1, ls = '--', color = 'tab:purple', alpha = 0.5)
    # ax.axhline(np.max(denvals_grid.flatten()), 0, 1, ls = '--', color = 'tab:purple', alpha = 0.5)
    
#     # add grid
#     ax1.grid(color = 'k', ls = (0, (3, 10, 1, 10)), lw = 0.25)

    # add plot information 
    info = f'Add {outlier}'
    ax.text(0.988, 0.988,
            info,
             fontsize = fontsize_info,
             # fontfamily = 'serif',
             multialignment = 'left',
             horizontalalignment = 'right',
             verticalalignment = 'top',
             transform = ax.transAxes,
             bbox = {'facecolor': 'none',
                     'boxstyle': 'Round, pad=0.2'})
    
    return ax

ani = FuncAnimation(
    fig, 
    update_plots, 
    frames = outlier_list, 
    interval = 500)

fig.suptitle(r'Logarithm of Score Matching Density Estimates with $\sigma$={bw} and $\lambda$=exp({pen})'.format(
    bw=bw, pen=-10.0), 
             fontsize = fontsize_suptitle, y = 0.98)

# uncomment the following line to save the gif
ani.save(f'gif/waiting_Gaussian_bw={bw}_{base}base_pen_exp({log_pen_param}).gif', writer='imagemagick')

plt.show()