# Interactive Visualizer (InVis)
This is the notebook where I'm currently developing the visualizer. I'm using Holoviews and Panel to construct the dashboard and implement the interactivity.

In [None]:
import holoviews as hv
from holoviews import dim, opts, streams
from holoviews.selection import link_selections
import hvplot.pandas
import pandas as pd
from itertools import combinations
import numpy as np
from tqdm import tqdm
import re
import panel as pn
import spatialpandas
import os

hv.extension('bokeh',width=100)
hv.Store.set_current_backend('bokeh')
pn.extension('tabulator')
pn.extension()

## Functions
First we must load the data. `load_params` reads in the .paramnames file and returns a list of each parameter name. `load_data` reads in a .txt file and a list of column names, and returns a DataFrame using those column names. `plot_scatter_table` is what constructs the dashboard.

In [None]:
def load_params(filename):
    params_list = []
    with open(filename, 'r') as f:
        for line in f:
            line = line.strip()
            params = re.split(' \t ', line)
            params_list.append(params)
    return [item[0] for item in params_list]


def load_data(filename, column_names):
    data = np.loadtxt(filename)
    df = pd.DataFrame(data[:,2:], columns=column_names)
    return df


def plot_scatter_table(data, params, plots):
    # kwargs:
    # data: a pandas DataFrame
    # params: list of parameters that correspond to column names in the DataFrame
    # plots: the number of plots to display
    
    # generate a list of all pairs of the parameters
    pairs = [list(comb) for comb in combinations(params, 2)]
    
    # create linked selections
    ls = link_selections.instance()
    
    # make height of the table match the total height of the plots if they're shown in rows of 2
    # if (plots % 2) == 1:
    #     table_height = (plots//2) + 1
    # else:
    #     table_height = plots/2
    
    # match height of table to total height of plots if only 1 plot per row
    # table_height = plots
    
    # table = hv.Table(data[params]).opts(width=800, height=int(300*table_height))
    layout = hv.Layout()
    
    for param_a, param_b in pairs[:plots]:
        # vdims = [e for e in params if e not in (param_a, param_b)]
        # ^^^ uncomment the above and add ", vdims" to the argument of hv.Dataset if desired
        ds = hv.Dataset(data, [param_a, param_b])
        pts = hv.Points(ds).opts(
            opts.Points(color='black', size=2))
        bivar = hv.Bivariate(data[[param_a,param_b]].values, [param_a,param_b], []).opts(
            opts.Bivariate(bandwidth=0.5,
                           cut=0,cmap="blues",
                           levels=5,
                           colorbar=False,
                           show_legend=False,
                           filled=True,
                           toolbar='above',
                           width=350,
                           alpha=0.75))
        layout += (ls(pts)*bivar).opts(width=300, height=300)
    
    layout = layout.cols(4)
    
    table = pn.widgets.Tabulator(data[params], disabled=True)
    selection = pn.widgets.Tabulator(disabled=True)
    try:
        table.selection = ls.selection_expr
    except ValueError:
        table.selection = []
    
    button_show_selection = pn.widgets.Button(name='Show selected rows only', button_type='primary', width_policy='auto')
    button_download_selection = pn.widgets.Button(name='Download selections as CSV', button_type='primary', width_policy='auto')
    button_reset = pn.widgets.Button(name='Reset', width_policy='auto')
    
    def show_selection(event):
        selection.value = table.selected_dataframe.sort_index()
    
    def download_selection(event):
        os.makedirs('data/selections', exist_ok=True)
        selection_df_sorted = table.selected_dataframe.sort_index()
        selection_df_sorted.to_csv('data/selections/selected_output.csv', index=False)

    def reset(event):
        table.selection = []
        selection.value = table.selected_dataframe

    button_show_selection.on_click(show_selection)
    button_download_selection.on_click(download_selection)
    button_reset.on_click(reset)
    dashboard = pn.Row(table, pn.Column(pn.Row(button_show_selection, button_download_selection, button_reset), selection))
    params = pn.Param(ls, parameters=['selection_mode'])
    scatter_table = pn.Column(params, pn.Row(layout), dashboard)
    return scatter_table

Read in paramnames and construct a DataFrame of all the chains using the paramnames as column names

In [None]:
if __name__=='__main__':
    # Read in data
    param_names = load_params('data/test_IDM_n_0/2022-05-04_75000_.paramnames')
    df = pd.DataFrame(columns=param_names)
    for i in tqdm(range(1,56)):
        temp = load_data('data/test_IDM_n_0/2022-05-04_75000__{}.txt'.format(i), column_names=param_names)
        df = pd.concat([df,temp]).reset_index(drop=True)

## Visualizing
We only need to look at a few of the parameters, so for now they're hardcoded here. Now we just call `plot_scatter_table` to produce the dashboard. On the top row are the 2-D plots. Any area-based selections on one of the plots is reflected in the others. In the bottom section there's a table of all the values, which you can highlight and make multiple selections on. These selections can then be displayed as their own table and downloaded by clicking on the buttons labeled as such.

In [None]:
params = ['omega_b', 'omega_dmeff', 'n_s', 'tau_reio', 'sigma_dmeff', 'H0', 'A_s', 'sigma8']
# slicing because I'm REALLY impatient. A better downsampling method may be implemented
df_slice = df[::1000]
new_df = df_slice.reset_index(drop=True)
viz = plot_scatter_table(new_df, params, plots=4)
viz

In [None]:
ds = hv.Dataset(new_df, ['omega_b', 'omega_dmeff'])
def plot_points(data):
    return hv.Points(ds).opts(opts.Points(color='black', size=2))
plot_points(ds)

In [None]:
scatter = hv.Scatter(ds).opts(opts.Scatter(tools=['tap'], size=3))