In [1]:
import plotly.figure_factory as ff
from typing import List, Dict
import pandas as pd


In [2]:
# -- Local imports
from utils.elasticsearch_utils import get_all_hits, flatten_dict, flatten_dict_keys, \
    get_hits_dsl_query, \
    get_hits_dict_query, update_fields_select_df, get_server_reports

In [None]:
# -- Useful paths
OUT_DIR = "results/reports"
eDATA_FILTER_FILE = "configs/edata_fields.csv"
rDATA_FILTER_FILE = "configs/reporting_fields.csv"

In [None]:
# -- Example of elasticsearch data fetch

# -- Simple Gather all data from elasticsearch
full_edata: List[Dict] = get_all_hits()

edata: List[Dict] = get_hits_dsl_query({
    "match": {
        "args.title": {
            "query": "andrei",
            "type": "phrase"
        }
    }
})
df.to_csv("3dDataset")
edata: List[Dict] = get_hits_dict_query({
    "args.title": ["andrei"], 
    "args.experiment": ["3d_datasets_ewc_256_mlp"]
}, exclude_keys=["task_info", ])

In [None]:
# -- Simple fetch of report from server 
rdata: List[Dict] = get_server_reports(e_ids=["E1sZ62MBm5wd3rDHL-EF"])

In [4]:
# -- Key filters for reports or eData
rdata_keys = [] # Empty if you do not want to update keys
edata_keys = []

# eDATA columns read 
edata_keys = sorted(list(flatten_dict_keys(edata[0])))  # Use tu update / initialize

# Reporting columns read 
rdata_keys = sorted(list(flatten_dict_keys(rdata[0])))  # Use tu update / initialize

# -- Read /update them to file
# Read key filters and smarg_grop prop for eData 
e_select_df, e_select_k, e_select_sg = update_fields_select_df(None, edata_keys, 
                                                               update_file=eDATA_FILTER_FILE)

# Read key filters and smarg_grop prop for raw reporting data 
r_select_df, r_select_k, r_select_sg = update_fields_select_df(None, rdata_keys, 
                                                               update_file=rDATA_FILTER_FILE)

In [None]:

edata: List[Dict] = get_hits_dict_query({
    "args.experiment": ["3d_datasets_ewc_mlp_multi"]}, include_keys=e_select_k)

In [None]:
# Transform to dataframe

data = [flatten_dict(x) for x in edata]
data = pd.DataFrame(data)

In [None]:
# Example of Gathering raw data from server

import seaborn as sns; sns.set(color_codes=True)
import matplotlib.pyplot as plt

full_report, df_return = get_server_reports(experiments=["3d_datasets_ewc_mlp_multi"],
                                            include_keys=r_select_k, smart_group=r_select_sg,
                                            df_format=True)
df, report_info = df_return

# Selecting finished experiment
# TODO next repair -> new argument with argument_finished
# For this experiment they should have reached same seen value
max_seen = df.groupby(["reporting_idx"]).max()["seen"]
select_reports = max_seen[max_seen == max_seen.max()].index
df = df[df["reporting_idx"].apply(lambda x: x in select_reports)]
df.isnull().any() # Check if any with null value

#  df_task["c_reporting_idx"] = df_task.groupby(["seen", optim_key]).cumcount()

# Print Columns 
print(df.columns)

df.groupby(["reporting_idx", "seen"]).count()["task_idx"].unique()

optim_key = "_args.train._optimizer.__name."
acc_key = '_eval_trace.0.acc.2'
merge_e_key = '_args.lifelong.merge_elasticities.'
dataset_key = '_args.tasks.datasets.'
reporting_idx_key = "reporting_idx"
run_id_key = '_args.run_id.'
batch_size_key = "_args.train.batch_size."
scale_key = '_args.lifelong.scale.'
optimizers = df[optim_key].unique()
seen_idxs = df["seen"].unique()

# Make some filters
df_select = df.copy()
df_select = df_select[df_select[dataset_key].apply(lambda x: x == ['cifar10', 'fashion', 'mnist'])]
df_select = df_select[df_select[merge_e_key] == True]
df_select = df_select[df_select[optim_key] == 'Adam 0.001']

# Set normal info
condition = "task_idx"

# Make special column column :)
condition = "combination"
df_select[condition] = df_select["task_idx"].apply(str) + "_"\
                       + df_select[optim_key].apply(str) + "_"\
                       # + df_select[scale_key].apply(str)\
                       # + df_select[batch_size_key].apply(str)

df_tsplot = df_select.groupby(["seen", condition, run_id_key]).max()[acc_key]
df_tsplot = df_tsplot.reset_index()

ax = sns.tsplot(time="seen", value=acc_key, condition=condition, unit=run_id_key,
                data=df_tsplot)
plt.show()



In [4]:
df.to_csv("test2.csv")

In [None]:
# Test weight importance

import torch
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import glob

import plotly.plotly as py
import plotly.graph_objs as go

plt.ion()

experiment_path = "/media/andrei/CE04D7C504D7AF291/rl/lifelong-learning/xdata/1530208894_test_weight_importance"
reports = glob.glob(experiment_path + "/**/reporting.pkl", recursive=True)
full_data  = [torch.load(x, map_location=lambda storage, loc: storage) for x in reports]
all_data = [x["_task_train_tick"][0]["info"] for x in full_data]
all_data_acc = [x["_task_train_tick"][0]["task"][0]["acc"] for x in full_data]
data = all_data[1]

# key names
key_mode = "mode"
key_constraints = "constraint"
key_res_size = "res_size"
key_results = "results"

# -- Example data 

# Consider 1 single model 1 data set trained ( => 1 task train tick)
mode = data[key_mode]

# {param_name: torch.tensor(torch.Size(param))
constraints = data[key_constraints]

# res_size: no_segments, no_samples
res_size = data[key_res_size]
no_segments, no_samples = res_size

# {params_name: {acc: torch.tensor(torch.Size(param) + res_size), loss: ... } }
results = data[key_results]

print(constraints.keys())

# -- Plot constraint values stats
plot_data = []
for ix, constraint in enumerate([x[key_constraints] for x in all_data]):
    y = []
    x = []
    for param_name, constraint in constraint.items():
        y += constraint.view(-1).cpu().numpy().tolist()
        x += (constraint.numel() * [param_name])
    trace = go.Box(y=y, x=x, name=str(ix))
    plot_data.append(trace)
    
layout = go.Layout(
    yaxis=dict(
        title='normalized moisture',
        zeroline=False
    ),
    boxmode='group'
)

fig = go.Figure(data=plot_data, layout=layout)
py.plot(fig, auto_open=False)

# Scatter plot value (e.g. acc) vs noise 
no_layers = len(constraints.keys())

table_data = dict({k: [] for k in constraints.keys()})

for pi, param_name in enumerate(constraints.keys()):
    td = []
    for ix in range(len(all_data)):
        constraint = all_data[ix][key_constraints][param_name].view(-1).unsqueeze(0)\
            .unsqueeze(2).unsqueeze(2).detach()
        results = all_data[ix][key_results][param_name]["acc"].view(-1, no_segments, no_samples).detach()
        results = results.permute([1, 0, 2]).unsqueeze(3)
        results = results.div(all_data_acc[ix])
        constraint = constraint.expand_as(results)
        td.append(torch.cat([results, constraint], dim=3))
    td = torch.cat(td, dim=2)
    table_data[param_name] = td

# table_data: {param_name : torch.tensor(
    # no_segments, weights, 
    # samples(no_samples * no_experiments), 
    # (acc, constraint)
    # }}

param_name = "fc.0.weight"
for i in range(results.size(0)):
    if len(results[i, :].unique()) > 1:
        print(results[i, :].unique())
        
td = table_data["heads.0.weight"]
segments = torch.linspace(0, 1, no_segments).unsqueeze(1).unsqueeze(1).expand(td.size()[:-1])
segments = segments.contiguous()
z = td.view(-1, 2)[:, 0].numpy()
x = td.view(-1, 2)[:, 1].numpy()
y = segments.view(-1).numpy()

trace = go.Scatter3d(
    x=x,
    y=y,
    z=z,
    mode='markers',
    marker=dict(
        size=1,
        line=dict(
            color='rgba(217, 217, 217, 0.14)',
            width=0.5
        ),
        opacity=0.8
    )

)

data = [trace]
layout = go.Layout(
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    )
)
fig = go.Figure(data=data, layout=layout)
py.plot(fig, filename='simple-3d-scatter', auto_open=False)



    ax.scatter(x, y, c=color, s=scale, label=color, alpha=0.3, edgecolors='none')

