In [10]:
import gzip, sklearn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

from joblib import dump, load
import os, json
import collections

from bokeh.io import output_notebook, show
from bokeh.plotting import figure, output_file, save
from bokeh.models import CustomJS, ColumnDataSource
from bokeh.models.tools import HoverTool


#dump(reg, 'best_regressor_sklearn.joblib')

In [2]:
reg = load('Neuron_predictor/sklearn_logreg/best_regressor_sklearn_after40.joblib')
scaler = load('Neuron_predictor/sklearn_logreg/nn_activations_scaler_sklearn_after40.joblib')

In [3]:
activations_no = 1000
usefulness_per_neuron = collections.defaultdict(dict)
target = 'usefulness_loss'
with open(os.path.join('neuron_logs', 'train_data', 'visualize_video_data.json'), 'r') as f:
    neuron_data = json.load(f)
    

features_= []
for e in neuron_data.keys():
    for neuron in neuron_data[e]:
        if ' ' not in neuron:
            continue
        current_data = neuron_data[e][neuron]
        important_features = []
        important_features += [current_data['depth']]
        important_features += [current_data['inverse_depth']]
        important_features += [current_data['width']]
        # important_features += [current_data['input_weights']]
        # important_features += [current_data['output_weights']]
        important_features += [current_data['reg_loss_in_layer']]
        important_features += current_data['activations'][:activations_no]
        usefulness_gold = current_data[target]
        line_of_data = np.array(important_features, dtype = np.float32).reshape(1, -1)
        usefulness_prediction = reg.predict(scaler.transform(line_of_data))[0]
        usefulness_per_neuron[e][neuron] = (usefulness_gold, usefulness_prediction)


In [15]:
e = '9'
layer = '4'
pos = '50'
data = np.array([usefulness_per_neuron[e][f'{layer} {pos}'] for e in usefulness_per_neuron])

In [23]:
plot_neuron_data = collections.defaultdict(list)

for pos in range(100):
    layer = 4
    gold = np.array([usefulness_per_neuron[e][f'{layer} {pos}'] for e in usefulness_per_neuron])[:, 0]
    pred = np.array([usefulness_per_neuron[e][f'{layer} {pos}'] for e in usefulness_per_neuron])[:, 1]
    plot_neuron_data['usefulness_gold'] += [gold]
    plot_neuron_data['usefulness_pred'] += [pred]
    plot_neuron_data['pos'] += [pos]
    plot_neuron_data['mean'] += [np.mean(gold)]
    plot_neuron_data['std'] += [f'{np.std(gold):.3f}']
    plot_neuron_data['range'] += [list(range(101))]
    plot_neuron_data['color'] += ['grey']
    plot_neuron_data['hovercolor'] += ['red']
    plot_neuron_data['hovercolor_pred'] += ['blue']
    plot_neuron_data['mean_pred'] += [np.mean(pred)]
    plot_neuron_data['std_pred'] += [f'{np.std(pred):.3f}']

plot_source = ColumnDataSource(plot_neuron_data)

In [24]:
from bokeh.layouts import gridplot

f1 = figure(width=1300, height=350, x_axis_label = 'epoch', y_axis_label = 'usefulness_gold', title=f'Gold usefulness on layer {layer}')

f1.multi_line(xs='range', ys='usefulness_gold',
             line_width=2, line_color='color', line_alpha=0.2,
             hover_line_color='hovercolor', hover_line_alpha=1.0,
             source=plot_source)



f2 = figure(width=1300, height=350, x_axis_label = 'epoch', y_axis_label = 'usefulness_pred', title='Predicted usefulness')

f2.multi_line(xs='range', ys='usefulness_pred',
             line_width=2, line_color='color', line_alpha=0.2,
             hover_line_color='hovercolor_pred', hover_line_alpha=1.0,
             source=plot_source)


f1.add_tools(HoverTool(show_arrow=False, line_policy='next', tooltips=[
    ('index', '@pos'),
    ('mean_gold', '@mean'),
    ('stdev_gold', '@std'),
    ('mean_pred', '@mean_pred'),
    ('stdev_pred', '@std_pred'),
])) 

f2.add_tools(HoverTool(show_arrow=False, line_policy='next', tooltips=[
    ('index', '@pos'),
    ('mean', '@mean_pred'),
    ('stdev', '@std_pred'),
    ('mean_gold', '@mean'),
    ('stdev_gold', '@std'),
]))


p = gridplot([[f1], [f2]])

output_file(f'visualization/gold_v_pred_usefulness_l_{layer}.html')
save(p)
#show(p)


'/home/levai/Renyi/resistant-neurons/visualization/gold_v_pred_usefulness_l_4.html'