In this notebook, we will show how to build and train a convolutional network in NengoDL, and then deploy that network on Loihi.

We will assume here that the reader is somewhat familiar with NengoDL, and focus on the issue of how to use NengoDL to train a network for Loihi.

In [1]:
%matplotlib inline

import os

import nengo
import nengo_dl
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import uproot
from myutils import *

try:
    import requests

    has_requests = True
except ImportError:
    has_requests = False

import nengo_loihi

epoch = 30
scale = 10
train_outdir = "./SNN0425_"+str(epoch)+"_scale"+str(scale)
os.makedirs(train_outdir, exist_ok=True)



We'll define helper function for drawing SNN results

We’ll use the tth(bb) and ttbb data to demonstrate the steps. The samples are generated by MADGRAPH@aMC_NLO with DELPHES. 

In [2]:
# set up training data
variables = [
    'njets', 'nbjets', 'ncjets', 'nElectron', 'nMuon', 'MET_met', 'HT', # 'nLepton', 'MET_px', 'MET_py',
    'Lepton_pt', 'Lepton_eta', 'Lepton_e',# 'Lepton_phi',
    'Jet1_pt', 'Jet1_eta', 'Jet1_e', 'Jet1_btag', 'Jet2_pt', 'Jet2_eta', 'Jet2_e', 'Jet2_btag',# 'Jet_phi1', 'Jet_phi2',
    'Jet3_pt', 'Jet3_eta', 'Jet3_e', 'Jet3_btag', 'Jet4_pt', 'Jet4_eta', 'Jet4_e', 'Jet4_btag',# 'Jet_phi3', 'Jet_phi4',
    #'bjet1_pt', 'bjet1_eta', 'bjet1_e', 'bjet2_pt', 'bjet2_eta', 'bjet2_e',# 'bjet1_phi', 'bjet2_phi',
    'selbjet1_pt', 'selbjet1_eta', 'selbjet1_e', 'selbjet2_pt', 'selbjet2_eta', 'selbjet2_e',# 'selbjet1_phi', 'selbjet2_phi',

    'bbdR',   'bbdEta',   'bbdPhi',   'bbPt',   'bbEta',   'bbMass',   'bbHt',   'bbMt',  # 'bbPhi',
    'nub1dR', 'nub1dEta', 'nub1dPhi', 'nub1Pt', 'nub1Eta', 'nub1Mass', 'nub1Ht', 'nub1Mt',# 'nub1Phi',
    'nub2dR', 'nub2dEta', 'nub2dPhi', 'nub2Pt', 'nub2Eta', 'nub2Mass', 'nub2Ht', 'nub2Mt',# 'nub2Phi',
    'nubbdR', 'nubbdEta', 'nubbdPhi', 'nubbPt', 'nubbEta', 'nubbMass', 'nubbHt', 'nubbMt',# 'nubbPhi',
    'lb1dR',  'lb1dEta',  'lb1dPhi',  'lb1Pt',  'lb1Eta',  'lb1Mass',  'lb1Ht',  'lb1Mt', # 'lb1Phi',
    'lb2dR',  'lb2dEta',  'lb2dPhi',  'lb2Pt',  'lb2Eta',  'lb2Mass',  'lb2Ht',  'lb2Mt', # 'lb2Phi',
    'lbbdR',  'lbbdEta',  'lbbdPhi',  'lbbPt',  'lbbEta',  'lbbMass',  'lbbHt',  'lbbMt', # 'lbbPhi',
    'Wjb1dR', 'Wjb1dEta', 'Wjb1dPhi', 'Wjb1Pt', 'Wjb1Eta', 'Wjb1Mass', 'Wjb1Ht', 'Wjb1Mt',# 'Wjb1Phi',
    'Wjb2dR', 'Wjb2dEta', 'Wjb2dPhi', 'Wjb2Pt', 'Wjb2Eta', 'Wjb2Mass', 'Wjb2Ht', 'Wjb2Mt',# 'Wjb2Phi',
    'Wlb1dR', 'Wlb1dEta', 'Wlb1dPhi', 'Wlb1Pt', 'Wlb1Eta', 'Wlb1Mass', 'Wlb1Ht', 'Wlb1Mt',# 'Wlb1Phi',
    'Wlb2dR', 'Wlb2dEta', 'Wlb2dPhi', 'Wlb2Pt', 'Wlb2Eta', 'Wlb2Mass', 'Wlb2Ht', 'Wlb2Mt',# 'Wlb2Phi',
 
]
class_names = ["tthbb", "ttbb", "ttbj", "ttcc", "ttlf"]
nClass, nVariables = len(class_names), len(variables)

pd_data = pd.read_hdf('samples/data.h5', key='df', mode='r')
print (pd_data)
print (pd_data.columns)

train_data = pd_data.filter(items = variables)
train_data = (train_data - train_data.min())*scale/(train_data.max() - train_data.min())
train_data = np.array(train_data).astype(float)
train_out = np.array(pd_data.filter(items = ["category"])).reshape((len(pd_data),))

print (train_out)

trainlen = 267767
train_images = train_data[:trainlen, 0::]
train_labels = train_out[:trainlen]
test_images = train_data[trainlen:, 0::]
test_labels = train_out[trainlen:]

        category  njets  nbjets  ncjets  nElectron  nMuon  nLepton  \
0              1      5       4       1          0      1        1   
1              3      9       2       2          1      0        1   
2              2      4       2       0          1      0        1   
3              4      7       3       1          1      0        1   
4              2      6       3       0          1      0        1   
...          ...    ...     ...     ...        ...    ...      ...   
382520         2      6       3       0          0      1        1   
382521         0      6       4       1          1      0        1   
382522         1      8       4       2          1      0        1   
382523         0      4       4       0          1      0        1   
382524         1      4       2       0          1      0        1   

          bjet1_pt    bjet2_pt  bjet1_eta  ...   Wlb1Eta   Wlb2Eta   Wlb1Phi  \
0       118.250000  116.328125   0.293274  ...  0.347168  0.350647 -1.378906   

Next we define the structure of our network. Because we need to keep the number of neurons and axons per core below the Loihi hardware limits, we adopt a somewhat unusual network architecture. We’ll have a relatively small core network, so that each layer fits on one Loihi core, and then repeat that network several times in parallel, summing their output. We can think of this as a variation on ensemble learning. See the CIFAR-10 example for a different approach that uses NengoLoihi’s BlockShape functionality to automatically split larger layers across cores.



In [3]:
dt = 0.001  # simulation timestep
presentation_time = 0.1  # input presentation time
max_rate = 120  # neuron firing rates
# neuron spike amplitude (scaled so that the overall output is ~1)
amp = 1 / max_rate

with nengo.Network(seed=0) as net:
    # set up the default parameters for ensembles/connections
    nengo_loihi.add_params(net)
    net.config[nengo.Ensemble].max_rates = nengo.dists.Choice([max_rate])
    net.config[nengo.Ensemble].intercepts = nengo.dists.Choice([0])
    net.config[nengo.Connection].synapse = None
    
    #net.config[nengo.Ensemble].neuron_type = nengo.SpikingRectifiedLinear(amplitude=amp)
    neuron_type = nengo.LIF(tau_rc=0.02, tau_ref=0.001, amplitude=amp)
    #neuron_type = nengo.SpikingRectifiedLinear(amplitude=amp)
    net.config[nengo.Ensemble].neuron_type = nengo.LIF(tau_rc=0.02, tau_ref=0.001, amplitude=amp)
    
    # the input node that will be used to feed in input images
    inp = nengo.Node(
        nengo.processes.PresentInput(test_images, presentation_time), size_out=nVariables
    )

    # the output node provides the 2-dimensional classification
    out = nengo.Node(size_in=nClass)

    layer_1 = nengo.Ensemble(n_neurons=nVariables, dimensions=1, neuron_type=neuron_type, label="Layer 1")
    # first layer is off-chip to translate the images into spikes
    net.config[layer_1].on_chip = False
    nengo.Connection(inp, layer_1.neurons, transform=nengo_dl.dists.Glorot())

    layer_2 = nengo.Ensemble(n_neurons=256, dimensions=1, neuron_type=neuron_type, label="Layer 2")
    nengo.Connection(layer_1.neurons, layer_2.neurons, transform=nengo_dl.dists.Glorot())

    layer_3 = nengo.Ensemble(n_neurons=256, dimensions=1, neuron_type=neuron_type, label="Layer 3")
    nengo.Connection(layer_2.neurons, layer_3.neurons, transform=nengo_dl.dists.Glorot())

    nengo.Connection(layer_3.neurons, out, transform=nengo_dl.dists.Glorot())

    out_p = nengo.Probe(out, label="out_p")
    out_p_filt = nengo.Probe(out, synapse=nengo.Alpha(0.01), label="out_p_filt")

The next step is to optimize the parameters of the network using NengoDL.

First we set up the input/target data for the training and test datasets.

In [4]:
# set up training data, adding the time dimension (with size 1)
minibatch_size = 200
train_images = train_images[:, None, :]
train_labels = train_labels[:, None, None]

# for the test data evaluation we'll be running the network over time
# using spiking neurons, so we need to repeat the input/target data
# for a number of timesteps (based on the presentation_time)
n_steps = int(presentation_time / dt)
test_images = np.tile(test_images[:, None, :], (1, n_steps, 1))
test_labels = np.tile(test_labels[:, None, None], (1, n_steps, 1))

Next we need to define our error functions.

For training we will use the standard categorical cross-entropy loss function.

For evaluation we will use classification accuracy (the % of images classified correctly) as an intuitive measure of how well the network is doing. Since we will be running the network over time during evaluation, we modify the loss function slightly so that it only assesses the accuracy on the last timestep.



In [5]:
def classification_accuracy(y_true, y_pred):
    return 100 * tf.metrics.sparse_categorical_accuracy(y_true[:, -1], y_pred[:, -1])


Now we create the NengoDL simulator and run the training using the sim.fit function.

More details on how to use NengoDL to optimize a model can be found here: https://www.nengo.ai/nengo-dl/user-guide.html.

To speed up this example we can set do_training=False to load some pre-trained parameters. 

Note that in order to run do_training=True, you will need to have TensorFlow installed with GPU support.

In [6]:
do_training = True

with nengo_dl.Simulator(net, minibatch_size=minibatch_size, seed=0) as sim:
    if do_training:
        sim.compile(loss={out_p_filt: classification_accuracy})
        print(
            "accuracy before training: %.2f%%"
            % sim.evaluate(test_images, {out_p_filt: test_labels}, verbose=0)["loss"]
        )

        # run training
        sim.compile(
            optimizer=tf.optimizers.RMSprop(0.001),
            loss={out_p: tf.losses.SparseCategoricalCrossentropy(from_logits=True)},
            metrics={out_p: classification_accuracy},
        )
        sim.fit(train_images, {out_p: train_labels}, epochs=epoch)

        sim.compile(loss={out_p_filt: classification_accuracy})
        acc = "%.2f%%" % sim.evaluate(test_images, {out_p_filt: test_labels}, verbose=0)["loss"]
        print(
            "accuracy after training: %.2f%%"
            % sim.evaluate(test_images, {out_p_filt: test_labels}, verbose=0)["loss"]
        )
        sim.save_params(train_outdir+"/ttbar_params")
    else:
        sim.load_params(train_outdir+"/ttbar_params")

    # store trained parameters back into the network
    sim.freeze_params(net)

Build finished in 0:00:00                                                      
Optimization finished in 0:00:00                                               
|             Constructing graph: build stage (0%)             | ETA:  --:--:--



Construction finished in 0:00:00                                               




|             Constructing graph: build stage (0%)             | ETA:  --:--:--

2024-04-24 16:00:16.253428: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5500800000 exceeds 10% of free system memory.


accuracy before training: 19.95%finished in 0:00:00                            




Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
|             Constructing graph: build stage (0%)             | ETA:  --:--:--

2024-04-24 16:17:25.264209: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5500800000 exceeds 10% of free system memory.


Constructing graph: build stage finished in 0:00:00                            

2024-04-24 16:23:54.878982: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5500800000 exceeds 10% of free system memory.


accuracy after training: 35.38%


As we built it, the network has no synaptic filters on the neural connections. This works well during training, but we can see that the error is still somewhat high when we evaluate it using spiking neurons. We can improve performance by adding synaptic filters to our trained network.

In [None]:
for conn in net.all_connections:
    conn.synapse = 0.005

if do_training:
    with nengo_dl.Simulator(net, minibatch_size=minibatch_size) as sim:
        sim.compile(loss={out_p_filt: classification_accuracy})
        acc = "%.2f%%"  % sim.evaluate(test_images, {out_p_filt: test_labels}, verbose=0)["loss"]
        print(
            "accuracy w/ synapse: %.2f%%"
            % sim.evaluate(test_images, {out_p_filt: test_labels}, verbose=0)["loss"]
        )
        sim.save_params(train_outdir+"/ttbar_synapse")

        # pred_train = sim.predict(x=train_images)

Build finished in 0:00:00                                                      
Optimization finished in 0:00:00                                               
Construction finished in 0:00:00                                               
|             Constructing graph: build stage (0%)             | ETA:  --:--:--

2024-04-24 16:31:24.550435: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 5500800000 exceeds 10% of free system memory.


Constructing graph: build stage finished in 0:00:00                            

Now we can load our trained network, with synaptic filters, onto Loihi. This is as easy as passing the network to nengo_loihi.Simulator and running it, there is no extra work required. We will give the network 50 test images, and use that to evaluate the classification error.

In [None]:
n_presentations = 2000

# if running on Loihi, increase the max input spikes per step
hw_opts = dict(snip_max_spikes_per_step=max_rate)
with nengo_loihi.Simulator(
    net,
    dt=dt,
    precompute=False,
    hardware_options=hw_opts,
) as sim:
    # run the simulation on Loihi
    sim.run(n_presentations * presentation_time)

    # check classification accuracy
    step = int(presentation_time / dt)
    output = sim.data[out_p_filt][step - 1 :: step]

    acc = 100 * np.mean(
        np.argmax(output, axis=-1) == test_labels[:n_presentations, -1, 0]
    )
    print("loihi accuracy: %.2f%%" % acc)

    predicted = np.argmax(output, axis=-1)
    correct = test_labels[:n_presentations, -1, 0]

    predicted = np.array(predicted, dtype=int)
    correct = np.array(correct, dtype=int)

    print("Predicted labels:\t", predicted)
    print("Correct labels: \t", correct)
    print("loihi acc: %.2f%%" % acc)

    np.set_printoptions(precision=2)


    plot_confusion_matrix(correct, predicted, classes=class_names, 
                          savename=train_outdir+"/loihi_confusion_matrix_val.pdf")
    plot_confusion_matrix(correct, predicted, classes=class_names, normalize=True, 
                          savename=train_outdir+"/loihi_norm_confusion_matrix_val.pdf")
    plot_confusion_matrix(correct, predicted, classes=class_names, normalize=True, 
                          savename=train_outdir+"/loihi_norm_confusion_matrix_val.png")
    

In [None]:
print(len(test_labels))

We can also plot the output activity from the Loihi network as we show it different test images, to see what this performance looks like in practice.

In [None]:
for i in range(10):
    n_plots = 5
    correct = test_labels[n_plots*i:n_plots*(i+1), -1, 0]
    print (correct)

    correct_str = "             "
    for j in correct:
        correct_str += class_names[j] + "           "
    correct = "".join(map(str, correct))


    plt.figure()

    tmp_x, tmp_y = sim.trange()[n_plots*i*step:n_plots*(i+1)*step], tf.nn.softmax(sim.data[out_p_filt][n_plots*i*step:n_plots*(i+1)*step])
    #print (tmp_x)
    tmp_x = ((tmp_x*10)%5.00001)/10
    #print (tmp_x)
    #plt.plot(sim.trange()[n_plots*i*step:n_plots*(i+1)*step], tf.nn.softmax(sim.data[out_p_filt][n_plots*i*step:n_plots*(i+1)*step]))
    plt.plot(tmp_x, tmp_y)
    for j in [0, 0.1, 0.2, 0.3, 0.4, 0.5]:
        plt.axvline(x=j, color='r', linestyle='--', linewidth=1)
    plt.legend(class_names)#, loc="upper left", bbox_to_anchor=(1.2, 0.99))
    plt.xlabel("time [s]")
    plt.ylabel("probability")
    plt.title(correct_str)

    plt.savefig(train_outdir+"/label_"+correct+".png")

In [None]:
n_plots = 10
plt.figure()

print (test_labels[:n_plots, -1, 0])

print (test_labels.shape)
plt.subplot(2, 1, 1)
plt.plot(sim.trange()[: n_plots * step], sim.data[out_p][: n_plots * step])
plt.legend(["%d" % i for i in range(10)], loc="best")