In [1]:
%matplotlib widget

Check the current GPU usage. Please try to be nice!

In [2]:
!nvidia-smi

Fri Dec  7 03:04:02 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 396.44                 Driver Version: 396.44                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN V             On   | 00000000:03:00.0 Off |                  N/A |
| 28%   29C    P8    23W / 250W |   1397MiB / 12066MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE...  On   | 00000000:83:00.0 Off |                    0 |
| N/A   28C    P0    29W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   2  TITAN V             On   | 00000000:84:00.0 Off |                  N/

> **WARNING**: The card numbers here are *not* the same as in CUDA. You have been warned. However, these numbers are correct if you use the `select_gpu` helper function.

## Imports

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import time
import torch
import pandas as pd

# Python 3 standard library
from pathlib import Path

## Get the helper functions

In [4]:
from model.collectdata import collect_data
from model.loss import Loss
from model.training import trainNet, select_gpu, Results
from model.plots import dual_train_plots, replace_in_ax

from model.models import SimpleCNN2Layer as Model

### Set up local parameters

In [5]:
# This gets built up during the run - do not rerun this cell
results = pd.DataFrame([], columns=Results._fields)

In [6]:
n_epochs = 10

# Size of batches
batch_size = 128

# How fast to learn
learning_rate = 4e-6

# Name is the output file name
name = 'Dec_6_mask_120000_2layer_'+str(learning_rate)

# Make an output folder named "name" (change if you want)
output = Path(name)


Make the output directory if it does not exist:

In [7]:
output.mkdir(exist_ok=True)

Set up Torch device configuration. All tensors and model parameters need to know where to be put.
This takes a BUS ID number: The BUS ID is the same as the listing at the top of this script.

In [8]:
device = select_gpu(0)

1 available GPUs (initially using device 0):
  0 TITAN V


## Loading data

Load the dataset, split into parts, then move to device if `device=device` is present. If this line is commented out, then load the datasets as the calculations progress. Allows larger datasets and plays nicer with memory, but very slightly slower. See `collectdata.py` in the `../model` directory for the source. Datasets are listed in the model directory README, repeated here:

|        From       |          To         |         Events          |
|-------------------|---------------------|-------------------------|
| `kernel_20181003` | `Oct03_20K_val`     | 1,2                     |
| `kernel_20181003` | `Oct03_20K_test`    | 3,4                     |
| `kernel_20181003` | `Oct03_40K_train`   | 5,6,7,8                 |
| `kernel_20181003` | `Oct03_80K_train`   | 9,10,11,12,13,14,15,16  |
| `kernel_20181003` | `Oct03_80K2_train`  | 17,18,19,20,21,22,23,24 |
| `kernel_20180814` | `Aug14_80K_train`   | 1,2,3,4,5,6,7,8         |

In [9]:
# Training dataset. You can put as many files here as desired.
train_loader = collect_data('data/Oct03_80K_train.h5',
                            'data/Oct03_80K2_train.h5',
                            batch_size=batch_size,
                            device=device,
                            masking=True, shuffle=True)

# Validation dataset. You can slice to reduce the size.
val_loader = collect_data('data/Oct03_20K_val.h5',
                          batch_size=batch_size,
                          slice=slice(256 * 39),
                          device=device,
                          masking=True, shuffle=False)

Loading data...
Loaded data/Oct03_80K_train.h5 in 10.3 s
Loaded data/Oct03_80K2_train.h5 in 9.919 s
Constructing 160000 event dataset took 27.03 s
Loading data...
Loaded data/Oct03_20K_val.h5 in 2.501 s
Constructing 9984 event dataset took 0.1894 s


In [10]:
print(train_loader)

<torch.utils.data.dataloader.DataLoader object at 0x7f165b415198>


# Preparing the model

Prepare a model, use multiple GPUs if they are VISIBLE, and move the model to the device.

In [14]:
model = Model()
loss = Loss(epsilon=1e-5)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

This should support multi-gpu, but doesn't work very well.

In [15]:
print("Let's use", torch.cuda.device_count(), "GPUs!")
if torch.cuda.device_count() > 1:
    model = torch.nn.DataParallel(model)

Let's use 1 GPUs!


Let's move the model's weight matricies to the GPU:

In [16]:
model = model.to(device)

## Train

The body of this loop runs once per epoch. Results is a named tuple of values (loss per epoch for training and validation, time each). Start by setting up a plot first:

In [17]:
ax, tax, lax, lines = dual_train_plots()
fig = ax.figure
plt.tight_layout()

In [18]:
for result in trainNet(model, optimizer, loss,
                        train_loader, val_loader,
                        n_epochs, epoch_start=len(results),
                        notebook=True):
    
    results = results.append(pd.Series(result._asdict()), ignore_index=True)
    
    xs = results.index
    
    # Update the plot above
    lines['train'].set_data(results.index,results.cost)
    lines['val'].set_data(results.index,results.val)
    
    #filter first cost epoch (can be really large)
    max_cost = max(max(results.cost if len(results.cost)<2 else results.cost[1:]), max(results.val))
    min_cost = min(min(results.cost), min(results.val))
    
    # The plot limits need updating too
    ax.set_ylim(min_cost*.9, max_cost*1.1)  
    ax.set_xlim(-.5, len(results.cost) - .5)
    
    replace_in_ax(lax, lines['eff'], xs, results['eff_val'].apply(lambda x: x.eff_rate))
    replace_in_ax(tax, lines['fp'], xs, results['eff_val'].apply(lambda x: x.fp_rate))
    
    # Redraw the figure
    fig.canvas.draw()

    # Save each model state dictionary
    torch.save(model.state_dict(), output / f'{name}_{result.epoch}.pyt')

Number of batches: train = 1250, val = 78


Epoch 0: train=1236.4, val=1168.79, took 16.427 s
  Validation Found 320 of 54700, added 9497 (eff 0.59%) (0.951 FP/event)


Epoch 1: train=1025.27, val=843.218, took 15.623 s
  Validation Found 316 of 54700, added 9500 (eff 0.58%) (0.951 FP/event)


Epoch 2: train=599.539, val=330.895, took 16.234 s
  Validation Found 0 of 54700, added 0 (eff 0.00%) (0.0 FP/event)


Epoch 3: train=125.446, val=27.0962, took 15.281 s
  Validation Found 0 of 54700, added 0 (eff 0.00%) (0.0 FP/event)


Epoch 4: train=17.1095, val=12.6463, took 15.269 s
  Validation Found 0 of 54700, added 0 (eff 0.00%) (0.0 FP/event)


Epoch 5: train=11.4317, val=10.6708, took 15.227 s
  Validation Found 0 of 54700, added 0 (eff 0.00%) (0.0 FP/event)


Epoch 6: train=10.3271, val=10.0969, took 15.722 s
  Validation Found 0 of 54700, added 0 (eff 0.00%) (0.0 FP/event)


Epoch 7: train=9.9617, val=9.87929, took 15.623 s
  Validation Found 0 of 54700, added 0 (eff 0.00%) (0.0 FP/event)


Epoch 8: train=9.81472, val=9.78649, took 15.215 s
  Validation Found 0 of 54700, added 0 (eff 0.00%) (0.0 FP/event)


Epoch 9: train=9.75021, val=9.74474, took 15.13 s
  Validation Found 0 of 54700, added 0 (eff 0.00%) (0.0 FP/event)



## Results

Let's save some results: (even though if you have not changed the code above, it saves the model every epoch)

In [19]:
A = results.outA

In [20]:
B = A.values

In [21]:
L1 = B[9][0]

In [22]:
L2 = B[9][1]

In [None]:
# Plot the surface.
surf = ax.plot_surface(L1[], Y, Z, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

# Customize the z axis.
ax.set_zlim(-1.01, 1.01)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)

plt.show()

In [73]:
import numpy as np
import math
import matplotlib.pyplot as plot
import mpl_toolkits.mplot3d.axes3d as axes3d
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator

def cube_marginals(cube, normalize=False):
    c_fcn = np.mean if normalize else np.sum
    xy = c_fcn(cube, axis=0)
    xz = c_fcn(cube, axis=1)
    yz = c_fcn(cube, axis=2)
    return(xy,xz,yz)

def plotcube(cube,x=None,y=None,z=None,normalize=False,plot_front=False):
    """Use contourf to plot cube marginals"""
    (Z,Y,X) = cube.shape
    (xy,xz,yz) = cube_marginals(cube,normalize=normalize)
    if x == None: x = np.arange(X)
    if y == None: y = np.arange(Y)
    if z == None: z = np.arange(Z)

    fig = plot.figure()
    ax = fig.gca(projection='3d')

    cset = ax.plot_surface(x[None,:].repeat(Y,axis=0), y[:,None].repeat(X,axis=1), xy,   cmap=plot.cm.coolwarm, alpha=0.75)
    cset = ax.plot_surface(x[None,:].repeat(Z,axis=0), xz, z[:,None].repeat(X,axis=1),  cmap=plot.cm.coolwarm, alpha=0.75)
    cset = ax.plot_surface(yz, y[None,:].repeat(Z,axis=0), z[:,None].repeat(Y,axis=1),  cmap=plot.cm.coolwarm, alpha=0.75)

    fig.colorbar(surf, shrink=0.5, aspect=5)
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    plot.show()

In [75]:
plotcube(B[9][1].cpu().detach().numpy())



NameError: name 'surf' is not defined

In [44]:
plotcube(B[5][0].cpu().detach().numpy())

In [27]:
import numpy as np
import math
import matplotlib.pyplot as plot
import mpl_toolkits.mplot3d.axes3d as axes3d

def cube_marginals(cube, normalize=False):
    c_fcn = np.mean if normalize else np.sum
    xy = c_fcn(cube, axis=0)
    xz = c_fcn(cube, axis=1)
    yz = c_fcn(cube, axis=2)
    return(xy,xz,yz)

def plotcube(cube,x=None,y=None,z=None,normalize=False,plot_front=False):
    """Use contourf to plot cube marginals"""
    (Z,Y,X) = cube.shape
    (xy,xz,yz) = cube_marginals(cube,normalize=normalize)
    if x == None: x = np.arange(X)
    if y == None: y = np.arange(Y)
    if z == None: z = np.arange(Z)

    ax = fig.gca(projection='3d')

    # draw edge marginal surfaces
    offsets = (Z-1,0,X-1) if plot_front else (0, Y-1, 0)
    cset = ax.contourf(x[None,:].repeat(Y,axis=0), y[:,None].repeat(X,axis=1), xy, zdir='z', offset=offsets[0], cmap=plot.cm.coolwarm, alpha=0.75)
    cset = ax.contourf(x[None,:].repeat(Z,axis=0), xz, z[:,None].repeat(X,axis=1), zdir='y', offset=offsets[1], cmap=plot.cm.coolwarm, alpha=0.75)
    cset = ax.contourf(yz, y[None,:].repeat(Z,axis=0), z[:,None].repeat(Y,axis=1), zdir='x', offset=offsets[2], cmap=plot.cm.coolwarm, alpha=0.75)

    # draw wire cube to aid visualization
    ax.plot([0,X-1,X-1,0,0],[0,0,Y-1,Y-1,0],[0,0,0,0,0],'k-')
    ax.plot([0,X-1,X-1,0,0],[0,0,Y-1,Y-1,0],[Z-1,Z-1,Z-1,Z-1,Z-1],'k-')
    ax.plot([0,0],[0,0],[0,Z-1],'k-')
    ax.plot([X-1,X-1],[0,0],[0,Z-1],'k-')
    ax.plot([X-1,X-1],[Y-1,Y-1],[0,Z-1],'k-')
    ax.plot([0,0],[Y-1,Y-1],[0,Z-1],'k-')

    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')

In [32]:
import matplotlib.animation as animation

fig, ax = plt.subplots()


ani = animation.FuncAnimation(
    fig, plotcube,range(10), init_func=init, interval=100, blit=True, save_count=50)
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
import numpy as np


fig = plt.figure()
ax = fig.gca(projection='3d')

# Make data.
X = np.arange(-5, 5, 0.25)
Y = np.arange(-5, 5, 0.25)
X, Y = np.meshgrid(X, Y)
R = np.sqrt(X**2 + Y**2)
Z = np.sin(R)

# Plot the surface.
surf = ax.plot_surface(X, Y, Z, cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)

# Customize the z axis.
ax.set_zlim(-1.01, 1.01)
ax.zaxis.set_major_locator(LinearLocator(10))
ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f'))

# Add a color bar which maps values to colors.
fig.colorbar(surf, shrink=0.5, aspect=5)

plt.show()


Go ahead and save the final model (even though it was also saved above):

In [17]:
torch.save(model.state_dict(), output / f'{name}_final.pyt')

Save the output results:

In [19]:
np.savez(output / f'{name}_stats.npz', **results.to_dict())

Save the plot (remake the plot just in case the one above has broken):

In [20]:
dual_train_plots(results.index,
                 results.cost, results.val, 
                 results['eff_val'].apply(lambda x: x.eff_rate),
                 results['eff_val'].apply(lambda x: x.fp_rate))
plt.tight_layout()
plt.savefig(str(output / f'{name}_stats_a.png'))

Quit the kernel (try to be nice to other users)

In [16]:
quit()

In [64]:
z,x,y = (B[5][0].cpu().detach().numpy()).nonzero()
C = B[5][0].cpu().detach().numpy()

In [67]:
fig = plt.figure()
ax = fig.gca(projection='3d')
n = 1
surf = ax.plot_surface(C[n,:,:], C[:,n,:],C[:,:,n], cmap=cm.coolwarm,
                       linewidth=0, antialiased=False)
plt.show()




ValueError: shape mismatch: objects cannot be broadcast to a single shape