In [1]:
%load_ext autoreload
%autoreload 2
%cd ..

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline


experiment_name = "baseline.classifier.cnn_4_retinas.vggsmall"
metadata_path = "data/8retinas"
data_path = "data/raw"

generate_metadata = not os.path.exists(metadata_path)

if generate_metadata:
    print "Metadata does not exist. This run might be MUCH longer due to preprocessing the data."

/home/jheuristic/yozhik/ksfinder
Metadata does not exist. This run might be MUCH longer due to preprocessing the data.


In [None]:
#import theano stack
%env THEANO_FLAGS="device=gpu1"

import lasagne
import theano
import theano.tensor as T
from lib.retina_compiled import retinize_events

floatX = theano.config.floatX


env: THEANO_FLAGS="device=gpu1"


Using gpu device 1: Tesla K40m (CNMeM is disabled, CuDNN 4004)


## Preprocess/Load data

In [None]:
retina_images_path = os.path.join(metadata_path,"retina_images.npy")
answers_path = os.path.join(metadata_path,"decay_counts.npy")

if generate_metadata:
    index_df_path = os.path.join(data_path,"paths_and_targets.csv")

    #event names
    df_ref = pd.DataFrame.from_csv(index_df_path,index_col=None)

    from lib.retina_compiled import retina_view
    retinas = [
        retina_view(0,0,2000,0,0,50**.5,64,64,np.pi/8,np.pi/8), #r_velo
        retina_view(1016.31,172.755,12969.2,0.393943,0.0705397,500**.5,64,64,np.pi/3,np.pi/3),#r_ecal
        retina_view(0,0,-200,0,0,50**.5,64,64,np.pi/8,np.pi/8), #r_beforeTT
        retina_view(0,500,6000,0,0,500**.5,64,64,0.5472,0.5472), #r_middle
        retina_view(0,0,12000,0,0,500**.5,64,64,np.pi/3,np.pi/3), #
        retina_view(0,0,1000,0,0,500**.5,64,64,np.pi/3,np.pi/3), #
        retina_view(-5000,-5000,-2000,500**.5,64,64,np.pi/3,np.pi/3), #
        retina_view(0,500,6000,0,0,500**.5,64,64,np.pi/3,np.pi/3), #
        ]


    X = retinize_events(df_ref.X_filename.values,
                             data_path,retina_views=retinas,
                             max_hits_block=15000,report_rate=100)
    
    y = df_ref.relevant_decay_count.values
    
    os.mkdir(metadata_path)
    np.save(retina_images_path,X)
    np.save(answers_path, y)
    
    generate_metadata = False
else:
    #load metadata
    X = np.load(retina_images_path)
    y = np.load(answers_path)

In [None]:
X.shape

In [None]:
X = X.reshape([-1,1,4*64,64])

In [None]:
from sklearn.cross_validation import train_test_split
Xtr, Xts, Ytr,Yts = train_test_split(X,y,test_size=0.25,random_state=1337)

print Ytr.shape,Yts.shape

# define NN

In [None]:
input_dim = (None,)+X.shape[1:]

retina_images = T.tensor4("input_images","floatX")

any_interesting_decays = T.ivector("mctruith_n_decays")

In [None]:
from lasagne.layers import InputLayer
from lasagne.layers import Conv2DLayer as ConvLayer
from lasagne.layers import MaxPool2DLayer as PoolLayer
from lasagne.layers import DenseLayer, DropoutLayer

#a short version of vgg16: vgg10 so to say
net = {}
net['input'] = InputLayer(input_dim,input_var=retina_images)
net['conv1_1'] = ConvLayer(
    net['input'], 64, 3, pad=1, flip_filters=False)
net['conv1_2'] = ConvLayer(
    net['conv1_1'], 64, 3, pad=1, flip_filters=False)
net['pool1'] = PoolLayer(net['conv1_2'], 2)
net['conv2_1'] = ConvLayer(
    net['pool1'], 128, 3, pad=1, flip_filters=False)
net['conv2_2'] = ConvLayer(
    net['conv2_1'], 128, 3, pad=1, flip_filters=False)
net['pool2'] = PoolLayer(net['conv2_2'], 2)
net['conv3_1'] = ConvLayer(
    net['pool2'], 256, 3, pad=1, flip_filters=False)
net['conv3_2'] = ConvLayer(
    net['conv3_1'], 256, 3, pad=1, flip_filters=False)
net['conv3_3'] = ConvLayer(
    net['conv3_2'], 256, 3, pad=1, flip_filters=False)
net['pool3'] = PoolLayer(net['conv3_3'], 2)
net['conv4_1'] = ConvLayer(
    net['pool3'], 512, 3, pad=1, flip_filters=False)
net['conv4_2'] = ConvLayer(
    net['conv4_1'], 512, 3, pad=1, flip_filters=False)
net['conv4_3'] = ConvLayer(
    net['conv4_2'], 512, 3, pad=1, flip_filters=False)
net['pool4'] = PoolLayer(net['conv4_3'], 2)

net['fc6'] = DenseLayer(net['pool4'], num_units=4096)
net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5)

net['out'] = DenseLayer(net['fc6_dropout'], num_units=2, 
                        nonlinearity=lasagne.nonlinearities.softmax)



In [None]:
weights = lasagne.layers.get_all_params(net["out"],trainable=True)
weights

# train updates

In [None]:
train_prediction = lasagne.layers.get_output(net["out"])
train_loss_ce = lasagne.objectives.categorical_crossentropy(train_prediction,any_interesting_decays).mean()
train_accuracy = lasagne.objectives.categorical_accuracy(train_prediction,any_interesting_decays).mean()
updates = lasagne.updates.adadelta(train_loss_ce,weights,learning_rate=0.05) 
#ik that lr does not matter. I just dont want an explosion.

In [None]:
train_fun = theano.function([retina_images,any_interesting_decays],[train_loss_ce,train_accuracy], updates=updates)

# evaluation

In [None]:
prediction = lasagne.layers.get_output(net["out"],deterministic=True)
loss_ce = lasagne.objectives.categorical_crossentropy(train_prediction,any_interesting_decays).mean()
accuracy = lasagne.objectives.categorical_accuracy(train_prediction,any_interesting_decays).mean()

In [None]:
eval_fun = theano.function([retina_images,any_interesting_decays], [loss_ce,accuracy])
predict_fun = theano.function([retina_images],prediction)

# main loop
* almost copies the layout of lasagne basic training 

In [None]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False,crop_at=None):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if crop_at == start_idx:
            break
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]
        


In [None]:
from collections import defaultdict
metrics = defaultdict(dict)

import time
num_epochs = 100
batch_size = 50

# We iterate over epochs:
for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_acc = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(Xtr, Ytr, batch_size, shuffle=True):
        batch_ce,batch_acc= train_fun(*batch)
        train_err +=batch_ce
        train_acc +=batch_acc
        train_batches += 1

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(Xts, Yts, batch_size, shuffle=False):
        err, acc = eval_fun(*batch)
        val_err += err
        val_acc += acc
        val_batches += 1

    metrics["acc_train"][epoch] = train_acc
    metrics["acc_test"][epoch] = val_acc
    
    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("  training accuracy:\t\t{:.2f} %".format(
        train_acc / train_batches * 100))
    print("  validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))


In [None]:
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(Xts, Yts, batch_size, shuffle=False):
    inputs, targets = batch
    err, acc = eval_fun(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))

In [None]:
# After training, we compute and print the test error:
test_err = 0
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(Xtr, Ytr, batch_size, shuffle=False):
    inputs, targets = batch
    err, acc = eval_fun(inputs, targets)
    test_err += err
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))