In [1]:
import h5py
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import glob


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
home_dir = os.path.dirname(os.path.realpath('__file__'))
train_dir = home_dir+'/data/image_format_single_plane/plane-xy/*/*'
vocab = ['electrons','protons','muons','pions','gamma']
image_size = 256

minibatch_size = 256 
epochs = 2
learning_rate=0.0001


In [3]:
def get_label(file_path):
  # convert the path to a list of path components and one hot them
    parts = tf.string_split([file_path], '/')
    label = parts.values[-2] 
    matches = tf.stack([tf.equal(label, s) for s in vocab], axis=-1)
    onehot = tf.cast(matches, tf.float32)
    return onehot


In [4]:
def decode_img(img):
  # convert the compressed string to a uint8 tensor
  img = tf.image.decode_jpeg(img, channels=3)
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  img = tf.image.convert_image_dtype(img, tf.float32)
  # resize the image to the desired size.
  return tf.image.resize(img, [image_size, image_size])

In [5]:
def process_path(file_path):
  label = get_label(file_path)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label

In [6]:
def initialize_variables():
    with tf.variable_scope('model',reuse=tf.AUTO_REUSE):
        #W1 is filter for first cnn layer. shape is (height,width,in_channel,out_channel)
        W1 = tf.get_variable('W1',shape=[4,4,3,16],initializer=tf.contrib.layers.xavier_initializer())
        W2 = tf.get_variable('W2',shape=[3,3,16,32],initializer= tf.contrib.layers.xavier_initializer())
        W3 = tf.get_variable('W3',shape=[3,3,32,16],initializer= tf.contrib.layers.xavier_initializer())
        parameters = {'W1':W1, 'W2':W2,'W3':W3}
    return parameters


def forward_prop(X,parameters):
    '''Arguments:
               X of shape (num_featues,batch_size)
               parameters-- dictionary of parameters
      Returns:
      logits-- without calculating activation function on the last layer as the cost function doesn't need it. 
    '''
    
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']
    
    Z1 = tf.nn.conv2d(X,W1,strides=[1,1,1,1],padding='VALID')
    A1 = tf.nn.relu(Z1)
    P1 = tf.nn.max_pool(A1,ksize=[1,8,8,1],strides=[1,8,8,1],padding='VALID') 
    
    Z2 = tf.nn.conv2d(P1,W2, strides=[1,1,1,1],padding='VALID')
    A2 = tf.nn.relu(Z2)
    P2 = tf.nn.max_pool(A2,ksize=[1,8,8,1],strides=[1,8,8,1],padding='VALID') 
    
    Z3 = tf.nn.conv2d(P2,W3, strides=[1,1,1,1],padding='VALID')
    A3 = tf.nn.relu(Z3)
    P3 = tf.nn.max_pool(A3,ksize=[1,8,8,1],strides=[1,8,8,1],padding='VALID') 
    
    f1 = tf.contrib.layers.flatten(P3)
    logits = tf.contrib.layers.fully_connected(f1,5,activation_fn=None)
    return logits


def compute_cost(Z,Y):
    logits = tf.transpose(Z)
    labels = tf.transpose(Y)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels = labels))
    return cost

In [7]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 17095025020369186999
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 4113245910530307688
physical_device_desc: "device: XLA_GPU device"
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 18350249221306066515
physical_device_desc: "device: XLA_CPU device"
]


In [8]:
%%time

def model(training_dataset,learning_rate=0.00002,epochs=10):   
    
    with tf.variable_scope('model',reuse=tf.AUTO_REUSE):
        with tf.device('/cpu:0'):
            batched_training_dataset = training_dataset.batch(minibatch_size).prefetch(1)
            iterator = tf.data.Iterator.from_structure(batched_training_dataset.output_types,batched_training_dataset.output_shapes)

            training_init_op = iterator.make_initializer(batched_training_dataset)
    #        validation_init_op = iterator.make_initializer(batched_validation_dataset)
            next_element = iterator.get_next()
            data = next_element     
            X = data[0]
            Y = data[1]
    #        assert(X.get_shape == (minibatch_size))
            # assert(Y.shape == (depth,minibatch_size))
        
        with tf.device('/gpu:0'):
            parameters = initialize_variables()
            logits =  forward_prop(X,parameters)    
            loss = compute_cost(logits,Y)
            optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
            predictions = tf.reshape(tf.argmax(logits,axis=0),shape=[-1,1])
            labels = tf.reshape(tf.argmax(Y,axis=0), shape=[-1,1])
            accuracy_calculator,accuracy_updater = tf.metrics.accuracy(predictions=predictions,labels=labels,name='metric')
            running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,scope = 'metric')
            running_vars_initializer = tf.variables_initializer(var_list=running_vars)
            saver = tf.train.Saver(parameters, max_to_keep=1)
        
        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            for epoch in range(epochs):
                sess.run(running_vars_initializer)
                epoch_cost = 0     
           # num_minibatches  = int(training_size/minibatch_size)
                sess.run(training_init_op)
                while True:
                    try:
                        _, cost = sess.run([optimizer,loss])
                        sess.run(accuracy_updater)
                        epoch_cost = epoch_cost + cost
                    except tf.errors.OutOfRangeError:
                        if epoch%1 == 0:
                            accuracy = sess.run(accuracy_calculator)  
                            _, cost = sess.run([optimizer,loss])
                        sess.run(accuracy_updater)
                        epoch_cost = epoch_cost + cost
                    except tf.errors.OutOfRangeError:
                        if epoch%1 == 0:
                  
                         #   epoch_cost = epoch_cost/num_minibatches
                            print(' accuracy is {1} for epoch {0}'.format(epoch,accuracy))
                        break

            print("Final accuracy " + str(sess.run(accuracy_calculator)))

            saver.save(sess, home_dir+'weights/model_parameters_'+epochs)



CPU times: user 4 µs, sys: 2 µs, total: 6 µs
Wall time: 9.54 µs


In [9]:
%%time
list_ds = tf.data.Dataset.list_files(train_dir)
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
training_dataset = list_ds.map(process_path, num_parallel_calls=5) 
model(training_dataset,learning_rate=learning_rate,epochs=epochs)
print('works')

Instructions for updating:
Use `tf.compat.v1.data.get_output_types(dataset)`.
Instructions for updating:
Use `tf.compat.v1.data.get_output_shapes(dataset)`.
Instructions for updating:
Use `tf.compat.v1.data.get_output_types(iterator)`.
Instructions for updating:
Use `tf.compat.v1.data.get_output_shapes(iterator)`.
Instructions for updating:
Use `tf.compat.v1.data.get_output_classes(iterator)`.
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



ValueError: Negative dimension size caused by subtracting 8 from 1 for 'model/MaxPool_2' (op: 'MaxPool') with input shapes: [?,1,1,16].