# Tensorflow model

In [1]:
from six.moves import cPickle as pickle


def read_pickle_file(path):
    '''read one pickle file '''
    with open(path, 'rb') as f:
        save = pickle.load(f)
        data = save['volume']
        del save
    return data

In [2]:
pickle_file = '/home/ldm/proj/TensorFlow/temp/' +  '20160705.pickle'
data = read_pickle_file(pickle_file)

In [3]:
import numpy as np
from __future__ import print_function
import os

In [4]:
path = '/home/ldm/proj/TensorFlow/temp/'
file_list = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f)) and f[-6:] == 'pickle']

In [5]:
data_dict = {}
for file_name in file_list:
    file_path = path + '/' + file_name
    data = read_pickle_file(file_path)
    data_dict[file_name] = data


In [6]:
data1 = data_dict['20160703.pickle']
data2 = data_dict['20160705.pickle']

In [7]:
'''
f0 = feature[:,:,:,0]
f0[f0 <= -2]=np.nan
f2 = features[:,:,:,2]
f2[f0 <= -999]=np.nan
f3 = features[:,:,:,3]
f3[f3 <= -999]=np.nan
f4 = features[:,:,:,4]
f4[f4 <= -999]=np.nan
'''

'\nf0 = feature[:,:,:,0]\nf0[f0 <= -2]=np.nan\nf2 = features[:,:,:,2]\nf2[f0 <= -999]=np.nan\nf3 = features[:,:,:,3]\nf3[f3 <= -999]=np.nan\nf4 = features[:,:,:,4]\nf4[f4 <= -999]=np.nan\n'

## dataset partition

In [8]:
class Preprocess:
    def __init__(self):
        self.dataset_standerlize = self.Dataset_standerlize()
        self.dataset_preprocess = self.Dataset_preprocess()
        
    class Dataset_preprocess:
        def __init__(self):
            self.shape = 0
            
        def data_flat(self, dataset):
            fshp = dataset.shape
            reshape = dataset.reshape(fshp[0]*fshp[1]*fshp[2], fshp[3])
            self.shape = fshp
            return reshape
        
        def data_reformat(self, dataset):
            '''reformat the flat data into its original format'''
            fshp = self.shape
            reshape = dataset.reshape(dataset.shape[0]/(fshp[1]*fshp[2]), fshp[1], fshp[2], dataset.shape[1])
            return reshape

        def nan_remove(self, dataset):
            b = np.isnan(dataset).any(axis=1)
            return dataset[~b]

        def feature_labels(self, dataset, start = 2, end = 5):
            labels = dataset[:,start:end]
            features = np.hstack([dataset[:,:start], dataset[:,end:]])
            return features, labels

    def simple_process(self,data):
        p = self.dataset_preprocess
        f = p.data_flat(data)
        nf = p.nan_remove(f)
        fs, ls = p.feature_labels(nf)
        data_dict = {'features':fs, 'labels':ls}
        return(data_dict)
        
    class Dataset_standerlize:
        def __init__(self):
            self.mean = 0
            self.std = 1
            
        def fit(self, dataset):
            '''The dataset should has format shape(samples_numbers)'''
            self.mean = dataset.mean(axis = 0)
            self.std = dataset.std(axis = 0)
            
        def transform(self, dataset):
            return (dataset - self.mean)/self.std
        
        def show_parameters(self):
            print (self.mean, self.std)
            
        
    def dataset_split(self, features, labels, ratio = [0.7, 0.1, 0.2], has_validate = False):
        '''The features should has format shape(samples_numbers, features), 
        the labels should has formates shape(sample_nmbers, labels)'''
        length = features.shape[0]
        length_list = range(0, length)
        if has_validate == True:
            train_len = int(length * ratio[0])
            validate_len = int(length * ratio[1]) 
            test_len = length - train_len - validate_len
            train_list = length_list[0:train_len]
            validate_list = length_list[train_len:train_len + validate_len]
            test_list = length_list[train_len + validate_len:]
            train_dataset = {
                'data' : features[train_list],
                'label' : labels[train_list],
            }
            validate_dataset= {
                'data' : features[validate_list],
                'label' : labels[validate_list],
            }
            test_dataset = {
                'data' : features[test_list],
                'label' : labels[test_list],
            }
            return train_dataset, validate_dataset, test_dataset
        if has_validate == False:
            train_len = int(length * (ratio[0]+ ratio[1]))
            test_len = length - train_len
            train_list = length_list[0:train_len]
            test_list = length_list[train_len:]
            train_dataset = {
                'data' : features[train_list],
                'label' : labels[train_list],
            }
            test_dataset = {
                'data' : features[test_list],
                'label' : labels[test_list],
            }
            return train_dataset, test_dataset

In [9]:
prep = Preprocess()

In [None]:
train, test = prep.dataset_split(features, labels)

In [10]:
data1 = prep.simple_process(data1)
data2 = prep.simple_process(data2)

## dataset normalize

In [11]:
prep.dataset_standerlize.fit(data1['features'])
data1_f_n = prep.dataset_standerlize.transform(data1['features'])
data2_f_n = prep.dataset_standerlize.transform(data2['features'])

In [12]:
data1['features'].shape

(353495, 10)

In [13]:
data1_l = data1['labels']
data2_l = data2['labels']

In [14]:
data1_l.shape

(353495, 3)

In [15]:
data1_f = prep.dataset_preprocess.data_reformat(data1_f_n)
data2_f = prep.dataset_preprocess.data_reformat(data2_f_n)

In [16]:
data1_f.shape

(95, 61, 61, 10)

In [17]:
data1_ll = prep.dataset_preprocess.data_reformat(data1_l)
data2_ll = prep.dataset_preprocess.data_reformat(data2_l)

In [18]:
data1_f.shape

(95, 61, 61, 10)

In [19]:
data1_l_s = data1_ll[:, 30, 30, 2:]
data2_l_s = data2_ll[:, 30, 30, 2:]

In [20]:
data1_n_VII = data1_f[:, :, :, -1:]
data2_n_VII = data2_f[:, :, :, -1:]

In [21]:
data1_n_VII.shape

(95, 61, 61, 1)

In [22]:
def create_batch(features, offset = 0, batch_size = 4, time_duration = 8):
    if offset < 0 and offset > features.shape[0] - time_duration - batch_size:
        raise ValueError('Incorrect offset value, the offset should < batch - 1 and offset > features.shape[0]')
    shp = features.shape
    batch = [features[offset + i:offset + i + time_duration, :, :, :].reshape(1, time_duration, shp[1], shp[2], shp[3])
    for i in range(0, batch_size)]
    return np.vstack(batch)
    

In [23]:
b1 = create_batch(data1_n_VII, batch_size = 80)
b1.shape

(80, 8, 61, 61, 1)

In [24]:
c1 = data1_l_s[:80]
c1.shape

(80, 1)

In [25]:
b2 = create_batch(data2_n_VII, batch_size = 4)
b2.shape


(4, 8, 61, 61, 1)

In [26]:
c2 = data2_l_s[:4]
c2.shape

(4, 1)

## TensorFlow model

In [27]:
import tensorflow as tf

In [None]:
class tfmodel:
    def __init__(self):
        self.batch_size = 128
        self.steps = 25000
        
    def fit(self, train_data, train_label):
        #simple regression model train_data has shape(samples, features) and label has shape(samples, 1)
        dshp = train_data.shape
        self.create_model(dshp[1], 1)
        self.train(train_data, train_label)
        
    def predict(self, data):
        # TODO: return predict value
        with tf.Session(graph = self.graph) as session:
            tf.initialize_all_variables().run()
            print('Initialized')
            feed_dict = {self.tf_train_dataset : data}
            return self.predicted_label.eval(feed_dict = feed_dict)
        # return self.model(data)
    
    def model_init(self, feature_num, label_num):
        self.weights = tf.Variable(tf.truncated_normal([feature_num, label_num]))
        self.biases = tf.Variable(tf.zeros([label_num]))
        
    def model(self, X):
        return tf.matmul(X, self.weights) + self.biases
    
    def create_model(self, feature_num, label_num):
        self.graph = tf.Graph()
        with self.graph.as_default():
            self.tf_train_dataset = tf.placeholder(tf.float32, shape=(None, feature_num))
            self.tf_train_label = tf.placeholder(tf.float32, shape=(None, label_num))
            self.model_init(feature_num, label_num)

            

            self.predicted_label = self.model(self.tf_train_dataset)
            self.loss = tf.reduce_mean(tf.square(self.predicted_label - self.tf_train_label))

            # Learning rate decay
            global_step = tf.Variable(0)
            starter_learning_rate = 0.01
            self.learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 500, 0.90, staircase=True)
            self.op = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss, global_step = global_step)
    
    def train(self, train_data, train_label):
        batch_size = self.batch_size
        with tf.Session(graph = self.graph) as session:
            tf.initialize_all_variables().run()
            print('Initialized')
            for step in range(self.steps):
                # Note: we could use better randomization across epochs.
                offset = (step * batch_size) % (train_label.shape[0] - batch_size)
                # Generate a minibatch.
                batch_data = train_data[offset:(offset + batch_size), :]
                #print(batch_data.shape)
                batch_labels = train_label[offset:(offset + batch_size), 0].reshape(batch_size, 1)
                feed_dict = {self.tf_train_dataset : batch_data, self.tf_train_label : batch_labels}
                #session.run(predicted_label, feed_dict=feed_dict)
                l, _, r = session.run([self.loss, self.op, self.learning_rate], feed_dict=feed_dict)

                if (step % 500 == 0):
                    print('step = %d, learning rate = %f, loss = %f' % (step, r, l))

In [None]:
model = tfmodel()

In [None]:
model.fit(train_data_n, train_label)

In [None]:
model.predict(test_data_n[:2000, :]).shape

# TFclass optimalize

In [28]:
class Model_base:
    def __init__(self, feature_num, label_num): 
        self.info()
        self.model_init_parameters(feature_num, label_num)
    def info(self):
        print('Using basic model:')
    
    def model(self, X):
        return tf.matmul(X, self.weights) + self.biases
    
    def model_init_parameters(self, feature_num, label_num):
        self.weights = tf.Variable(tf.truncated_normal([feature_num, label_num]))
        self.biases = tf.Variable(tf.zeros([label_num]))
        
class Model_complex(Model_base):
    
    def info(self):
        print('Using complex model:')
        
    def model(self, X):
        hidden = tf.matmul(X, self.weights['layer1']) + self.biases['layer1']
        return tf.matmul(tf.nn.relu(hidden), self.weights['layer2']) + self.biases['layer2']
    
    def model_init_parameters(self, feature_num, label_num):
        self.weights = {'layer1': tf.Variable(tf.truncated_normal([feature_num, 5])),
                       'layer2': tf.Variable(tf.truncated_normal([5, label_num])),}
        self.biases = {'layer1': tf.Variable(tf.zeros([5])),
                       'layer2': tf.Variable(tf.zeros([label_num])),}

class simple_DL():
    def __init__(self, fshp, lshp):
        self.info()
        self.model_init_parameters(fshp, lshp)
        
    def model(self, X):
        shape = tf.pack([tf.shape(X)[0], tf.shape(X)[1]*tf.shape(X)[2]*tf.shape(X)[3]])
        reshape = tf.reshape(X, shape)
        hidden = tf.matmul(reshape, self.weights['layer1']) + self.biases['layer1']
        return tf.matmul(tf.nn.relu6(hidden), self.weights['layer2']) + self.biases['layer2']
        
    def model_init_parameters(self, fshp, lshp):
        if len(fshp) != 5 and len(lshp) != 2:
            raise ValueError('Incorrect shape for fshp %s and lshp %s' % (fshp, lshp))
        feature_num = fshp[1] * fshp[2] * fshp[3] * fshp[4]
        label_num = lshp[1]
        self.weights = {'layer1': tf.Variable(tf.truncated_normal([feature_num, 61])),
                       'layer2': tf.Variable(tf.truncated_normal([61, label_num])),}
        self.biases = {'layer1': tf.Variable(tf.zeros([61])),
                       'layer2': tf.Variable(tf.zeros([label_num])),}
    
    def info(self):
        print('Using simple Deep learning model:')

In [36]:
class CNN_3D():
    def __init__(self, fshp, lshp):
        print(fshp, lshp)
        self.info()
        self.model_init_parameters(fshp, lshp)
        
    def model(self, X):
        conv = tf.nn.conv3d(X, self.weights['layer1'], [1, 2, 2, 2, 1], padding='SAME')
        hidden = tf.nn.relu6(conv + self.biases['layer1'])
        conv = tf.nn.conv3d(hidden, self.weights['layer2'], [1, 2, 2, 2, 1], padding='SAME')
        hidden = tf.nn.relu6(conv + self.biases['layer2'])
        shape = tf.pack([tf.shape(hidden)[0], tf.shape(hidden)[1]*tf.shape(hidden)[2]*tf.shape(hidden)[3]*tf.shape(hidden)[4]])
        reshape = tf.reshape(hidden, shape)
        hidden = tf.nn.relu6(tf.matmul(reshape, self.weights['layer3']) + self.biases['layer3'])
        return tf.matmul(hidden, self.weights['layer4']) + self.biases['layer4']
        
    def model_init_parameters(self, fshp, lshp):
        if len(fshp) != 5 and len(lshp) != 2:
            raise ValueError('Incorrect shape for fshp %s and lshp %s' % (fshp, lshp))
        patch_size = 3
        channels = fshp[4]
        depth1 = 16
        depth2 = 8
        label_num = lshp[1]
        num_hidden = 100
       
        self.weights = {'layer1': tf.Variable(tf.truncated_normal([patch_size, patch_size, patch_size, channels, depth1])),
                       'layer2':tf.Variable(tf.truncated_normal([patch_size, patch_size, patch_size, depth1, depth2])),
                       'layer3': tf.Variable(tf.truncated_normal([4096 , num_hidden], stddev=0.1)),
                       'layer4': tf.Variable(tf.truncated_normal([num_hidden, label_num], stddev=0.1)),}
                      
        self.biases = {'layer1': tf.Variable(tf.zeros([depth1])),
                       'layer2': tf.Variable(tf.zeros([depth2])),
                       'layer3': tf.Variable(tf.zeros([num_hidden])),
                       'layer4': tf.Variable(tf.zeros([label_num])),}
    
    def info(self):
        print('Using 3D CNN learning model:')

In [53]:
class tfmodel:
    def __init__(self, batch_size = 128, steps = 25000, learning_rate = 0.01, model_class = CNN_3D):
        self.batch_size = batch_size
        self.steps = steps
        self.learn_rate = learning_rate
        self.model_class = model_class
        
    def fit(self, train_data, train_label):
        #simple regression model train_data has shape(samples, features) and label has shape(samples, 1)
        dshp = list(train_data.shape)
        dshp[0] = None
        dshp = tuple(dshp)
        lshp = list(train_label.shape)
        lshp[0] = None
        lshp = tuple(lshp)
        self.create_model(dshp, lshp)
        self.train(train_data, train_label, self.batch_size, self.steps, inital = True)
        
    def partial_fit(self, train_data, train_label, steps):
        self.train(train_data, train_label, 1, steps)
        
    def predict(self, data):
        '''return the predict value'''
        with tf.Session(graph = self.graph) as session:
            self.saver.restore(session, "/home/ldm/proj/TensorFlow/temp/model.ckpt")
            print('Model restored')
            feed_dict = {self.tf_train_dataset : data}
            return self.predicted_label.eval(feed_dict = feed_dict)
        # return self.model(data)
    
    
    def create_model(self, feature_shape, label_shape):
        
        self.graph = tf.Graph()
        with self.graph.as_default():
            self.tf_train_dataset = tf.placeholder(tf.float32, shape=feature_shape)
            self.tf_train_label = tf.placeholder(tf.float32, shape=label_shape)
            self.base = self.model_class(feature_shape, label_shape)
            self.predicted_label = self.base.model(self.tf_train_dataset)
            self.loss = tf.reduce_mean(tf.square(self.predicted_label - self.tf_train_label))

            # Learning rate decay
            global_step = tf.Variable(0)
            starter_learning_rate = self.learn_rate
            self.learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 20, 0.90, staircase=True)
            self.op = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss, global_step = global_step)
            self.saver = tf.train.Saver()
    
    def train(self, train_data, train_label, batch_size, steps, inital = False):
        
        with tf.Session(graph = self.graph) as session:
            if inital == True:
                tf.initialize_all_variables().run()
                print('Initialized')
            else:
                self.saver.restore(session, "/home/ldm/proj/TensorFlow/temp/model.ckpt")
                print('Model restored')
            for step in range(steps):
                # Note: we could use better randomization across epochs.
                offset = (step * batch_size) % (train_label.shape[0] - batch_size)
                # Generate a minibatch.
                batch_data = train_data[offset:(offset + batch_size), :]
                #print(batch_data.shape)
                batch_labels = train_label[offset:(offset + batch_size), :]
                feed_dict = {self.tf_train_dataset : batch_data, self.tf_train_label : batch_labels}
                #session.run(predicted_label, feed_dict=feed_dict)
                l, _, r = session.run([self.loss, self.op, self.learning_rate], feed_dict=feed_dict)

                if (step % (steps / 50) == 0):
                    loss_feed = {self.tf_train_dataset : train_data, self.tf_train_label : train_label}
                    tl = self.loss.eval(feed_dict = loss_feed)
                    print('step = %d, learning rate = %f, loss = %f' % (step, r, tl))
            save_path = self.saver.save(session, "/home/ldm/proj/TensorFlow/temp/model.ckpt")
            print('Model save in file: %s' % (save_path))

In [54]:
model = tfmodel(batch_size = 2, steps = 250, learning_rate = 0.001)

In [None]:
model.fit(b1, c1)

(None, 8, 61, 61, 1) (None, 1)
Using 3D CNN learning model:
Initialized
step = 0, learning rate = 0.001000, loss = 2.086961


In [35]:
model.partial_fit(b2, c2, 50)

Model restored
step = 0, learning rate = 0.000282, loss = 0.451763
step = 1, learning rate = 0.000282, loss = 0.936149
step = 2, learning rate = 0.000282, loss = 0.038624
step = 3, learning rate = 0.000282, loss = 0.085584
step = 4, learning rate = 0.000282, loss = 0.740418
step = 5, learning rate = 0.000282, loss = 0.196060
step = 6, learning rate = 0.000282, loss = 0.092863
step = 7, learning rate = 0.000282, loss = 0.465139
step = 8, learning rate = 0.000282, loss = 0.226488
step = 9, learning rate = 0.000282, loss = 0.186419


KeyboardInterrupt: 

In [None]:
result = model.predict(b2)

In [None]:
result