# Tensorflow model

In [1]:
from six.moves import cPickle as pickle
pickle_file = '/home/htan/proj/TensorFlow/data/MRMS/' +  '1timeslice.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    name = save['name']
    feature = save['features']
    del save

In [2]:
import numpy as np

In [4]:
fshp = feature.shape

In [5]:
feature = feature.reshape(fshp[0]*fshp[1], fshp[2])

In [None]:
f0 = feature[:,:,:,0]
f0[f0 <= -2]=np.nan
f2 = features[:,:,:,2]
f2[f0 <= -999]=np.nan
f3 = features[:,:,:,3]
f3[f3 <= -999]=np.nan
f4 = features[:,:,:,4]
f4[f4 <= -999]=np.nan

In [6]:
b = np.isnan(feature).any(axis=1)
feature = feature[~b]
feature.shape

(24500000, 13)

In [8]:
labels = feature[:,2:5]

In [9]:
labels.shape

(24500000, 3)

In [10]:
features = np.hstack([feature[:,:2], feature[:,5:]])
features.shape

(24500000, 10)

## dataset partition

In [11]:
length = features.shape[0]
length_list = range(0, length)
train_len = length * 7 / 10
validate_len = length / 10 
test_len = length - train_len - validate_len
print(train_len, validate_len, test_len)

(17150000, 2450000, 4900000)


In [12]:
train_list = length_list[0:train_len]
validate_list = length_list[train_len:train_len + validate_len]
test_list = length_list[train_len + validate_len:]

In [13]:
train_dataset = {
    'data' : features[train_list],
    'label' : labels[train_list],
}
validate_dataset= {
    'data' : features[validate_list],
    'label' : labels[validate_list],
}
test_dataset = {
    'data' : features[test_list],
    'label' : labels[test_list],
}

In [18]:
train_data = train_dataset['data']
validate_data = validate_dataset['data']
test_data = test_dataset['data']


In [20]:
train_label = train_dataset['label']
validate_label = validate_dataset['label']
test_label = test_dataset['label']

## dataset normalize

In [19]:
#dataset normalize
mean = train_data.mean(axis = 0)
std = train_data.std(axis = 0)
print(mean.shape, std.shape)
train_data_n = (train_data - mean)/std
validate_data_n = (validate_data - mean)/std
test_data_n = (test_data - mean)/std

((10,), (10,))


## TensorFlow model

In [22]:
import tensorflow as tf

In [41]:
class tfmodel:
    def __init__(self):
        self.batch_size = 128
        self.steps = 25000
        
    def fit(self, train_data, train_label):
        #simple regression model train_data has shape(samples, features) and label has shape(samples, 1)
        dshp = train_data.shape
        self.create_model(dshp[1], 1)
        self.train(train_data, train_label)
        
    def create_model(self, feature_num, label_num):
        self.graph = tf.Graph()
        with self.graph.as_default():
            self.tf_train_dataset = tf.placeholder(tf.float32, shape=(self.batch_size, feature_num))
            self.tf_train_label = tf.placeholder(tf.float32, shape=(batch_size, label_num))
            self.weights = tf.Variable(tf.truncated_normal([feature_num, label_num]))
            self.biases = tf.Variable(tf.zeros([label_num]))

            def model(X, w, b):
                return tf.matmul(X, w) + b

            self.predicted_label = model(self.tf_train_dataset, self.weights, self.biases)
            self.loss = tf.reduce_mean(tf.square(self.predicted_label - self.tf_train_label))

            # Learning rate decay
            global_step = tf.Variable(0)
            starter_learning_rate = 0.01
            self.learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 500, 0.90, staircase=True)
            self.op = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss, global_step = global_step)
    
    def train(self, train_data, train_label):
        batch_size = self.batch_size
        with tf.Session(graph = self.graph) as session:
            tf.initialize_all_variables().run()
            print('Initialized')
            for step in range(self.steps):
                # Note: we could use better randomization across epochs.
                offset = (step * batch_size) % (train_label.shape[0] - batch_size)
                # Generate a minibatch.
                batch_data = train_data[offset:(offset + batch_size), :]
                #print(batch_data.shape)
                batch_labels = train_label[offset:(offset + batch_size), 0].reshape(batch_size, 1)
                feed_dict = {self.tf_train_dataset : batch_data, self.tf_train_label : batch_labels}
                #session.run(predicted_label, feed_dict=feed_dict)
                _, l, _, r = session.run([self.predicted_label, self.loss, self.op, self.learning_rate], feed_dict=feed_dict)

                if (step % 500 == 0):
                    print('step = %d, learning rate = %f, loss = %f' % (step, r, l))

In [42]:
model = tfmodel()

In [43]:
model.fit(train_data_n, train_label)

Initialized
step = 0, learning rate = 0.010000, loss = 1.142225
step = 500, learning rate = 0.009000, loss = 0.059786
step = 1000, learning rate = 0.008100, loss = 0.000162
step = 1500, learning rate = 0.007290, loss = 0.000022
step = 2000, learning rate = 0.006561, loss = 0.000000
step = 2500, learning rate = 0.005905, loss = 0.000000
step = 3000, learning rate = 0.005314, loss = 0.000000
step = 3500, learning rate = 0.004783, loss = 0.000000
step = 4000, learning rate = 0.004305, loss = 0.000023
step = 4500, learning rate = 0.003874, loss = 0.000060
step = 5000, learning rate = 0.003487, loss = 0.000001
step = 5500, learning rate = 0.003138, loss = 0.000000
step = 6000, learning rate = 0.002824, loss = 0.000000
step = 6500, learning rate = 0.002542, loss = 0.000000
step = 7000, learning rate = 0.002288, loss = 0.000000
step = 7500, learning rate = 0.002059, loss = 0.000000
step = 8000, learning rate = 0.001853, loss = 0.000000
step = 8500, learning rate = 0.001668, loss = 0.000000
st