In [1]:
import tensorflow as tf
import scipy as sp
from scipy import ndimage
import numpy as np
import os
import sklearn
import imageio
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
# The inputs and outptus are the maps downloaded from https://www.nnvl.noaa.gov/view/globaldata.html
# The inputs to the model are Land Surface Temperature, Moisture and Longwave Energy. 
# The output is the average precipitation map. 
# All maps are in the average weekly format. The idea is to find correlation between the input maps and 
# the output. 

files = ['fire_data.txt', 'frac_data.txt', 'land_data.txt', 'mois_data.txt', 
         'ndvi_data.txt', 'olwr_data.txt', 'wind_data.txt']

xdim, ydim = 168, 512
#xdim, ydim = 84, 128 # For partitioned data
in_chan, out_chan = 7, 1

# Load training and test input data
input_metrics, input_metrics_test = [], []
for path in files:
    loaded_train = np.loadtxt('Train_Data/' + path, delimiter=',').reshape(-1, xdim, ydim, 1)
    loaded_test = np.loadtxt('Test_Data/' + path, delimiter=',').reshape(-1, xdim, ydim, 1)
    
    if len(input_metrics) == 0:
        input_metrics, input_metrics_test = loaded_train, loaded_test
    else:
        input_metrics = np.concatenate((input_metrics, loaded_train), axis=3)
        input_metrics_test = np.concatenate((input_metrics_test, loaded_test), axis=3)
    
    print path, input_metrics.shape, input_metrics_test.shape
    
samples = input_metrics.shape[0];

# Load training and test output data
path = 'rain_data.txt'
output_metric = np.loadtxt('Train_Data/' + path, delimiter=',').reshape(-1, xdim, ydim, 1)
output_metric_test = np.loadtxt('Test_Data/' + path, delimiter=',').reshape(-1, xdim, ydim, 1)

print path, output_metric.shape, output_metric_test.shape

fire_data.txt (85, 168, 512, 1) (4, 168, 512, 1)
frac_data.txt (85, 168, 512, 2) (4, 168, 512, 2)
land_data.txt (85, 168, 512, 3) (4, 168, 512, 3)
mois_data.txt (85, 168, 512, 4) (4, 168, 512, 4)
ndvi_data.txt (85, 168, 512, 5) (4, 168, 512, 5)
olwr_data.txt (85, 168, 512, 6) (4, 168, 512, 6)
wind_data.txt (85, 168, 512, 7) (4, 168, 512, 7)
rain_data.txt (85, 168, 512, 1) (4, 168, 512, 1)


In [3]:
# Function to obtain the next batch based on the input size and the batch size
def next_batch(indices, i):
    
    ind0, ind1 = i*batch_size, np.minimum((i+1)*batch_size, samples)
            
    return input_metrics[indices[ind0:ind1], :, :, :], output_metric[indices[ind0:ind1], :, :, :]

In [4]:
# Building a simple CNN model that looks like an auto-encoder. This is the section to change for a new model.

def conv_net(x):
    
    x = tf.reshape(x, shape=[-1, xdim, ydim, in_chan], name='reshape_x');
    x = tf.cast(x, tf.float32) 
    
    l2_reg = tf.contrib.layers.l2_regularizer(5.0);
    # Encoder
    
    # Scale down by 2x2. Out Channels = 16
    conv1 = tf.nn.relu(tf.contrib.layers.conv2d(x, 8, [4, 4], stride=2, padding='SAME', 
                                                biases_initializer=tf.zeros_initializer(),
                                                weights_regularizer=l2_reg));
    
    
    # Scale down by 2x2. Out Channels = 8
    conv2 = tf.nn.relu(tf.contrib.layers.conv2d(conv1, 16, [4, 4], stride=2, padding='SAME', 
                                                biases_initializer=tf.zeros_initializer(),
                                                weights_regularizer=l2_reg));
    
    # Scale down by 2x2. Out Channels = 8
    conv3 = tf.nn.relu(tf.contrib.layers.conv2d(conv2, 32, [4, 4], stride=2, padding='SAME', 
                                                biases_initializer=tf.zeros_initializer(),
                                                weights_regularizer=l2_reg));
    
    # Decoder
    # Scale up by 2x2. Out Channels = 16
    conv4 = tf.nn.relu(tf.contrib.layers.conv2d_transpose(conv3, 32, [4, 4], stride=2, padding='SAME',
                                                          biases_initializer=tf.zeros_initializer(),
                                                          weights_regularizer=l2_reg));
    
    # Scale up by 2x2. Out Channels = 32
    conv5 = tf.nn.relu(tf.contrib.layers.conv2d_transpose(conv2, 16, [4, 4], stride=2, padding='SAME',
                                                          biases_initializer=tf.zeros_initializer(),
                                                          weights_regularizer=l2_reg));
    
    # Scale up by 2x2. Out Channels = 1
    conv6 = tf.nn.relu(tf.contrib.layers.conv2d_transpose(conv5, out_chan, [4, 4], stride=2, padding='SAME',
                                                          biases_initializer=tf.zeros_initializer(),
                                                          weights_regularizer=l2_reg));
    
    
    return conv6;

Y = tf.placeholder(tf.float32, shape=(None,xdim,ydim,out_chan));
X = tf.placeholder(tf.float32, shape=[None,xdim,ydim,in_chan]);

optimizer = tf.train.AdamOptimizer(learning_rate=0.001);

out = conv_net(X);
loss_op = tf.reduce_sum(tf.multiply(Y-out,Y-out));
train_op = optimizer.minimize(loss_op);

In [5]:
# Load colorbar for rain
color_bar = imageio.imread('rain_colorbar.png')
color_bar = color_bar[0, :, 0:3].astype(float)

c_n = np.shape(color_bar)[0]

color_bar_data = np.exp(np.linspace(np.log(1), np.log(375), c_n)).reshape((c_n, 1, 1, 1))
color_bar_data = (color_bar_data - 1) / (375 - 1)

# Function for reconstructing rain image based on output from network
def generate_rainfall_images(rain_data, filename):
    ind = np.argmin(abs(color_bar_data - rain_data), axis=0)
    
    rain_image = np.uint8(color_bar[ind, :].reshape((xdim, ydim, 3)).astype(int))
    
    imageio.imwrite(filename, rain_image)

In [6]:
num_epochs = 300;
batch_size = 8;
num_batches = int(np.ceil(float(samples) / batch_size));
print(num_batches)

loss_arr = [];
test_arr = [];

saver = tf.train.Saver()

with tf.Session() as sess:

    # Run the initializer
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(num_epochs):
        indices = np.random.permutation(samples);
        
        # Compute the loss across all the batches
        total_loss = 0;
        for i in range(num_batches):
            x_train, y_train = next_batch(indices, i);
            [loss, train] = sess.run([loss_op, train_op], feed_dict={X: x_train, Y: y_train});            
            total_loss += loss;
        
        loss_arr.append(total_loss / num_batches / batch_size)
        
        # Test data
        x_test = input_metrics_test
        y_test = output_metric_test
        
        [test_acc] = sess.run([loss_op], feed_dict={X: x_test, Y: y_test});
        
        test_arr.append(test_acc / x_test.shape[0]);
        print(epoch, total_loss / num_batches / batch_size, test_acc / x_test.shape[0]);
        
    save_path = saver.save(sess, "./model.ckpt")
       

11
(0, 2007.7711791992188, 2413.689453125)
(1, 1857.9171031605113, 2266.639404296875)
(2, 1720.9480979225852, 2034.3892822265625)
(3, 1610.2781704989347, 1995.27490234375)
(4, 1605.9779274680398, 1993.1787109375)
(5, 1585.4443803267045, 1994.8837890625)
(6, 1578.4806795987215, 2072.066650390625)
(7, 1591.4343705610795, 2032.2562255859375)
(8, 1578.0230102539062, 1983.0694580078125)
(9, 1571.1915116743608, 2000.27099609375)
(10, 1565.4895685369318, 2003.6744384765625)
(11, 1564.1190962357955, 1987.916259765625)
(12, 1563.299754749645, 1985.380126953125)
(13, 1564.7982843572443, 1985.026123046875)
(14, 1570.9652377041903, 1980.4622802734375)
(15, 1571.8665383078835, 1990.396728515625)
(16, 1567.1482932350852, 2042.99951171875)
(17, 1569.7254083806818, 2046.8890380859375)
(18, 1568.2755681818182, 1996.39306640625)
(19, 1564.725147594105, 1992.5302734375)
(20, 1557.8305996981535, 1996.9312744140625)
(21, 1557.843905362216, 1976.904296875)
(22, 1568.8158957741477, 2009.9500732421875)
(23, 1

(187, 1502.4021439985795, 1908.3983154296875)
(188, 1501.4712025035512, 1938.52197265625)
(189, 1502.6438099254262, 1927.2239990234375)
(190, 1499.0689308860085, 1980.639892578125)
(191, 1507.4155994762075, 1937.91455078125)
(192, 1515.058183149858, 1898.1279296875)
(193, 1513.8890158913352, 1922.6153564453125)
(194, 1504.1540638316762, 1935.2879638671875)
(195, 1498.842373934659, 1916.7095947265625)
(196, 1494.873418634588, 1950.84521484375)
(197, 1493.8329634232955, 1903.347900390625)
(198, 1509.083740234375, 1909.419677734375)
(199, 1498.5910866477273, 1929.574951171875)
(200, 1492.3135986328125, 1923.7786865234375)
(201, 1491.5584938742898, 1920.4091796875)
(202, 1492.0900712446733, 1929.163330078125)
(203, 1493.5512362393465, 1917.1072998046875)
(204, 1492.5585771040483, 1912.7127685546875)
(205, 1496.309414950284, 1977.339111328125)
(206, 1510.4814564098012, 1978.8450927734375)
(207, 1508.4414506392045, 1906.654541015625)
(208, 1498.65234375, 1908.414794921875)
(209, 1494.9139792

In [8]:
# Make rain plots from network outputs
with tf.Session() as sess:
    saver.restore(sess, "./model.ckpt")
    
    test_samples = input_metrics_test.shape[0];
    for i in range(test_samples):
        x_test = input_metrics_test[i, :, :, :].reshape((1, xdim, ydim, in_chan))
        y_test = output_metric_test[i, :, :, :].reshape((1, xdim, ydim, out_chan))
        
        [output_data] = sess.run([out], feed_dict={X: x_test, Y: y_test});
        
        generate_rainfall_images(output_data, 'rainfall' + str(i) + '_generated.png')
        generate_rainfall_images(y_test, 'rainfall' + str(i) + '_true.png')

INFO:tensorflow:Restoring parameters from ./model.ckpt
