In [1]:
from __future__ import division
import rawpy
import numpy as np
from matplotlib.pyplot import imshow
from scipy.misc import imread
import tensorflow as tf
import tensorflow.contrib.slim as slim
import glob
import os, time, scipy.io

In [2]:
# get ids of training set
input_dir = './dataset/HUAWEIMATE20/'
train_fns = glob.glob(input_dir + 'a*.dng')
train_ids = [int(os.path.basename(train_fn)[1:5]) for train_fn in train_fns]
# Initialize crop size
ps = 512

In [3]:
def relu(x):
    return tf.maximum(x * 0.2, x)

In [4]:
# data preprocessing
def pack_raw(raw):
    # pack Bayer image to 4 channels
    im = raw.raw_image_visible.astype(np.float32)
    # black level of NIKON D70 is 128, and the raw image is in 12 bits.
    im = np.maximum(im - 128, 0) / (4096 - 128) 
    im = np.expand_dims(im, axis=2)
    img_shape = im.shape
    H = img_shape[0]
    W = img_shape[1]

    out = np.concatenate((im[0:H:2, 0:W:2, :],
                          im[0:H:2, 1:W:2, :],
                          im[1:H:2, 1:W:2, :],
                          im[1:H:2, 0:W:2, :]), axis=2)
    return out

In [5]:
def upsample_and_concat(x1, x2, output_channels, in_channels):
    pool_size = 2
    deconv_filter = tf.Variable(tf.truncated_normal([pool_size, pool_size, output_channels, in_channels], stddev=0.02))
    deconv = tf.nn.conv2d_transpose(x1, deconv_filter, tf.shape(x2), strides=[1, pool_size, pool_size, 1])

    deconv_output = tf.concat([deconv, x2], 3)
    deconv_output.set_shape([None, None, None, output_channels * 2])

    return deconv_output

In [6]:
def network(data):
    print("1")
    conv1 = slim.conv2d(data, 32, [3, 3], rate=1, activation_fn=relu, scope='g_conv1_1')
    conv1 = slim.conv2d(conv1, 32, [3, 3], rate=1, activation_fn=relu, scope='g_conv1_2')
    pool1 = slim.max_pool2d(conv1, [2, 2], padding='SAME')

    conv2 = slim.conv2d(pool1, 64, [3, 3], rate=1, activation_fn=relu, scope='g_conv2_1')
    conv2 = slim.conv2d(conv2, 64, [3, 3], rate=1, activation_fn=relu, scope='g_conv2_2')
    pool2 = slim.max_pool2d(conv2, [2, 2], padding='SAME')

    conv3 = slim.conv2d(pool2, 128, [3, 3], rate=1, activation_fn=relu, scope='g_conv3_1')
    conv3 = slim.conv2d(conv3, 128, [3, 3], rate=1, activation_fn=relu, scope='g_conv3_2')
    pool3 = slim.max_pool2d(conv3, [2, 2], padding='SAME')

    conv4 = slim.conv2d(pool3, 256, [3, 3], rate=1, activation_fn=relu, scope='g_conv4_1')
    conv4 = slim.conv2d(conv4, 256, [3, 3], rate=1, activation_fn=relu, scope='g_conv4_2')
    pool4 = slim.max_pool2d(conv4, [2, 2], padding='SAME')

    conv5 = slim.conv2d(pool4, 512, [3, 3], rate=1, activation_fn=relu, scope='g_conv5_1')
    conv5 = slim.conv2d(conv5, 512, [3, 3], rate=1, activation_fn=relu, scope='g_conv5_2')

    up6 = upsample_and_concat(conv5, conv4, 256, 512)
    conv6 = slim.conv2d(up6, 256, [3, 3], rate=1, activation_fn=relu, scope='g_conv6_1')
    conv6 = slim.conv2d(conv6, 256, [3, 3], rate=1, activation_fn=relu, scope='g_conv6_2')

    up7 = upsample_and_concat(conv6, conv3, 128, 256)
    conv7 = slim.conv2d(up7, 128, [3, 3], rate=1, activation_fn=relu, scope='g_conv7_1')
    conv7 = slim.conv2d(conv7, 128, [3, 3], rate=1, activation_fn=relu, scope='g_conv7_2')

    up8 = upsample_and_concat(conv7, conv2, 64, 128)
    conv8 = slim.conv2d(up8, 64, [3, 3], rate=1, activation_fn=relu, scope='g_conv8_1')
    conv8 = slim.conv2d(conv8, 64, [3, 3], rate=1, activation_fn=relu, scope='g_conv8_2')

    up9 = upsample_and_concat(conv8, conv1, 32, 64)
    conv9 = slim.conv2d(up9, 32, [3, 3], rate=1, activation_fn=relu, scope='g_conv9_1')
    conv9 = slim.conv2d(conv9, 32, [3, 3], rate=1, activation_fn=relu, scope='g_conv9_2')

    conv10 = slim.conv2d(conv9, 12, [1, 1], rate=1, activation_fn=None, scope='g_conv10')
    out = tf.depth_to_space(conv10, 2)
    return out



In [7]:
# Create a session.
sess = tf.Session()
# Reserve memory in the flow for input and label.
input_image = tf.placeholder(tf.float32, [None, None, None, 4])
target_image = tf.placeholder(tf.float32, [None, None, None, 3])
output_image = network(input_image)
# Define the loss function.
G_loss = tf.reduce_mean(tf.abs(output_image - target_image))

t_vars = tf.trainable_variables()
lr = tf.placeholder(tf.float32)
# Define optimizer for the flow.
G_opt = tf.train.AdamOptimizer(learning_rate=lr).minimize(G_loss)

input_images = [None] * len(train_ids)
target_images = [None] * len(train_ids)
learning_rate = 1e-4
g_loss = np.zeros((5000, 1))

sess.run(tf.global_variables_initializer())

for epoch in range(4001):
    if epoch > 2000:
        learning_rate = 1e-5
    for ind in np.random.permutation(len(train_ids)):
        # get the path from image id
        train_id = train_ids[ind]
        in_files = glob.glob(input_dir + 'a%04d*.dng' % train_id)
        in_path = in_files[0]
        
        if input_images[ind] is None:
            raw = rawpy.imread(in_path)
            input_images[ind] = np.expand_dims(pack_raw(raw), axis=0)

            im = raw.postprocess(use_camera_wb=True, half_size=False, no_auto_bright=True, output_bps=16)
            target_images[ind] = np.expand_dims(np.float32(im / 65535.0), axis=0)

        H = input_images[ind].shape[1]
        W = input_images[ind].shape[2]
        for z in range(10):
            xx = np.random.randint(0, W - ps)
            yy = np.random.randint(0, H - ps)

            input_patch = input_images[ind][:, yy:yy + ps, xx:xx + ps, :]
            target_patch = target_images[ind][:, yy * 2:yy * 2 + ps * 2, xx * 2:xx * 2 + ps * 2, :]

            if np.random.randint(2, size=1)[0] == 1:  # random flip
                input_patch = np.flip(input_patch, axis=1)
                target_patch = np.flip(target_patch, axis=1)
            if np.random.randint(2, size=1)[0] == 1:
                input_patch = np.flip(input_patch, axis=2)
                target_patch = np.flip(target_patch, axis=2)
            if np.random.randint(2, size=1)[0] == 1:  # random transpose
                input_patch = np.transpose(input_patch, (0, 2, 1, 3))
                target_patch = np.transpose(target_patch, (0, 2, 1, 3))
            
            input_patch = np.minimum(input_patch, 1.0)

            _, G_current, output = sess.run([G_opt, G_loss, output_image],
                                        feed_dict={input_image: input_patch, target_image: target_patch, lr: learning_rate})
            output = np.minimum(np.maximum(output, 0), 1)
            print('input_shape', input_patch.shape)
            print('output_shape', output.shape)
            #g_loss[ind] = G_current
            print('loss', G_current)
            #print("%d Loss=%.3f" % (epoch, np.mean(g_loss[np.where(g_loss)])))

1
Instructions for updating:
Colocations handled automatically by placer.
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.17549431
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.09846735
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.12337265
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.16338319
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.17046161
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.1599279
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.19059773
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.12339434
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.1409844
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.22422183
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.21742569
input_shape (1, 512, 512, 4)
output_shape (1, 1024, 1024, 3)
loss 0.18269403
inpu

KeyboardInterrupt: 