### 한글 인식 디버그용 Notebook

#### Import

In [1]:
# -*-coding: utf-8-*-
from dataset.datasets import DataSet
from dataset.korean_utils import *

import unicodedata

import cv2
import numpy as np

import tensorflow as tf
from attention.attention import glimpse_sensor, model, loglikelihood

from attention.config import *

  return f(*args, **kwds)


#### Set Arguments

In [2]:
args = {
    'dataset': 'phd08',
    'dataset_path': '/Users/kimsu/datasets/korean_image/phd08',
    'width': 28,
    'height': 28,
    'sampling': True,
    'n_sample': 50,
    'train_set_ratio': 0.7
}
args['data_size'] = args['width'] * args['height']

#### Datasets : PHD08

In [3]:
dataset = DataSet(args)

100%|██████████| 3/3 [00:00<00:00, 97.32it/s]


In [4]:
# Dataset Test
images, labels = dataset.train_data.next_batch(10)
# images, labels

#### Model Build

In [5]:
# input image tensor
x = tf.placeholder(tf.float32, [1, 28, 28, 1])
Y = tf.placeholder(tf.int64, shape=[batch_size, 3])

In [6]:
# Weight and Bias variables
w = {
    # for context network
    'wc1': tf.get_variable('wc1', [3, 3, channels, 16], tf.float32),
    'wc2': tf.get_variable('wc2', [3, 3, 16, 64], tf.float32),
    'wc3': tf.get_variable('wc3', [1, 1, 64, 3], tf.float32),
    'wc_fc': tf.get_variable('wc_fc', [img_len * 3, lstm_size*2], tf.float32),
    # for emission network
    'we_bl': tf.get_variable('we_bl', [lstm_size, 1], tf.float32),
    'we_h_nl': tf.get_variable('we_h_nl', [lstm_size, 2], tf.float32),
    # for action network
    'wai': tf.get_variable('wai', [lstm_size, n_initial_character], tf.float32),
    'wam': tf.get_variable('wam', [lstm_size, n_middle_character], tf.float32),
    'waf': tf.get_variable('waf', [lstm_size, n_final_character], tf.float32),
    # for glimpse network
    'wg1': tf.get_variable('wg1', [3, 3, channels, 16], tf.float32),
    'wg2': tf.get_variable('wg2', [3, 3, 16, 64], tf.float32),
    'wg3': tf.get_variable('wg3', [1, 1, 64, 3], tf.float32),
    'wg_fc': tf.get_variable('wg_fc', [sensor_bandwidth*sensor_bandwidth * 3, lstm_size], tf.float32),
    'wg_lh': tf.get_variable('wg_lh', [2, lstm_size], tf.float32),
    'wg_gh_gf': tf.get_variable('wg_gh_gf', [lstm_size, lstm_size], tf.float32),
    'wg_lh_gf': tf.get_variable('wg_lh_gf', [lstm_size, lstm_size], tf.float32),
    # for core network
    'wo': tf.get_variable('wo', [lstm_size, lstm_size], tf.float32)
}

b = {
    # for context network
    'bc1': tf.get_variable('bc1', [16], tf.float32),
    'bc2': tf.get_variable('bc2', [64], tf.float32),
    'bc3': tf.get_variable('bc3', [3], tf.float32),
    'bc_fc': tf.get_variable('bc_fc', [lstm_size*2], tf.float32),
    # for emission network
    'be_bl': tf.get_variable('be_bl', [1], tf.float32),
    'be_h_nl': tf.get_variable('be_h_nl', [2], tf.float32),
    # for action network
    'bai': tf.get_variable('bai', [n_initial_character], tf.float32),
    'bam': tf.get_variable('bam', [n_middle_character], tf.float32),
    'baf': tf.get_variable('baf', [n_final_character], tf.float32),
    # for glimpse network
    'bg1': tf.get_variable('bg1', [16], tf.float32),
    'bg2': tf.get_variable('bg2', [64], tf.float32),
    'bg3': tf.get_variable('bg3', [3], tf.float32),
    'bg_fc': tf.get_variable('bg_fc', [lstm_size], tf.float32),
    'bg_lh': tf.get_variable('bg_lh', [lstm_size], tf.float32),
    'bg_glh_gf': tf.get_variable('bg_glh_gf', [lstm_size], tf.float32),
    # for core network
    'bo': tf.get_variable('bo', [lstm_size], tf.float32)
}

In [7]:
 outputs, mean_locs, sampled_locs, baselines, actions = model(x, w, b)



### Loss Function and Reward

In [15]:
cross_entropies = []
equals = []
sq_errs = []
logllratios = []

for i in range(n_element_per_character):
    idx = (i + 1) * n_glimpse_per_element
    logits = actions[idx-1]
    print('logits        : ', logits)
    
    pred_label = tf.argmax(logits, 1)
    equal = tf.equal(pred_label, Y[:, i])
    print('pred_label    : ', pred_label)
    print('equal         : ', equal)
    print()
    
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=Y[:, i])
    # cross_entropy = tf.reduce_mean(cross_entropy)
    cross_entropies.append(cross_entropy)
    print('cross_entropy : ', cross_entropy)
    
    reward = tf.cast(equal, tf.float32)
    rewards = tf.expand_dims(reward, 1)
    rewards = tf.tile(rewards, (1, n_glimpse_per_element))
    print('reward        : ', reward)
    print('rewards       : ', rewards)
    print()
    
    b = baselines[idx - n_glimpse_per_element:idx]
    b = tf.stack(b, 1)
    b = tf.reshape(b, [batch_size, n_glimpse_per_element])
    
    m_locs = mean_locs[idx - n_glimpse_per_element: idx]
    s_locs = sampled_locs[idx - n_glimpse_per_element: idx]
    
#     logll = loglikelihood(mean_locs[idx], sampled_locs[idx], loc_sd)
    logll = loglikelihood(m_locs, s_locs, loc_sd)
    advs = rewards - tf.stop_gradient(b)
    logllratio = tf.reduce_mean(logll * advs)
    sq_err = tf.square(rewards - b)
    sq_errs.append(sq_err)
    logllratios.append(logllratio)
    print('baseline      : ', b)
    print('loglikelihood : ', logll)
    print('advs          : ', advs)
    print('logll_ratio   : ', logllratio)
    print('sq_error      : ', sq_err)
    print()
    print('-'*100)
    
equals = tf.stack(equals)
sq_errs = tf.stack(sq_errs)
cross_entropies = tf.stack(cross_entropies)
logllratios = tf.stack(logllratios)

baseline_mse = tf.reduce_mean(sq_errs)
cross_entropies = tf.reduce_mean(cross_entropies)
logllratios = tf.reduce_mean(logllratios)

var_list = tf.trainable_variables()
total_loss = -logllratio + cross_entropies + baseline_mse  # '-' to minimize
grads = tf.gradients(total_loss, var_list)
max_grad_norm = 5.
grads, _ = tf.clip_by_global_norm(grads, max_grad_norm)

print('equals          : ', equals)
print('sq_errs         : ', sq_errs)
print('cross entropies : ', cross_entropies)
print('baseline mse    : ', baseline_mse)
print('logllratios     : ', logllratios)
print('total_loss      : ', total_loss)
print('gradients       : ', len(grads))

logits        :  Tensor("Softmax_4:0", shape=(1, 19), dtype=float32)
pred_label    :  Tensor("ArgMax_17:0", shape=(1,), dtype=int64)
equal         :  Tensor("Equal_17:0", shape=(1,), dtype=bool)

cross_entropy :  Tensor("SparseSoftmaxCrossEntropyWithLogits_17/SparseSoftmaxCrossEntropyWithLogits:0", shape=(1,), dtype=float32)
reward        :  Tensor("Cast_32:0", shape=(1,), dtype=float32)
rewards       :  Tensor("Tile_62:0", shape=(1, 5), dtype=float32)

Tensor("mul_83:0", shape=(5, 1, 2), dtype=float32)
baseline      :  Tensor("Reshape_228:0", shape=(1, 5), dtype=float32)
loglikelihood :  Tensor("transpose_15:0", shape=(1, 5), dtype=float32)
advs          :  Tensor("sub_93:0", shape=(1, 5), dtype=float32)
logll_ratio   :  Tensor("Mean_22:0", shape=(), dtype=float32)
sq_error      :  Tensor("Square_51:0", shape=(1, 5), dtype=float32)

----------------------------------------------------------------------------------------------------
logits        :  Tensor("Softmax_9:0", shape=(1, 21),

In [1]:
outputs, mean_locs, sampled_locs, baselines, actions

NameError: name 'outputs' is not defined

### Model 함수 호출해서 결과 나오는지, 그리고 그 결과를 Session 돌려서 그림에 시각화해보자
### Batch Size를 고정시켜두었는데, 이부분을 None으로 할 때에는 어떻게 해야할 지 고민해볼 것.
###   첫번째 context network 다음에 rnn으로 입력하는 텐서의 값을 초기에는 zeros를 하는데,
###  이때 zeros를 만들 때, batch size가 undefined되있으면 안됨.

In [8]:
def visualize_glimpse_movement(img, locs):
    print(img.shape)
    rows = img.shape[0]
    cols = img.shape[1]
    n_channel = img.shape[2]
    disp = img.copy()
    if n_channel == 1:
        disp = cv2.cvtColor(disp, cv2.COLOR_GRAY2BGR)
    
    pts = []
    for loc in locs:
        x = int((loc[0,0] + 1) * 0.5 * cols + 0.5)
        y = int((loc[0,1] + 1) * 0.5 * rows + 0.5)
        pts.append((x, y))
        
        cv2.circle(disp, (x, y), 1, (0, 255, 0), 2)
    cv2.circle(disp, pts[0], 1, (255, 0, 0), 2)
    cv2.circle(disp, pts[-1], 1, (0, 0, 255), 2)
    
    for i in range(len(pts) - 1):
        cv2.line(disp, pts[i], pts[i+1], (0, 255, 0), 1)
    return disp

In [9]:
image = images[0]
image = np.reshape(image, [1, img_sz, img_sz, 1])
image = image.astype(np.float32) / 255

In [10]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    locs = sess.run(mean_locs, feed_dict={x:image})

In [11]:
locs

[array([[-0.3561387 ,  0.11891598]], dtype=float32),
 array([[-0.33642894,  0.06717372]], dtype=float32),
 array([[-0.30380607,  0.03299333]], dtype=float32),
 array([[-0.27188158,  0.00953914]], dtype=float32),
 array([[-0.24474992, -0.00537742]], dtype=float32),
 array([[-0.22312959, -0.01584165]], dtype=float32),
 array([[-0.20406395, -0.02282955]], dtype=float32),
 array([[-0.18730672, -0.02806933]], dtype=float32),
 array([[-0.17508894, -0.03297839]], dtype=float32),
 array([[-0.16327296, -0.03718998]], dtype=float32),
 array([[-0.15269616, -0.04123304]], dtype=float32),
 array([[-0.14532149, -0.0455807 ]], dtype=float32),
 array([[-0.13793853, -0.04933016]], dtype=float32),
 array([[-0.13146052, -0.05360417]], dtype=float32),
 array([[-0.1253789 , -0.05730192]], dtype=float32),
 array([[-0.11995378, -0.06115675]], dtype=float32)]

In [12]:
image.shape

(1, 28, 28, 1)

In [13]:
r_image = cv2.resize(image[0], (image.shape[2] * 15, image.shape[1] * 15))
r_image = np.expand_dims(r_image, -1)
disp = visualize_glimpse_movement(r_image, locs)

(420, 420, 1)


In [14]:
cv2.imshow("disp", disp)
cv2.waitKey()

0

In [16]:
cv2.destroyAllWindows()

In [15]:
cv2.destroyWindow('disp')

In [18]:
image[0].shape

(28, 28, 1)

In [None]:
if __name__ == '__main__':
    args = {
        'dataset': 'phd08',
        'dataset_path': '/Users/kimsu/datasets/korean_image/phd08',
        'width': 28,
        'height': 28,
        'sampling': True,
        'n_sample': 50,
        'train_set_ratio': 0.7
    }
    args['data_size'] = args['width'] * args['height']

    dataset = DataSet(args)

    images, labels = dataset.train_data.next_batch(10)
    print(images.shape, labels.shape)

    x = tf.placeholder(tf.float32, [None, 28, 28, 1])

    loc = tf.constant([[0.5, 0.5]])

    # glimpse = tf.image.extract_glimpse(x, (14, 14), [(0.5, 0.5)],
    #                                    normalized=True, centered=False)

    glimpse = glimpse_sensor(x, loc)

    '''
    tf.image.extract_glimpse
      args
        input : 이미지
        size : 패치 사이즈
        offsets : loc 좌표
        normalized : True일때, loc 좌표를 0~1로 표
        centered : 중심 좌표를 원점으로?
    '''

    sess = tf.Session()

    for i, arg in enumerate(zip(images, labels)):
        image, label = arg

        print(image.shape)
        img_flat = np.reshape(image, [1, 28, 28, 1])
        img_flat = img_flat.astype(np.float32) / 255

        g_list = sess.run(glimpse, feed_dict={x: img_flat})
        print(g_list.shape)
        for i, patch in enumerate(g_list[0]):
            # print(i, patch.shape)
            print(i, patch.shape)
            cv2.imshow('%d th Glimpse' % (i+1), patch)

        cv2.imshow(str(label), image)
        key = cv2.waitKey(0)

        if key == ord('q'):
            break
    cv2.destroyAllWindows()


# import numpy as np
#
# a = [1, 2, 3, 4, 5, 6]
# a = np.array(a)
# a = a.reshape([3, 2])
# # a = np.append(a, [7, 8], 0)
#
# np.random.shuffle(a)
# c = np.array([])
# # print(a.shape, a, c, np.vstack((c, a)), np.random.random_integers(0, 10, 11))

