[View in Colaboratory](https://colab.research.google.com/github/ysterin/deep-pointing/blob/master/deep_pointing_keras.ipynb)

In [0]:
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision
import torch

In [2]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

Saving ta.txt to ta.txt
User uploaded file "ta.txt" with length 3311023 bytes


In [44]:
with open('ta.txt') as f:
  text = f.read()
text[:100], text[-100:]

(' בְּרֵאשִׁית, בָּרָא אֱלֹהִים, אֵת הַשָּׁמַיִם, וְאֵת הָאָרֶץ.\xa0\n וְהָאָרֶץ, הָיְתָה תֹהוּ וָבֹהוּ, ו',
 'ְלֹשִׁים וַחֲמִשָּׁה.\xa0\n וְאַתָּה, לֵךְ לַקֵּץ; וְתָנוּחַ וְתַעֲמֹד לְגֹרָלְךָ, לְקֵץ הַיָּמִין.\xa0 {ש}')

In [45]:
import re
chars = sorted(set(text))
pointing_chars = [c for c in chars if 1455<ord(c)<1480]
non_pointing_chars = [c for c in chars if not 1455<ord(c)<1480]
pointing_chars, non_pointing_chars
spiecial_chars = ['.', '[',']','}', '{', '(', ')']
np_chars_pattern = ['\\'+c for c in non_pointing_chars if c in spiecial_chars] + [c for c in non_pointing_chars if c not in spiecial_chars]
print(np_chars_pattern)
np_pattern = '|'.join(np_chars_pattern)
np_pattern

['\\(', '\\)', '\\.', '\\]', '\\{', '\\}', '\n', ' ', ',', '-', ':', ';', '\xa0', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י', 'ך', 'כ', 'ל', 'ם', 'מ', 'ן', 'נ', 'ס', 'ע', 'ף', 'פ', 'ץ', 'צ', 'ק', 'ר', 'ש', 'ת', '�']


'\\(|\\)|\\.|\\]|\\{|\\}|\n| |,|-|:|;|\xa0|א|ב|ג|ד|ה|ו|ז|ח|ט|י|ך|כ|ל|ם|מ|ן|נ|ס|ע|ף|פ|ץ|צ|ק|ר|ש|ת|�'

In [24]:
len(non_pointing_chars)

40

In [0]:
unpointed_text = ''.join(re.findall(np_pattern, text))
pointings = re.split(np_pattern, text)[1:]
len(unpointed_text), len(pointings)
len(''.join([''.join(z) for z in zip(pointings, unpointed_text)])) == len(text)
_text = ''.join([''.join(z) for z in zip(unpointed_text, pointings)])
for i in range(len(text)):
  if _text[i] != text[i]:
    print(i, text[i], _text[i])
    

In [0]:
ps2ids = {p:i for i, p in enumerate(pointing_chars)}
cs2ids = {c:i for i, c in enumerate(non_pointing_chars)}
ids2ps = {i:p for i, p in enumerate(pointing_chars)}
ids2cs = {i:c for i, c in enumerate(non_pointing_chars)}

In [59]:
import numpy as np
from keras import utils
X = np.asarray([cs2ids[c] for c in unpointed_text])
X = utils.to_categorical(X)
N_points = len(pointing_chars)
N = len(unpointed_text)
y = np.zeros((N, N_points), dtype=np.float32)
for i in range(N):
  for j in range(N_points):
    if ids2ps[j] in pointings[i]:
      y[i, j] = 1
print(y[:10])
print(X[:10])

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0.]]
[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.

In [0]:
from keras import layers, preprocessing
sequence = preprocessing.sequence.TimeseriesGenerator(X, y, 60, stride=10, batch_size=64)

In [0]:
from keras.utils import Sequence
class batchSeq(Sequence):
    def __init__(self, X, y, length, stride, batch_size):
      assert X.shape[0]==y.shape[0]
      self._N = X.shape[0]
      self.X = X
      self.y = y
      self.length = length
      self.stride = stride
      self.bs = batch_size
      self.index = 0
    
    def __len__(self):
      return ((self._N-self.length)//self.stride)//self.bs

    def __getitem__(self, idx):
      bx = [self.X[self.stride*(self.bs*idx+i):self.stride*(self.bs*idx+i)+self.length] for i in range(self.bs)]
      by = [self.y[self.stride*(self.bs*idx+i):self.stride*(self.bs*idx+i)+self.length] for i in range(self.bs)]
      return np.asarray(bx), np.asarray(by)
    
    def __iter__(self):
      self.index = 0
      return self
    
    def __next__(self):
      if self.index <= self.__len__():
        self.index += 1
        return self.__getitem__(self.index-1)
      else:
        raise StopIteration

In [75]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('GPU device not found')
print('Found GPU at: {}'.format(device_name))
from keras.models import Sequential
from keras.regularizers import Regularizer, l2, l1
from keras.layers import Dense, Activation, Input, Dropout, GaussianNoise, concatenate, Reshape, Input
from keras.layers import LSTM, SimpleRNN, Bidirectional, GRU, CuDNNLSTM, CuDNNGRU
from keras.optimizers import RMSprop, Nadam, SGD
from keras.models import Model
from keras.layers import ActivityRegularization, Masking, TimeDistributed, Concatenate, Multiply
from keras.callbacks import TerminateOnNaN
if device_name == '/device:GPU:0':
  lstm = CuDNNLSTM 
else:
  lstm = LSTM
lstm

Found GPU at: /device:GPU:0


keras.layers.cudnn_recurrent.CuDNNLSTM

In [0]:
def create_model():
  input_dim = X.shape[-1]
  output_dim = y.shape[-1]
  bptt = 60
  model = Sequential()
  model.add(Bidirectional(lstm(64, return_sequences=True), input_shape=(bptt, input_dim)))
  model.add(Bidirectional(lstm(64, return_sequences=True)))
  model.add(Dense(output_dim, activation='sigmoid'))
  model.summary()
  return model


In [97]:
model = create_model()
lr = 1e-6
i = 0
while lr<10:
  opt = RMSprop(lr)
  model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
  bx, by = seq[i]
  loss = model.train_on_batch(bx, by)
  print(lr, loss)
  lr *= 2
  #i += 1


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_7 (Bidirection (None, 60, 64)            19200     
_________________________________________________________________
dense_6 (Dense)              (None, 60, 14)            910       
Total params: 20,110
Trainable params: 20,110
Non-trainable params: 0
_________________________________________________________________
1e-06 [1.5303694, 0.07102864]
2e-06 [1.530361, 0.07109375]
4e-06 [1.5303441, 0.07124023]
8e-06 [1.5303102, 0.07163086]
1.6e-05 [1.5302422, 0.07239583]
3.2e-05 [1.5301065, 0.07374675]
6.4e-05 [1.5298352, 0.076188155]
0.000128 [1.5292931, 0.08063151]
0.000256 [1.5282098, 0.088151045]
0.000512 [1.5260365, 0.097770184]
0.001024 [1.5215746, 0.10496419]
0.002048 [1.5116903, 0.09412435]
0.004096 [1.4823364, 0.07773437]
0.008192 [1.3740413, 0.082128905]
0.016384 [1.3376431, 0.09060872]
0.032768 [1.4134853, 0.029003907]
0.065536 [1.37525

In [109]:
seq = batchSeq(X, y, bptt, 20, 64)
model = create_model()
opt = RMSprop(0.005)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.fit_generator(seq, epochs=10)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_11 (Bidirectio (None, 60, 128)           54784     
_________________________________________________________________
bidirectional_12 (Bidirectio (None, 60, 128)           99328     
_________________________________________________________________
dense_10 (Dense)             (None, 60, 14)            1806      
Total params: 155,918
Trainable params: 155,918
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
114/885 [==>...........................] - ETA: 1:32 - loss: 1.1648 - acc: 0.2226

Epoch 2/10
187/885 [=====>........................] - ETA: 47s - loss: 0.5117 - acc: 0.5282

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<keras.callbacks.History at 0x7fdefdf05e48>

In [93]:
lr = 0.002



1e-06 [2.4407284, 0.5895508]
2e-06 [2.4669514, 0.5914551]
4e-06 [2.5604484, 0.5858887]
8e-06 [2.5810928, 0.58969724]
1.6e-05 [2.5716844, 0.58761394]
3.2e-05 [2.5063097, 0.5940267]
6.4e-05 [2.8076706, 0.5880697]
0.000128 [2.7708557, 0.583431]
0.000256 [2.5982711, 0.5925293]
0.000512 [2.4584138, 0.5932617]
0.001024 [2.5994668, 0.6007162]
0.002048 [2.8436828, 0.60589194]


KeyboardInterrupt: ignored