In [1]:
import numpy as np
import tensorflow as tf
from integrated import BasicRNNCell, MultiRNNCell, dynamic_rnn
%autosave 0

from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.util import nest
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops.math_ops import tanh

Autosave disabled


In [2]:
def _linear(args, output_size, bias, bias_start=0.0):
  """Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
  Args:
    args: a 2D Tensor or a list of 2D, batch x n, Tensors.
    output_size: int, second dimension of W[i].
    bias: boolean, whether to add a bias term or not.
    bias_start: starting value to initialize the bias; 0 by default.
  Returns:
    A 2D Tensor with shape [batch x output_size] equal to
    sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
  Raises:
    ValueError: if some of the arguments has unspecified or wrong shape.
  """
  if args is None or (nest.is_sequence(args) and not args):
    raise ValueError("`args` must be specified")
  if not nest.is_sequence(args):
    args = [args]

  # Calculate the total size of arguments on dimension 1.
  total_arg_size = 0
  shapes = [a.get_shape() for a in args]
  for shape in shapes:
    if shape.ndims != 2:
      raise ValueError("linear is expecting 2D arguments: %s" % shapes)
    if shape[1].value is None:
      raise ValueError("linear expects shape[1] to be provided for shape %s, "
                       "but saw %s" % (shape, shape[1]))
    else:
      total_arg_size += shape[1].value

  dtype = [a.dtype for a in args][0]

  # Now the computation.
  scope = vs.get_variable_scope()
  with vs.variable_scope(scope) as outer_scope:
    weights = vs.get_variable(
        "weights", [total_arg_size, output_size], dtype=dtype)
    
    # either one argument or more
    if len(args) == 1:
      res = math_ops.matmul(args[0], weights)
    else:
      res = math_ops.matmul(array_ops.concat(args, 1), weights)
    
    # add biases if they exist
    if not bias:
      return res
    with vs.variable_scope(outer_scope) as inner_scope:
      inner_scope.set_partitioner(None)
      biases = vs.get_variable(
          "biases", [output_size],
          dtype=dtype,
          initializer=init_ops.constant_initializer(bias_start, dtype=dtype))
        
    return nn_ops.bias_add(res, biases)

class RNNCell2(object):
  """
  This definition of cell differs from the definition used in the literature.
  In the literature, 'cell' refers to an object with a single scalar output.
  This definition refers to a horizontal array of such units.
  An RNN cell, in the most abstract setting, is anything that has
  a state and performs some operation that takes a matrix of inputs.
  This operation results in an output matrix with `self.output_size` columns.
  If `self.state_size` is an integer, this operation also results in a new
  state matrix with `self.state_size` columns.  If `self.state_size` is a
  tuple of integers, then it results in a tuple of `len(state_size)` state
  matrices, each with a column size corresponding to values in `state_size`.
  """
  def __init__(self, num_units, activation=tf.tanh, reuse=None):
    self._num_units = num_units
    self._activation = activation
    self._reuse = reuse
    
  def __call__(self, inputs, state, scope=None):
    """Run this RNN cell on inputs, starting from the given state.
    Args:
      inputs: `2-D` tensor with shape `[batch_size x input_size]`.
      state: if `self.state_size` is an integer, this should be a `2-D Tensor`
        with shape `[batch_size x self.state_size]`.  Otherwise, if
        `self.state_size` is a tuple of integers, this should be a tuple
        with shapes `[batch_size x s] for s in self.state_size`.
      scope: VariableScope for the created subgraph; defaults to class name.
    Returns:
      A pair containing:
      - Output: A `2-D` tensor with shape `[batch_size x self.output_size]`.
      - New state: Either a single `2-D` tensor, or a tuple of tensors matching
        the arity and shapes of `state`.
    """
    """Most basic RNN: output = new_state = act(W * input + U * state + B)."""
    """ U is W.hh in karpathy's code ???"""
    output = self._activation(_linear([inputs, state], self._num_units, True))
    return output, output


  @property
  def state_size(self):
    """size(s) of state(s) used by this cell.
    It can be represented by an Integer, a TensorShape or a tuple of Integers
    or TensorShapes.
    """
    return self._num_units

  @property
  def output_size(self):
    """Integer or TensorShape: size of outputs produced by this cell."""
    return self._num_units

  def zero_state(self, batch_size, dtype):
    """Return zero-filled state tensor(s).
    Args:
      batch_size: int, float, or unit Tensor representing the batch size.
      dtype: the data type to use for the state.
    Returns:
      If `state_size` is an int or TensorShape, then the return value is a
      `N-D` tensor of shape `[batch_size x state_size]` filled with zeros.
      If `state_size` is a nested list or tuple, then the return value is
      a nested list or tuple (of the same structure) of `2-D` tensors with
      the shapes `[batch_size x s]` for each s in `state_size`.
    """
    with ops.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]):
      state_size = self.state_size
      return _zero_state_tensors(state_size, batch_size, dtype)

### Hello Example

In [3]:
def one_hot_generator(label,X):
    X = np.array(X)
    for x in X:
        this_one = np.zeros(len(X))
        this_one[np.where(X==label)] = 1
    return this_one

In [9]:
def one_hot_array(X, voc):
    one_hot_array = []
    for char in X:
        one_hot_stuff = one_hot_generator(char,voc)
        one_hot_array.append(one_hot_stuff)
    one_hot_array = np.array(one_hot_array)
    # print(one_hot_array.shape)
    # print()
    return one_hot_array

In [12]:

num_units = 5

batch_size = 1

X = "hello"*3

voc = np.unique(list(X))
voc_size = len(voc)

X_one_hot = one_hot_array(X, voc)
x = X_one_hot[:batch_size,:].transpose()


has_state = False

W_size = (batch_size + num_units*has_state, num_units)
W = np.arange(W_size[0]*W_size[1])[::-1].reshape(W_size)
h = np.ones(voc_size*num_units).reshape((voc_size, num_units)) # adding second dimension
# B = np.arange(num_units)

print(X_one_hot.shape, x.shape)
print(W.shape)
# print(W.shape,h.shape,B.shape)

(15, 4) (4, 1)
(1, 5)


In [None]:
x

In [13]:
tf.reset_default_graph()

scope = vs.get_variable_scope()

inputs = tf.Variable(initial_value=x, dtype=tf.float32)
hidden_state = tf.Variable(initial_value=h, dtype=tf.float32)

cell = RNNCell2(5)

with tf.Session() as sess:
    tf.global_variables_initializer()
    output, _ = sess.run(cell.__call__(inputs, hidden_state))

RuntimeError: min: Conversion function <function _constant_tensor_conversion_function at 0x115056f28> for type <class 'object'> returned incompatible dtype: requested = float32_ref, actual = float32