In [34]:
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
from sklearn import metrics
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Embedding
from keras.layers import LSTM, GRU
from keras.preprocessing import text
from keras.preprocessing import sequence
from keras.utils import np_utils
from keras.callbacks import EarlyStopping
from __future__ import print_function
from keras.layers.core import Activation, TimeDistributedDense, RepeatVector
from keras.layers import recurrent
import numpy as np

In [35]:
class CharacterTable(object):
    def __init__(self, vocab, maxlen):
        self.vocab = vocab
        self.maxlen = maxlen
    
    def encode(self, C, maxlen=None):
        maxlen = maxlen if maxlen else self.maxlen
        X = np.zeros((maxlen, len(self.vocab)))
        for i, c in enumerate(C):
            X[i, c] = 1
        return X
    
    def decode(self, X, calc_argmax=True):
        if calc_argmax:
            X = X.argmax(axis=-1)
        return ','.join(x for x in X)
    
def generateRandSeq(min, max, len):
    return [np.random.randint(min, max) for _ in range(len)]

In [42]:
TRAINING_SIZE = 150000
TEST_SIZE = 10000
DIGITS = 25
MAXLEN = DIGITS
voc = list(xrange(1000))
ctable = CharacterTable(voc, MAXLEN)

In [43]:
inputs = []
outputs = []
inputs_t = []
outputs_t = []
print('Generating data...')
while len(inputs) < TRAINING_SIZE:
    s = generateRandSeq(0, len(voc), DIGITS)
    inputs.append(s)
    # outputs.append(s[::-1])
    outputs.append(sorted(s))

while len(inputs_t) < TEST_SIZE:
    s = generateRandSeq(0, len(voc), DIGITS)
    inputs_t.append(s)
    # outputs_t.append(s[::-1])
    outputs_t.append(sorted(s))
print(inputs[12])
print(outputs[12])

Generating data...
[86, 636, 674, 750, 775, 618, 474, 596, 65, 168, 965, 975, 19, 999, 236, 57, 80, 29, 984, 468, 600, 624, 494, 538, 688]
[19, 29, 57, 65, 80, 86, 168, 236, 468, 474, 494, 538, 596, 600, 618, 624, 636, 674, 688, 750, 775, 965, 975, 984, 999]


In [44]:
print('Vectorization...')
X = np.zeros((len(inputs), MAXLEN), dtype=np.int32)
# y = np.zeros((len(outputs), MAXLEN), dtype=np.int32)
y = np.zeros((len(outputs), MAXLEN, len(voc)), dtype=np.bool)
for i, sentence in enumerate(inputs):
    X[i] = inputs[i]

# for i, sentence in enumerate(outputs):
#     y[i] = outputs[i]
for i, sentence in enumerate(outputs):
    y[i] = ctable.encode(sentence, maxlen=MAXLEN)

X_test = np.zeros((len(inputs_t), MAXLEN), dtype=np.int32)
# y_test = np.zeros((len(outputs_t), MAXLEN), dtype=np.int32)
y_test = np.zeros((len(outputs_t), MAXLEN, len(voc)), dtype=np.bool)
for i, sentence in enumerate(inputs_t):
    X_test[i] = inputs_t[i]

# for i, sentence in enumerate(outputs_t):
#     y_test[i] = outputs_t[i]
for i, sentence in enumerate(outputs_t):
    y_test[i] = ctable.encode(sentence, maxlen=MAXLEN)
    
print(X.shape)
print(y.shape)
print(X_test.shape)
print(y_test.shape)

Vectorization...
(150000, 25)
(150000, 25, 1000)
(10000, 25)
(10000, 25, 1000)


In [45]:
HIDDEN_SIZE = 256
BATCH_SIZE = 200
LAYERS = 2
'''
Hey guys, I also met this problem and I found this thread. Basically, 
the error info can happen when the dimension of the input data (X_train or Y_train) doesn't match with the 
model's input shape.

In my case (and @LeavesBreathe 's case I guess), the problem is that 
the model is expecting the Y_train to be a 3d tensor. Because of the embedding layer, 
the 2d tensor X_train of size (n_batch, sequence_length) will be eventually converted to a 3d tensor of size 
(n_batch, sequence_length, embedding_size) and will be processed by the succeeding LSTM layer. However, 
the 2d tensor Y_train of size (n_sample, sequence_length) is not converted to 3d, 
which is needed by the decoder LSTM.

To fix this problem, what I did is to convert Y_train into a 3d binary tensor (binary one-hot coding) and it worked.
'''
print('Build model...')
model = Sequential()
model.add(Embedding(len(voc), 300, input_length = MAXLEN))
model.add(LSTM(HIDDEN_SIZE, return_sequences=True))
for _ in range(LAYERS - 2):
    model.add(LSTM(HIDDEN_SIZE, return_sequences=True))

model.add(LSTM(HIDDEN_SIZE))
model.add(RepeatVector(MAXLEN))
for _ in range(LAYERS):
    model.add(LSTM(HIDDEN_SIZE, return_sequences=True))

model.add(TimeDistributedDense(input_dim=HIDDEN_SIZE, output_dim=300))
model.add(Activation('tanh'))
model.add(TimeDistributedDense(input_dim=300, output_dim=len(voc)))
model.add(Activation('softmax'))

model.compile(optimizer='RMSprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

Build model...


In [47]:
early_stopping = EarlyStopping(monitor='val_loss', patience=7)
hist = model.fit(X, y, batch_size=BATCH_SIZE, nb_epoch=50, 
                 callbacks=[early_stopping],
          validation_split = 0.1, shuffle=True)

score, acc = model.evaluate(X_test, y_test,
                            batch_size=BATCH_SIZE,
                            show_accuracy=True)
print('Test score:', score)
print('Test accuracy:', acc)

Train on 135000 samples, validate on 15000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Test score: 0.670910172462
Test accuracy: 0.797108012438


In [9]:
score, acc = model.evaluate(X_test, y_test,
                            batch_size=BATCH_SIZE,
                            show_accuracy=True)
print('Test score:', score)
print('Test accuracy:', acc)

`model.compile(optimizer, loss, metrics=["accuracy"])`


Test score: 0.0201964445598
Test accuracy: 0.993400006294


In [10]:
json_string = model.to_json()
open('sortemd_100_256_150k_model.json', 'w').write(json_string)
model.save_weights('sortemd_100_256_150k_weights.h5')

In [6]:
from keras.models import model_from_json
model = model_from_json(open('sortemd_100_256_150k_model.json').read())
model.load_weights('sortemd_100_256_150k_weights.h5')

In [12]:
model2.compile(optimizer='RMSprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=5)
hist = model2.fit(X, y, batch_size=BATCH_SIZE, nb_epoch=2, 
                 callbacks=[early_stopping],
          validation_split = 0.1, shuffle=True)

score, acc = model2.evaluate(X_test, y_test,
                            batch_size=BATCH_SIZE,
                            show_accuracy=True)
print('Test score:', score)
print('Test accuracy:', acc)

Train on 135000 samples, validate on 15000 samples
Epoch 1/2
  2600/135000 [..............................] - ETA: 701s - loss: 0.1208 - acc: 0.9708

KeyboardInterrupt: 

In [3]:
from keras import backend as K
def get_activations(model, layer, X_batch):
    get_activations = K.function([model.layers[0].input, K.learning_phase()], [model.layers[layer].output,])
    activations = get_activations([X_batch,0])
    return activations

In [5]:
X_voc = np.zeros((4, 25), dtype=np.int32)
X_voc[0] = range(0,25)
X_voc[1] = range(25,50)
X_voc[2] = range(50,75)
X_voc[3] = range(75,100)
X_voc

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24],
       [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
        42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66,
        67, 68, 69, 70, 71, 72, 73, 74],
       [75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
        92, 93, 94, 95, 96, 97, 98, 99]], dtype=int32)

In [7]:
print(model.layers[0])
print(model.layers[0].output)
embeddings = get_activations(model2, 0, X_voc)[0]

<keras.layers.embeddings.Embedding object at 0x7f926f392250>
Reshape{3}.0


  **kwargs)


In [8]:
embed = np.zeros((100, 300))
for i in range(25):
    embed[i] = embeddings[0][i]
    embed[i+25] = embeddings[1][i]
    embed[i+50] = embeddings[2][i]
    embed[i+75] = embeddings[3][i]

In [26]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import decomposition
from sklearn import datasets

pca = decomposition.PCA(n_components=2)
pca.fit(embed[:40])
V = pca.transform(embed[:40])



In [27]:
fig, ax = plt.subplots()
plt.plot(V[:,0], V[:,1], "o")
plt.axis([-1.2,1.2,-1.2,1.2])
for i in range(0,V.shape[0],4):                                      
    ax.annotate(i, xy=V[i], textcoords='data')

plt.show()

In [30]:
pca = decomposition.PCA(n_components=1)
pca.fit(embed[:40])
V = pca.transform(embed[:40])
print(V)

[[-0.60725524]
 [-0.5478629 ]
 [-0.64999643]
 [-0.59897736]
 [-0.5413074 ]
 [-0.64889847]
 [-0.61972691]
 [-0.58120159]
 [-0.56387784]
 [-0.58660659]
 [-0.56847293]
 [-0.54236413]
 [-0.52597181]
 [-0.4776906 ]
 [-0.44901819]
 [-0.46480098]
 [-0.37702839]
 [-0.28938999]
 [-0.19230272]
 [-0.15344832]
 [-0.16317479]
 [-0.10045423]
 [-0.00275577]
 [ 0.06050427]
 [ 0.1355235 ]
 [ 0.14400794]
 [ 0.31399033]
 [ 0.43521893]
 [ 0.46121149]
 [ 0.6210367 ]
 [ 0.67228949]
 [ 0.63967616]
 [ 0.76844663]
 [ 0.88829104]
 [ 0.90041505]
 [ 0.92397774]
 [ 0.90445206]
 [ 0.88113638]
 [ 0.85624798]
 [ 0.64615786]]


In [32]:
fig, ax = plt.subplots()
plt.plot(V, [0]*40, "o")
plt.axis([-1.2,1.2,-0.3,0.3])
for i in range(0,40,4):                                      
    ax.annotate(i, xy=(V[i],0.001), textcoords='data')

plt.show()