In [1]:
import itertools
import numpy as np
import random

from keras.models import Sequential
from keras.layers import LSTM,RepeatVector,Dense,Activation,Dropout
from keras.layers.wrappers import TimeDistributed,Bidirectional
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint





  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
CHARS = [str(n) for n in range(10)] + ['*','+','-', ' ', '\0']
CHAR_TO_INDEX = {i: c for c, i in enumerate(CHARS)}
INDEX_TO_CHAR = {c: i for c, i in enumerate(CHARS)}
          
MIN_NUMBER = 100
MAX_NUMBER = 999

MAX_N_EXAMPLES = (MAX_NUMBER - MIN_NUMBER) ** 2
N_EXAMPLES = 100000
N_FEATURES = len(CHARS)
MAX_NUMBER_LENGTH_LEFT_SIDE = len(str(MAX_NUMBER))
MAX_NUMBER_LENGTH_RIGHT_SIDE = MAX_NUMBER_LENGTH_LEFT_SIDE *2
MAX_EQUATION_LENGTH = (MAX_NUMBER_LENGTH_LEFT_SIDE * 2) + 4
MAX_RESULT_LENGTH = MAX_NUMBER_LENGTH_RIGHT_SIDE + 1

SPLIT = .1
EPOCHS = 500
LEARNING_RATE = 0.001
BATCH_SIZE = 256
HIDDEN_SIZE = 128
ENCODER_DEPTH = 1
DECODER_DEPTH = 1
DROPOUT = 0
BATCH_NORM = True

##模型参数设置：
##最新数字为100，最大数字为999.
##数字范围是 1798
##总共生成data 的数目为100000
##考虑到乘法的情况下将 MAX_NUMBER_LENGTH_LEFT_SIDE *2
##最大input长度是 15
##最大output长度是7

##模型参数设置
##encoder 和decoder 均为一层
##batch 为256
##hidden state 为128





In [3]:
def generate_equations(max_count=None):
        number_permutations=itertools.permutations(range(100,1000),2)
        if max_count is not None:
            number_permutations=itertools.islice(number_permutations,max_count)
        number_permutations=list(number_permutations)
        random.shuffle(number_permutations)
        for x,y in number_permutations:
            p=random.randint(1,2)
            if p==1:
                if x>y:
                    yield'{}-{}'.format(x,y)
                else:
                    yield'{}-{}'.format(y,x)
            if p==2:
                    yield'{}+{}'.format(x,y)
            if p==3 or p==4:
                    yield'{}*{}'.format(x,y)
 
##随机生成 范围为100-1000的eqution




def one_hot_to_index(vector):
    if not np.any(vector):
        return -1
    return np.argmax(vector)

def one_hot_to_char(vector):
    index=one_hot_to_index(vector)
    if index==-1:
        return''
    return INDEX_TO_CHAR[index]

def one_hot_to_string(matrix):
    return ''.join(one_hot_to_char(vector) for vector in matrix)              





def equations_to_x_y(equations,n):
     x=np.zeros((n,MAX_EQUATION_LENGTH,N_FEATURES),dtype=np.bool)
     y=np.zeros((n,MAX_RESULT_LENGTH,N_FEATURES),dtype=np.bool)             
     for i,equation in enumerate(itertools.islice(equations,n)):
         result=str(eval(equation))
         result=' ' *(MAX_RESULT_LENGTH-1-len(result))+result
         
         equation +='\0'
         result   +='\0'
         for t,char in enumerate(equation):
            x[i,t,CHAR_TO_INDEX[char]]=1
         for t,char in enumerate(result):
            y[i,t,CHAR_TO_INDEX[char]]=1
     return x,y
 
##eval and encode data to one-hot-vector
##such as data ”123+345“ we will get result string ”458“ and then get one-hot vector ”00010000000000 .......“



def build_dataset():
    generator=generate_equations(max_count=1000000)
    n_test=round(SPLIT*N_EXAMPLES)
    n_train=N_EXAMPLES-n_test
    
    x_test,y_test=equations_to_x_y(generator,n_test)
    x_train,y_train=equations_to_x_y(generator,n_train)
    
    x_train_name="C:/Users/hasee/Desktop/lstm-sub/x_train.txt"
    y_train_name="C:/Users/hasee/Desktop/lstm-sub/y_train.txt"
    x_test_name  ="C:/Users/hasee/Desktop/lstm-sub/x_test.txt"
    y_test_name="C:/Users/hasee/Desktop/lstm-sub/y_test.txt"
    
    p_train=np.reshape(x_train,[-1,N_FEATURES])
    q_train=np.reshape(y_train,[-1,N_FEATURES])
    p_test=np.reshape(x_test,[-1,N_FEATURES])
    q_test=np.reshape(y_test,[-1,N_FEATURES])
    
    np.savetxt(x_train_name,np.array(p_train),fmt="%s", delimiter=",")
    np.savetxt(y_train_name,np.array(q_train),fmt="%s", delimiter=",")
    np.savetxt(x_test_name, np.array(p_test), fmt="%s", delimiter=",")
    np.savetxt(y_test_name, np.array(q_test), fmt="%s", delimiter=",")
    
    
    return x_test,y_test,x_train,y_train

##build dataset 
##0.9 of example to train
##0.1 of example to validate

def print_example_prediction(count,model,x_test,y_test):
    print('Examples:')
    prediction_indices=np.random.choice(x_test.shape[0],size=count,replace=False)
    print(np.array(x_test[prediction_indices,:]).shape)
    predictions=model.predict(x_test[prediction_indices,:])
    for i in range(count): 
        correct=one_hot_to_string(y_test[prediction_indices[i]])
        guess=one_hot_to_string(predictions[i])
        
        print('Q {} - {} '.format(one_hot_to_string(x_test[prediction_indices[i]]),one_hot_to_string(predictions[i])),end=' ')
        print('T', correct, end=' ')  
        if correct == guess:
            print( 'ok',end=' ')
        else:
            print( 'not ok',end=' ')
        print('\n')
      
        
def build_model():
    """
    Builds and returns the model based on the global config.
    """
    input_shape = (MAX_EQUATION_LENGTH, N_FEATURES)

    model = Sequential()

    # Encoder:
    model.add(Bidirectional(LSTM(20), input_shape=input_shape))
    model.add(BatchNormalization())

    # The RepeatVector-layer repeats the input n times
    model.add(RepeatVector(MAX_RESULT_LENGTH))

    # Decoder:
    model.add(Bidirectional(LSTM(20, return_sequences=True)))
    model.add(BatchNormalization())

    model.add(TimeDistributed(Dense(N_FEATURES)))
    model.add(Activation('softmax'))

    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(lr=0.01),
        metrics=['accuracy'],
    )

    return model


### build model
### we choose a seq2seq model
### Sequence to sequence learning deals with problems in which a source sequence of inputs 
### has to be mapped to a target sequence of outputs where each output is not necessarily directly dependent on a single input. 
### The classical example is translation.How do you learn that a Chinese input phrase “他现在已经在路上了。” 
### equals “She is on her way.” in English?

###an encoder based on RNNs learns an abstract representation.
###Then a decoder also based on RNNs learns to decode it to another language, 
###generating a new sequence from the encoding as output.

###I choose a Bidirectional Lstm as encoder and decoder.
###After them,a fully connected layer help me to get result which have the same length as ground-truth


In [4]:
def main():
     
    
    
    model=build_model()
    model.summary()
    print()    
    
    x_test,y_test,x_train,y_train=build_dataset()
    
    print()
    print_example_prediction(5,model,x_test,y_test)
    print()
    
    for iteration in range(100):
        print()
        print('-'*50)
        print('Iteration',iteration)
        model.fit(x_train, y_train,batch_size=BATCH_SIZE,epochs=1,validation_data=(x_test, y_test))
        print_example_prediction(10,model,x_test,y_test)
        model.save('my_model.h5')
        
        
###first experiment get add and sub  together

main()

In [1]:
def generate_equations_three_sum(max_count=None):
        number_permutations=itertools.permutations(range(100,1000),2)
        if max_count is not None:
            number_permutations=itertools.islice(number_permutations,max_count)
        number_permutations=list(number_permutations)
        random.shuffle(number_permutations)
        for x,y in number_permutations:
            z=random.randint(100,1000)
            yield'{}+{}+{}'.format(x,y,z)
            
            

In [2]:
import itertools
import numpy as np
import random

from keras.models import Sequential
from keras.layers import LSTM,RepeatVector,Dense,Activation,Dropout
from keras.layers.wrappers import TimeDistributed,Bidirectional
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint

MIN_NUMBER = 100
MAX_NUMBER = 999

CHARS = [str(n) for n in range(10)] + ['*','+','-', ' ', '\0']
CHAR_TO_INDEX = {i: c for c, i in enumerate(CHARS)}
INDEX_TO_CHAR = {c: i for c, i in enumerate(CHARS)}

MAX_N_EXAMPLES = (MAX_NUMBER - MIN_NUMBER) ** 2
N_EXAMPLES = 30000
N_FEATURES = len(CHARS)
MAX_NUMBER_LENGTH_LEFT_SIDE = len(str(MAX_NUMBER))
MAX_NUMBER_LENGTH_RIGHT_SIDE = MAX_NUMBER_LENGTH_LEFT_SIDE *2
MAX_EQUATION_LENGTH = (MAX_NUMBER_LENGTH_LEFT_SIDE * 3) + 4
MAX_RESULT_LENGTH = MAX_NUMBER_LENGTH_RIGHT_SIDE + 1

SPLIT = .1
EPOCHS = 500
LEARNING_RATE = 0.001
BATCH_SIZE = 256
HIDDEN_SIZE = 128
ENCODER_DEPTH = 1
DECODER_DEPTH = 1
DROPOUT = 0
BATCH_NORM = True

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
def build_dataset_three_sum():
    generator=generate_equations_three_sum(max_count=1000000)
    n_test=round(SPLIT*N_EXAMPLES)
    n_train=N_EXAMPLES-n_test
    
    x_test,y_test=equations_to_x_y(generator,n_test)
    x_train,y_train=equations_to_x_y(generator,n_train)
    
 
    
    
    return x_test,y_test,x_train,y_train

In [4]:
def main2():
     
    
    
    model=build_model()
    model.summary()
    print()    
    
    x_test,y_test,x_train,y_train=build_dataset_three_sum()
    
    print()
    print_example_prediction(5,model,x_test,y_test)
    print()
    
    for iteration in range(300):
        print()
        print('-'*50)
        print('Iteration',iteration)
        model.fit(x_train, y_train,batch_size=BATCH_SIZE,epochs=1,validation_data=(x_test, y_test))
        print_example_prediction(10,model,x_test,y_test)
        

In [5]:
def generate_equations(max_count=None):
        number_permutations=itertools.permutations(range(100,1000),2)
        if max_count is not None:
            number_permutations=itertools.islice(number_permutations,max_count)
        number_permutations=list(number_permutations)
        random.shuffle(number_permutations)
        for x,y in number_permutations:
            p=random.randint(1,2)
            if p==1:
                if x>y:
                    yield'{}-{}'.format(x,y)
                else:
                    yield'{}-{}'.format(y,x)
            if p==2:
                    yield'{}+{}'.format(x,y)
            if p==3 or p==4:
                    yield'{}*{}'.format(x,y)
 

def one_hot_to_index(vector):
    if not np.any(vector):
        return -1
    return np.argmax(vector)

def one_hot_to_char(vector):
    index=one_hot_to_index(vector)
    if index==-1:
        return''
    return INDEX_TO_CHAR[index]

def one_hot_to_string(matrix):
    return ''.join(one_hot_to_char(vector) for vector in matrix)              
                
def equations_to_x_y(equations,n):
     x=np.zeros((n,MAX_EQUATION_LENGTH,N_FEATURES),dtype=np.bool)
     y=np.zeros((n,MAX_RESULT_LENGTH,N_FEATURES),dtype=np.bool)             
     for i,equation in enumerate(itertools.islice(equations,n)):
         result=str(eval(equation))
         result=' ' *(MAX_RESULT_LENGTH-1-len(result))+result
         
         equation +='\0'
         result   +='\0'
         for t,char in enumerate(equation):
            x[i,t,CHAR_TO_INDEX[char]]=1
         for t,char in enumerate(result):
            y[i,t,CHAR_TO_INDEX[char]]=1
     return x,y
     
def build_dataset():
    generator=generate_equations(max_count=1000000)
    n_test=round(SPLIT*N_EXAMPLES)
    n_train=N_EXAMPLES-n_test
    
    x_test,y_test=equations_to_x_y(generator,n_test)
    x_train,y_train=equations_to_x_y(generator,n_train)
    
    x_train_name="C:/Users/hasee/Desktop/lstm-sub/x_train.txt"
    y_train_name="C:/Users/hasee/Desktop/lstm-sub/y_train.txt"
    x_test_name  ="C:/Users/hasee/Desktop/lstm-sub/x_test.txt"
    y_test_name="C:/Users/hasee/Desktop/lstm-sub/y_test.txt"
    
    p_train=np.reshape(x_train,[-1,N_FEATURES])
    q_train=np.reshape(y_train,[-1,N_FEATURES])
    p_test=np.reshape(x_test,[-1,N_FEATURES])
    q_test=np.reshape(y_test,[-1,N_FEATURES])
    
    np.savetxt(x_train_name,np.array(p_train),fmt="%s", delimiter=",")
    np.savetxt(y_train_name,np.array(q_train),fmt="%s", delimiter=",")
    np.savetxt(x_test_name, np.array(p_test), fmt="%s", delimiter=",")
    np.savetxt(y_test_name, np.array(q_test), fmt="%s", delimiter=",")
    
    
    return x_test,y_test,x_train,y_train
    
def print_example_prediction(count,model,x_test,y_test):
    print('Examples:')
    prediction_indices=np.random.choice(x_test.shape[0],size=count,replace=False)
    print(np.array(x_test[prediction_indices,:]).shape)
    predictions=model.predict(x_test[prediction_indices,:])
    for i in range(count): 
        correct=one_hot_to_string(y_test[prediction_indices[i]])
        guess=one_hot_to_string(predictions[i])
        
        print('Q {} - {} '.format(one_hot_to_string(x_test[prediction_indices[i]]),one_hot_to_string(predictions[i])),end=' ')
        print('T', correct, end=' ')  
        if correct == guess:
            print( 'ok',end=' ')
        else:
            print( 'not ok',end=' ')
        print('\n')
      
        
def build_model():
    """
    Builds and returns the model based on the global config.
    """
    input_shape = (MAX_EQUATION_LENGTH, N_FEATURES)

    model = Sequential()

    # Encoder:
    model.add(Bidirectional(LSTM(20), input_shape=input_shape))
    model.add(BatchNormalization())

    # The RepeatVector-layer repeats the input n times
    model.add(RepeatVector(MAX_RESULT_LENGTH))

    # Decoder:
    model.add(Bidirectional(LSTM(20, return_sequences=True)))
    model.add(BatchNormalization())

    model.add(TimeDistributed(Dense(N_FEATURES)))
    model.add(Activation('softmax'))

    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(lr=0.01),
        metrics=['accuracy'],
    )

    return model

In [6]:
main2()
###第二个实验，将数字增加为3个.

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 40)                5760      
_________________________________________________________________
batch_normalization_1 (Batch (None, 40)                160       
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 7, 40)             0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 7, 40)             9760      
_________________________________________________________________
batch_normalization_2 (Batch (None, 7, 40)             160       
_________________________________________________________________
time_distributed_1 (TimeDist (None, 7, 15)             615       
_________________________________________________________________
activation_1 (Activation)    (None, 7, 15)             0         
Total para

Test data with three digits sum and get train dataset accuracy 0.9095 and valid data accuracy 0.9390

In [20]:
def generate_equations_random_digit(max_count=None):
        number_permutations=itertools.permutations(range(10,1000),2)
        if max_count is not None:
            number_permutations=itertools.islice(number_permutations,max_count)
        number_permutations=list(number_permutations)
        random.shuffle(number_permutations)
        for x,y in number_permutations:
            p=random.randint(1,2)
            if p==1:
                if x>y:
                    yield'{}-{}'.format(x,y)
                else:
                    yield'{}-{}'.format(y,x)
            if p==2:
                    yield'{}+{}'.format(x,y)
            if p==3 or p==4:
                    yield'{}*{}'.format(x,y)

In [21]:
from keras.models import Sequential
from keras.layers import LSTM,RepeatVector,Dense,Activation,Dropout
from keras.layers.wrappers import TimeDistributed,Bidirectional
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.models import load_model

import itertools
import numpy as np
import random

CHARS = [str(n) for n in range(10)] + ['*','+','-', ' ', '\0']
CHAR_TO_INDEX = {i: c for c, i in enumerate(CHARS)}
INDEX_TO_CHAR = {c: i for c, i in enumerate(CHARS)}
          
MIN_NUMBER = 10
MAX_NUMBER = 999

MAX_N_EXAMPLES = (MAX_NUMBER - MIN_NUMBER) ** 2
N_EXAMPLES = 30000
N_FEATURES = len(CHARS)
MAX_NUMBER_LENGTH_LEFT_SIDE = len(str(MAX_NUMBER))
MAX_NUMBER_LENGTH_RIGHT_SIDE = MAX_NUMBER_LENGTH_LEFT_SIDE *2
MAX_EQUATION_LENGTH = (MAX_NUMBER_LENGTH_LEFT_SIDE * 2) + 4
MAX_RESULT_LENGTH = MAX_NUMBER_LENGTH_RIGHT_SIDE + 1

SPLIT = .1
EPOCHS = 500
LEARNING_RATE = 0.001
BATCH_SIZE = 256
HIDDEN_SIZE = 128
ENCODER_DEPTH = 1
DECODER_DEPTH = 1
DROPOUT = 0
BATCH_NORM = True

In [22]:
def generate_equations(max_count=None):
        number_permutations=itertools.permutations(range(100,1000),2)
        if max_count is not None:
            number_permutations=itertools.islice(number_permutations,max_count)
        number_permutations=list(number_permutations)
        random.shuffle(number_permutations)
        for x,y in number_permutations:
            p=random.randint(1,2)
            if p==1:
                if x>y:
                    yield'{}-{}'.format(x,y)
                else:
                    yield'{}-{}'.format(y,x)
            if p==2:
                    yield'{}+{}'.format(x,y)
            if p==3 or p==4:
                    yield'{}*{}'.format(x,y)
 

def one_hot_to_index(vector):
    if not np.any(vector):
        return -1
    return np.argmax(vector)

def one_hot_to_char(vector):
    index=one_hot_to_index(vector)
    if index==-1:
        return''
    return INDEX_TO_CHAR[index]

def one_hot_to_string(matrix):
    return ''.join(one_hot_to_char(vector) for vector in matrix)              
                
def equations_to_x_y(equations,n):
     x=np.zeros((n,MAX_EQUATION_LENGTH,N_FEATURES),dtype=np.bool)
     y=np.zeros((n,MAX_RESULT_LENGTH,N_FEATURES),dtype=np.bool)             
     for i,equation in enumerate(itertools.islice(equations,n)):
         result=str(eval(equation))
         result=' ' *(MAX_RESULT_LENGTH-1-len(result))+result
         
         equation +='\0'
         result   +='\0'
         for t,char in enumerate(equation):
            x[i,t,CHAR_TO_INDEX[char]]=1
         for t,char in enumerate(result):
            y[i,t,CHAR_TO_INDEX[char]]=1
     return x,y
     
def build_dataset():
    generator=generate_equations_random_digit(max_count=1000000)
    n_test=round(SPLIT*N_EXAMPLES)
    n_train=N_EXAMPLES-n_test
    
    x_test,y_test=equations_to_x_y(generator,n_test)
    x_train,y_train=equations_to_x_y(generator,n_train)
    
    return x_test,y_test,x_train,y_train

def print_example_prediction(count,model,x_test,y_test):
    print('Examples:')
    prediction_indices=np.random.choice(x_test.shape[0],size=count,replace=False)
    print(np.array(x_test[prediction_indices,:]).shape)
    predictions=model.predict(x_test[prediction_indices,:])
    for i in range(count): 
        correct=one_hot_to_string(y_test[prediction_indices[i]])
        guess=one_hot_to_string(predictions[i])
        
        print('Q {} - {} '.format(one_hot_to_string(x_test[prediction_indices[i]]),one_hot_to_string(predictions[i])),end=' ')
        print('T', correct, end=' ')  
        if correct == guess:
            print( 'ok',end=' ')
        else:
            print( 'not ok',end=' ')
        print('\n')

In [23]:
def build_model():
    """
    Builds and returns the model based on the global config.
    """
    input_shape = (MAX_EQUATION_LENGTH, N_FEATURES)

    model = Sequential()

    # Encoder:
    model.add(Bidirectional(LSTM(20), input_shape=input_shape))
    model.add(BatchNormalization())

    # The RepeatVector-layer repeats the input n times
    model.add(RepeatVector(MAX_RESULT_LENGTH))

    # Decoder:
    model.add(Bidirectional(LSTM(20, return_sequences=True)))
    model.add(BatchNormalization())

    model.add(TimeDistributed(Dense(N_FEATURES)))
    model.add(Activation('softmax'))

    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(lr=0.01),
        metrics=['accuracy'],
    )

    return model

In [24]:
def main3():
     
    
    
    model=build_model()
    model.summary()
    print()    
    
    x_test,y_test,x_train,y_train=build_dataset()
    
    print()
    print_example_prediction(5,model,x_test,y_test)
    print()
    
    for iteration in range(100):
        print()
        print('-'*50)
        print('Iteration',iteration)
        model.fit(x_train, y_train,batch_size=BATCH_SIZE,epochs=1,validation_data=(x_test, y_test))
        print_example_prediction(10,model,x_test,y_test)
         

In [26]:
main3()
##第三个实验测试不同位数的加减法。得到了不错的结果

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_9 (Bidirection (None, 40)                5760      
_________________________________________________________________
batch_normalization_9 (Batch (None, 40)                160       
_________________________________________________________________
repeat_vector_5 (RepeatVecto (None, 7, 40)             0         
_________________________________________________________________
bidirectional_10 (Bidirectio (None, 7, 40)             9760      
_________________________________________________________________
batch_normalization_10 (Batc (None, 7, 40)             160       
_________________________________________________________________
time_distributed_5 (TimeDist (None, 7, 15)             615       
_________________________________________________________________
activation_5 (Activation)    (None, 7, 15)             0         
Total para

In [1]:
import itertools
import numpy as np
import random

from keras.models import Sequential
from keras.layers import LSTM,RepeatVector,Dense,Activation,Dropout
from keras.layers.wrappers import TimeDistributed,Bidirectional
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.models import load_model

CHARS = [str(n) for n in range(10)] + ['*','+','-', ' ', '\0']
CHAR_TO_INDEX = {i: c for c, i in enumerate(CHARS)}
INDEX_TO_CHAR = {c: i for c, i in enumerate(CHARS)}
          
MIN_NUMBER = 1
MAX_NUMBER = 100

MAX_N_EXAMPLES = (MAX_NUMBER - MIN_NUMBER) ** 2
N_EXAMPLES = 30000
N_FEATURES = len(CHARS)
MAX_NUMBER_LENGTH_LEFT_SIDE = len(str(MAX_NUMBER))
MAX_NUMBER_LENGTH_RIGHT_SIDE = MAX_NUMBER_LENGTH_LEFT_SIDE *2
MAX_EQUATION_LENGTH = (MAX_NUMBER_LENGTH_LEFT_SIDE * 2) + 4
MAX_RESULT_LENGTH = MAX_NUMBER_LENGTH_RIGHT_SIDE + 1

SPLIT = .1
EPOCHS = 500
LEARNING_RATE = 0.001
BATCH_SIZE = 256
HIDDEN_SIZE = 128
ENCODER_DEPTH = 1
DECODER_DEPTH = 1
DROPOUT = 0
BATCH_NORM = True

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def generate_equations(max_count=None):
        number_permutations=itertools.permutations(range(1,100),2)
        if max_count is not None:
            number_permutations=itertools.islice(number_permutations,max_count)
        number_permutations=list(number_permutations)
        random.shuffle(number_permutations)
        for x,y in number_permutations:
            p=random.randint(3,4)
            if p==1:
                if x>y:
                    yield'{}-{}'.format(x,y)
                else:
                    yield'{}-{}'.format(y,x)
            if p==2:
                    yield'{}+{}'.format(x,y)
            if p==3 or p==4:
                    yield'{}*{}'.format(x,y)

In [3]:
def one_hot_to_index(vector):
    if not np.any(vector):
        return -1
    return np.argmax(vector)

def one_hot_to_char(vector):
    index=one_hot_to_index(vector)
    if index==-1:
        return''
    return INDEX_TO_CHAR[index]

def one_hot_to_string(matrix):
    return ''.join(one_hot_to_char(vector) for vector in matrix)              
                
def equations_to_x_y(equations,n):
     x=np.zeros((n,MAX_EQUATION_LENGTH,N_FEATURES),dtype=np.bool)
     y=np.zeros((n,MAX_RESULT_LENGTH,N_FEATURES),dtype=np.bool)             
     for i,equation in enumerate(itertools.islice(equations,n)):
         result=str(eval(equation))
         result=' ' *(MAX_RESULT_LENGTH-1-len(result))+result
         
         equation +='\0'
         result   +='\0'
         for t,char in enumerate(equation):
            x[i,t,CHAR_TO_INDEX[char]]=1
         for t,char in enumerate(result):
            y[i,t,CHAR_TO_INDEX[char]]=1
     return x,y


In [4]:
def build_dataset():
    generator=generate_equations(max_count=1000000)
    n_test=round(SPLIT*N_EXAMPLES)
    n_train=N_EXAMPLES-n_test
    
    x_test,y_test=equations_to_x_y(generator,n_test)
    x_train,y_train=equations_to_x_y(generator,n_train)
  
    
    return x_test,y_test,x_train,y_train
    
def print_example_prediction(count,model,x_test,y_test):
    print('Examples:')
    prediction_indices=np.random.choice(x_test.shape[0],size=count,replace=False)
    print(np.array(x_test[prediction_indices,:]).shape)
    predictions=model.predict(x_test[prediction_indices,:])
    for i in range(count): 
        correct=one_hot_to_string(y_test[prediction_indices[i]])
        guess=one_hot_to_string(predictions[i])
        
        print('Q {} - {} '.format(one_hot_to_string(x_test[prediction_indices[i]]),one_hot_to_string(predictions[i])),end=' ')
        print('T', correct, end=' ')  
        if correct == guess:
            print( 'ok',end=' ')
        else:
            print( 'not ok',end=' ')
        print('\n')
      
        
def build_model():
    """
    Builds and returns the model based on the global config.
    """
    input_shape = (MAX_EQUATION_LENGTH, N_FEATURES)

    model = Sequential()

    # Encoder:
    model.add(Bidirectional(LSTM(20), input_shape=input_shape))
    model.add(BatchNormalization())

    # The RepeatVector-layer repeats the input n times
    model.add(RepeatVector(MAX_RESULT_LENGTH))

    # Decoder:
    model.add(Bidirectional(LSTM(20, return_sequences=True)))
    model.add(BatchNormalization())

    model.add(TimeDistributed(Dense(N_FEATURES)))
    model.add(Activation('softmax'))

    model.compile(
        loss='categorical_crossentropy',
        optimizer=Adam(lr=0.01),
        metrics=['accuracy'],
    )

    return model



In [5]:
def main():
     
    
    
    model=build_model()
    model.summary()
    print()    
    
    x_test,y_test,x_train,y_train=build_dataset()
    
    print()
    print_example_prediction(5,model,x_test,y_test)
    print()
    
    for iteration in range(100):
        print()
        print('-'*50)
        print('Iteration',iteration)
        model.fit(x_train, y_train,batch_size=BATCH_SIZE,epochs=1,validation_data=(x_test, y_test))
        print_example_prediction(10,model,x_test,y_test)
         

In [None]:
main()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 40)                5760      
_________________________________________________________________
batch_normalization_1 (Batch (None, 40)                160       
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 7, 40)             0         
_________________________________________________________________
bidirectional_2 (Bidirection (None, 7, 40)             9760      
_________________________________________________________________
batch_normalization_2 (Batch (None, 7, 40)             160       
_________________________________________________________________
time_distributed_1 (TimeDist (None, 7, 15)             615       
_________________________________________________________________
activation_1 (Activation)    (None, 7, 15)             0         
Total para

In [None]:
## seq2seq 学习乘法是比较困难的
## 尽管在train过程中 loss 很低 但是 acc 同样很低
## 在针对两位数的乘法训练，seq2seq 已经比较难以拟合结果
## 它会比较容易学习 10的倍数的数相乘，比如52*90
## 观察实验结果可以发现seq2seq，在错误的例子中业与正确答案有2-3个数字一样
## 可能需要跟复杂的模型才能解决这个问题。
## 比如 CGRU
