In [7]:
from numpy import exp, array, random, dot
import nltk

class NeuronLayer():
    def __init__(self, number_of_neurons, number_of_inputs_per_neuron):
        self.synaptic_weights = 2 * random.random((number_of_inputs_per_neuron, number_of_neurons)) - 1


class NeuralNetwork():
    def __init__(self, layer1, layer2):
        self.layer1 = layer1
        self.layer2 = layer2

    # The Sigmoid function, which describes an S shaped curve.
    # We pass the weighted sum of the inputs through this function to
    # normalise them between 0 and 1.
    def __sigmoid(self, x):
        return 1 / (1 + exp(-x))

    # The derivative of the Sigmoid function.
    # This is the gradient of the Sigmoid curve.
    # It indicates how confident we are about the existing weight.
    def __sigmoid_derivative(self, x):
        return x * (1 - x)

    # We train the neural network through a process of trial and error.
    # Adjusting the synaptic weights each time.
    def train(self, training_set_inputs, training_set_outputs, number_of_training_iterations):
        for iteration in xrange(number_of_training_iterations):
            # Pass the training set through our neural network
            output_from_layer_1, output_from_layer_2 = self.think(training_set_inputs)

            # Calculate the error for layer 2 (The difference between the desired output
            # and the predicted output).
            layer2_error = training_set_outputs - output_from_layer_2
            layer2_delta = layer2_error * self.__sigmoid_derivative(output_from_layer_2)

            # Calculate the error for layer 1 (By looking at the weights in layer 1,
            # we can determine by how much layer 1 contributed to the error in layer 2).
            layer1_error = layer2_delta.dot(self.layer2.synaptic_weights.T)
            layer1_delta = layer1_error * self.__sigmoid_derivative(output_from_layer_1)

            # Calculate how much to adjust the weights by
            layer1_adjustment = training_set_inputs.T.dot(layer1_delta)
            layer2_adjustment = output_from_layer_1.T.dot(layer2_delta)

            # Adjust the weights.
            self.layer1.synaptic_weights += layer1_adjustment
            self.layer2.synaptic_weights += layer2_adjustment

    # The neural network thinks.
    def think(self, inputs):
        output_from_layer1 = self.__sigmoid(dot(inputs, self.layer1.synaptic_weights))
        output_from_layer2 = self.__sigmoid(dot(output_from_layer1, self.layer2.synaptic_weights))
        return output_from_layer1, output_from_layer2

    # The neural network prints its weights
    def print_weights(self):
        print "    Layer 1 (4 neurons, each with 3 inputs): "
        print self.layer1.synaptic_weights
        print "    Layer 2 (1 neuron, with 4 inputs):"
        print self.layer2.synaptic_weights

        
    # return bag of words array
    def bow(self, sentence, words):
        for s in sentence:
            bag = []
            for w in words:
                bag.append(1) if w in sentence else bag.append(0)
        return(array(bag))

In [12]:
if __name__ == "__main__":

    # try some sample sentences
    sentence = []
    sentence.append("this is a test")
    sentence.append("we might have a test")
    sentence.append("tests are fun")
    sentence.append("this is not a test")
    sentence.append("a test is not what you need")
    sentence.append("I am a dog")
    sentence.append("you are my friend")
    sentence.append("was that a beagle")

    words = []
    for s in sentence:
        words.extend(s.split(' '))
    words = list(set(words))
    print "# unique words:", len(words)
    print "# training sentences:", len(sentence)
    print "sentences:", sentence
    print "words:", words
    print 
    
    #Seed the random number generator
    random.seed(1)

    # Create layer 1 (4 neurons, each with an input for each unique word in corpus)
    layer1 = NeuronLayer(4, len(words))

    # Create layer 2 (a single neuron with 4 inputs)
    layer2 = NeuronLayer(1, 4)

    # Combine the layers to create a neural network
    neural_network = NeuralNetwork(layer1, layer2)

    print "Stage 1) Random starting synaptic weights: "
    #neural_network.print_weights()
    
    # training set, bag of words for each sentence
    training = []
    for s in sentence:
        bag = []
        for w in words:
            bag.append(1) if w in s else bag.append(0)
        training.append(bag)
    
    training_set_inputs = array(training)
    training_set_outputs = array([[1, 1, 1, 1, 1, 0, 0, 0]]).T
    
    # The training set. We have 7 examples, each consisting of 3 input values
    # and 1 output value.
#    training_set_inputs = array([[0, 0, 1], [0, 1, 1], [1, 0, 1], [0, 1, 0], [1, 0, 0], [1, 1, 1], [0, 0, 0]])
#    training_set_outputs = array([[0, 1, 1, 1, 1, 0, 0]]).T

    # Train the neural network using the training set.
    # Do it 50,000 times and make small adjustments each time.
    neural_network.train(training_set_inputs, training_set_outputs, 50000)

    print "Stage 2) New synaptic weights after training: "
    neural_network.print_weights()

    # Test the neural network with a new situation.
    #print "Stage 3) Considering a new situation [1, 1, 0] -> ?: "
    #hidden_state, output = neural_network.think(array([1, 1, 0]))

    sentence = "make me a sandwich"
    print "Stage 3) Considering a new sentence: ", sentence
    hidden_state, output = neural_network.think(neural_network.bow(sentence, words))
    
    print output


# unique words: 22
# training sentences: 8
sentences: ['this is a test', 'we might have a test', 'tests are fun', 'this is not a test', 'a test is not what you need', 'I am a dog', 'you are my friend', 'was that a beagle']
words: ['is', 'am', 'tests', 'are', 'have', 'need', 'what', 'beagle', 'test', 'you', 'was', 'friend', 'we', 'that', 'I', 'not', 'a', 'this', 'might', 'dog', 'fun', 'my']

Stage 1) Random starting synaptic weights: 
Stage 2) New synaptic weights after training: 
    Layer 1 (4 neurons, each with 3 inputs): 
[[  5.04862377e-01   7.92541067e-01  -1.99228323e+00  -5.91431697e-01]
 [ -1.42111166e+00  -9.80508409e-01  -2.34398368e-02  -1.16485994e-01]
 [  4.04388867e-01   3.06924620e-01  -1.00412024e+00  -4.46004490e-02]
 [ -4.66745631e-01   5.94058648e-01  -7.19311984e-01   2.02017749e-01]
 [  5.53438596e-01   4.06626986e-01  -1.03696326e+00  -7.46892541e-01]
 [  7.19729960e-01   9.56338397e-01  -4.37241393e-01   3.22571184e-01]
 [  8.71019127e-01   8.09028573e-01  -8.940