In [2]:
import numpy as np
import random

In [104]:
class ModelData:
    """this is the model data for our "last number" training set.  We
    produce input of length N, drawing from the NUM_SET randomly and
    then we set the output to be simply the last element of the input
    vector

    """
    def __init__(self, N=10):
        self.N = N

        # our model input data
        self.x = np.random.randint(0, high=10, size=N)
        
        # our scaled model output data
        self.y = np.array([self.x[-1]])

    def interpret_result(self, out):
        """take the network output and return the number from the allowed
        sequence we are closest to

        """
        return max(0, min(9, int(np.round(out, decimals=0))))

In [177]:
class NeuralNetwork:

    def __init__(self, num_training_unique=100, eta=0.2, data_class=None):
        
        self.num_training_unique = num_training_unique
        self.eta = eta

        self.train_set = []
        for _ in range(self.num_training_unique):
            self.train_set.append(data_class())

        # initialize our matrix with Gaussian normal random numbers
        # we get the size from the length of the input and output
        model = self.train_set[0]
        self.N_out = len(model.y)
        self.N_in = len(model.x)

        self.A = np.random.normal(0.0, 1.0/np.sqrt(self.N_in * self.N_out), (self.N_out, self.N_in))

    def g(self, xi):
        """our sigmoid function"""
        return 1.0/(1.0 + np.exp(-xi))

    def train(self, n_epochs=10):
        """Do the minimization for the training"""

        # train
        for _ in range(n_epochs * len(self.train_set)):
            model = random.choice(self.train_set)

            # gradient descent -- just a single improvement.  eta
            # here is our learning rate

            # make these column vectors
            x = model.x.reshape(self.N_in, 1)
            y = model.y.reshape(self.N_out, 1)

            b = self.A @ x
            z = self.g(b)

            self.A[:,:] += -self.eta * 2 * (z - y) * z * (1 - z) @ x.T

    def predict(self, model):
        """ predict the outcome using our trained matrix A """
        z = self.g(self.A @ model.x)
        return model.interpret_result(z)

In [178]:
nn = NeuralNetwork(num_training_unique=100, data_class=ModelData)
nn.train(n_epochs=100)

In [179]:
print(nn.A)

[[ 0.33768883  0.61156985  0.0997347   1.90597873  1.36356267 -0.01097751
   0.67079972  0.92404173  0.69467818  0.75549632]]


In [180]:
err = []
npts = 100
n_right = 0
for k in range(npts):
    model = ModelData()
    y_nn = nn.predict(model)
    e = float(y_nn - model.x[-1])
    if e == 0:
        n_right += 1
    err.append(e)
    
print(f"fraction correct: {n_right / npts}")

fraction correct: 0.1


Clearly we are not doing that great.  Let's look at a single attempt

In [181]:
model = ModelData()

In [182]:
model.x

array([9, 2, 4, 3, 0, 2, 1, 0, 5, 1])

In [183]:
model.y

array([1])

In [184]:
nn.predict(model)

1

In [185]:
nn.A @ model.x

array([15.25694615])

# Scaled Data
Let's try again, but this time, let's scale the output that we train to by 10 so it falls within $[0, 1]$.

In [186]:
class ModelDataScaled:
    """this is the model data for our "last number" training set.  We
    produce input of length N, drawing from the NUM_SET randomly and
    then we set the output to be simply the last element of the input
    vector

    """
    def __init__(self, N=10):
        self.N = N
        self.offset = 0.05
        
        # our model input data
        self.x = np.random.randint(0, high=10, size=N) / 10 + self.offset

        # our scaled model output data
        self.y = np.array([self.x[-1]])

    def interpret_result(self, out):
        """take the network output and return the number from the allowed
        sequence we are closest to

        """
        return np.round(out / self.offset) * self.offset

In [187]:
nn = NeuralNetwork(num_training_unique=100, data_class=ModelDataScaled)
nn.train(n_epochs=100)

In [188]:
nn.A

array([[-0.50666556, -0.42220143, -0.82062978, -0.54440861, -0.626418  ,
        -0.78876666, -0.14953013, -0.45645145, -0.19193429,  4.65100156]])

In [189]:
err = []
npts = 100
n_right = 0
for k in range(npts):
    model = ModelDataScaled()
    y_nn = nn.predict(model)
    e = float(y_nn - model.y)
    if e == 0:
        n_right += 1
    err.append(e)
    
print(f"fraction correct: {n_right / npts}")

fraction correct: 0.16


In [190]:
model = ModelDataScaled()

In [191]:
p = nn.g(nn.A @ model.x)

In [192]:
np.round(p / 0.05) * 0.05

array([0.75])

In [193]:
model.y

array([0.95])

## Categorical 

In [194]:
class ModelDataCategorical:
    """this is the model data for our "last number" training set.  We
    produce input of length N, drawing from the NUM_SET randomly and
    then we set the output to be simply the last element of the input
    vector

    """
    def __init__(self, N=10):
        self.N = N
        
        # our model input data
        self.x = np.random.randint(0, high=10, size=N)

        # our scaled model output data
        self.y = np.zeros(10) + 0.01
        self.y[self.x[-1]] = 0.99

    def interpret_result(self, out):
        """take the network output and return the number we predict"""
        return np.argmax(out)

In [195]:
nn = NeuralNetwork(num_training_unique=100, data_class=ModelDataCategorical)
nn.train(n_epochs=100)

In [199]:
err = []
npts = 100
n_right = 0
for k in range(npts):
    model = ModelDataCategorical()
    y_nn = nn.predict(model)
    e = float(y_nn - np.argmax(model.y))
    if e == 0:
        n_right += 1
    err.append(e)
    
print(f"fraction correct: {n_right / npts}")

fraction correct: 0.1


In [139]:
model = ModelDataCategorical()

In [140]:
model.x

array([2, 0, 2, 6, 3, 8, 5, 3, 3, 7])

In [141]:
model.y

array([0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.99, 0.01, 0.01])

In [142]:
nn.g(nn.A @ model.x)

array([8.82774251e-32, 3.93339100e-28, 1.36722111e-21, 7.80687234e-03,
       2.36845144e-09, 3.08940647e-08, 2.19174955e-09, 4.53080714e-25,
       6.80767078e-08, 1.82519567e-09])

In [143]:
print(err)

[2.0, -3.0, -2.0, -1.0, 0.0, -1.0, 5.0, -1.0, -1.0, 0.0, 2.0, -3.0, 1.0, 0.0, 0.0, 1.0, -1.0, 1.0, 0.0, 0.0, -2.0, -1.0, 1.0, 4.0, 0.0, 0.0, -1.0, 2.0, -4.0, -2.0, -3.0, -1.0, -1.0, 2.0, 0.0, -2.0, 1.0, -2.0, -4.0, -4.0, -2.0, 2.0, -5.0, 2.0, 1.0, 0.0, -2.0, 0.0, 0.0, 0.0, 3.0, 1.0, -1.0, -6.0, 0.0, -2.0, 0.0, -2.0, 0.0, 1.0, -2.0, -5.0, -2.0, -1.0, 1.0, -1.0, 1.0, 2.0, -1.0, 1.0, 0.0, -2.0, -6.0, -2.0, -5.0, 0.0, 0.0, -2.0, -1.0, -4.0, -1.0, 1.0, 0.0, -1.0, 0.0, -3.0, 0.0, 0.0, 2.0, -1.0, -1.0, -4.0, 1.0, -2.0, 0.0, 0.0, 0.0, 0.0, -4.0, 3.0]


In [74]:
nn.A.shape

(10, 10)