# Theano Tutorial

In [14]:
import numpy as np
import theano.tensor as T
import theano
from theano import function
from theano import pp

## 1. Basic Operation

In [15]:
#1. Define parameters
X = T.dscalar('x')
Y = T.dscalar('y')
x = T.dmatrix('x')
y = T.dmatrix('y')
a, b = T.dmatrices('a','b')

Z = X+Y
f = function([X,Y], Z)
print(f(2,3))

5.0


In [16]:
s = 1/(1+T.exp(-x))
f1 = function([x], s)
print(f1([[2,3],[3,5]]))

[[0.88079708 0.95257413]
 [0.95257413 0.99330715]]


In [17]:
diff = a-b
absdiff = abs(a-b)
diff_square = absdiff ** 2
f2 = function([a,b], [diff,absdiff,diff_square])
print(f2(np.ones((2,2)), np.arange(4).reshape((2,2))))

[array([[ 1.,  0.],
       [-1., -2.]]), array([[1., 0.],
       [1., 2.]]), array([[1., 0.],
       [1., 4.]])]


In [20]:
#Define variable
state = theano.shared(np.array(0,dtype=np.float64), 'state')
inc = T.scalar('inc', dtype=state.dtype)
accumulator = function([inc], state, updates=[(state, state+inc)])

In [21]:
print(state.get_value())
accumulator(10)
print(state.get_value())
accumulator(1)
print(state.get_value())
state.set_value(1)
accumulator(2)
print(state.get_value())

0.0
10.0
11.0
3.0


In [22]:
#Temporarily replace hsared variable with another value in another function
a = T.scalar(dtype=state.dtype)
b = state*2+inc
accumulator1 = function([inc,a], b, givens=[(state, a)])
print(accumulator1(2,3))

print(state.get_value())

8.0
3.0


In [23]:
#2. Define layers
class Layer(object):
    def __init__(self, inputs, in_size, out_size, activation_function=None):
        self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size)))
        self.b = theano.shared(np.zeros((out_size, )) + 0.1)
        self.Wx_plus_b = T.dot(inputs, self.W) + self.b
        self.activation_function = activation_function
        if activation_function is None:
            self.outputs = self.Wx_plus_b
        else:
            self.outputs = self.activation_function(self.Wx_plus_b)

## 2. Regressor

In [25]:
#1. Define layers
class Layer(object):
    def __init__(self, inputs, in_size, out_size, activation_function=None):
        self.W = theano.shared(np.random.normal(0, 1, (in_size, out_size)))
        self.b = theano.shared(np.zeros((out_size, )) + 0.1)
        self.Wx_plus_b = T.dot(inputs, self.W) + self.b
        self.activation_function = activation_function
        if activation_function is None:
            self.outputs = self.Wx_plus_b
        else:
            self.outputs = self.activation_function(self.Wx_plus_b)

In [30]:
#2. Define parameters and calculation
x = T.dmatrix('x')
y = T.dmatrix('y')

l1 = Layer(x, 1, 10, T.nnet.relu)
l2 = Layer(l1.outputs, 10, 1, None)

cost = T.mean(T.square(l2.outputs-y))
gW1, gb1, gW2, gb2 = T.grad(cost, [l1.W, l1.b, l2.W, l2.b])

In [31]:
#3. Define train
# apply gradient descent
learning_rate = 0.05
train = theano.function(
    inputs=[x, y],
    outputs=cost,
    updates=[(l1.W, l1.W - learning_rate * gW1),
             (l1.b, l1.b - learning_rate * gb1),
             (l2.W, l2.W - learning_rate * gW2),
             (l2.b, l2.b - learning_rate * gb2)])
# prediction
predict = theano.function(inputs=[x], outputs=l2.outputs)

In [32]:
# Make up some fake data
x_data = np.linspace(-1, 1, 300)[:, np.newaxis]
noise = np.random.normal(0, 0.05, x_data.shape)
y_data = np.square(x_data) - 0.5 + noise        # y = x^2 - 0.5 + wihtenoise

In [34]:
#4. Run Training
for i in range(1000):
    # training
    err = train(x_data, y_data)
    if i % 50 == 0:
        print('loss = ',err)

loss =  0.0033993545126246587
loss =  0.003366331468226002
loss =  0.003334997770550666
loss =  0.003305281066661631
loss =  0.0032714251269618353
loss =  0.003243027361688797
loss =  0.003218678781197237
loss =  0.0031965899769304295
loss =  0.003173364033589988
loss =  0.0031526951627376953
loss =  0.003133079630045783
loss =  0.003115943042568304
loss =  0.003101909201799517
loss =  0.0030897221269296418
loss =  0.003079004012166864
loss =  0.0030674528851822353
loss =  0.003055307097527482
loss =  0.0030448543067851342
loss =  0.0030371360666536945
loss =  0.0030306063664698917


## 3. Classification

In [68]:
#1. Make some data
feat = 784
N = 200

x_data = np.random.randn(N, feat)
y_data = np.random.randint(size=N, low=0, high=2)

In [72]:
#2. Define parameters
X = T.dmatrix('X')
Y = T.dvector('Y')

W = theano.shared(np.random.randn(feat), 'W')
b = theano.shared(0., 'b')

p_1 = T.nnet.sigmoid(T.dot(X,W)+b)
pred = p_1 > 0.5
xent = -(Y*T.log(p_1)+(1-Y)*T.log(1-p_1))
cost = xent.mean() + 0.01 * (W**2).sum()
gW, gb = T.grad(cost, [W,b])

In [73]:
#3. Define train
train = function([X, Y], outputs=[pred, xent.mean()], updates=((W,W-0.1*gW),(b,b-0.1*gb)))
predict = function([X], outputs=pred)

In [76]:
#4. Run Training
for i in range(1000):
    # training
    pred, err = train(x_data, y_data)
    if i % 50 == 0:
        print('loss = ',err)

loss =  0.024335293975827142
loss =  0.024334963861475486
loss =  0.024334649533150357
loss =  0.024334349458357246
loss =  0.024334062273674677
loss =  0.024333786773224165
loss =  0.024333521895151488
loss =  0.02433326670737744
loss =  0.02433302039345535
loss =  0.024332782239066316
loss =  0.024332551619465518
loss =  0.024332327988037895
loss =  0.0243321108660187
loss =  0.02433189983336484
loss =  0.024331694520721432
loss =  0.024331494602401808
loss =  0.02433129979028874
loss =  0.024331109828560394
loss =  0.02433092448914631
loss =  0.024330743567824137
