# Theano logistic regression

In [None]:
%pylab inline
import seaborn as sns
import numpy as np
import theano
from theano import tensor

In [None]:
# generate a dataset: D = (input_values, target_class)
rng = np.random
N = 5000
feats = 800
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
training_steps = 200

In [None]:
# Declare Theano symbolic variables
x = tensor.dmatrix('x')
y = tensor.dvector('y')

In [None]:
# initialize the weight vector w randomly
#
# this and the following bias variable b
# are shared so they keep their values
# between training iterations (updates)
w = theano.shared(rng.randn(feats), name="w")

# initialize the bias term
b = theano.shared(0., name="b")

In [None]:
# Construct Theano expression graph
p_1 = 1 / (1 + tensor.exp(-tensor.dot(x, w) - b))   # Probability that target = 1
prediction = p_1 > 0.5
xent = -y * tensor.log(p_1) - (1. - y) * tensor.log(1. - p_1) # Cross-entropy loss function
accuracy = 1. - tensor.sum(tensor.abs_(prediction - y) / y.size)
cost = xent.mean() + 0.01 * (w ** 2).sum() # The cost to minimize
gw, gb = tensor.grad(cost, [w, b])

In [None]:
# Compile
train = theano.function(inputs=[x, y],
                       outputs=[prediction, xent, accuracy],
                       updates=[(w, w - 0.5 * gw), (b, b - 0.5 * gb)])
predict = theano.function(inputs=[x], outputs=prediction)

In [None]:
%%time
for i in range(training_steps):
    pred, err, accuracy = train(D[0], D[1])
    print(i, err.mean(), accuracy)

# Scikit-learno logistic regression

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression

In [None]:
regressor = LogisticRegression(tol=1e-8, fit_intercept=False)

In [None]:
%%time
model = regressor.fit(D[0], D[1])

In [None]:
predict = model.predict(D[0])

In [None]:
accuracy = 1. - np.sum(np.abs(predict - D[1]) / D[1].size)

In [None]:
accuracy