In [121]:
%matplotlib inline
from jupyterthemes import jtplot
jtplot.style()
import numpy as np
import matplotlib.pyplot as plt

def preprocess():
    raw = np.genfromtxt('monks-1.csv', delimiter=',')
    DATA = raw[:, 0:7]
    X = raw[:, 1:7]
    Y = raw[:, 0]
    return DATA, X, Y

DATA, X, Y = preprocess()
H = lambda x: (1 / (1 + np.exp(-x.dot(theta))))
M = Y.size
ALPHA = 0.1
K = 1/float(M)
ITERATIONS = 100

In [130]:
def cost(theta: np.array) -> float:
    '''
    param: theta, parameter vector
    returns: cost that theta incurs on your data
    '''
    sum = 0
    for i in range(M):
        x, y = X[i], Y[i] # transfer
        sum += - (y * np.log(H(x))) - ((1 - y) * np.log(1 - H(x)))
    return sum

def accuracy(theta: np.array):
    correct = 0;
    threshold = 0.5
    for i in range(M):
        x, y = X[i], Y[i]
        pos, neg = y == 1 and H(x) >= threshold, y == 0 and H(x) < threshold
        correct += 1 if pos or neg else 0
    return correct/float(M)

def distances(theta: np.ndarray) -> np.ndarray:
    '''
    param: theta, parameter vector
    returns: an array of differences between the model and the actual values
    '''
    res = np.zeros(M)
    for i in range(M):
        x, y = X[i], Y[i] # transfer
        res[i] = H(x) - y
    return res

def descend(theta):
    normalize = lambda v: -ALPHA * K * v
    inf = distances(theta).dot(X)
    inf = normalize(inf)
    return theta + inf

In [135]:
theta = np.ones(X.shape[1])
for i in range(ITERATIONS):
    theta = descend(theta)
    # print(cost(theta))
print(accuracy(theta))
for i in range(M):
    x, y = X[i], Y[i]
    print(H(x), y)


0.6666666666666666
0.5776439908067061 1.0
0.6515367954908758 1.0
0.3945381310137029 1.0
0.4711361950509247 1.0
0.23691631027337529 1.0
0.29797306288052905 1.0
0.12886341751304695 1.0
0.16821184158406474 1.0
0.6120994618320055 1.0
0.6832694532217028 1.0
0.4291698039209923 1.0
0.5068628028258155 1.0
0.26373911800340694 1.0
0.32873058970325886 1.0
0.14579009797425835 1.0
0.1891846258638079 1.0
0.6454692504742261 1.0
0.7133836017976714 1.0
0.4645091692748721 1.0
0.5425194577023338 1.0
0.29243492782478836 1.0
0.36103025083670587 1.0
0.16452025856161193 1.0
0.2121055144816608 1.0
0.6515367954908758 1.0
0.7187952101620023 1.0
0.4711361950509247 1.0
0.5491181607565321 1.0
0.29797306288052905 1.0
0.3671932937172169 1.0
0.16821184158406474 1.0
0.2165880390754355 1.0
0.6832694532217028 1.0
0.746783404991318 1.0
0.5068628028258155 1.0
0.5842257452999755 1.0
0.32873058970325886 1.0
0.401014463603693 1.0
0.18918462586380794 1.0
0.24183874957218435 1.0
0.7133836017976714 1.0
0.7728661917355564 1.0
0.