In [121]:
%matplotlib inline
from jupyterthemes import jtplot
jtplot.style()
import numpy as np
import matplotlib.pyplot as plt

def preprocess():
    raw = np.genfromtxt('monks-1.csv', delimiter=',')
    DATA = raw[:, 0:7]
    X = raw[:, 1:7]
    Y = raw[:, 0]
    return DATA, X, Y

DATA, X, Y = preprocess()
H = lambda x: (1 / (1 + np.exp(-x.dot(theta))))
M = Y.size
ALPHA = 0.1
K = 1/float(M)
ITERATIONS = 100

In [130]:
def cost(theta: np.array) -> float:
    '''
    param: theta, parameter vector
    returns: cost that theta incurs on your data
    '''
    sum = 0
    for i in range(M):
        x, y = X[i], Y[i] # transfer
        sum += - (y * np.log(H(x))) - ((1 - y) * np.log(1 - H(x)))
    return sum

def accuracy(theta: np.array):
    correct = 0;
    threshold = 0.5
    for i in range(M):
        x, y = X[i], Y[i]
        pos, neg = y == 1 and H(x) >= threshold, y == 0 and H(x) < threshold
        correct += 1 if pos or neg else 0
    return correct/float(M)

def distances(theta: np.ndarray) -> np.ndarray:
    '''
    param: theta, parameter vector
    returns: an array of differences between the model and the actual values
    '''
    res = np.zeros(M)
    for i in range(M):
        x, y = X[i], Y[i] # transfer
        res[i] = H(x) - y
    return res

def descend(theta):
    normalize = lambda v: -ALPHA * K * v
    inf = distances(theta).dot(X)
    inf = normalize(inf)
    return theta + inf

In [134]:
theta = np.ones(X.shape[1])
for i in range(ITERATIONS):
    theta = descend(theta)
    # print(cost(theta))
print(accuracy(theta))
for i in range(M):
    x, y = X[i], Y[i]
    print(H(x), y)


0.6666666666666666
