In [3]:
%matplotlib inline
import numpy as np
import numpy.random as ran
import matplotlib.pyplot as plt

DATA = np.genfromtxt('data.csv', delimiter=',')
X = DATA[:, 0:5]
Y = DATA[:, 5]
NORMALIZE = True

SEED = 10
ITERATIONS = 1000
ALPHA = 0.003
THETA = np.array([1, 1, 1/2, 2, 1/5])
R = np.array([[3, 1], [6, 5/2], [10, 3], [2, 1], [20, 5]])
K = 1/(DATA.shape[0])

In [4]:
def invertedMeans(X):
    res = []
    for i in range(X.shape[1]):
        res.append(X.shape[0]/np.sum(X[:, i]))
    return np.array(res)

def normalized(X):
    mInv = invertedMeans(X)
    return X * mInv

if NORMALIZE:
    print(f'means {invertedMeans(X)}')
    X = normalized(X)

means [0.33371735 0.17242657 0.09769546 0.48057735 0.04842994]


In [5]:
def cost(theta: np.ndarray) -> float:
    res = 0
    for i in range(DATA.shape[0]):
        res += (np.dot(X[i], theta) - Y[i]) ** 2
    return res

def influence(theta: np.ndarray, index: int) -> float:
    inf = 0
    for i, vector in enumerate(X):
        inf += (np.dot(X[i], theta) - Y[i]) * X[i, index]
    return inf

def descend(theta: np.ndarray) -> list:
    inf = np.zeros(THETA.size)
    for i in range(THETA.size):
        inf[i] = influence(theta, i)
    inf = -K * ALPHA * inf
    return np.add(theta, inf)

In [6]:
theta = np.zeros(5)
for i in range(ITERATIONS):
    theta = descend(theta)
print(f'theta {theta}, cost {cost(theta)}')
print(f'theta mod {theta * invertedMeans(DATA[:, 0:5])}')


theta [3.74007251 5.53720747 4.39462409 4.31471887 3.77750615], cost 1167.1011823663332
theta mod [1.24812707 0.9547617  0.42933484 2.07355618 0.18294439]
