In [19]:
from os import path
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
from operator import add 
# https://www.geeksforgeeks.org/python-adding-two-list-elements/

In [20]:
train_1d = path.abspath("dataset/classification/cl_train_1.csv")
train_2d = path.abspath("dataset/classification/cl_train_2.csv")

test_1d = path.abspath("dataset/classification/cl_test_1.csv")
test_2d = path.abspath("dataset/classification/cl_test_2.csv")

In [21]:
def preprocessData(filePath):
    df = pd.read_csv(filePath, names=['x1', 'x2', 'y'], header=None)
    df.insert(0, 'x0', 1, True) # Add new dimension because of merging bias into weight vector --> w0 = bias
    X, y = df.values[:,[0,1,2]], df.values[:,-1]
    return X, y, df

In [22]:
def sigmoid(t):
    return 1/(1 + np.exp(-t))

In [23]:
def h(X, weights):
    return X.dot(weights)

In [24]:
def predict(X, weights):
    return sigmoid(h(X, weights))

In [25]:
def gradientDescent(weights, X, y, learningRate=0.01):
    y_pred = [predict(weights, X[i]) for i in range(len(X))]
    print(f"y_pred[0]: {y_pred[0]}\ny[0]: {y[0]}\nX[0]: {X[0]}")
    cumulative = [(np.subtract(y_pred[0], y[0]))* f for f in X[0]]
    
    for i in range(1, len(y)):
        error = y_pred[i] - y[i]
        gradient = np.dot(error, X)
        sumOf = [error * f for f in X[i]]
        cumulative = list(map(add, cumulative, sumOf))
    return np.subtract(weights, [learningRate*cum for cum in cumulative])

In [26]:
def crossEntropy(y_pred, y):
    for i in range(len(y)):
        ce = np.sum(y[i] * np.log(sigmoid(y_pred[i])) 
                + (1 - y[i]) * np.log(1 - sigmoid(y_pred[i])))
    return -(ce/len(y))

In [27]:
def fit(X, y, X_test, y_test, n_iterations):
    weights = np.zeros(X.shape[1])
    training_history = []
    test_history = []
    
    for _ in range(n_iterations):
        weights = gradientDescent(weights, X, y)
        
        y_pred_train = [predict(weights, x) for x in X]
        y_pred_test = [predict(weights, x_t) for x_t in X_test]
        
        training_history.append(crossEntropy(y_pred_train, y))
        test_history.append(crossEntropy(y_pred_test, y_test))
    return weights, training_history, test_history

In [28]:
X_train, y_train, df_train = preprocessData(train_1d)

In [29]:
df_train.head()
print(X_train[0])

[1.    0.567 0.182]


In [30]:
#print(f"type(X): {type(X)}\ntype(y): {type(y)}")
#print(f"type(X_train): {type(X_train)}")
#for i in X_train[:,[0,1,2]]:
#    print(i)

In [31]:
#weights_1d = np.zeros(X_train.shape[1])
#weights = gradientDescent(weights_1d, X_train, y_train)
#print(weights)

## 1D: Fit

In [32]:
X_test, y_test, df_test = preprocessData(test_1d)

In [33]:
w, train_hist, test_hist = fit(X_train, y_train, X_test, y_test, n_iterations=1000)

y_pred[0]: 0.5
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.510100548205119
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5169797519361672
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5214049145856191
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5239661347888331
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5251141716327541
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5251927110915579
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5244639093036506
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5231279917909917
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5213381064769232
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5192115416045916
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5168382053685405
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5142870526714272
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5116109715212404
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5088505097476752
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.5060367249931

y_pred[0]: 0.3188689839044343
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.31831763226151455
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.3177704135000428
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.31722727192752415
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.3166881528315086
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.3161530024588992
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.31562176799576386
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.31509439754763663
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.3145708401202969
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.31405104560101077
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.31353496474022574
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.31302254913370336
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.31251375120507896
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.3120085241888391
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.3115068221137013
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_

y_pred[0]: 0.2598684262413425
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.25962672666540365
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2593858142406398
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.25914568329746374
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2589063282266836
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2586677434787024
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.25842992356273253
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2581928630460203
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.25795655655308247
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.25772099876495685
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.25748618441846133
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2572521083054667
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.25701876527217926
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.25678615021843426
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.25655425809700166
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y

y_pred[0]: 0.2278878475157791
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.22772763596597614
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.22756773952718434
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.22740815685650043
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.22724888662056306
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2270899274954614
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.22693127816664635
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2267729373288432
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2266149036859625
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2264571759510153
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.22629975284602763
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.22614263310195487
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.22598581545860058
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.22582929866453197
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.22567308147699927
y[0]: 0.0
X[0]: [1.    0.567 0.182]


y_pred[0]: 0.20543398308646843
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20531193163450848
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20519005979589786
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20506836705555861
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2049468529009457
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20482551682202865
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2047043583112735
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20458337686362602
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20446257197649342
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20434194314972784
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.2042214898856085
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20410121168882545
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20398110806646194
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20386117852797858
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.20374142258519615
y[0]: 0.0
X[0]: [1.    0.567 0.182

y_pred[0]: 0.18740852710393294
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.18731059038766418
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.18721277292416058
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1871150744552543
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1870174947236943
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1869200334731412
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1868226904481626
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.186725465394228
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.186628358057705
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.18653136818585364
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1864344955268214
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.18633773982964005
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.18624110084421913
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.18614457832134326
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1860481720126659
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pre

y_pred[0]: 0.17277731854811954
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.17269603488925422
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.17261483766481003
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1725337267202163
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.17245270190132914
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1723717630544296
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.17229091002622207
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1722101426638325
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.17212946081480696
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1720488643271094
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.17196835304912034
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1718879268296356
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.17180758551786343
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.17172732896342416
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y_pred[0]: 0.1716471570163476
y[0]: 0.0
X[0]: [1.    0.567 0.182]
y

In [34]:
w

array([  4.22448447, -12.27806252,   6.36330889])