In [1]:
from os import path
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

In [2]:
train_1d = path.abspath("dataset/classification/cl_train_1.csv")
train_2d = path.abspath("dataset/classification/cl_train_2.csv")

test_1d = path.abspath("dataset/classification/cl_test_1.csv")
test_2d = path.abspath("dataset/classification/cl_test_2.csv")

In [3]:
def sigmoid(t):
    return 1/(1 + np.exp(-t))

In [4]:
def h(X, weights):
    #return np.dot(weights, np.matrix.transpose(np.array(X)))
    return X.dot(weights)

In [5]:
def preprocessData(filePath):
    df = pd.read_csv(filePath, names=['x1', 'x2', 'y'], header=None)
    X = df[['x1', 'x2']]
    X.insert(0, 'x0', 1, True) # Add new dimension because of merging bias into weight vector --> w0 = bias
    y = df[['y']]
    return X, y, df

In [6]:
def gradientDescent(X, y, n_iter, learningRate=0.5):
    weights = np.zeros(X.shape[1])
    error = []
    for i in range(n_iter):
        pred = sigmoid(h(X, weights.T))
        print("pred \n", pred)
        error = pred[1] - y["y"]
        gradient = np.dot(error, X)
        weights -= learningRate * gradient
    return weights

In [7]:
X_train, y_train, df_train = preprocessData(train_1d)

In [8]:
X_train.head()

Unnamed: 0,x0,x1,x2
0,1,0.567,0.182
1,1,0.713,0.257
2,1,0.199,0.11
3,1,0.685,0.008
4,1,0.39,0.836


In [9]:
weights = gradientDescent(X_train, y_train, 1500)
print(weights)

pred 
 0     0.5
1     0.5
2     0.5
3     0.5
4     0.5
5     0.5
6     0.5
7     0.5
8     0.5
9     0.5
10    0.5
11    0.5
12    0.5
13    0.5
14    0.5
15    0.5
16    0.5
17    0.5
18    0.5
19    0.5
20    0.5
21    0.5
22    0.5
23    0.5
24    0.5
25    0.5
26    0.5
27    0.5
28    0.5
29    0.5
30    0.5
31    0.5
32    0.5
33    0.5
34    0.5
35    0.5
36    0.5
37    0.5
38    0.5
39    0.5
40    0.5
41    0.5
42    0.5
43    0.5
44    0.5
45    0.5
46    0.5
47    0.5
48    0.5
49    0.5
50    0.5
51    0.5
52    0.5
53    0.5
54    0.5
55    0.5
56    0.5
57    0.5
58    0.5
59    0.5
dtype: float64
pred 
 0     0.882921
1     0.878984
2     0.922197
3     0.781305
4     0.987152
5     0.992633
6     0.952182
7     0.981763
8     0.921894
9     0.980034
10    0.982213
11    0.974918
12    0.878423
13    0.979005
14    0.945627
15    0.860871
16    0.982467
17    0.753655
18    0.967806
19    0.769943
20    0.964076
21    0.906428
22    0.968792
23    0.985235
24    0.911

dtype: float64
pred 
 0     1.000000e+00
1     1.000000e+00
2     1.000000e+00
3     5.144955e-25
4     1.000000e+00
5     1.000000e+00
6     1.000000e+00
7     1.000000e+00
8     1.000000e+00
9     1.000000e+00
10    1.000000e+00
11    1.000000e+00
12    1.000000e+00
13    1.000000e+00
14    1.000000e+00
15    1.699415e-04
16    1.000000e+00
17    2.404394e-50
18    1.000000e+00
19    1.758238e-34
20    1.000000e+00
21    1.000000e+00
22    1.000000e+00
23    1.000000e+00
24    1.000000e+00
25    1.000000e+00
26    7.878558e-14
27    1.000000e+00
28    1.000000e+00
29    1.000000e+00
30    1.000000e+00
31    1.000000e+00
32    1.000000e+00
33    1.000000e+00
34    1.000000e+00
35    1.000000e+00
36    1.000000e+00
37    1.000000e+00
38    1.000000e+00
39    1.000000e+00
40    1.000000e+00
41    1.000000e+00
42    1.000000e+00
43    1.000000e+00
44    1.000000e+00
45    1.000000e+00
46    1.000000e+00
47    1.000000e+00
48    9.908605e-07
49    7.137826e-14
50    1.000000e+00
51    1.0

 0      1.000000e+00
1      3.007989e-01
2      1.000000e+00
3      1.670728e-64
4      1.000000e+00
5      1.000000e+00
6      1.000000e+00
7      1.000000e+00
8      1.000000e+00
9      1.000000e+00
10     1.000000e+00
11     1.000000e+00
12     1.000000e+00
13     1.000000e+00
14     1.000000e+00
15     2.762272e-24
16     1.000000e+00
17    1.504885e-118
18     1.000000e+00
19     1.222173e-84
20     1.000000e+00
21     1.000000e+00
22     1.000000e+00
23     1.000000e+00
24     1.000000e+00
25     1.000000e+00
26     6.583427e-43
27     1.000000e+00
28     1.000000e+00
29     1.000000e+00
30     1.000000e+00
31     1.000000e+00
32     1.000000e+00
33     1.000000e+00
34     1.000000e+00
35     1.000000e+00
36     1.000000e+00
37     1.000000e+00
38     1.000000e+00
39     1.000000e+00
40     1.000000e+00
41     1.000000e+00
42     1.000000e+00
43     1.000000e+00
44     1.000000e+00
45     1.000000e+00
46     1.000000e+00
47     1.000000e+00
48     1.859588e-28
49     3.458232e-43

dtype: float64
pred 
 0      1.000000e+00
1      6.294632e-03
2      1.000000e+00
3      8.617180e-99
4      1.000000e+00
5      1.000000e+00
6      1.000000e+00
7      1.000000e+00
8      1.000000e+00
9      1.000000e+00
10     1.000000e+00
11     1.000000e+00
12     1.000000e+00
13     1.000000e+00
14     1.000000e+00
15     4.535225e-38
16     1.000000e+00
17    7.172975e-181
18     1.000000e+00
19    2.296411e-129
20     1.000000e+00
21     1.000000e+00
22     1.000000e+00
23     1.000000e+00
24     1.000000e+00
25     1.000000e+00
26     3.192303e-66
27     1.000000e+00
28     1.000000e+00
29     1.000000e+00
30     1.000000e+00
31     1.000000e+00
32     1.000000e+00
33     1.000000e+00
34     1.000000e+00
35     1.000000e+00
36     1.000000e+00
37     1.000000e+00
38     1.000000e+00
39     1.000000e+00
40     1.000000e+00
41     1.000000e+00
42     1.000000e+00
43     1.000000e+00
44     1.000000e+00
45     1.000000e+00
46     1.000000e+00
47     1.000000e+00
48     2.403108e-4

dtype: float64
pred 
 0      1.000000e+00
1      4.579230e-03
2      1.000000e+00
3     9.845279e-130
4      1.000000e+00
5      1.000000e+00
6      1.000000e+00
7      1.000000e+00
8      1.000000e+00
9      1.000000e+00
10     1.000000e+00
11     1.000000e+00
12     1.000000e+00
13     1.000000e+00
14     1.000000e+00
15     1.792197e-49
16     1.000000e+00
17    4.662526e-238
18     1.000000e+00
19    4.371691e-170
20     1.000000e+00
21     1.000000e+00
22     1.000000e+00
23     1.000000e+00
24     1.000000e+00
25     1.000000e+00
26     1.133419e-86
27     1.000000e+00
28     1.000000e+00
29     1.000000e+00
30     1.000000e+00
31     1.000000e+00
32     1.000000e+00
33     1.000000e+00
34     1.000000e+00
35     1.000000e+00
36     1.000000e+00
37     1.000000e+00
38     1.000000e+00
39     1.000000e+00
40     1.000000e+00
41     1.000000e+00
42     1.000000e+00
43     1.000000e+00
44     1.000000e+00
45     1.000000e+00
46     1.000000e+00
47     1.000000e+00
48     8.917585e-5

pred 
 0      1.000000e+00
1      1.000000e+00
2      1.000000e+00
3     3.230932e-150
4      1.000000e+00
5      1.000000e+00
6      1.000000e+00
7      1.000000e+00
8      1.000000e+00
9      1.000000e+00
10     1.000000e+00
11     1.000000e+00
12     1.000000e+00
13     1.000000e+00
14     1.000000e+00
15     5.284517e-50
16     1.000000e+00
17    2.540280e-282
18     1.000000e+00
19    1.811062e-199
20     1.000000e+00
21     1.000000e+00
22     1.000000e+00
23     1.000000e+00
24     1.000000e+00
25     1.000000e+00
26     3.242445e-96
27     1.000000e+00
28     1.000000e+00
29     1.000000e+00
30     1.000000e+00
31     1.000000e+00
32     1.000000e+00
33     1.000000e+00
34     1.000000e+00
35     1.000000e+00
36     1.000000e+00
37     1.000000e+00
38     1.000000e+00
39     1.000000e+00
40     1.000000e+00
41     1.000000e+00
42     1.000000e+00
43     1.000000e+00
44     1.000000e+00
45     1.000000e+00
46     1.000000e+00
47     1.000000e+00
48     1.609996e-60
49     8.4755

dtype: float64
pred 
 0      1.000000e+00
1      3.055714e-01
2      1.000000e+00
3     3.259341e-188
4      1.000000e+00
5      1.000000e+00
6      1.000000e+00
7      1.000000e+00
8      1.000000e+00
9      1.000000e+00
10     1.000000e+00
11     1.000000e+00
12     1.000000e+00
13     1.000000e+00
14     1.000000e+00
15     1.725749e-69
16     1.000000e+00
17     0.000000e+00
18     1.000000e+00
19    1.409838e-247
20     1.000000e+00
21     1.000000e+00
22     1.000000e+00
23     1.000000e+00
24     1.000000e+00
25     1.000000e+00
26    1.899456e-124
27     1.000000e+00
28     1.000000e+00
29     1.000000e+00
30     1.000000e+00
31     1.000000e+00
32     1.000000e+00
33     1.000000e+00
34     1.000000e+00
35     1.000000e+00
36     1.000000e+00
37     1.000000e+00
38     1.000000e+00
39     1.000000e+00
40     1.000000e+00
41     1.000000e+00
42     1.000000e+00
43     1.000000e+00
44     1.000000e+00
45     1.000000e+00
46     1.000000e+00
47     1.000000e+00
48     8.160482e-8

pred 
 0      1.000000e+00
1      1.303995e-03
2      1.000000e+00
3     1.119393e-222
4      1.000000e+00
5      1.000000e+00
6      1.000000e+00
7      1.000000e+00
8      1.000000e+00
9      1.000000e+00
10     1.000000e+00
11     1.000000e+00
12     1.000000e+00
13     1.000000e+00
14     1.000000e+00
15     8.158117e-84
16     1.000000e+00
17     0.000000e+00
18     1.000000e+00
19    2.264862e-292
20     1.000000e+00
21     1.000000e+00
22     1.000000e+00
23     1.000000e+00
24     1.000000e+00
25     1.000000e+00
26    3.774643e-148
27     1.000000e+00
28     1.000000e+00
29     1.000000e+00
30     1.000000e+00
31     1.000000e+00
32     1.000000e+00
33     1.000000e+00
34     1.000000e+00
35     1.000000e+00
36     1.000000e+00
37     1.000000e+00
38     1.000000e+00
39     1.000000e+00
40     1.000000e+00
41     1.000000e+00
42     1.000000e+00
43     1.000000e+00
44     1.000000e+00
45     1.000000e+00
46     1.000000e+00
47     1.000000e+00
48     3.386904e-98
49    3.89385

pred 
 0      1.000000e+00
1      1.000000e+00
2      1.000000e+00
3     4.140313e-244
4      1.000000e+00
5      1.000000e+00
6      1.000000e+00
7      1.000000e+00
8      1.000000e+00
9      1.000000e+00
10     1.000000e+00
11     1.000000e+00
12     1.000000e+00
13     1.000000e+00
14     1.000000e+00
15     2.217502e-84
16     1.000000e+00
17     0.000000e+00
18     1.000000e+00
19     0.000000e+00
20     1.000000e+00
21     1.000000e+00
22     1.000000e+00
23     1.000000e+00
24     1.000000e+00
25     1.000000e+00
26    3.850469e-158
27     1.000000e+00
28     1.000000e+00
29     1.000000e+00
30     1.000000e+00
31     1.000000e+00
32     1.000000e+00
33     1.000000e+00
34     1.000000e+00
35     1.000000e+00
36     1.000000e+00
37     1.000000e+00
38     1.000000e+00
39     1.000000e+00
40     1.000000e+00
41     1.000000e+00
42     1.000000e+00
43     1.000000e+00
44     1.000000e+00
45     1.000000e+00
46     1.000000e+00
47     1.000000e+00
48    4.466316e-101
49    3.93484

pred 
 0      1.000000e+00
1      9.999997e-01
2      1.000000e+00
3     1.840806e-281
4      1.000000e+00
5      1.000000e+00
6      1.000000e+00
7      1.000000e+00
8      1.000000e+00
9      1.000000e+00
10     1.000000e+00
11     1.000000e+00
12     1.000000e+00
13     1.000000e+00
14     1.000000e+00
15     7.334898e-99
16     1.000000e+00
17     0.000000e+00
18     1.000000e+00
19     0.000000e+00
20     1.000000e+00
21     1.000000e+00
22     1.000000e+00
23     1.000000e+00
24     1.000000e+00
25     1.000000e+00
26    3.108743e-183
27     1.000000e+00
28     1.000000e+00
29     1.000000e+00
30     1.000000e+00
31     1.000000e+00
32     1.000000e+00
33     1.000000e+00
34     1.000000e+00
35     1.000000e+00
36     1.000000e+00
37     1.000000e+00
38     1.000000e+00
39     1.000000e+00
40     1.000000e+00
41     1.000000e+00
42     1.000000e+00
43     1.000000e+00
44     1.000000e+00
45     1.000000e+00
46     1.000000e+00
47     1.000000e+00
48    6.470218e-118
49    2.13746

dtype: float64
pred 
 0      1.000000e+00
1      1.000000e+00
2      1.000000e+00
3      0.000000e+00
4      1.000000e+00
5      1.000000e+00
6      1.000000e+00
7      1.000000e+00
8      1.000000e+00
9      1.000000e+00
10     1.000000e+00
11     1.000000e+00
12     1.000000e+00
13     1.000000e+00
14     1.000000e+00
15    2.328050e-110
16     1.000000e+00
17     0.000000e+00
18     1.000000e+00
19     0.000000e+00
20     1.000000e+00
21     1.000000e+00
22     1.000000e+00
23     1.000000e+00
24     1.000000e+00
25     1.000000e+00
26    5.127136e-205
27     1.000000e+00
28     1.000000e+00
29     1.000000e+00
30     1.000000e+00
31     1.000000e+00
32     1.000000e+00
33     1.000000e+00
34     1.000000e+00
35     1.000000e+00
36     1.000000e+00
37     1.000000e+00
38     1.000000e+00
39     1.000000e+00
40     1.000000e+00
41     1.000000e+00
42     1.000000e+00
43     1.000000e+00
44     1.000000e+00
45     1.000000e+00
46     1.000000e+00
47     1.000000e+00
48    9.586887e-13

In [10]:
X_test, y_test, df_test = preprocessData(test_1d)

In [11]:
X_test.head()

Unnamed: 0,x0,x1,x2
0,1,0.452,0.068
1,1,0.945,0.6
2,1,0.69,0.284
3,1,0.3,0.189
4,1,0.953,0.993


In [12]:
y_pred = h(X_test, weights)

In [13]:
def predict(preds):
    classified_preds = []
    for y in preds:
        if y >= 0:
            classified_preds.append(1)
        else:
            classified_preds.append(0)
    return classified_preds
            

In [14]:
pred = predict(y_pred)

In [15]:
pred

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1]