In [2]:
import pandas
from sklearn.svm import SVC

data = pandas.read_csv(filepath_or_buffer='data-logistic.csv', header=None)

Y = data[0]
X = data.drop(0, axis=1)

In [25]:
import math
from scipy.spatial.distance import euclidean


def sigmoid(w1, w2, x1, x2):
    return 1 / (1.0 + math.exp(-w1 * x1 - w2 * x2))

def distance(prev_w1, prev_w2, w1, w2):
    prev_w = (prev_w1, prev_w2)
    w = (w1, w2)
    return euclidean(prev_w, w)

def get_weight_w1(prev_w1, prev_w2, k, C = 10, use_regularization=False):
    summ = 0
    l = len(Y)
    for i in range(l):
        xi1 = X[1][i]
        xi2 = X[2][i]
        yi = Y[i]
        summ += yi * xi1 * (1.0 - 1.0 / (1.0 + math.exp(-yi * (prev_w1 * xi1 + prev_w2 * xi2))))
    
    result = prev_w1 + k * (1.0 / l) * summ
    if use_regularization:
        return result - k * C * prev_w1
    return result

def get_weight_w2(prev_w1, prev_w2, k, C = 10, use_regularization=False):
    summ = 0
    l = len(Y)
    for i in range(l):
        xi1 = X[1][i]
        xi2 = X[2][i]
        yi = Y[i]
        summ += yi * xi2 * (1.0 - 1.0 / (1.0 + math.exp(-yi * (prev_w1 * xi1 + prev_w2 * xi2))))
    
    result = prev_w2 + k * (1.0 / l) * summ
    if use_regularization:
        return result - k * C * prev_w2
    return result

prev_w1 = w1 = 0.0
prev_w2 = w2 = 0.0
C = 10
k = 0.01
epsilon = 1e-5
iteration = 0

while iteration <= 10000:
    prev_w1 = w1
    prev_w2 = w2
    
    w1 = get_weight_w1(prev_w1, prev_w2, k, C, True)
    w2 = get_weight_w2(prev_w1, prev_w2, k, C, True)
    
    print("iteration: {i}, w1 = {w}".format(i=iteration, w=w1))
    print("iteration: {i}, w2 = {w}".format(i=iteration, w=w2))
    
    dist = distance(prev_w1, prev_w2, w1, w2)
    print("iteration: {i}, distance = {dist}".format(i=iteration, dist=dist))
    
    if dist < epsilon:
        break
        
    iteration += 1
print(iteration)

iteration: 0, w1 = 0.00357312663987
iteration: 0, w2 = 0.00324599712784
iteration: 0, distance = 0.00482739384539
iteration: 1, w1 = 0.00701702944603
iteration: 1, w2 = 0.00635856085494
iteration: 1, distance = 0.00464203829083
iteration: 2, w1 = 0.0103367767574
iteration: 2, w2 = 0.00934319402824
iteration: 2, distance = 0.00446416368318
iteration: 3, w1 = 0.0135373916704
iteration: 3, w2 = 0.0122053700424
iteration: 3, distance = 0.00429371486682
iteration: 4, w1 = 0.0166238121662
iteration: 4, w2 = 0.0149504826904
iteration: 4, distance = 0.00413057319589
iteration: 5, w1 = 0.0196008589781
iteration: 5, w2 = 0.0175838055062
iteration: 5, distance = 0.00397456875313
iteration: 6, w1 = 0.0224732107591
iteration: 6, w2 = 0.0201104601101
iteration: 6, distance = 0.00382549189525
iteration: 7, w1 = 0.0252453858438
iteration: 7, w2 = 0.0225353927234
iteration: 7, distance = 0.00368310370196
iteration: 8, w1 = 0.0279217297693
iteration: 8, w2 = 0.024863357839
iteration: 8, distance = 0.003

In [21]:
non_regulazed_w1 = 0.287811620472
non_regulazed_w2 = 0.0919833021593

regulazed_w1 = 0.0285587545462
regulazed_w2 = 0.0247801372497

predictions_non_regulazed = list()
predictions_regulazed = list()
for i in range(len(X)):
    x1 = X[1][i]
    x2 = X[2][i]
    predictions_non_regulazed.append(sigmoid(non_regulazed_w1, non_regulazed_w2, x1, x2))
    predictions_regulazed.append(sigmoid(regulazed_w1, regulazed_w2, x1, x2))

In [22]:
from sklearn.metrics import roc_auc_score

score_non_regulazed = roc_auc_score(Y, predictions_non_regulazed)
score_regulazed = roc_auc_score(Y, predictions_regulazed)

print(score_non_regulazed)
print(score_regulazed)

0.926857142857
0.936285714286
