In [0]:
import pandas as pd
import numpy as np
import math
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

In [3]:
df = pd.read_csv('data-logistic.csv', header=None)
df.head()

Unnamed: 0,0,1,2
0,-1,-0.663827,-0.138526
1,1,1.994596,2.468025
2,-1,-1.247395,0.749425
3,1,2.309374,1.899836
4,1,0.849143,2.40775


In [0]:
y = df[0]
X = df.loc[:, 1:]

In [0]:
def fw1(w1, w2, y, X, k, C):
    l = len(y)
    S = 0
    for i in range(0, l):
        S += y[i] * X[1][i] * (1.0 - 1.0 / (1.0 + math.exp(-y[i] * (w1*X[1][i] + w2*X[2][i]))))

    return w1 + (k * (1.0 / l) * S) - k * C * w1

In [0]:
def fw2(w1, w2, y, X, k, C):
    l = len(y)
    S = 0
    for i in range(0, l):
        S += y[i] * X[2][i] * (1.0 - 1.0 / (1.0 + math.exp(-y[i] * (w1*X[1][i] + w2*X[2][i]))))

    return w2 + (k * (1.0 / l) * S) - k * C * w2

In [0]:
def grad(y, X, C=0.0, w1=0.0, w2=0.0, k=0.1, err=1e-5):
    i = 0
    i_max = 10000
    w1_new, w2_new = w1, w2

    while True:
        i += 1
        w1_new, w2_new = fw1(w1, w2, y, X, k, C), fw2(w1, w2, y, X, k, C)
        e = math.sqrt((w1_new - w1) ** 2 + (w2_new - w2) ** 2)

        if i >= i_max or e <= err:
            break
        else:
            w1, w2 = w1_new, w2_new

    return [w1_new, w2_new]

In [0]:
w1, w2 = grad(y, X)
rw1, rw2 = grad(y, X, 10.0)

In [0]:
def a(X, w1, w2):
    return 1.0 / (1.0 + math.exp(-w1 * X[1] - w2 * X[2]))

In [32]:
y_score = X.apply(lambda x: a(x, w1, w2), axis=1)
y_rscore = X.apply(lambda x: a(x, rw1, rw2), axis=1)

auc = roc_auc_score(y, y_score)
rauc = roc_auc_score(y, y_rscore)

ans = "{:0.3f} {:0.3f}".format(auc, rauc)
ans

'0.927 0.936'

In [60]:
clf_auc = LogisticRegression(penalty='l1', C=1.0, tol=1e-5, random_state=0, intercept_scaling=0.1)
clf_auc.fit(X, y)
y_score = clf_auc.predict_proba(X)[:, 1:2]
c_auc = roc_auc_score(y, y_score)
c_auc



0.9274285714285713

In [85]:
clf_rauc = LogisticRegression(penalty='l2', C=0.02, tol=1e-5, random_state=0, solver='lbfgs', intercept_scaling=0.1, multi_class='ovr')
clf_rauc.fit(X, y)
y_rscore = clf_rauc.predict_proba(X)[:, 1:2]
c_rauc = roc_auc_score(y, y_rscore)
c_rauc

0.9361904761904761

In [86]:
answer = "{:0.3f} {:0.3f}".format(c_auc, c_rauc)
answer

'0.927 0.936'