In [1]:
import numpy as np
import pandas as pd
#import torch
from math import factorial
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt

In [2]:
def pos(i,t,prod):
    '''
    Compute positive function and gradient information
    
    input:
        i - index of function
        t - iteration
        prod - wt*xt
        
    output:
        fpt - positive function value
        gfpt - positive function gradient
    '''
    fpt = 0.0 
    gfpt = 0.0 
    fpt = (L/2+prod)**i # no xt yet!
    gfpt = i*(L/2+prod)**(i-1) 
    return fpt,gfpt               

In [3]:
def comb(n, k):
    '''
    Compute combination
    
    input:
        n - total number
        k - number of chosen
    
    output:
        c - number of combination
    '''
    return factorial(n) / factorial(k) / factorial(n - k)

In [4]:
def neg(loss,i,t,prod):
    '''
    Compute negative function and gradient information
    
    input:
        loss - loss function
        i - index of function
        t - iteration
        prod - wt*xt
        
    output:
        fnt - negative function value
        gfnt - negative function gradient
    '''
    fnt = 0.0 # n stands for negative
    gfnt = 0.0
    for k in range(i,N+1):
        # compute forward difference
        delta = 0.0
        for j in range(k+1):
            delta += (-1)**(k-j)*comb(k,j)*loss(j/N)
        # compute coefficient
        beta = comb(N,k)*comb(k,i)*(N+1)*delta/(2*L)**k
        # compute function value
        fnt += beta*(L/2-prod)**(k-i)
        # compute gradient
        gfnt += beta*(k-i)*(L/2-prod)**(k-i-1)  # no xt yet!
    return fnt,gfnt

In [5]:
def p_hat(t,yt,ptm1):
    '''
    Approximate probability
    
    input:
        t - iteration
        yt - label at t
        ptm1 - p at t-1
    
    output:
        pt - p at t
    '''
    pt = (t*ptm1 + (yt+1)/2)/(t+1) # m stands for minus
    return pt

In [6]:
def a_hat(t,fpt,yt,ptm1,atm1):
    '''
    Approximate primal a
    
    input:
        t - iteration
        fpt - positive function at t
        yt - sample label at t
        ptm1 - p at t-1
        atm1 - a at t-1
    
    output:
        at - a at t
    '''
    at = (fpt*((yt+1)/2) + t*ptm1*atm1)/(t+1) # do not update pt yet!
    return at

In [7]:
def b_hat(t,fmt,yt,ptm1,btm1):
    '''
    Approximate primal b
    
    input:
        t - iteration
        fmt - negative function at t
        yt - sample label at t
        ptm1 - p at t-1
        btm1 - b at t-1
    
    output:
        bt - b at t-1
    '''
    bt = (fmt*((-yt+1)/2) + t*(1-ptm1)*btm1)/(t+1) # indicator of y=-1!
    return bt

In [8]:
def alpha_step(t,at,bt):
    '''
    Compute dual alpha
    
    input:
        t - iteration
        at - a at t
        bt - b at t
        
    output:
        alphat - alpha at t
    '''
    alphat = at + bt
    return alphat

In [9]:
def w_grad(gfpt,gfnt,yt,pt,at,bt,alphat):
    '''
    Gradient with respect to w
    
    input:
        fpt - positive function at t
        gfpt - positive function gradient at t
        fnt - negative function at t
        gfnt - negative function gradient at t
        yt - sample label at t
        pt - p at t
        at - a at t
        bt - b at t
        alphat - alpha at t
    output:
        gradwt - gradient w.r.t. w at t
    '''
    gradwt = 0.0
    if yt == 1:
        gradwt = 2*(1-pt)*(alphat - at)*gfpt
    else:
        gradwt = 2*pt*(alphat - bt)*gfnt
    return gradwt

In [10]:
def proj(wt,R):
    '''
    Projection
    
    input:
        wt - w at t
        R - radius
        
    output:
        proj - projected wt
    '''
    norm = np.linalg.norm(wt)
    if norm > R:
        wt = wt/norm*R
    return wt

In [11]:
def a_grad(fpt,yt,pt,at):
    '''
    Gradient with respect to a
    
    input:
        fpt - positive function at t
        yt - sample label at t
        pt - p at t
        at - a at t
    
    output:
        gradat - gradient w.r.t a at t
    '''
    gradat = 0.0 
    if yt == 1:
        gradat = -2*(1-pt)*(fpt-at)
    else:
        pass
    return gradat

In [12]:
def b_grad(fnt,yt,pt,bt):
    '''
    Gradient with respect to b
    
    input:
        fnt - negative function at t
        yt - sample label at t
        pt - p at t
        bt - b at t
    
    output:
        gradbt - gradient w.r.t b at t
    '''
    gradbt = 0.0 
    if yt == 1:
        pass
    else:
        gradbt = -2*pt*(fnt-bt)
    return gradbt

In [24]:
def obj(pt,fpt,fnt,at,bt,alphat,yt):
    '''
    Compute objective function value
    
    input:
        t - iteration
        pt - 
        wt - 
    
    output:
        F - objective funciton value
    '''
    F = 0.0
    if yt == 1:
        F = (1-pt)*(-pt*alphat**2 + 2*alphat*fpt+(fpt-at)**2 - fpt**2)
    else:
        F = pt*(-(1-pt)*alphat**2 + 2*alphat*fnt+(fnt-bt)**2 - fnt**2)
    return F

In [25]:
def SOLAM(t,batch,loss,pt,wt,at,bt,alphat):
    '''
    Stochastic Online AUC Maximization step
    
    input:
        T - total number of iteration
        F - objective function value
        loss - loss function
        pt - p at t
        wt - w at t
        at - a at t
        bt - b at t
        alphat - alpha at t
    output:
        W - record of each wt
        A - record of each at
        B - record of each bt
        ALPHA - record of each alphat
    '''
    # Loop in the batch
    eta = 1/np.sqrt(t+1)/2
    for k in range(batch):
        
        # Update pt
        pt = p_hat(t*batch+k,y[(t*batch+k)%M],pt)
        # Update wt,at,bt
        prod = np.inner(x[(t*batch+k)%M],wt)
        fpt = np.zeros(N+1)
        gfpt = np.zeros(N+1)
        fnt = np.zeros(N+1)
        gfnt = np.zeros(N+1)
        gradwt = np.zeros(N+1)
        gradat = np.zeros(N+1)
        gradbt = np.zeros(N+1)
        
        for i in range(N+1): # add up info of each i
            fpt[i],gfpt[i] = pos(i,t,prod) # partial info
            fnt[i],gfnt[i] = neg(loss,i,t,prod)
            gradwt[i] = w_grad(gfpt[i],gfnt[i],y[(t*batch+k)%M],pt,at[i],bt[i],alphat[i])
            gradat[i] = a_grad(fpt[i],y[(t*batch+k)%M],pt,at[i])
            gradbt[i] = b_grad(fnt[i],y[(t*batch+k)%M],pt,bt[i])
            #Ft += obj(pt,fpt[i],fnt[i],at,bt,alphat,y[(t*batch+k)%M])
            
            at[i] -= eta*gradat[i]/(N+1)/batch
            bt[i] -= eta*gradbt[i]/(N+1)/batch
            alphat[i] = at[i]+bt[i]
            
        wt -= eta*sum(gradwt)*y[(t*batch+k)%M]*x[(t*batch+k)%M]/(N+1)/batch
        wt = proj(wt,1)    
    
        
    return pt,wt,at,bt,alphat

In [26]:
def loader(filename):
    '''
    Data file loader
    
    input:
        filename - filename
    
    output:
        x - sample features
        y - sample labels
    '''
    # raw data
    raw_df = pd.read_csv(filename,header=None,sep = '\s+|:',engine='python')
    y = raw_df[0].values
    x = raw_df[raw_df.columns[2::2]].values
    # avoid nan
    x = np.nan_to_num(x)
    # normalize
    norm = np.linalg.norm(x,axis=1)
    x = x/norm[:,None]
    return x,y

In [33]:
x,y = loader('diabetes')
M,d = x.shape
L = 2 # range
N = 30 # degree
T = 1000 # iteration

In [34]:
hinge = lambda x:max(0,1+L-2*L*x)
logistics = lambda x:np.log(1+np.exp(L-2*L*x))

pt = 0.0
wt = np.zeros(d)
at = np.zeros(N+1)
bt = np.zeros(N+1)
alphat = np.zeros(N+1)

roc_auc = np.zeros(T)
batch = 1
for t in range(T):
    pt,wt,at,bt,alphat = SOLAM(t,batch,hinge,pt,wt,at,bt,alphat)
    fpr, tpr, _ = roc_curve(y, np.dot(x,wt))
    roc_auc[t] = auc(fpr, tpr)
    print('iteration: %d AUC: %f' %(t+1,roc_auc[t]))

iteration: 1 AUC: 0.500000
iteration: 2 AUC: 0.500000
iteration: 3 AUC: 0.570231
iteration: 4 AUC: 0.716082
iteration: 5 AUC: 0.569657
iteration: 6 AUC: 0.635530
iteration: 7 AUC: 0.413015
iteration: 8 AUC: 0.452291
iteration: 9 AUC: 0.541097
iteration: 10 AUC: 0.533388
iteration: 11 AUC: 0.596246
iteration: 12 AUC: 0.649918
iteration: 13 AUC: 0.546694
iteration: 14 AUC: 0.565575
iteration: 15 AUC: 0.589888
iteration: 16 AUC: 0.556485
iteration: 17 AUC: 0.518485
iteration: 18 AUC: 0.493172
iteration: 19 AUC: 0.583112
iteration: 20 AUC: 0.535716
iteration: 21 AUC: 0.595396
iteration: 22 AUC: 0.589433
iteration: 23 AUC: 0.627627
iteration: 24 AUC: 0.612284
iteration: 25 AUC: 0.638545
iteration: 26 AUC: 0.628239
iteration: 27 AUC: 0.625127
iteration: 28 AUC: 0.695201
iteration: 29 AUC: 0.700291
iteration: 30 AUC: 0.721866
iteration: 31 AUC: 0.705940
iteration: 32 AUC: 0.668470
iteration: 33 AUC: 0.719164
iteration: 34 AUC: 0.726328
iteration: 35 AUC: 0.697679
iteration: 36 AUC: 0.728664
i

iteration: 291 AUC: 0.805918
iteration: 292 AUC: 0.807037
iteration: 293 AUC: 0.808560
iteration: 294 AUC: 0.809940
iteration: 295 AUC: 0.810709
iteration: 296 AUC: 0.810888
iteration: 297 AUC: 0.810784
iteration: 298 AUC: 0.810851
iteration: 299 AUC: 0.807060
iteration: 300 AUC: 0.808336
iteration: 301 AUC: 0.809716
iteration: 302 AUC: 0.809851
iteration: 303 AUC: 0.811149
iteration: 304 AUC: 0.809216
iteration: 305 AUC: 0.810328
iteration: 306 AUC: 0.810679
iteration: 307 AUC: 0.807515
iteration: 308 AUC: 0.808022
iteration: 309 AUC: 0.809112
iteration: 310 AUC: 0.809015
iteration: 311 AUC: 0.810567
iteration: 312 AUC: 0.810537
iteration: 313 AUC: 0.809463
iteration: 314 AUC: 0.809963
iteration: 315 AUC: 0.807806
iteration: 316 AUC: 0.808507
iteration: 317 AUC: 0.809455
iteration: 318 AUC: 0.808560
iteration: 319 AUC: 0.808694
iteration: 320 AUC: 0.805970
iteration: 321 AUC: 0.806970
iteration: 322 AUC: 0.805418
iteration: 323 AUC: 0.803701
iteration: 324 AUC: 0.800679
iteration: 325

iteration: 584 AUC: 0.814985
iteration: 585 AUC: 0.814216
iteration: 586 AUC: 0.814075
iteration: 587 AUC: 0.813612
iteration: 588 AUC: 0.813784
iteration: 589 AUC: 0.813903
iteration: 590 AUC: 0.813627
iteration: 591 AUC: 0.812978
iteration: 592 AUC: 0.812933
iteration: 593 AUC: 0.812821
iteration: 594 AUC: 0.812276
iteration: 595 AUC: 0.812261
iteration: 596 AUC: 0.812724
iteration: 597 AUC: 0.812769
iteration: 598 AUC: 0.812433
iteration: 599 AUC: 0.812351
iteration: 600 AUC: 0.812164
iteration: 601 AUC: 0.812201
iteration: 602 AUC: 0.812119
iteration: 603 AUC: 0.812060
iteration: 604 AUC: 0.811672
iteration: 605 AUC: 0.811963
iteration: 606 AUC: 0.811612
iteration: 607 AUC: 0.812828
iteration: 608 AUC: 0.812687
iteration: 609 AUC: 0.812657
iteration: 610 AUC: 0.812627
iteration: 611 AUC: 0.812507
iteration: 612 AUC: 0.812881
iteration: 613 AUC: 0.812731
iteration: 614 AUC: 0.812716
iteration: 615 AUC: 0.811761
iteration: 616 AUC: 0.811754
iteration: 617 AUC: 0.812134
iteration: 618

iteration: 877 AUC: 0.804590
iteration: 878 AUC: 0.804687
iteration: 879 AUC: 0.804679
iteration: 880 AUC: 0.804485
iteration: 881 AUC: 0.804522
iteration: 882 AUC: 0.804612
iteration: 883 AUC: 0.804463
iteration: 884 AUC: 0.803910
iteration: 885 AUC: 0.803403
iteration: 886 AUC: 0.803493
iteration: 887 AUC: 0.803545
iteration: 888 AUC: 0.803642
iteration: 889 AUC: 0.804224
iteration: 890 AUC: 0.804246
iteration: 891 AUC: 0.804291
iteration: 892 AUC: 0.804627
iteration: 893 AUC: 0.804448
iteration: 894 AUC: 0.805075
iteration: 895 AUC: 0.804963
iteration: 896 AUC: 0.804910
iteration: 897 AUC: 0.804731
iteration: 898 AUC: 0.804388
iteration: 899 AUC: 0.804075
iteration: 900 AUC: 0.803918
iteration: 901 AUC: 0.803940
iteration: 902 AUC: 0.804373
iteration: 903 AUC: 0.804381
iteration: 904 AUC: 0.804493
iteration: 905 AUC: 0.804358
iteration: 906 AUC: 0.804343
iteration: 907 AUC: 0.804373
iteration: 908 AUC: 0.804530
iteration: 909 AUC: 0.804821
iteration: 910 AUC: 0.804963
iteration: 911