In [1]:
import numpy as np
import pandas as pd
#import torch
from math import factorial
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import csv

In [2]:
def pos(i,t,prod):
    '''
    Compute positive function and gradient information
    
    input:
        i - index of function
        t - iteration
        prod - wt*xt
        
    output:
        fpt - positive function value
        gfpt - positive function gradient
    '''
    fpt = 0.0 
    gfpt = 0.0 
    fpt = (L/2+prod)**i # no xt yet!
    gfpt = i*(L/2+prod)**(i-1) 
    return fpt,gfpt               

In [3]:
def comb(n, k):
    '''
    Compute combination
    
    input:
        n - total number
        k - number of chosen
    
    output:
        c - number of combination
    '''
    return factorial(n) / factorial(k) / factorial(n - k)

In [4]:
def neg(loss,i,t,prod):
    '''
    Compute negative function and gradient information
    
    input:
        loss - loss function
        i - index of function
        t - iteration
        prod - wt*xt
        
    output:
        fnt - negative function value
        gfnt - negative function gradient
    '''
    fnt = 0.0 # n stands for negative
    gfnt = 0.0
    for k in range(i,N+1):
        # compute forward difference
        delta = 0.0
        for j in range(k+1):
            delta += (-1)**(k-j)*comb(k,j)*loss(j/N)
        # compute coefficient
        beta = comb(N,k)*comb(k,i)*(N+1)*delta/(2*L)**k
        # compute function value
        fnt += beta*(L/2-prod)**(k-i)
        # compute gradient
        gfnt += beta*(k-i)*(L/2-prod)**(k-i-1)  # no xt yet!
    return fnt,gfnt

In [5]:
def p_hat(t,yt,ptm1):
    '''
    Approximate probability
    
    input:
        t - iteration
        yt - label at t
        ptm1 - p at t-1
    
    output:
        pt - p at t
    '''
    pt = (t*ptm1 + (yt+1)/2)/(t+1) # m stands for minus
    return pt

In [6]:
def a_hat(t,fpt,yt,ptm1,atm1):
    '''
    Approximate primal a
    
    input:
        t - iteration
        fpt - positive function at t
        yt - sample label at t
        ptm1 - p at t-1
        atm1 - a at t-1
    
    output:
        at - a at t
    '''
    at = (fpt*((yt+1)/2) + t*ptm1*atm1)/(t+1) # do not update pt yet!
    return at

In [7]:
def b_hat(t,fmt,yt,ptm1,btm1):
    '''
    Approximate primal b
    
    input:
        t - iteration
        fmt - negative function at t
        yt - sample label at t
        ptm1 - p at t-1
        btm1 - b at t-1
    
    output:
        bt - b at t-1
    '''
    bt = (fmt*((-yt+1)/2) + t*(1-ptm1)*btm1)/(t+1) # indicator of y=-1!
    return bt

In [8]:
def alpha_step(t,at,bt):
    '''
    Compute dual alpha
    
    input:
        t - iteration
        at - a at t
        bt - b at t
        
    output:
        alphat - alpha at t
    '''
    alphat = at + bt
    return alphat

In [9]:
def w_grad(fpt,gfpt,fnt,gfnt,yt,pt,at,bt,alphat):
    '''
    Gradient with respect to w
    
    input:
        fpt - positive function at t
        gfpt - positive function gradient at t
        fnt - negative function at t
        gfnt - negative function gradient at t
        yt - sample label at t
        pt - p at t
        at - a at t
        bt - b at t
        alphat - alpha at t
    output:
        gradwt - gradient w.r.t. w at t
    '''
    gradwt = 0.0
    if yt == 1:
        gradwt = (2*alphat*(1-pt) + 2*(1-pt)*(fpt-at) - 2*(1-pt)*fpt)*gfpt
    else:
        gradwt = (2*alphat*pt + 2*pt*(fnt-bt) - 2*pt*fnt)*gfnt
    return gradwt

In [10]:
def proj(wt,R):
    '''
    Projection
    
    input:
        wt - w at t
        R - radius
        
    output:
        proj - projected wt
    '''
    norm = np.linalg.norm(wt)
    if norm > R:
        wt = wt/norm*R
    return wt

In [11]:
def a_grad(fpt,yt,pt,at):
    '''
    Gradient with respect to a
    
    input:
        fpt - positive function at t
        yt - sample label at t
        pt - p at t
        at - a at t
    
    output:
        gradat - gradient w.r.t a at t
    '''
    gradat = 0.0 
    if yt == 1:
        gradat = -2*(1-pt)*(fpt-at)
    else:
        pass
    return gradat

In [12]:
def b_grad(fnt,yt,pt,bt):
    '''
    Gradient with respect to b
    
    input:
        fnt - negative function at t
        yt - sample label at t
        pt - p at t
        bt - b at t
    
    output:
        gradbt - gradient w.r.t b at t
    '''
    gradbt = 0.0 
    if yt == 1:
        pass
    else:
        gradbt = -2*pt*(fnt-bt)
    return gradbt

In [13]:
def obj(pt,fpt,fnt,at,bt,alphat,yt):
    '''
    Compute objective function value
    
    input:
        t - iteration
        pt - 
        wt - 
    
    output:
        F - objective funciton value
    '''
    F = 0.0
    if yt == 1:
        F = -pt*(1-pt)*alphat**2 + 2*alphat*(1-pt)*fpt+(1-pt)*(fpt-at)**2 - (1-pt)*fpt**2
    else:
        F = -pt*(1-pt)*alphat**2 + 2*alphat*pt*fnt+pt*(fnt-bt)**2 - pt*fnt**2
    return F

In [14]:
def SOLAM(t,batch,loss,pt,wt,at,bt,alphat):
    '''
    Stochastic Online AUC Maximization step
    
    input:
        T - total number of iteration
        F - objective function value
        loss - loss function
        pt - p at t
        wt - w at t
        at - a at t
        bt - b at t
        alphat - alpha at t
    output:
        W - record of each wt
        A - record of each at
        B - record of each bt
        ALPHA - record of each alphat
    '''
    # Loop in the batch
    eta = 1/np.sqrt(t+1)/2
    for k in range(batch):
        
        # Update pt
        pt = p_hat(t*batch+k,y[(t*batch+k)%M],pt)
        # Update wt,at,bt
        prod = np.inner(wt,x[(t*batch+k)%M])
        fpt = np.zeros(N+1)
        gfpt = np.zeros(N+1)
        fnt = np.zeros(N+1)
        gfnt = np.zeros(N+1)
        gradwt = 0.0
        gradat = 0.0
        gradbt = 0.0
        
        
        for i in range(N+1): # add up info of each i
            fpt[i],gfpt[i] = pos(i,t,prod) # partial info
            fnt[i],gfnt[i] = neg(loss,i,t,prod)
            gradwt += w_grad(fpt[i],gfpt[i],fnt[i],gfnt[i],y[(t*batch+k)%M],pt,at[i],bt[i],alphat[i])
            gradat = a_grad(fpt[i],y[(t*batch+k)%M],pt,at[i])
            gradbt = b_grad(fnt[i],y[(t*batch+k)%M],pt,bt[i])
            at[i] -= eta*gradat/(N+1)/batch
            bt[i] -= eta*gradbt/(N+1)/batch
            alphat[i] = at[i]+bt[i]
            #F += obj(pt,fpt[i],fnt[i],at,bt,alphat,y[(t*batch+k)%T])
        
        wt -= eta*gradwt*y[(t*batch+k)%M]*x[(t*batch+k)%M]/(N+1)/batch # step size as 1/t gradient descent
        
    wt = proj(wt,1)    
        
    return pt,wt,at,bt,alphat

In [15]:
def loader(filename):
    '''
    Data file loader
    
    input:
        filename - filename
    
    output:
        x - sample features
        y - sample labels
    '''
    # raw data
    L = []
    with open(filename,'r') as file:
        for line in csv.reader(file, delimiter = ' '):
            line[0] = '0:'+line[0]
            line.remove('')
            L.append(dict(i.split(':') for i in line))
    df = pd.DataFrame(L,dtype=float).fillna(0)
    x = df.iloc[:,1:].values
    y = df.iloc[:,0].values
    # normalize
    norm = np.linalg.norm(x,axis=1)
    x = x/norm[:,None]
    return x,y

In [16]:
x,y = loader('diabetes')
M,d = x.shape

In [17]:
L = 2 # range
N = 10 # degree
T = 100 # iteration

hinge = lambda x:max(0,1+L-2*L*x)
logistics = lambda x:np.log(1+np.exp(L-2*L*x))

pt = 0.0
wt = np.zeros(d)
at = np.zeros(N+1)
bt = np.zeros(N+1)
alphat = np.zeros(N+1)

roc_auc = np.zeros(T)
batch = 1
for t in range(T):
    pt,wt,at,bt,alphat = SOLAM(t,batch,hinge,pt,wt,at,bt,alphat)
    fpr, tpr, _ = roc_curve(y, np.dot(x,wt))
    roc_auc[t] = auc(fpr, tpr)
    print('iteration: %d AUC: %f' %(t+1,roc_auc[t]))

iteration: 1 AUC: 0.500000
iteration: 2 AUC: 0.500000
iteration: 3 AUC: 0.568955
iteration: 4 AUC: 0.716127
iteration: 5 AUC: 0.571448
iteration: 6 AUC: 0.638388
iteration: 7 AUC: 0.415112
iteration: 8 AUC: 0.454776
iteration: 9 AUC: 0.541597
iteration: 10 AUC: 0.534075
iteration: 11 AUC: 0.597284
iteration: 12 AUC: 0.649910
iteration: 13 AUC: 0.547366
iteration: 14 AUC: 0.565843
iteration: 15 AUC: 0.589672
iteration: 16 AUC: 0.556104
iteration: 17 AUC: 0.518545
iteration: 18 AUC: 0.493418
iteration: 19 AUC: 0.582799
iteration: 20 AUC: 0.536134
iteration: 21 AUC: 0.595284
iteration: 22 AUC: 0.590590
iteration: 23 AUC: 0.627119
iteration: 24 AUC: 0.611560
iteration: 25 AUC: 0.637806
iteration: 26 AUC: 0.627649
iteration: 27 AUC: 0.624179
iteration: 28 AUC: 0.694358
iteration: 29 AUC: 0.698657
iteration: 30 AUC: 0.720358
iteration: 31 AUC: 0.705724
iteration: 32 AUC: 0.668343
iteration: 33 AUC: 0.718993
iteration: 34 AUC: 0.726142
iteration: 35 AUC: 0.698560
iteration: 36 AUC: 0.729134
i

In [18]:
wt

array([-0.0891991 , -0.07257935, -0.0165664 , -0.08278925, -0.1178296 ,
       -0.06898348, -0.08609111, -0.13859913])