In [1]:
import os
import numpy as np
import multiprocessing as mp
import pandas as pd
from sklearn.datasets import load_svmlight_file
from sklearn.utils import shuffle
from sklearn.model_selection import RepeatedKFold
from sauc_ import SAUC
from get_idx import get_idx

In [2]:
def single_run(para):

    '''
    for multiprocessing mapping function with variable
    input:
        para -
    output:
    '''

    # unfold parameters
    i,train_index,test_index,m = para
    n_tr = len(train_index)
    options['ids'] = get_idx(n_tr, options['n_pass'])

    # X and y must be global here to avoid multiprocessing sharing
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Define model parameter
    options['m'] = m

    # implement algorithm
    elapsed_time, roc_auc = SAUC(X_train, X_test, y_train, y_test, options)

    return i,m,roc_auc

In [3]:
def cv(num_cpus, n_splits, n_repeats, M):

    '''
    Cross validation by multiprocessing
    input:
        num_cpus -
        M -
    '''

    # record auc
    ROC_AUC = pd.DataFrame()

    # record parameters
    input_paras = []

    # cross validation prepare
    rkf = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=7)
    for i, (train_index, test_index) in enumerate(rkf.split(X)):
        for m in M:
            input_paras.append((i, train_index, test_index, m))

    # cross validation run on multiprocessors
    with mp.Pool(processes=num_cpus) as pool:
        results_pool = pool.map(single_run, input_paras)
        pool.close()
        pool.join()

    # get results
    for i, m, roc_auc in results_pool:
        ROC_AUC[(i,m)] = roc_auc

    return ROC_AUC

In [51]:
# Define what to run this time
datasets = ['svmguide1']
names = ['hinge']
num_cpus = 15
n_splits = 3
n_repeats = 1

options = {}
options['n_pass'] = 1
options['rec'] = .5
options['name'] = 'hinge'

# Define model parameter
options['R'] = .01
options['c'] = 1

# Define model parameter to search
M = [25]

In [7]:
print('Loading dataset = %s ......' % (dataset), end=' ')
X, y = load_svmlight_file('/home/neyo/PycharmProjects/AUC/bi-datasets/%s' % (dataset))
X = X.toarray()
X, y = shuffle(X, y, random_state=7)
print('Done!')

Loading dataset = svmguide1 ...... Done!


In [55]:
if os.path.isfile('/home/neyo/PycharmProjects/AUC/results/deg_%s_%s.h5' % (name, dataset)):

    df = pd.read_pickle('/home/neyo/PycharmProjects/AUC/results/deg_%s_%s.h5' % (name, dataset))
    result = df.to_dict()
else:
    print('hi')
    result = {}

In [58]:
df.po

Unnamed: 0,0,1,2,5,10,25,40
0,,0 0.500000 1 0.607765 2 0.878580 3...,0 0.500000 1 0.672667 2 0.870154 3...,0 0.500000 1 0.676556 2 0.872584 3...,0 0.500000 1 0.679993 2 0.872315 3...,0 0.500000 1 0.679993 2 0.872315 3...,0 0.500000 1 0.679993 2 0.872441 3...
1,,0 0.500000 1 0.727633 2 0.835064 3...,0 0.500000 1 0.788411 2 0.852943 3...,0 0.500000 1 0.788411 2 0.849306 3...,0 0.500000 1 0.788411 2 0.849235 3...,0 0.500000 1 0.788411 2 0.849235 3...,0 0.500000 1 0.788411 2 0.849273 3...
2,0 0.500000 1 0.672667 2 0.870154 3...,0 0.500000 1 0.788411 2 0.852943 3...,0 0.500000 1 0.697927 2 0.794708 3...,0 0.500000 1 0.697189 2 0.788894 3...,0 0.500000 1 0.696460 2 0.788999 3...,0 0.500000 1 0.696460 2 0.788999 3...,0 0.500000 1 0.696645 2 0.789180 3...


In [53]:
roc_auc = cv(num_cpus, n_splits, n_repeats, M)

for m in M:

    for i in range(n_splits * n_repeats):
        
        result[(m)][i] = roc_auc[(i, m)]

# Results
df = pd.DataFrame(result)
df.to_pickle('/home/neyo/PycharmProjects/AUC/results/deg_%s_%s.h5' % (name,dataset))

SAUC with loss = hinge N = 25 R = 0 gamma = 55028238054572798297324813986260909068123917836858949632.00 c = 1
SAUC with loss = hinge N = 25 R = 0 gamma = 55028238054572798297324813986260909068123917836858949632.00 c = 1
SAUC with loss = hinge N = 25 R = 0 gamma = 55028238054572798297324813986260909068123917836858949632.00 c = 1
iteration: 10 AUC: 0.872918 time elapsed: 0.01
iteration: 10 AUC: 0.868685 time elapsed: 0.01
iteration: 10 AUC: 0.867016 time elapsed: 0.01
iteration: 20 AUC: 0.867205 time elapsed: 0.03
iteration: 20 AUC: 0.872360 time elapsed: 0.04
iteration: 20 AUC: 0.869987 time elapsed: 0.04
iteration: 30 AUC: 0.867809 time elapsed: 0.09
iteration: 30 AUC: 0.874862 time elapsed: 0.09
iteration: 30 AUC: 0.872964 time elapsed: 0.09
iteration: 40 AUC: 0.867054 time elapsed: 0.15
iteration: 40 AUC: 0.872723 time elapsed: 0.15
iteration: 40 AUC: 0.874263 time elapsed: 0.16
iteration: 50 AUC: 0.872154 time elapsed: 0.22
iteration: 50 AUC: 0.866638 time elapsed: 0.23
iteration: 5

In [54]:
df

Unnamed: 0,0,1,2,5,10,25,40
0,,0 0.500000 1 0.607765 2 0.878580 3...,0 0.500000 1 0.672667 2 0.870154 3...,0 0.500000 1 0.610467 2 0.878328 3...,0 0.500000 1 0.610882 2 0.878273 3...,0 0.500000 1 0.679993 2 0.872315 3...,0 0.500000 1 0.679993 2 0.872441 3...
1,,0 0.500000 1 0.727633 2 0.835064 3...,0 0.500000 1 0.788411 2 0.852943 3...,0 0.500000 1 0.727633 2 0.835076 3...,0 0.500000 1 0.727633 2 0.835072 3...,0 0.500000 1 0.788411 2 0.849235 3...,0 0.500000 1 0.788411 2 0.849273 3...
2,0 0.500000 1 0.672667 2 0.870154 3...,0 0.500000 1 0.788411 2 0.852943 3...,0 0.500000 1 0.697927 2 0.794708 3...,,,0 0.500000 1 0.696460 2 0.788999 3...,0 0.500000 1 0.696645 2 0.789180 3...


In [175]:
comb_dict = {0:{0:1},1:{0:1,1:1},2:{0:1,1:2,2:1},3:{0:1,1:3,2:3,3:1},4:{0:1,1:4,2:6,3:4,4:1},
             5:{0:1,1:5,2:10,3:10,4:5,5:1},6:{0:1,1:6,2:15,3:20,4:15,5:6,6:1},
             7:{0:1,1:7,2:21,3:35,4:35,5:21,6:7,7:1},8:{0:1,1:8,2:28,3:56,4:70,5:56,6:28,7:8,8:1},
             9:{0:1,1:9,2:36,3:84,4:126,5:126,6:84,7:36,8:9,9:1},
             10:{0:1,1:10,2:45,3:120,4:210,5:252,6:210,7:120,8:45,9:10,10:1}}