In [19]:
import time
import numpy as np
import optimization.quasi_newton as qn
import optimization.nconj_grad as ncg
import matplotlib.pyplot as plot
import opt_in_cv.log_reg as lr
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score
from functools import partial

# hyper-param
TEST_SIZE = 0.1
LMBDA = 0.1 # regularization parameter

In [20]:
# load data
X,y = lr.load_MNST_data()
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=TEST_SIZE)
# only use to recongnize digit 1
y_train = (y_train==1).flatten()
y_test = (y_test==1).flatten()
train_size, feature_num = X_train.shape
theta = np.zeros(feature_num)

In [21]:
# bind X_train to loss function, grad, hassian and approx hassian
f = partial(lr.costFunctionReg, X=X_train, y=y_train, lmbda=LMBDA)
f_grad = partial(lr.gradRegularization, X=X_train, y=y_train, lmbda=LMBDA)
f_hassian = partial(lr.Hessian, X=X_train, y=y_train, lmbda=LMBDA)
def f_h_approx(theta):
    h_approx = np.zeros(feature_num,feature_num)
    for i in range(feature_num):
        h_approx[i] = lr.finite_diff(f_grad,theta,i)
    return h_approx

# gather it all
def g(x):
    return (f(x), f_grad(x), f_hassian(x))

# return train precision and test precision
def mnist_tester(theta):
    train_pred = X_train @ theta
    train_pred = (train_pred > 0.5).flatten()
    test_pred = ((X_test @ theta) > 0.5).flatten()
    train_prec = precision_score(y_train,train_pred)
    test_prec = precision_score(y_test,test_pred)
    return (train_prec, test_prec)
'''
given model and optimizer, return wanted stats
@params theta: model parameter, should be a vector of size "feature_num"
@params func: target function, should only take 1 parameter (which should be theta)
@params optimizer: optimizer, should take func and theta as parameters
@params tester: optional tester, if not provided, the precision score will not be reported
return (num_iter, runtime, train_prec, test_prec, final_params)
'''
def train(theta, func, optimizer, tester=None, eps=1e-12):
    start = time.time()
    soln, niters = optimizer(func, theta, epsilon=eps)
    end = time.time()
    runtime = (end - start) / 1000
    if tester is not None:
        train_prec,test_prec = tester(soln)
    else:
        train_prec,test_prec = None, None
    return (niters, runtime, train_prec, test_prec, soln)

In [22]:
train(theta, g, ncg.nonlinear_conjugate_gradient, mnist_tester)

KeyboardInterrupt: 