# Example

In [1]:
include("example_logistic.jl")

numberOfNonZero = 101
trainError = 0.038
validError = 0.106


0.106

# 2.1

In [2]:
include("logReg.jl")

logRegOnevsAll (generic function with 1 method)

In [3]:
using Statistics

# Load X and y variable
using JLD
data = load("logisticData.jld")
(X,y,Xtest,ytest) = (data["X"],data["y"],data["Xtest"],data["ytest"])

# Standardize columns and add bias
n = size(X,1)
include("misc.jl")
(X,mu,sigma) = standardizeCols(X)
X = [ones(n,1) X]

# Standardize columns of test data, using mean/std from train data
t = size(Xtest,1)
Xtest = standardizeCols(Xtest,mu=mu,sigma=sigma)
Xtest = [ones(t,1) Xtest]

# Fit logistic regression model
model = logReg(X,y)

# Count number of non-zeroes in model
numberOfNonZero = sum(model.w .!= 0)
@show(numberOfNonZero)

# Compute training and validation error
yhat = model.predict(X)
trainError = mean(yhat .!= y)
@show(trainError)
yhat = model.predict(Xtest)
validError = mean(yhat .!= ytest)
@show(validError)

User and numerical derivatives agree
Backtracking
Backtracking
Backtracking
Backtracking
     1     4.72491e-02     2.06433e+02     2.11866e+01
     2     3.92532e-02     5.75468e+01     1.00189e+01
     3     2.96095e-02     2.72500e+01     8.69643e+00
     4     2.43376e-02     1.92787e+01     4.09251e+00
     5     2.76717e-02     1.65306e+01     2.62678e+00
     6     7.27270e-02     1.48840e+01     2.19912e+00
     7     1.15654e-01     1.23718e+01     1.24129e+00
     8     8.35542e-02     1.06939e+01     1.02576e+00
     9     3.09735e-02     1.01964e+01     1.48468e+00
    10     2.91093e-02     9.74869e+00     8.36714e-01
    11     5.18505e-02     9.57607e+00     7.40687e-01
    12     3.80860e-01     9.30237e+00     7.03919e-01
    13     4.47726e-01     7.85848e+00     5.35069e-01
    14     5.12262e-02     7.12535e+00     1.02454e+00
    15     3.44280e-02     6.88496e+00     6.56831e-01
    16     3.67468e-02     6.76587e+00     4.95980e-01
    17     8.59505e-02     6.70

0.082

# 2.2

In [4]:
function logRegL2(X,y, lambda)

	(n,d) = size(X)

	# Initial guess
	w = zeros(d,1)

	# Function we're going to minimize (and that computes gradient)
	funObj(w) = logRegL2Obj(w,X,y,lambda)

	# Solve least squares problem
	w = findMin(funObj,w,derivativeCheck=true)

	# Make linear prediction function
	predict(Xhat) = sign.(Xhat*w)

	# Return model
	return LinearModel(predict,w)
end

function logRegL2Obj(w,X,y,lambda)
	yXw = y.*(X*w)
	f = sum(log.(1 .+ exp.(-yXw))) + lambda/2 * (w'*w)[1]
	g = -X'*(y./(1 .+ exp.(yXw))) .+ lambda * w
	return (f,g)
end

logRegL2Obj (generic function with 1 method)

In [5]:
using Statistics

# Load X and y variable
using JLD
data = load("logisticData.jld")
(X,y,Xtest,ytest) = (data["X"],data["y"],data["Xtest"],data["ytest"])

# Standardize columns and add bias
n = size(X,1)
include("misc.jl")
(X,mu,sigma) = standardizeCols(X)
X = [ones(n,1) X]

# Standardize columns of test data, using mean/std from train data
t = size(Xtest,1)
Xtest = standardizeCols(Xtest,mu=mu,sigma=sigma)
Xtest = [ones(t,1) Xtest]

# Fit logistic regression model
model = logRegL2(X,y,1)

# Count number of non-zeroes in model
numberOfNonZero = sum(model.w .!= 0)
@show(numberOfNonZero)

# Compute training and validation error
yhat = model.predict(X)
trainError = mean(yhat .!= y)
@show(trainError)
yhat = model.predict(Xtest)
validError = mean(yhat .!= ytest)
@show(validError)

User and numerical derivatives agree
Backtracking
Backtracking
Backtracking
     1     5.11540e-02     3.36073e+02     2.30538e+01
     2     4.19153e-02     1.53827e+02     1.03013e+01
     3     3.08338e-02     1.13033e+02     9.37973e+00
     4     2.43796e-02     1.00284e+02     5.65776e+00
     5     2.63624e-02     9.44993e+01     4.87980e+00
     6     7.07661e-02     9.00551e+01     4.64738e+00
     7     1.25564e-01     8.06443e+01     4.14572e+00
     8     9.49034e-02     6.94422e+01     3.31385e+00
     9     2.31279e-02     6.63664e+01     4.38126e+00
    10     1.96151e-02     6.29106e+01     3.10037e+00
    11     2.06794e-02     6.20246e+01     2.88225e+00
    12     1.42171e-01     6.12560e+01     2.74290e+00
    13     3.17413e-01     5.70009e+01     1.98575e+00
    14     5.16579e-02     5.44398e+01     2.84188e+00
Backtracking
    15     1.62792e-02     5.33997e+01     1.01619e+00
    16     1.71668e-02     5.32717e+01     6.02078e-01
    17     2.83400e-02     5.32

0.074

# 2.3

In [6]:
function logRegL1(X,y, lambda)

	(n,d) = size(X)

	# Initial guess
	w = zeros(d,1)

	# Function we're going to minimize (and that computes gradient)
	funObj(w) = logRegL1Obj(w,X,y,lambda)

	# Solve least squares problem
	w = findMinL1(funObj,w,lambda)

	# Make linear prediction function
	predict(Xhat) = sign.(Xhat*w)

	# Return model
	return LinearModel(predict,w)
end

function logRegL1Obj(w,X,y,lambda)
	yXw = y.*(X*w)
	f = sum(log.(1 .+ exp.(-yXw))) + lambda * sum(w)
	g = -X'*(y./(1 .+ exp.(yXw))) .+ lambda * sign.(w)
	return (f,g)
end

logRegL1Obj (generic function with 1 method)

In [7]:
using Statistics

# Load X and y variable
using JLD
data = load("logisticData.jld")
(X,y,Xtest,ytest) = (data["X"],data["y"],data["Xtest"],data["ytest"])

# Standardize columns and add bias
n = size(X,1)
include("misc.jl")
(X,mu,sigma) = standardizeCols(X)
X = [ones(n,1) X]

# Standardize columns of test data, using mean/std from train data
t = size(Xtest,1)
Xtest = standardizeCols(Xtest,mu=mu,sigma=sigma)
Xtest = [ones(t,1) Xtest]

# Fit logistic regression model
model = logRegL1(X,y,1)

# Count number of non-zeroes in model
numberOfNonZero = sum(model.w .!= 0)
@show(numberOfNonZero)

# Compute training and validation error
yhat = model.predict(X)
trainError = mean(yhat .!= y)
@show(trainError)
yhat = model.predict(Xtest)
validError = mean(yhat .!= ytest)
@show(validError)

Backtracking
Backtracking
Backtracking
Backtracking
     1     5.32557e-02     3.06850e+02     2.23078e+01
     2     4.26794e-02     1.32336e+02     9.99069e+00
     3     2.97789e-02     9.89444e+01     7.55319e+00
     4     2.55205e-02     8.71133e+01     3.98658e+00
     5     2.31706e-02     8.33453e+01     4.16008e+00
     6     1.93780e-02     8.13368e+01     4.05738e+00
     7     1.80533e-02     8.00931e+01     3.99930e+00
     8     6.50772e-03     7.91150e+01     3.84458e+00
     9     7.45228e-03     7.87918e+01     3.80092e+00
    10     3.13788e-04     7.84598e+01     3.76834e+00
    11     4.27337e-04     7.84459e+01     3.76663e+00
    12     1.00000e+00     7.84276e+01     3.76482e+00
Backtracking
Backtracking
Backtracking
    13     2.50317e-02     7.54797e+01     3.85228e+00
    14     2.74552e-02     7.40741e+01     3.42401e+00
    15     6.18214e-03     7.37072e+01     3.58578e+00
    16     5.67209e-03     7.35794e+01     3.48444e+00
    17     1.65766e-03     7.

Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
    67     1.00000e+00     6.86025e+01     1.72813e+00
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtrack

Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
    78     1.00000e+00     6.86025e+01     1.72813e+00
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtrack

Backtracking
Backtracking
Backtracking
Backtracking
    87     1.00000e+00     6.86025e+01     1.72813e+00
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtrack

Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
    95     1.00000e+00     6.86025e+01     1.72813e+00
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtracking
Backtrack

0.038

# 2.3

In [8]:
# Variant where we use forward selection for feature selection
function logRegL0(X,y,lambda)
	(n,d) = size(X)

	# Define an objective that will operate on a subset of the data called Xs
	funObj(w) = logisticObj(w,Xs,y)

	# Start out just using the bias variable (assumed to be in first column),
	# and record 'score' which is the loss plus regularizer
	S = [1] # Candidate set of features
	Xs = X[:,S]
	w = zeros(length(S),1)
	w = findMin(funObj,w,verbose=false)
	(f,~) = funObj(w)
	score = f + lambda*length(S)
	minScore = score # Lowest score we've found
	minS = S # Best set of features we've found

	@show(minScore)
	@show(minS)

	# Greedily start adding the variable that improves the score the most
	oldScore = Inf
	while minScore != oldScore
		oldScore = minScore

		# Print out the variables we've selected so far
		@printf("Current set of selected variables (score = %f):\n",minScore)
		for j in 1:length(S)
			@printf("%d ",S[j])
		end
		@printf("\n")

		for j in setdiff(1:d,S)
			# Fit the model with 'j' added to the feature set 'S'
			# then compute the score and update 'minScore' and 'minS'
			Sj = [S;j]
			Xs = X[:,Sj]

			# PUT YOUR CODE HERE
            w = zeros(length(Sj),1)
            w = findMin(funObj,w,verbose=false)
            (f,~) = funObj(w)
            score = f + lambda*length(S)
            
            if score < minScore
                minScore = score
                minS = Sj
            end
		end
		S = minS
	end

	# Construct final 'w' vector
	w = zeros(d,1)
	S = minS
	Xs = X[:,S]
	w[S] = findMin(funObj,zeros(length(S),1),verbose=false)

	# Make linear prediction function
	predict(Xhat) = sign.(Xhat*w)

	# Return model
	return LinearModel(predict,w)
end

logRegL0 (generic function with 1 method)

In [9]:
using Statistics

# Load X and y variable
using JLD
data = load("logisticData.jld")
(X,y,Xtest,ytest) = (data["X"],data["y"],data["Xtest"],data["ytest"])

# Standardize columns and add bias
n = size(X,1)
include("misc.jl")
(X,mu,sigma) = standardizeCols(X)
X = [ones(n,1) X]

# Standardize columns of test data, using mean/std from train data
t = size(Xtest,1)
Xtest = standardizeCols(Xtest,mu=mu,sigma=sigma)
Xtest = [ones(t,1) Xtest]

# Fit logistic regression model
model = logRegL0(X,y,1)

# Count number of non-zeroes in model
numberOfNonZero = sum(model.w .!= 0)
@show(numberOfNonZero)

# Compute training and validation error
yhat = model.predict(X)
trainError = mean(yhat .!= y)
@show(trainError)
yhat = model.predict(Xtest)
validError = mean(yhat .!= ytest)
@show(validError)

minScore = 347.1734835449823
minS = [1]
Current set of selected variables (score = 347.173484):
1 
Current set of selected variables (score = 304.914385):
1 79 
Current set of selected variables (score = 276.768190):
1 79 37 
Current set of selected variables (score = 250.586141):
1 79 37 67 
Current set of selected variables (score = 224.511446):
1 79 37 67 71 
Current set of selected variables (score = 211.007657):
1 79 37 67 71 2 
Current set of selected variables (score = 197.745378):
1 79 37 67 71 2 7 
Current set of selected variables (score = 185.332050):
1 79 37 67 71 2 7 89 
Current set of selected variables (score = 171.630054):
1 79 37 67 71 2 7 89 31 
Current set of selected variables (score = 158.530607):
1 79 37 67 71 2 7 89 31 19 
Current set of selected variables (score = 144.884724):
1 79 37 67 71 2 7 89 31 19 43 
Current set of selected variables (score = 131.733022):
1 79 37 67 71 2 7 89 31 19 43 47 
Current set of selected variables (score = 123.925317):
1 79 37 67 

0.018