In [1]:
import numpy as np

In [3]:
class OptimisationProblem:
    def __init__(self, parameters):
        self.x = self.random_guess()
        pass
    
    def __call__(self, x):
        pass
    
    def gradient(self, x):
        pass
    
    def random_guess(self):
        pass

In [4]:
class GradientDescentOptimiser:
    def __init__(self, optimisation_problem, step_length, n_iter, tol):
        self.optimisation_problem = optimisation_problem
        self.step_length = step_length
        self.n_iter = n_iter
        self.tol = tol
    
    def gradient_step(self, x):
        return x - self.step_length*self.optimisation_problem.gradient(x)
    
    def optimise(self):
        for _ in range(self.n_iter):
            x = self.optimisation_problem.x
            self.optimisation_problem.x = self.gradient_step(x)
            
            if self.check_convergence():
                break
    
    def check_convergence(self):
        x = self.optimisation_problem.x
        gradient = self.optimisation_problem.gradient(x)
        return np.linalg.norm(gradient) < self.tol

In [5]:
class LinearRegressionProblem:
    def __init__(self, data_matrix, target_vector):
        self.data_matrix = data_matrix
        self.target_vector = target_vector
        self.x = self.random_guess_at_minimiser()
    
    def __call__(self, x):
        return np.sum((self.data_matrix@x - self.target_vector)**2)
    
    def gradient(self, x):
        return self.data_matrix.T@(self.data_matrix@x - self.target_vector)
    
    def random_guess_at_minimiser(self):
        return np.random.standard_normal(
            (self.data_matrix.shape[1], self.target_vector.shape[1])
        )

## Let us test these classes

In [6]:
data_matrix = np.random.standard_normal((100, 5))

weights = np.random.standard_normal((5, 1))

target_vector = data_matrix@weights
noisy_target = target_vector + np.random.standard_normal(target_vector.shape)*0.01

In [9]:
linear_regression_problem = LinearRegressionProblem(data_matrix, noisy_target)

optimisation_problem = GradientDescentOptimiser(
    optimisation_problem=linear_regression_problem,
    step_length=0.01,
    n_iter=1000,
    tol=1e-8  # 10^{-8}=1e-8
)

optimisation_problem.optimise()

print('The relative error between the true weights and the discovered weights are:')
print((weights - linear_regression_problem.x)/weights)

The relative error between the true weights and the discovered weights are:
[[-0.00090508]
 [-0.00556443]
 [ 0.00077474]
 [-0.00161982]
 [-0.00127895]]


In [10]:
class RidgeRegressionProblem:
    def __init__(self, data_matrix, target_vector, regularisation_penalty):
        self.data_matrix = data_matrix
        self.target_vector = target_vector
        self.x = self.random_guess_at_minimiser()
        self.regularisation_penalty = regularisation_penalty
    
    def __call__(self, x):
        unregularised_loss = np.sum((self.data_matrix@x - self.target_vector)**2)
        regularisation_loss = self.regularisation_penalty*np.linalg.norm(x)**2
        return unregularised_loss + regularisation_loss
    
    def gradient(self, x):
        unregularised_gradient = self.data_matrix.T@(self.data_matrix@x - self.target_vector)
        regularisation_gradient = self.regularisation_penalty*x
        return unregularised_gradient + regularisation_gradient
    
    def random_guess_at_minimiser(self):
        return np.random.standard_normal(
            (self.data_matrix.shape[1], self.target_vector.shape[1])
        )

class LinearRegressionProblem(RidgeRegressionProblem):
    def __init__(self, data_matrix, target_vector):
        super().__init__(
            data_matrix=data_matrix,
            target_vector=target_vector, 
            regularisation_penalty=0
        )

In [11]:
linear_regression_problem = LinearRegressionProblem(data_matrix, noisy_target)

optimisation_problem = GradientDescentOptimiser(
    optimisation_problem=linear_regression_problem,
    step_length=0.01,
    n_iter=1000,
    tol=1e-8  # 10^{-8}=1e-8
)

optimisation_problem.optimise()

print('The relative error between the true weights and the discovered weights are:')
print((weights - linear_regression_problem.x)/weights)

The relative error between the true weights and the discovered weights are:
[[-0.00090508]
 [-0.00556443]
 [ 0.00077474]
 [-0.00161982]
 [-0.00127895]]


In [12]:
linear_regression_problem = RidgeRegressionProblem(data_matrix, noisy_target, 0.1)

optimisation_problem = GradientDescentOptimiser(
    optimisation_problem=linear_regression_problem,
    step_length=0.01,
    n_iter=1000,
    tol=1e-8  # 10^{-8}=1e-8
)

optimisation_problem.optimise()

print('The relative error between the true weights and the discovered weights are:')
print((weights - linear_regression_problem.x)/weights)

The relative error between the true weights and the discovered weights are:
[[ 9.61645523e-05]
 [-2.40643614e-03]
 [ 1.80312096e-03]
 [-2.54639353e-04]
 [-5.31864793e-04]]
