In [42]:
#https://lulaoshi.info/machine-learning/linear-model/minimise-loss-function

import numpy as np

class LinearRegression:

    def __init__(self):
        # the weight vector
        self.W = None

    def train(self, X, y, method='bgd', learning_rate=1e-2, num_iters=100, verbose=False):
        """
        Train linear regression using batch gradient descent or stochastic gradient descent

        Parameters
        ----------
        X: training data, shape (num_of_samples x num_of_features), num_of_samples rows of training sample, each training sample has num_of_features-dimension features.
        y: target, shape (num_of_samples, 1). 
        method: (string) 'bgd' for Batch Gradient Descent or 'sgd' for Stochastic Gradient Descent
        learning_rate: (float) learning rate or alpha
        num_iters: (integer) number of steps to iterate for optimization
        verbose: (boolean) if True, print out the progress

        Returns
        -------
        losses_history: (list) of losses at each training iteration
        """
        num_of_samples, num_of_features = X.shape

        if self.W is None:
            # initilize weights with values
            # shape (num_of_features, 1)
            self.W = np.random.randn(num_of_features, 1) * 0.001
        losses_history = []

        for i in range(num_iters):

            if method == 'sgd':
                # randomly choose a sample
                idx = np.random.choice(num_of_samples)
                loss, grad = self.loss_and_gradient(X[idx, np.newaxis], y[idx, np.newaxis])
            else:
                loss, grad = self.loss_and_gradient(X, y)
            losses_history.append(loss)

            # Update weights using matrix computing (vectorized)
            self.W -= learning_rate * grad

            if verbose and i % (num_iters / 10) == 0:
                print('iteration %d / %d : loss %f' %(i, num_iters, loss))
        return losses_history


    def predict(self, X):
        """
        Predict value of y using trained weights

        Parameters
        ----------
        X: predict data, shape (num_of_samples x num_of_features), each row is a sample with num_of_features-dimension features.

        Returns
        -------
        pred_ys: predicted data, shape (num_of_samples, 1)
        """
        pred_ys = X.dot(self.W)
        return pred_ys


    def loss_and_gradient(self, X, y, vectorized=True):
        """
        Compute the loss and gradients

        Parameters
        ----------
        The same as self.train function

        Returns
        -------
        tuple of two items (loss, gradient)
        loss: (float)
        gradient: (array) with respect to self.W 
        """
        if vectorized:
            return linear_loss_grad_vectorized(self.W, X, y)
        else:
            return linear_loss_grad_for_loop(self.W, X, y)


def linear_loss_grad_vectorized(W, X, y):
    """
    Compute the loss and gradients with weights, vectorized version
    """
    # vectorized implementation 
    num_of_samples = X.shape[0]
    # (num_of_samples, num_of_features) * (num_of_features, 1)
    f_mat = X.dot(W)

    # (num_of_samples, 1) - (num_of_samples, 1)
    diff = f_mat - y 
    loss =   np.sum(diff * diff)/num_of_samples
    
    # {(num_of_samples, 1).T dot (num_of_samples, num_of_features)}.T
  #  gradient = ((diff.T).dot(X)).T
    gradient = np.dot(X.T,diff) /num_of_samples
    return (loss, gradient)


def linear_loss_grad_for_loop(W, X, y):
    """
    Compute the loss and gradients with weights, for loop version
    """
    
    # num_of_samples rows of training data
    num_of_samples = X.shape[0]
    
    # num_of_samples columns of features
    num_of_features = X.shape[1]
    
    loss = 0
    
    # shape (num_of_features, 1) same with W
    gradient = np.zeros_like(W) 
    
    for i in range(num_of_samples):
        X_i = X[i, :] # i-th sample from training data
        f = 0
        for j in range(num_of_features):
            f += X_i[j] * W[j, 0]
        diff = f - y[i, 0]
        loss += np.power(diff, 2)
        for j in range(num_of_features):
            gradient[j, 0] += diff * X_i[j]
            
    loss = 1.0 / 2 * loss

    return (loss, gradient)
    

In [35]:

train_x = np.random.uniform(-100,100,100)
train_x = train_x.reshape(-1,1)


num_of_sample , num_of_feature = train_x.shape

noise = np.random.randn(num_of_sample,1) * 5
print(noise)
t_w = np.array([5]).reshape(1,1)

m = LinearRegresstion()
m.W = t_w
train_y = m.predict(x=train_x) + noise

print("train_x:")
print(train_x)
print("train_y:")
print(train_y)



[[  7.42404282]
 [ -0.873987  ]
 [ -2.74934807]
 [ -0.8399295 ]
 [ -0.78877784]
 [ -2.86842619]
 [ -4.3001756 ]
 [  7.34986157]
 [ 11.96124358]
 [ -7.25166141]
 [  0.34983773]
 [ -3.65561418]
 [ -2.7424915 ]
 [ -0.84360163]
 [  2.67004468]
 [ -4.79971037]
 [ -1.50465931]
 [ -0.79914875]
 [ -3.38245673]
 [ -1.09562372]
 [ -3.14420095]
 [  1.91753761]
 [  2.25410096]
 [ -3.63063017]
 [ -2.71305841]
 [  0.76646079]
 [ -7.34423679]
 [ -1.32243276]
 [ -0.96792529]
 [ -3.76161417]
 [ -1.03162163]
 [  1.20169436]
 [ -1.45874168]
 [  0.27626248]
 [ -5.72966832]
 [ 10.15432288]
 [  1.63250684]
 [  4.58829623]
 [  8.2436905 ]
 [ -7.21555785]
 [ -4.7617956 ]
 [ -0.83297179]
 [ -1.64450968]
 [ -1.61152045]
 [  1.83752979]
 [  1.02732348]
 [ -2.1947042 ]
 [  2.12598459]
 [  7.22280789]
 [  0.6976439 ]
 [  5.14998633]
 [  4.61549158]
 [ -1.39280113]
 [  3.02669322]
 [  3.21094576]
 [  1.42417595]
 [ -1.14536796]
 [  0.37920782]
 [ 10.24868581]
 [  2.4313054 ]
 [  2.2852372 ]
 [  3.45839257]
 [  0.40

In [15]:
##初始化参数
init_W = np.random.randn(num_of_feature,1)
num_iters = 100
print(init_W)

[[0.60400414]]


In [44]:
m = LinearRegression()
m.train(train_x,train_y,method="",learning_rate=1e-7,num_iters=100000,verbose=True)

print(m.W)


iteration 0 / 100000 : loss 85082.321693
iteration 10000 / 100000 : loss 113.586604
iteration 20000 / 100000 : loss 20.224283
iteration 30000 / 100000 : loss 20.121698
iteration 40000 / 100000 : loss 20.121585
iteration 50000 / 100000 : loss 20.121585
iteration 60000 / 100000 : loss 20.121585
iteration 70000 / 100000 : loss 20.121585
iteration 80000 / 100000 : loss 20.121585
iteration 90000 / 100000 : loss 20.121585
[[4.99693996]]
