In [1]:
from sklearn.datasets import make_regression
from matplotlib import pyplot
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# generate regression dataset
X, y = make_regression(n_samples=1000, n_features=4, noise=10)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

pd.DataFrame(data = np.concatenate((X_train, np.array([y_train]).T), axis=1)).to_csv("train.csv", index=None)
pd.DataFrame(data = np.concatenate((X_test, np.array([y_test]).T), axis=1)).to_csv("test.csv", index=None)

In [2]:
reg = LinearRegression().fit(X_train, y_train)

In [3]:
mean_absolute_error(y_test, reg.predict(X_test))

7.748804690163286

In [4]:
class LinearRegression:
    def __init__(self):
        self.w = None
        self.b = None
        
    @staticmethod
    def loss(y_train, y_predicted):
        return ((y_predicted - y_train)**2)
        
    @staticmethod
    def sample_batch(X_train, y_train, batch_size=None):
        if not batch_size is None:
            rand_idx = np.random.permutation(X_train.shape[0])[:batch_size]
            X, y = X_train[rand_idx, ...], y_train[rand_idx, ...]
        else:
            X, y = X_train, y_train
        return X, y
        
    def fit(self, x, y, iters=100, lr_base=0.1, batch_size=None):
        self.N = x.shape[0]
        self.D = x.shape[1]
        self.O = y.shape[1]
        if  self.w is None or self.b is None or \
            self.w.shape != (self.D, self.O) or self.b.shape != (1., self.O):

            self.__init_weights()
            
        for i in range(iters):
            #sample data
            X_train, y_train = self.sample_batch(x, y, batch_size)
            
            # forward pass
            y_predicted = self.predict(X_train)
            loss = self.loss(y, y_predicted).mean()

            # backward pass
            dz = y_predicted - y_train
            dw = X_train.T.dot(dz) / X_train.shape[0]
            db = dz.mean(axis=0)

            # update params
            lr = lr_base / ((i + 10) // 10)
            self.w -= lr * dw
            self.b -= lr * db
            
            # log
            if (i + 1) % 100 == 0:
                print("iter: {}, loss: {:5.3f}".format(i + 1, ((self.predict(X_train) - y_train)**2).mean()))
            
        return self
    
    def __init_weights(self):
        self.w = np.random.uniform(0, 0.01, size=(self.D, self.O))
        self.b = np.ones(shape=(1, self.O)) / 100
    
    def predict(self, x):
        return x.dot(self.w) + self.b

In [5]:
reg = LinearRegression().fit(X_train, y_train.reshape(1, -1), iters=1000)

iter: 100, loss: 34.702
iter: 200, loss: 9.430
iter: 300, loss: 4.374
iter: 400, loss: 2.534
iter: 500, loss: 1.659
iter: 600, loss: 1.174
iter: 700, loss: 0.877
iter: 800, loss: 0.681
iter: 900, loss: 0.545
iter: 1000, loss: 0.446


In [6]:
mean_absolute_error(reg.predict(X_test)[:, 0], y_test)

153.1196770473175