In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
import matplotlib.pyplot as plt

In [2]:
class LinearRegression(object):
    
    def __init__(self, learning_rate=0.001, n_iters=1000, normal=False):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
        self.normal = normal
        self.costs = []
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        # init parameters
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        if self.normal:
            X_b = np.c_[np.ones((n_samples, 1)), X]
            self.weights = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
        
        else:
            # gradient descent

            for _ in range(self.n_iters):
                y_predicted = np.dot(X, self.weights) + self.bias

                cost = np.mean(np.square(y - y_predicted))
                self.costs.append(cost)

                # compute gradients
                dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
                db = (1 / n_samples) * np.sum(y_predicted - y)


                # update parameters
                self.weights -= self.lr * dw
                self.bias -= self.lr * db
            
    
    def predict(self, X):
        if self.normal:
            X_b = np.c_[np.ones((X.shape[0], 1)), X]
            y_approximated = X_b.dot(self.weights)
        else:
            y_approximated = np.dot(X, self.weights) + self.bias
            
        return y_approximated

In [3]:
# metric

def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred)**2)

In [4]:
# test

X, y = datasets.make_regression(n_samples=400, n_features=1, noise=30, random_state=42)

transformer = StandardScaler()
X = transformer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

NameError: name 'StandardScaler' is not defined

In [None]:
reg = LinearRegression(learning_rate=0.01, n_iters=1000, normal=True)
reg.fit(X_train, y_train)
predictions = reg.predict(X_test)
    
mse = mean_squared_error(y_test, predictions)
print("MSE:", mse)


In [None]:
y_pred_line = reg.predict(X)
mse = mean_squared_error(y, y_pred_line)
cmap = plt.get_cmap('viridis')

fig = plt.figure(figsize=(8,6))

plt.scatter(X_train, y_train, color=cmap(0.9), s=10, label="Train")
plt.scatter(X_test, y_test, color=cmap(0.5), s=10, label="Test")
plt.plot(X, y_pred_line, color='black', linewidth=2, label="Prediction")
plt.xlabel("X", fontsize=15)
plt.ylabel("Y", fontsize=15, rotation=0)
plt.title(f"$Error: {mse}$", fontsize=18)
plt.legend()
plt.show()

In [None]:
# ax = plt.axes(projection='3d')

# # Data for a three-dimensional line
# # zline = np.linspace(0, 15, 1000)
# # xline = np.sin(zline)
# # yline = np.cos(zline)
# # ax.plot3D(xline, yline, zline, 'gray')

# # # Data for three-dimensional scattered points
# # zdata = 15 * np.random.random(100)
# # xdata = np.sin(zdata) + 0.1 * np.random.randn(100)
# # ydata = np.cos(zdata) + 0.1 * np.random.randn(100)
# ax.scatter3D(X[:, 0], X[:, 1], y, cmap='Greens');

# ax.set_xlabel('X1')
# ax.set_ylabel('Y2')
# ax.set_zlabel('Z')
