In [2]:
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
np.set_printoptions(suppress=True)
%matplotlib inline

In [5]:
def prepare_boston_data():
    # подгружает boston dataset
    data = load_boston()
    X, y = data['data'], data['target']

    # нормализуем X
    X = (X - np.mean(X, axis=0))/np.std(X, axis=0)

    # добавляем столбец свободных членов (bias линейной модели)
    n = X.shape[0]  # количество строк
    X = np.hstack([np.ones(n).reshape(n, 1), X])

    return X, y

def print_regression_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    print(f'mse: {mse:.2f}, rmse: {rmse:.2f}')

Обернем написанную нами линейную регрессию методом матричных операций в класс:

In [18]:
class LinRegAlgebra():
    def __init__(self):
        self.theta = None
        self.X = None
            
    def fit(self, X, y):
        self.theta = (np.linalg.inv((X.T).dot(X))).dot((X.T).dot(y))
            
    def predict(self, X):
        if self.theta is None:
            raise Exception("You should train the model first")
        return X.dot(self.theta)

In [23]:
# пробуем модель на всем датасете
X, y = prepare_boston_data()
model = LinRegAlgebra()
model.fit(X, y)
y_pred = model.predict(X)
print_regression_metrics(y,y_pred)

mse: 21.89, rmse: 4.68


In [65]:
# пробуем модель на train_test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
model_train = LinRegAlgebra()
model_train.fit(X_train,y_train)
y_pred = model.predict(X_test)
print_regression_metrics(y_test,y_pred)

mse: 19.96, rmse: 4.47
