## **手写线性回归**

### **一、线性回归框架**

In [31]:
import numpy as np
import joblib

class LinearRegression:
    # 初始化
    def __init__(self, fit_intercept=True):
        self.W = None
        self.b = None
        self.fit_intercept = fit_intercept      # 是否带截距

    # 训练
    def fit(self, X_train, y_train):
        # 参数初始化为矩阵
        X = np.matrix(X_train)
        y = np.matrix(y_train).reshape(-1, 1)  # 确保 y 的形状为 (n_samples, 1)
        if self.fit_intercept:
            X = np.hstack([np.ones((X.shape[0], 1)), X])
        # 直接套公式求解
        W = np.linalg.pinv(X.T * X) * X.T * y
        if self.fit_intercept:
            self.b = W[0, 0]  # 提取截距项
            self.W = W[1:, 0]  # 提取权重项
        else:
            self.W = W[:, 0]  # 提取权重项

    # 预测
    def predict(self, X_test):
        X = np.matrix(X_test)
        if self.fit_intercept:
            return X * np.matrix(self.W) + self.b  # 确保 self.W 是列向量
        else:
            X = np.hstack([np.ones((X.shape[0], 1)), X])
            return X * np.matrix(self.W)

    # 评估: 使用 均方误差（mean-square error, MSE）
    def score(self, X_test, y_test):
        Y_predict = self.predict(X_test)
        Y = np.matrix(y_test).reshape(-1, 1)  # 确保 y 的形状为 (n_samples, 1)
        return self.calculate_loss(Y, Y_predict)

    # 损失函数: 使用 均方误差（mean-square error, MSE）
    def calculate_loss(self, y_true, y_predict):
        return np.sum(((np.array(y_true) - np.array(y_predict)) ** 2) / len(y_true))

    # 保存模型
    def save(self, path):
        joblib.dump(self, path)

    # 加载模型
    @staticmethod
    def load(path):
        return joblib.load(path)



In [32]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 获取数据集
dataset = fetch_california_housing()
# 划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2, random_state=2)

# 特征工程：
# 标准化
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

linear = LinearRegression()
linear.fit(x_train, y_train)
# 预测
score1 = linear.score(x_test, y_test)
print(f'准确率: {score1}')

y_test_predict = linear.predict(x_test)
print(f'预测值: {y_test_predict}')


准确率: 0.5369436868087002
预测值: [[2.80991017]
 [1.25276895]
 [1.44720173]
 ...
 [1.57213375]
 [1.43077765]
 [2.60233972]]


In [33]:
linear.save('model.pkl')

In [34]:
linear2 = LinearRegression.load('model.pkl')
score2 = linear2.score(x_test, y_test)
print(f'准确率: {score2}')

准确率: 0.5369436868087002
