# 线性回归--正规方程

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# 加载加利福尼亚房价数据集
housing = fetch_california_housing(data_home='../data')
X = housing.data
y = housing.target

# 查看数据集信息
print("特征名称:", housing.feature_names)
print("数据集形状:", X.shape)
print("目标变量形状:", y.shape)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(y_train[:10])

# 对特征进行标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 创建线性回归模型
lr_model = LinearRegression()

# 训练模型
lr_model.fit(X_train_scaled, y_train)

# 预测
y_pred = lr_model.predict(X_test_scaled)

# 评估模型,均方误差
mse = mean_squared_error(y_test, y_pred)

print(f"均方误差 (MSE): {mse:.4f}")

特征名称: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
数据集形状: (20640, 8)
目标变量形状: (20640,)
[1.03    3.821   1.726   0.934   0.965   2.648   1.573   5.00001 1.398
 3.156  ]
均方误差 (MSE): 0.5559


  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_


# 线性回归--梯度下降

In [4]:
import joblib
from sklearn.linear_model import SGDRegressor

# 加载加利福尼亚房价数据集
housing = fetch_california_housing(data_home='../data')
X = housing.data
y = housing.target

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(y_train[:10])

# 对特征进行标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 创建SGD回归模型，tol是停止条件，当相邻两次迭代的目标函数值（如损失函数）差值 ≤ tol 时，算法停止
#invscaling代表学习率随迭代次数衰减，eta0是初始学习率
sgd_model = SGDRegressor(max_iter=1000, tol=1e-3, random_state=42, eta0=0.01)

# 训练模型
sgd_model.fit(X_train_scaled, y_train)

# 预测
y_pred = sgd_model.predict(X_test_scaled)

# 评估模型,均方误差
mse = mean_squared_error(y_test, y_pred)

print(f"均方误差 (MSE): {mse:.4f}")

[1.03    3.821   1.726   0.934   0.965   2.648   1.573   5.00001 1.398
 3.156  ]
均方误差 (MSE): 0.5506


  ret = a @ b
  ret = a @ b
  ret = a @ b


## lasso回归、岭回归

In [7]:
from sklearn.linear_model import Lasso, Ridge
from sklearn.metrics import mean_squared_error

# Lasso回归
lasso_model = Lasso(alpha=0.01, random_state=42, max_iter=10000)

# 训练模型
lasso_model.fit(X_train_scaled, y_train)

# 预测
lasso_pred = lasso_model.predict(X_test_scaled)

# 评估模型
lasso_mse = mean_squared_error(y_test, lasso_pred)


print(f"lasso均方误差 (MSE): {lasso_mse:.4f}")


# 岭回归

# 创建岭回归模型
ridge_model = Ridge(alpha=1.0, random_state=42)

# 训练模型
ridge_model.fit(X_train_scaled, y_train)


# 预测
ridge_pred = ridge_model.predict(X_test_scaled)

# 评估模型
ridge_mse = mean_squared_error(y_test, ridge_pred)


print(f"ridge均方误差 (MSE): {ridge_mse:.4f}")


lasso均方误差 (MSE): 0.5483
ridge均方误差 (MSE): 0.5559


  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  ret = a @ b
  ret = a @ b
  ret = a @ b
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
  return X @ coef_ + self.intercept_
