In [1]:
# 导入保存的特征矩阵和目标值
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score
data_path = '../data/'

In [2]:
X_accumulated = np.load(data_path + 'X_accumulated.npy')
P_accumulated = np.load(data_path + 'P_accumulated.npy')

# 打印数据以确认导入是否成功
print("Loaded X_accumulated:")
print(X_accumulated)

print("\nLoaded P_accumulated:")
print(P_accumulated)


Loaded X_accumulated:
[[ -0.          -0.           0.        ]
 [ -0.          -0.           0.        ]
 [ -0.          -0.           0.        ]
 ...
 [237.22801394 237.22801394   1.26510926]
 [234.33238717 234.33238717   1.22668185]
 [231.34799113 231.34799113   1.1886911 ]]

Loaded P_accumulated:
[0.86917192 0.77902669 0.80899501 ... 5.94081211 6.54024935 6.15057802]


In [3]:
remove_ = np.all(X_accumulated == 0, axis=1)
X_nonzero_rows = X_accumulated[~remove_]
P_nonzero_rows = P_accumulated[~remove_]

In [4]:
# model = SGDRegressor(
#     loss='huber',  # 损失函数类型
#     penalty='l2',  # 正则化类型
#     alpha=0.1,  # 正则化强度
#     fit_intercept=True,  # 是否计算截距
#     max_iter=50000,  # 最大迭代次数
#     tol=0.01,  # 收敛容忍度
#     shuffle=True,  # 是否打乱数据
#     verbose=0,  # 输出的详细程度
#     random_state=None,  # 随机数生成器的种子
#     learning_rate='optimal',  # 学习率类型
#     epsilon=1,
#     eta0=0.01,  # 初始学习率
#     power_t=0.25,  # 学习率调整的幂
#     early_stopping=False,  # 是否启用早期停止
#     validation_fraction=0.1,  # 验证集的比例
#     n_iter_no_change=10,  # 连续多少轮未改进则停止
#     warm_start=False,  # 是否使用温启动
#     average=False  # 是否使用平均梯度
# )

model = SGDRegressor(
    loss='huber',  # 损失函数类型
    penalty='l2',  # 正则化类型
    alpha=0.8,  # 正则化强度
    fit_intercept=True,  # 是否计算截距
    max_iter=100000,  # 最大迭代次数
    tol=0.01,  # 收敛容忍度
    shuffle=True,  # 是否打乱数据
    verbose=0,  # 输出的详细程度
    random_state=None,  # 随机数生成器的种子
    learning_rate='optimal',  # 学习率类型
    epsilon=1,
    eta0=0.01,  # 初始学习率
    power_t=0.25,  # 学习率调整的幂
    early_stopping=False,  # 是否启用早期停止
    validation_fraction=0.1,  # 验证集的比例
    n_iter_no_change=10,  # 连续多少轮未改进则停止
    warm_start=False,  # 是否使用温启动
    average=False  # 是否使用平均梯度
)


In [5]:
# 使用导入的数据进行训练
model.fit(X_nonzero_rows, P_nonzero_rows)

# 打印模型系数
print("\nModel coefficients:")
print(model.coef_)
print("Model intercept:")
print(model.intercept_)


Model coefficients:
[0.00479301 0.01457816 0.08997533]
Model intercept:
[0.85633447]


In [6]:
# 获取预测值
y_pred = model.predict(X_nonzero_rows)

# 输出结果
print("预测值:", y_pred)

# 计算均方误差和R^2分数
mse = mean_squared_error(P_nonzero_rows, y_pred)
r2 = r2_score(P_nonzero_rows, y_pred)

print("均方误差 (MSE):", mse)
print("R^2分数:", r2)

预测值: [0.8645541  0.86475036 0.86494895 ... 5.56554571 5.50599653 5.44476708]
均方误差 (MSE): 5.331645922512865
R^2分数: 0.9216147601355368
