In [12]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
from sklearn.linear_model import Lasso, LinearRegression
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

# 导入训练数据
df = pd.read_csv('../improved_data/xy_orbit.csv')
array = df.to_numpy()
array = array.T
t = array[0]
yy = array[1:]/1e10

# 假设我们有 n 个时间点的观测数据 (x, y)
n = t.size  # 时间点数量
x = yy[0]    # x坐标
y = yy[1]    # y坐标
vx = yy[2]   # vx
vy = yy[3]   # vy

# 加速度的计算
r = np.sqrt(x**2 + y**2)   # 距离
G = 6.67430e-11 * 86400**2   # 引力常数, m^3 kg^-1 s^-2
M = 1.989e30      # 太阳质量, kg
GM = G * M/1e30

print( "GM:", GM) 

x_ddot = -G * M * x / r**3
y_ddot = -G * M * y / r**3


# 构建设计矩阵
def build_design_matrix(x, y):
    denominator = (x**2 + y**2)**(3/2)
    Theta = np.column_stack([
        np.ones(len(x)),  # 常数项
        GM * x / denominator,  # GM * x / r^3
        GM * y / denominator,   # GM * y / r^3
        x,                # x
        y,                # y
        x**2,             # x^2
        y**2,             # y^2
        x * y            # x * y
    ])
    return Theta

# 简单稀疏化的函数
def lasso_with_refit(Theta, Theta_scaled, target, alpha, threshold=None):
    """
    使用 LASSO 模型并结合阈值稀疏化后重新拟合模型
    Args:
        Theta: 原始设计矩阵（未标准化）
        Theta_scaled: 标准化后的设计矩阵
        target: 目标值 (x_ddot 或 y_ddot)
        alpha: LASSO 正则化参数
        threshold: 稀疏化阈值，若为 None，则动态设置
    Returns:
        sparse_coefficients: 稀疏化后的系数
        refit_coefficients: 重新拟合后的系数
    """
    # 第一步：使用 Lasso 初次拟合
    lasso = Lasso(alpha=alpha, max_iter=10000)
    lasso.fit(Theta_scaled, target)
    coefficients = lasso.coef_
    
    # 打印初始 LASSO 系数
    print("Initial LASSO coefficients:", coefficients)

    # 动态设置阈值
    if threshold is None:
        # 剔除接近 0 的值后动态计算阈值
        valid_coefficients = coefficients[np.abs(coefficients) > 1e-6]
        if len(valid_coefficients) > 0:
            threshold = np.median(np.abs(valid_coefficients))
        else:
            threshold = 0.0
    print("Dynamic threshold set to:", threshold)

    # 第二步：应用阈值稀疏化
    mask = np.abs(coefficients) >= threshold  # 筛选重要特征
    sparse_coefficients = np.where(mask, coefficients, 0)
    
    # 第三步：重新拟合模型，仅使用非零特征（恢复未标准化数据）
    if np.sum(mask) > 0:  # 如果存在非零特征
        Theta_refit = Theta[:, mask]  # 筛选非零特征对应的列
        refit_model = LinearRegression()  # 使用无正则化线性回归
        refit_model.fit(Theta_refit, target)
        refit_coefficients = np.zeros_like(coefficients)  # 初始化为全零
        refit_coefficients[mask] = refit_model.coef_  # 更新非零特征的系数
    else:
        refit_coefficients = sparse_coefficients  # 如果所有系数都为零，返回稀疏化结果
    
    return sparse_coefficients, refit_coefficients

# 设置参数
alpha = 1e-3  # 降低 Lasso 正则化强度
threshold = None  # 动态设置阈值

# 生成目标值和设计矩阵
Theta = build_design_matrix(x, y)
scaler = StandardScaler()
Theta_scaled = scaler.fit_transform(Theta)

# 针对 x_ddot 稀疏回归
sparse_coefficients_x, refit_coefficients_x = lasso_with_refit(Theta, Theta_scaled, x_ddot, alpha, threshold)
print("Sparse coefficients for x_ddot:", sparse_coefficients_x)
print("Refit coefficients for x_ddot:", refit_coefficients_x)

# 针对 y_ddot 稀疏回归
sparse_coefficients_y, refit_coefficients_y = lasso_with_refit(Theta, Theta_scaled, y_ddot, alpha, threshold)
print("Sparse coefficients for y_ddot:", sparse_coefficients_y)
print("Refit coefficients for y_ddot:", refit_coefficients_y)



GM: 0.9909870784819198
Initial LASSO coefficients: [ 0.00000000e+00 -3.13220753e+27 -3.79427619e+09 -9.11511334e+11
 -6.02675754e+07  5.55013587e+10 -2.78826316e+08  2.28996031e+09]
Dynamic threshold set to: 3794276190.989694
Sparse coefficients for x_ddot: [ 0.00000000e+00 -3.13220753e+27 -3.79427619e+09 -9.11511334e+11
  0.00000000e+00  5.55013587e+10  0.00000000e+00  0.00000000e+00]
Refit coefficients for x_ddot: [ 0.00000000e+00 -1.00000000e+30 -3.53284196e+12 -1.95554157e+12
  0.00000000e+00  2.90741253e+09  0.00000000e+00  0.00000000e+00]
Initial LASSO coefficients: [ 0.00000000e+00 -7.64611579e+21 -3.13321733e+27  7.65812864e+21
 -1.82770315e+16  1.90480333e+20 -9.56929746e+17 -4.21513018e+14]
Dynamic threshold set to: 1.9048033262831737e+20
Sparse coefficients for y_ddot: [ 0.00000000e+00 -7.64611579e+21 -3.13321733e+27  7.65812864e+21
  0.00000000e+00  1.90480333e+20  0.00000000e+00  0.00000000e+00]
Refit coefficients for y_ddot: [ 0.00000000e+00 -3.32657519e+19 -1.00000000e+3