In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error

from RFS import RandomizedForwardSelection, step, RandomizedForwardSelectionV2

# Generate synthetic data
np.random.seed(42)
n, p = 5000, 500  # 100 samples, 50 features

# Create base features
X = np.random.randn(n, p)

# Introduce correlations
X[:, 1] = 0 * X[:, 0] + 0.3 * np.random.randn(n)  # Correlated with feature 0
X[:, 3] = 0 * X[:, 2] + 0.4 * np.random.randn(n)  # Correlated with feature 2
X[:, 5] = 0 * X[:, 0] + 0 * X[:, 4] + 0.1 * np.random.randn(n)  # Correlated with features 0 and 4

# Create true coefficients (some zero, some non-zero)
true_coef = np.zeros(p)
true_coef[:6] = [1.5, -0.8, 1.2, -0.5, 0.7, -0.3]  # Only first 6 features are relevant

# Create the target variable (linear model)
y = X @ true_coef + np.random.normal(0, 1, n)  # Add some noise

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
rfs = RandomizedForwardSelectionV2(k=10, m=200, tol=1, resample=True)
rfs.fit(X, y)

In [4]:
mean_squared_error(rfs.predict(X_test), y_test), mean_squared_error(lasso.predict(X_test), y_test)

(1.0198521977881008, 1.0409735104433)

In [4]:
np.abs(rfs.coef_).argsort()

array([249, 336, 335, 334, 333, 332, 331, 330, 329, 328, 327, 326, 325,
       337, 324, 322, 320, 319, 318, 317, 316, 315, 314, 313, 312, 311,
       310, 323, 338, 340, 341, 368, 367, 366, 365, 364, 363, 362, 361,
       360, 359, 358, 357, 356, 355, 354, 353, 352, 351, 350, 349, 348,
       347, 346, 345, 344, 343, 342, 309, 369, 308, 306, 273, 272, 271,
       270, 269, 268, 267, 266, 265, 264, 263, 262, 274, 261, 259, 258,
       257, 256, 255, 254, 253, 252, 251, 250, 498, 247, 260, 276, 277,
       278, 305, 304, 303, 302, 301, 300, 299, 298, 297, 296, 295, 294,
       293, 292, 291, 290, 289, 288, 287, 286, 285, 284, 283, 282, 281,
       280, 279, 307, 370, 371, 372, 464, 463, 462, 461, 460, 459, 458,
       457, 456, 455, 454, 453, 465, 452, 450, 448, 447, 446, 445, 444,
       443, 442, 441, 440, 438, 437, 451, 466, 467, 468, 497, 496, 495,
       494, 492, 491, 490, 489, 488, 487, 486, 485, 484, 483, 482, 481,
       479, 478, 477, 476, 475, 474, 473, 472, 471, 470, 469, 43

In [3]:
lasso = LassoCV()
lasso.fit(X, y)
np.abs(lasso.coef_).argsort()

array([249, 336, 335, 334, 333, 332, 331, 330, 329, 328, 327, 326, 325,
       337, 324, 322, 320, 319, 318, 317, 316, 315, 314, 313, 312, 311,
       310, 323, 338, 340, 341, 369, 368, 367, 366, 365, 364, 363, 362,
       361, 360, 359, 358, 357, 356, 355, 354, 353, 352, 351, 350, 349,
       348, 347, 346, 345, 344, 342, 309, 370, 308, 306, 273, 272, 271,
       270, 269, 268, 267, 266, 265, 264, 263, 262, 274, 261, 258, 257,
       256, 255, 254, 253, 252, 251, 250, 498, 247, 246, 260, 275, 276,
       277, 305, 304, 302, 301, 300, 299, 298, 297, 296, 295, 294, 293,
       292, 291, 290, 289, 288, 287, 286, 285, 284, 283, 282, 281, 280,
       279, 278, 307, 245, 371, 373, 464, 463, 461, 460, 459, 458, 457,
       456, 455, 454, 453, 452, 465, 451, 449, 448, 447, 446, 445, 444,
       443, 442, 441, 440, 438, 437, 450, 466, 467, 468, 497, 496, 495,
       492, 491, 490, 489, 488, 487, 486, 485, 484, 483, 482, 481, 480,
       479, 478, 477, 476, 475, 474, 473, 472, 471, 470, 469, 43

In [8]:
sum(rfs.trajectory[8].values())

930

In [22]:
rfs2 = RandomizedForwardSelection(k=3, m=200)
rfs2.fit(X, y)

In [2]:
generator = np.random.default_rng(42)
result = step(frozenset(), X_train, y_train, 30, 1000, generator)

In [3]:
rfs.predict(X_test) - y_test

array([-2.05667258, -1.94949425,  0.70389263, -1.70329769,  0.47938517,
       -1.384011  , -1.73853927, -0.61452961, -0.04921108,  0.5504136 ,
       -1.00732492,  0.20927401, -1.53075781, -1.60142563, -0.33865719,
        0.49207626, -3.48415817,  1.71126122, -0.11015187,  0.69212383])

In [9]:
len(rfs.trajectory[3])

109

In [None]:
lasso.in

set()