In [63]:
# %% Imports
import ast
import itertools
from pathlib import Path

import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

from P08_feature_importances.T00_lib.classes_ml import (
    DataHandler,
    MyEval,
    MyUtil,
)
from P08_feature_importances.T00_lib.optuna_ml import OptunaUtil
from P08_feature_importances.T00_lib.utils import check_jupyter


In [64]:
BASE_DIR = Path.cwd()  # Current directory of the running file
DATA_DIR = BASE_DIR.parent / "T02_combine_features"
OPTUNA_DIR = BASE_DIR.parent / "T03_optuna"
CURRENT_DIR = BASE_DIR

In [65]:
study_info_filename = "S02_combine_study.xlsx"
study_info = pd.read_excel(OPTUNA_DIR / study_info_filename)
study_info["model_params"] = study_info["model_params"].apply(ast.literal_eval)
df = pd.read_excel(DATA_DIR / "S02_data_combined_loc.xlsx")
print(f"df.shape: {df.shape}")

df.shape: (378, 87)


In [66]:
_dfX = df.iloc[:, :-3]
_dfY = df.iloc[:, -3:]

# Extract features and targets
_X = _dfX.values
_Y = _dfY.values
print(f"_X.shape: {_X.shape}")
print(f"_Y.shape: {_Y.shape}")

# Use only the third target variable
_Y = _Y[:, 2:3]

_X.shape: (378, 84)
_Y.shape: (378, 3)


In [67]:
# Create DataHandler instance
data_handler = DataHandler(
    _X=_X, _Y=_Y, scalerX=StandardScaler(), scalerY=StandardScaler()
)

In [68]:
idx = 1
random_state = 1
test_size = 0.0
data_handler.split_and_scale(random_state=random_state, test_size=test_size)
X_train, Y_train = data_handler.get_train()

df_X_train = pd.DataFrame(X_train, columns=_dfX.columns)
df_Y_train = pd.DataFrame(Y_train, columns=["target"])

No test set, using all data for training.


In [69]:
from sklearn.linear_model import LassoCV

# Assuming X_train, y_train are your training data
# 'alphas' can be a list of values to test, or let LassoCV find the best ones
model = LassoCV(cv=5, random_state=0, max_iter=10000).fit(
    df_X_train, df_Y_train.values.ravel()
)
best_alpha = model.alpha_

In [70]:
best_alpha

np.float64(0.03754741764507284)

In [None]:
from sklearn.feature_selection import SelectFromModel

# Select features where the coefficient is not zero
selection = SelectFromModel(
    model, prefit=True, threshold=1e-5
)  # Use a small threshold to capture exactly zero
# X_train_selected = selection.transform(X_train)

# To get the names of selected features (if X_train is a pandas DataFrame)
selected_features = df_X_train.columns[selection.get_support()]
print(f"Selected features: {list(selected_features)}")

Selected features: ['position', 'Fx_location', 'Fx__dwell__partial_autocorrelation__lag_6', 'Fy__dwell__fft_coefficient__attr_"real"__coeff_71', 'Fy__dwell__autocorrelation__lag_3', 'Fy__dwell__fft_coefficient__attr_"imag"__coeff_95', 'Mz__dwell__fft_coefficient__attr_"angle"__coeff_13', 'Mz__dwell__fft_coefficient__attr_"real"__coeff_31', 'Fx__weld__fft_coefficient__attr_"abs"__coeff_58', 'Fx__weld__fft_coefficient__attr_"real"__coeff_72', 'Fy__weld__fft_coefficient__attr_"real"__coeff_51', 'Fy__weld__ar_coefficient__coeff_5__k_10', 'Fy__weld__cid_ce__normalize_True', 'Fz__weld__change_quantiles__f_agg_"mean"__isabs_False__qh_0.6__ql_0.4', 'Fz__weld__fft_coefficient__attr_"real"__coeff_84', 'Fz__weld__ratio_beyond_r_sigma__r_1', 'Mz__weld__change_quantiles__f_agg_"mean"__isabs_False__qh_0.8__ql_0.2', 'Mz__weld__fft_coefficient__attr_"angle"__coeff_15', 'Mz__weld__fft_coefficient__attr_"abs"__coeff_96']


In [72]:
selected_features

Index(['position', 'Fx_location', 'Fx__dwell__partial_autocorrelation__lag_6',
       'Fy__dwell__fft_coefficient__attr_"real"__coeff_71',
       'Fy__dwell__autocorrelation__lag_3',
       'Fy__dwell__fft_coefficient__attr_"imag"__coeff_95',
       'Mz__dwell__fft_coefficient__attr_"angle"__coeff_13',
       'Mz__dwell__fft_coefficient__attr_"real"__coeff_31',
       'Fx__weld__fft_coefficient__attr_"abs"__coeff_58',
       'Fx__weld__fft_coefficient__attr_"real"__coeff_72',
       'Fy__weld__fft_coefficient__attr_"real"__coeff_51',
       'Fy__weld__ar_coefficient__coeff_5__k_10',
       'Fy__weld__cid_ce__normalize_True',
       'Fz__weld__change_quantiles__f_agg_"mean"__isabs_False__qh_0.6__ql_0.4',
       'Fz__weld__fft_coefficient__attr_"real"__coeff_84',
       'Fz__weld__ratio_beyond_r_sigma__r_1',
       'Mz__weld__change_quantiles__f_agg_"mean"__isabs_False__qh_0.8__ql_0.2',
       'Mz__weld__fft_coefficient__attr_"angle"__coeff_15',
       'Mz__weld__fft_coefficient__attr_"a