In [33]:
import re
from pathlib import Path

import pandas as pd
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler

from run1.lib.classes_ml import DataHandler

In [34]:
BASE_DIR = Path.cwd()  # Current directory of the running file
ROOT_DIR = BASE_DIR.parent.parent.parent
DATA_DIR = ROOT_DIR / "run1" / "P02_MF_1" / "T01_af_features"
CURRENT_DIR = BASE_DIR

In [35]:
_df = pd.read_excel(DATA_DIR / "S01_combined_data.xlsx")
print(f"df.shape: {_df.shape}")

df.shape: (378, 37)


In [36]:
# Select columns for features and targets
colsY = [c for c in _df.columns if re.search(r"stress_value", c)]
colsX = [c for c in _df.columns if c not in ["sample_no", "location", *colsY]]

# Select feature columns based on predefined names
colsY = [c for c in colsY if c in ["stress_value_center"]]

_dfY = _df[colsY]
_dfX = _df[colsX]
print("Selected feature columns:", colsX)
print("Selected target columns:", colsY)
print(f"dfX.shape: {_dfX.shape}")
print(f"dfY.shape: {_dfY.shape}")

Selected feature columns: ['position', 'R', 'W', 'D', 'Fx_location', 'Fy_location', 'Fz_location', 'Mz_location', 'Fx__dwell__fft_coefficient__attr_"abs"__coeff_11', 'Fx__dwell__quantile__q_0.7', 'Fx__dwell__partial_autocorrelation__lag_6', 'Fy__dwell__fft_coefficient__attr_"real"__coeff_71', 'Fy__dwell__last_location_of_minimum', 'Fy__dwell__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.2', 'Fz__dwell__fft_coefficient__attr_"angle"__coeff_4', 'Fz__dwell__fft_coefficient__attr_"angle"__coeff_68', 'Fz__dwell__approximate_entropy__m_2__r_0.7', 'Mz__dwell__augmented_dickey_fuller__attr_"teststat"__autolag_"AIC"', 'Mz__dwell__fft_coefficient__attr_"real"__coeff_62', 'Mz__dwell__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.2', 'Fx__weld__change_quantiles__f_agg_"var"__isabs_True__qh_0.6__ql_0.2', 'Fx__weld__fft_coefficient__attr_"real"__coeff_31', 'Fx__weld__fft_coefficient__attr_"abs"__coeff_58', 'Fy__weld__energy_ratio_by_chunks__num_segments_10__segment_focus_5', '

In [37]:
# %% Extract features and targets
_X = _dfX.values
_Y = _dfY.values
print(f"_X.shape: {_X.shape}")
print(f"_Y.shape: {_Y.shape}")

_X.shape: (378, 32)
_Y.shape: (378, 1)


In [38]:
# Create DataHandler instance
data_handler = DataHandler(
    _X=_X,
    _Y=_Y,
    scalerX=StandardScaler(),
    scalerY=StandardScaler(),
    colsX=colsX,
    colsY=colsY,
)

In [39]:
idx = 1
random_state = 1
test_size = 0.0
data_handler.split_and_scale(random_state=random_state, test_size=test_size)
df_X_train, df_Y_train = data_handler.get_train(as_dataframe=True)
display(df_X_train.head())
display(df_Y_train.head())

No test set, using all data for training.


Unnamed: 0,position,R,W,D,Fx_location,Fy_location,Fz_location,Mz_location,"Fx__dwell__fft_coefficient__attr_""abs""__coeff_11",Fx__dwell__quantile__q_0.7,...,"Fx__weld__fft_coefficient__attr_""abs""__coeff_58",Fy__weld__energy_ratio_by_chunks__num_segments_10__segment_focus_5,"Fy__weld__fft_coefficient__attr_""imag""__coeff_61","Fy__weld__fft_coefficient__attr_""real""__coeff_51","Fz__weld__change_quantiles__f_agg_""mean""__isabs_False__qh_1.0__ql_0.4","Fz__weld__fft_coefficient__attr_""real""__coeff_84",Fz__weld__ratio_beyond_r_sigma__r_1,Mz__weld__quantile__q_0.1,"Mz__weld__fft_coefficient__attr_""angle""__coeff_15","Mz__weld__fft_coefficient__attr_""abs""__coeff_58"
0,-0.5,-1.224745,0.0,-1.224745,-0.777006,-0.464968,-0.805576,-0.423257,-0.212769,-0.267954,...,0.601824,0.10259,-1.971528,-3.38779,0.358393,-0.865086,0.527619,-0.348236,1.361784,0.104113
1,-4.810966e-16,1.224745,1.224745,0.0,-0.032479,-0.377666,0.320237,-0.355252,-0.525585,0.190508,...,0.002835,-0.30258,0.160638,0.536173,-0.374696,2.500901,-1.23472,-0.315047,0.163715,0.052008
2,-1.0,0.0,-1.224745,-1.224745,0.064,-0.782369,-2.024732,-0.465982,0.203409,1.527986,...,-0.032205,1.093932,-1.5008,0.771406,0.329889,1.139794,1.94874,-0.5368,-2.594135,0.540846
3,-0.5,1.224745,1.224745,1.224745,-0.050435,-0.543303,-0.166273,-0.374813,-0.05391,0.267501,...,-1.123431,-0.944348,0.233331,-1.08193,-0.163097,2.072971,0.615044,-0.294181,0.100716,0.194849
4,-1.0,1.224745,0.0,-1.224745,-0.117693,-0.393217,-1.237342,-0.468396,-0.531406,-0.342628,...,0.319822,-0.398962,0.197916,0.044907,0.691684,-0.271261,0.606539,-0.318095,-0.064711,1.326479


Unnamed: 0,stress_value_center
0,-1.088213
1,0.678466
2,-0.49932
3,-0.05765
4,-0.646543


In [40]:
# Train LassoCV model
model = LassoCV(cv=5, random_state=0, max_iter=10000).fit(
    df_X_train, df_Y_train.values.ravel()
)

In [47]:
# Rank features from coefficients
selector = SelectFromModel(model, prefit=True, threshold="mean")
feature_idx = selector.get_support()
feature_names = df_X_train.columns[feature_idx]
feature_importance = abs(model.coef_[feature_idx])
feature_ranking = pd.DataFrame(
    {"Feature": feature_names, "Importance": feature_importance}
).sort_values(by="Importance", ascending=False) 
feature_ranking.reset_index(drop=True, inplace=True)
display(feature_ranking)

Unnamed: 0,Feature,Importance
0,position,0.48042
1,Fz__weld__ratio_beyond_r_sigma__r_1,0.186607
2,"Fy__weld__fft_coefficient__attr_""real""__coeff_51",0.100461
3,"Fx__weld__fft_coefficient__attr_""abs""__coeff_58",0.087345
4,Fx_location,0.069481
5,R,0.061424
6,Fx__dwell__partial_autocorrelation__lag_6,0.06
7,"Fz__weld__fft_coefficient__attr_""real""__coeff_84",0.054795


In [None]:
# Feature selection using Lasso coefficients
# Use a small threshold to capture exactly zero coefficients

sel = SelectFromModel(model, prefit=True, threshold=1e-2)

# To get the names of selected features (if X_train is a pandas DataFrame)
selected_features = df_X_train.columns[sel.get_support()]

print(f"Selected features: {list(selected_features)}")

Threshold: 0.0001, Selected features (17): ['position', 'R', 'W', 'D', 'Fx_location', 'Fx__dwell__partial_autocorrelation__lag_6', 'Fy__dwell__fft_coefficient__attr_"real"__coeff_71', 'Fy__dwell__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.2', 'Fz__dwell__approximate_entropy__m_2__r_0.7', 'Mz__dwell__augmented_dickey_fuller__attr_"teststat"__autolag_"AIC"', 'Fx__weld__change_quantiles__f_agg_"var"__isabs_True__qh_0.6__ql_0.2', 'Fx__weld__fft_coefficient__attr_"abs"__coeff_58', 'Fy__weld__fft_coefficient__attr_"imag"__coeff_61', 'Fy__weld__fft_coefficient__attr_"real"__coeff_51', 'Fz__weld__fft_coefficient__attr_"real"__coeff_84', 'Fz__weld__ratio_beyond_r_sigma__r_1', 'Mz__weld__fft_coefficient__attr_"angle"__coeff_15']
Threshold: 0.0005, Selected features (17): ['position', 'R', 'W', 'D', 'Fx_location', 'Fx__dwell__partial_autocorrelation__lag_6', 'Fy__dwell__fft_coefficient__attr_"real"__coeff_71', 'Fy__dwell__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.2', 