In [12]:
import re
from pathlib import Path

import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler

from run1.lib.classes_ml import DataHandler

In [13]:
BASE_DIR = Path.cwd()  # Current directory of the running file
ROOT_DIR = BASE_DIR.parent.parent.parent
DATA_DIR = ROOT_DIR / "run1" / "P02_MF_1" / "T01_af_features"
CURRENT_DIR = BASE_DIR

In [14]:
_df = pd.read_excel(DATA_DIR / "S01_combined_data.xlsx")
print(f"df.shape: {_df.shape}")

df.shape: (378, 37)


In [15]:
# Select columns for features and targets
colsY = [c for c in _df.columns if re.search(r"stress_value", c)]
colsX = [c for c in _df.columns if c not in ["sample_no", "location", *colsY]]

# Select feature columns based on predefined names
colsY = [c for c in colsY if c in ["stress_value_center"]]

_dfY = _df[colsY]
_dfX = _df[colsX]
print("Selected feature columns:", colsX)
print("Selected target columns:", colsY)
print(f"dfX.shape: {_dfX.shape}")
print(f"dfY.shape: {_dfY.shape}")

Selected feature columns: ['position', 'R', 'W', 'D', 'Fx_location', 'Fy_location', 'Fz_location', 'Mz_location', 'Fx__dwell__fft_coefficient__attr_"abs"__coeff_11', 'Fx__dwell__quantile__q_0.7', 'Fx__dwell__partial_autocorrelation__lag_6', 'Fy__dwell__fft_coefficient__attr_"real"__coeff_71', 'Fy__dwell__last_location_of_minimum', 'Fy__dwell__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.2', 'Fz__dwell__fft_coefficient__attr_"angle"__coeff_4', 'Fz__dwell__fft_coefficient__attr_"angle"__coeff_68', 'Fz__dwell__approximate_entropy__m_2__r_0.7', 'Mz__dwell__augmented_dickey_fuller__attr_"teststat"__autolag_"AIC"', 'Mz__dwell__fft_coefficient__attr_"real"__coeff_62', 'Mz__dwell__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.2', 'Fx__weld__change_quantiles__f_agg_"var"__isabs_True__qh_0.6__ql_0.2', 'Fx__weld__fft_coefficient__attr_"real"__coeff_31', 'Fx__weld__fft_coefficient__attr_"abs"__coeff_58', 'Fy__weld__energy_ratio_by_chunks__num_segments_10__segment_focus_5', '

In [16]:
# %% Extract features and targets
_X = _dfX.values
_Y = _dfY.values
print(f"_X.shape: {_X.shape}")
print(f"_Y.shape: {_Y.shape}")

_X.shape: (378, 32)
_Y.shape: (378, 1)


In [17]:
# Create DataHandler instance
data_handler = DataHandler(
    _X=_X,
    _Y=_Y,
    scalerX=StandardScaler(),
    scalerY=StandardScaler(),
    colsX=colsX,
    colsY=colsY,
)

In [18]:
idx = 1
random_state = 1
test_size = 0.0
data_handler.split_and_scale(random_state=random_state, test_size=test_size)
df_X_train, df_Y_train = data_handler.get_train(as_dataframe=True)
display(df_X_train.head())
display(df_Y_train.head())

No test set, using all data for training.


Unnamed: 0,position,R,W,D,Fx_location,Fy_location,Fz_location,Mz_location,"Fx__dwell__fft_coefficient__attr_""abs""__coeff_11",Fx__dwell__quantile__q_0.7,...,"Fx__weld__fft_coefficient__attr_""abs""__coeff_58",Fy__weld__energy_ratio_by_chunks__num_segments_10__segment_focus_5,"Fy__weld__fft_coefficient__attr_""imag""__coeff_61","Fy__weld__fft_coefficient__attr_""real""__coeff_51","Fz__weld__change_quantiles__f_agg_""mean""__isabs_False__qh_1.0__ql_0.4","Fz__weld__fft_coefficient__attr_""real""__coeff_84",Fz__weld__ratio_beyond_r_sigma__r_1,Mz__weld__quantile__q_0.1,"Mz__weld__fft_coefficient__attr_""angle""__coeff_15","Mz__weld__fft_coefficient__attr_""abs""__coeff_58"
0,-0.5,-1.224745,0.0,-1.224745,-0.777006,-0.464968,-0.805576,-0.423257,-0.212769,-0.267954,...,0.601824,0.10259,-1.971528,-3.38779,0.358393,-0.865086,0.527619,-0.348236,1.361784,0.104113
1,-4.810966e-16,1.224745,1.224745,0.0,-0.032479,-0.377666,0.320237,-0.355252,-0.525585,0.190508,...,0.002835,-0.30258,0.160638,0.536173,-0.374696,2.500901,-1.23472,-0.315047,0.163715,0.052008
2,-1.0,0.0,-1.224745,-1.224745,0.064,-0.782369,-2.024732,-0.465982,0.203409,1.527986,...,-0.032205,1.093932,-1.5008,0.771406,0.329889,1.139794,1.94874,-0.5368,-2.594135,0.540846
3,-0.5,1.224745,1.224745,1.224745,-0.050435,-0.543303,-0.166273,-0.374813,-0.05391,0.267501,...,-1.123431,-0.944348,0.233331,-1.08193,-0.163097,2.072971,0.615044,-0.294181,0.100716,0.194849
4,-1.0,1.224745,0.0,-1.224745,-0.117693,-0.393217,-1.237342,-0.468396,-0.531406,-0.342628,...,0.319822,-0.398962,0.197916,0.044907,0.691684,-0.271261,0.606539,-0.318095,-0.064711,1.326479


Unnamed: 0,stress_value_center
0,-1.088213
1,0.678466
2,-0.49932
3,-0.05765
4,-0.646543


In [19]:
# Analyze model summary (all predictors)
X = sm.add_constant(df_X_train)
model = sm.OLS(df_Y_train, X).fit()

# Get and print model summary
model_summary = model.summary()
print(model_summary)

# Get p-values and sort features by significance
df_table = model.summary2().tables[1]
df_table = df_table.sort_values(by="P>|t|", ascending=True)
display(df_table)


                             OLS Regression Results                            
Dep. Variable:     stress_value_center   R-squared:                       0.450
Model:                             OLS   Adj. R-squared:                  0.399
Method:                  Least Squares   F-statistic:                     8.811
Date:                 Thu, 29 Jan 2026   Prob (F-statistic):           6.61e-29
Time:                         14:07:58   Log-Likelihood:                -423.46
No. Observations:                  378   AIC:                             912.9
Df Residuals:                      345   BIC:                             1043.
Df Model:                           32                                         
Covariance Type:             nonrobust                                         
                                                                            coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
position,-0.8022147,0.09322,-8.605567,2.724742e-16,-0.985567,-0.618863
Mz_location,2.751546,0.672704,4.090277,5.367179e-05,1.428429,4.074663
Mz__weld__quantile__q_0.1,-2.591277,0.649716,-3.988326,8.127077e-05,-3.869179,-1.313375
Fz__weld__ratio_beyond_r_sigma__r_1,-0.2885729,0.08809,-3.275894,0.001160189,-0.461834,-0.115312
"Fy__weld__fft_coefficient__attr_""real""__coeff_51",-0.1460992,0.071576,-2.041168,0.04199473,-0.28688,-0.005318
Fx_location,0.1283855,0.068893,1.863558,0.06323315,-0.007117,0.263888
"Mz__weld__fft_coefficient__attr_""angle""__coeff_15",0.1343185,0.076104,1.764925,0.07846115,-0.015368,0.284005
"Fz__weld__fft_coefficient__attr_""real""__coeff_84",-0.1041619,0.068073,-1.530139,0.1268986,-0.238053,0.029729
"Mz__dwell__augmented_dickey_fuller__attr_""teststat""__autolag_""AIC""",-0.09205889,0.065319,-1.409374,0.1596253,-0.220532,0.036415
W,0.07053395,0.05275,1.337146,0.182056,-0.033217,0.174285
