In [11]:
from pathlib import Path
import pandas as pd

In [12]:
CURRENT_DIR = Path.cwd()  # Current directory of the running file
ROOT_DIR = CURRENT_DIR.parent.parent  # Root directory of the project

print(f"Current Directory: {CURRENT_DIR}")
print(f"Root Directory: {ROOT_DIR}")

Current Directory: c:\Users\admin\Coding\research\weld-ml\run1\Q01_agg
Root Directory: c:\Users\admin\Coding\research\weld-ml


In [13]:
load_infos = [
    {
        "path": ROOT_DIR / "run1" / "P02_MF_1" / "T21_ols" / "S01.xlsx",
        "experiment": "P02_MF_1",
        "experiment_subtype": "ols",
    },
    {
        "path": ROOT_DIR / "run1" / "P02_MF_1" / "T22_lasso" / "S01.xlsx",
        "experiment": "P02_MF_1",
        "experiment_subtype": "lasso",
    },
    {
        "path": ROOT_DIR / "run1" / "P02_MF_1" / "T23_shap_ml" / "S02.xlsx",
        "experiment": "P02_MF_1",
         "experiment_subtype": "shap_ml",
    },
    {
        "path": ROOT_DIR / "run1" / "P02_MF_1" / "T24_shap_tabPFN" / "S02.xlsx",
        "experiment": "P02_MF_1",
        "experiment_subtype": "shap_tabPFN",
    },
    {
        "path": ROOT_DIR / "run1" / "P03_MF_2" / "T21_ols" / "S01.xlsx",
        "experiment": "P03_MF_2",
        "experiment_subtype": "ols",
    },
    {
        "path": ROOT_DIR / "run1" / "P03_MF_2" / "T22_lasso" / "S01.xlsx",
        "experiment": "P03_MF_2",
        "experiment_subtype": "lasso",
    },  
    {
        "path": ROOT_DIR / "run1" / "P03_MF_2" / "T23_shap_ml" / "S02.xlsx",
        "experiment": "P03_MF_2",
        "experiment_subtype": "shap_ml",
    },
    {
        "path": ROOT_DIR / "run1" / "P03_MF_2" / "T24_shap_tabPFN" / "S02.xlsx",
        "experiment": "P03_MF_2",
        "experiment_subtype": "shap_tabPFN",
    },
]

In [14]:
df_arr = []
for info in load_infos:
    df_in = pd.read_excel(info["path"])
    df_in["experiment"] = info["experiment"]
    df_in["experiment_subtype"] = info["experiment_subtype"]
    df_arr.append(df_in)
    print(f"Loaded data for experiment: {info['experiment']}")

df = pd.concat(df_arr, ignore_index=True)

Loaded data for experiment: P02_MF_1
Loaded data for experiment: P02_MF_1
Loaded data for experiment: P02_MF_1
Loaded data for experiment: P02_MF_1
Loaded data for experiment: P03_MF_2
Loaded data for experiment: P03_MF_2
Loaded data for experiment: P03_MF_2
Loaded data for experiment: P03_MF_2


In [15]:
df

Unnamed: 0,feature,value,measure,rank,experiment,experiment_subtype
0,position,2.724742e-16,OLS_p_value,1,P02_MF_1,ols
1,Mz_location,5.367179e-05,OLS_p_value,2,P02_MF_1,ols
2,Mz__weld__quantile__q_0.1,8.127077e-05,OLS_p_value,3,P02_MF_1,ols
3,Fz__weld__ratio_beyond_r_sigma__r_1,1.160189e-03,OLS_p_value,4,P02_MF_1,ols
4,"Fy__weld__fft_coefficient__attr_""real""__coeff_51",4.199473e-02,OLS_p_value,5,P02_MF_1,ols
...,...,...,...,...,...,...
280,Fy__dwell__last_location_of_minimum,2.715260e-03,SHAP_importance,52,P03_MF_2,shap_tabPFN
281,"Fz__dwell__change_quantiles__f_agg_""mean""__isa...",2.388679e-03,SHAP_importance,53,P03_MF_2,shap_tabPFN
282,Fx__dwell__quantile__q_0.7,2.321671e-03,SHAP_importance,54,P03_MF_2,shap_tabPFN
283,"Fz__weld__agg_linear_trend__attr_""rvalue""__chu...",2.197688e-03,SHAP_importance,55,P03_MF_2,shap_tabPFN


In [16]:
def select_rank(df_in):
    experiment = df_in.name[0]
    experiment_subtype = df_in.name[1]
    print(f"Selecting top ranks for Experiment: {experiment}, Measure: {experiment_subtype}")

    # Filter for OLS p-values
    if experiment_subtype == "ols":
        filt = df_in["value"] <= 0.05
        df_in = df_in[filt]

    df_sorted = df_in.sort_values(by=["rank"], ascending=[True])
    df_out = df_sorted.head(20)
    return df_out


df_top = (
    df.groupby(["experiment", "experiment_subtype"])
    .apply(select_rank, include_groups=False)
    .reset_index()
    .drop(columns=["level_2"])
)
df_top

Selecting top ranks for Experiment: P02_MF_1, Measure: lasso
Selecting top ranks for Experiment: P02_MF_1, Measure: ols
Selecting top ranks for Experiment: P02_MF_1, Measure: shap_ml
Selecting top ranks for Experiment: P02_MF_1, Measure: shap_tabPFN
Selecting top ranks for Experiment: P03_MF_2, Measure: lasso
Selecting top ranks for Experiment: P03_MF_2, Measure: ols
Selecting top ranks for Experiment: P03_MF_2, Measure: shap_ml
Selecting top ranks for Experiment: P03_MF_2, Measure: shap_tabPFN


Unnamed: 0,experiment,experiment_subtype,feature,value,measure,rank
0,P02_MF_1,lasso,position,0.480420,Lasso_coefficient,1
1,P02_MF_1,lasso,Fz__weld__ratio_beyond_r_sigma__r_1,0.186607,Lasso_coefficient,2
2,P02_MF_1,lasso,"Fy__weld__fft_coefficient__attr_""real""__coeff_51",0.100461,Lasso_coefficient,3
3,P02_MF_1,lasso,"Fx__weld__fft_coefficient__attr_""abs""__coeff_58",0.087345,Lasso_coefficient,4
4,P02_MF_1,lasso,Fx_location,0.069481,Lasso_coefficient,5
...,...,...,...,...,...,...
111,P03_MF_2,shap_tabPFN,"Fz__weld__fft_coefficient__attr_""real""__coeff_84",0.013788,SHAP_importance,16
112,P03_MF_2,shap_tabPFN,Fz__dwell__approximate_entropy__m_2__r_0.9,0.013029,SHAP_importance,17
113,P03_MF_2,shap_tabPFN,"Fx__weld__fft_coefficient__attr_""abs""__coeff_58",0.012086,SHAP_importance,18
114,P03_MF_2,shap_tabPFN,"Mz__weld__fft_coefficient__attr_""abs""__coeff_83",0.011783,SHAP_importance,19


In [17]:
df_pivot = df_top.pivot_table(
    index=["feature"], columns=["experiment", "experiment_subtype"], values=["rank"]
)
df_pivot

Unnamed: 0_level_0,rank,rank,rank,rank,rank,rank,rank,rank
experiment,P02_MF_1,P02_MF_1,P02_MF_1,P02_MF_1,P03_MF_2,P03_MF_2,P03_MF_2,P03_MF_2
experiment_subtype,lasso,ols,shap_ml,shap_tabPFN,lasso,ols,shap_ml,shap_tabPFN
feature,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
D,,,,16.0,,7.0,,
Fx__dwell__ar_coefficient__coeff_0__k_10,,,,,,,19.0,
"Fx__dwell__fft_coefficient__attr_""abs""__coeff_11",,,16.0,10.0,,,,8.0
Fx__dwell__partial_autocorrelation__lag_6,7.0,,5.0,3.0,,,8.0,7.0
"Fx__weld__change_quantiles__f_agg_""var""__isabs_True__qh_0.6__ql_0.2",,,20.0,19.0,11.0,,,
"Fx__weld__fft_coefficient__attr_""abs""__coeff_5",,,,,13.0,6.0,,
"Fx__weld__fft_coefficient__attr_""abs""__coeff_58",4.0,,13.0,12.0,3.0,,,18.0
"Fx__weld__fft_coefficient__attr_""imag""__coeff_21",,,,,,,5.0,10.0
Fx_location,5.0,,7.0,9.0,4.0,,13.0,9.0
Fy__dwell__autocorrelation__lag_3,,,,,9.0,8.0,4.0,13.0


In [18]:
colsX = df_pivot.columns
colsX

MultiIndex([('rank', 'P02_MF_1',       'lasso'),
            ('rank', 'P02_MF_1',         'ols'),
            ('rank', 'P02_MF_1',     'shap_ml'),
            ('rank', 'P02_MF_1', 'shap_tabPFN'),
            ('rank', 'P03_MF_2',       'lasso'),
            ('rank', 'P03_MF_2',         'ols'),
            ('rank', 'P03_MF_2',     'shap_ml'),
            ('rank', 'P03_MF_2', 'shap_tabPFN')],
           names=[None, 'experiment', 'experiment_subtype'])

In [19]:
df_pivot["count"] = df_pivot[colsX].count(axis=1)
df_pivot["average_rank"] = df_pivot[colsX].mean(axis=1).round(2)
df_pivot = df_pivot.sort_values(by=["count", "average_rank"], ascending=[False, True])
df_pivot

Unnamed: 0_level_0,rank,rank,rank,rank,rank,rank,rank,rank,count,average_rank
experiment,P02_MF_1,P02_MF_1,P02_MF_1,P02_MF_1,P03_MF_2,P03_MF_2,P03_MF_2,P03_MF_2,Unnamed: 9_level_1,Unnamed: 10_level_1
experiment_subtype,lasso,ols,shap_ml,shap_tabPFN,lasso,ols,shap_ml,shap_tabPFN,Unnamed: 9_level_2,Unnamed: 10_level_2
feature,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3
position,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,8,1.0
Fz__weld__ratio_beyond_r_sigma__r_1,2.0,4.0,2.0,2.0,2.0,,2.0,2.0,7,2.29
"Fy__weld__fft_coefficient__attr_""real""__coeff_51",3.0,5.0,3.0,8.0,5.0,,3.0,6.0,7,4.71
Mz_location,,2.0,10.0,6.0,,2.0,10.0,5.0,6,5.83
Fx_location,5.0,,7.0,9.0,4.0,,13.0,9.0,6,7.83
Fz_location,,,8.0,5.0,,3.0,9.0,4.0,5,5.8
Fx__dwell__partial_autocorrelation__lag_6,7.0,,5.0,3.0,,,8.0,7.0,5,6.0
Mz__weld__quantile__q_0.1,,3.0,12.0,13.0,,4.0,16.0,,5,9.6
"Fx__weld__fft_coefficient__attr_""abs""__coeff_58",4.0,,13.0,12.0,3.0,,,18.0,5,10.0
Fy__dwell__autocorrelation__lag_3,,,,,9.0,8.0,4.0,13.0,4,8.5


In [20]:
df_pivot.to_excel("feature_ranks_pivot.xlsx")