In [1]:
from pathlib import Path
import pandas as pd
from tsfresh.feature_selection.relevance import calculate_relevance_table
import os
import re

In [2]:
CURRENT_DIR = Path.cwd()


In [3]:
# Load Y data

stresses = pd.read_excel(CURRENT_DIR / "S02_data_exp.xlsx")
stresses

Unnamed: 0,sample_no,location,R,W,D,stress_value_5052,stress_value_6061,stress_value_center
0,1,1,1400,60,10,28.0,51.0,12.0
1,2,1,1400,60,15,14.0,-21.0,17.0
2,3,1,1400,60,20,10.0,35.0,12.0
3,4,1,1400,70,10,10.0,-10.0,20.0
4,5,1,1400,70,15,6.0,41.0,14.0
...,...,...,...,...,...,...,...,...
373,50,7,1600,70,15,4.0,-23.0,2.0
374,51,7,1600,70,20,0.0,-1.0,2.0
375,52,7,1600,80,10,-2.0,-41.0,5.0
376,53,7,1600,80,15,10.0,-90.0,1.0


In [4]:
# Load features
_filts = os.listdir(CURRENT_DIR)
files = [f for f in _filts if f.startswith("S01_af_features_") and f.endswith(".xlsx")]
print(files)

['S01_af_features_Fx.xlsx', 'S01_af_features_Fy.xlsx', 'S01_af_features_Fz.xlsx', 'S01_af_features_Mz.xlsx']


In [5]:
# Load features
features_dict = {}
for file in files:
    measure = re.match(r"S01_af_features_(\w+)\.xlsx", file).group(1)
    print(f"Loading features for: {measure}")

    ext_dwell = pd.read_excel(
        CURRENT_DIR / file,
        sheet_name="af_dwell",
    )
    ext_weld = pd.read_excel(CURRENT_DIR / file, sheet_name="af_weld")
    data = {
        "dwell": ext_dwell,
        "weld": ext_weld,
    }
    features_dict[measure] = data

Loading features for: Fx
Loading features for: Fy
Loading features for: Fz
Loading features for: Mz


### Prototype


In [6]:
phase_type = "dwell"  # or "weld"
measure_type = "Fx"  # or "Fy", "Fz", "Mz"
stress_type = "stress_value_5052"  # or "stress_value_6061", "stress_value_center"
features = features_dict[measure_type][phase_type]

# Merge features into a row of sample_no and drop sample_no column
features_expanded = (
    stresses[["sample_no"]]
    .merge(features, on="sample_no", how="left")
    .drop(columns=["sample_no"])
)

relevance = calculate_relevance_table(
    features_expanded, stresses[stress_type], ml_task="regression"
)

# Remove extra "feature" columns since it appears in both index and data columns
relevance = relevance.reset_index(drop=True)  #
display(relevance)

Unnamed: 0,feature,type,p_value,relevant
0,"Fx__fft_coefficient__attr_""abs""__coeff_11",real,1.565675e-10,True
1,Fx__ar_coefficient__coeff_0__k_10,real,2.866412e-08,True
2,Fx__variation_coefficient,real,1.952901e-07,True
3,"Fx__fft_coefficient__attr_""abs""__coeff_51",real,5.246075e-07,True
4,"Fx__fft_coefficient__attr_""angle""__coeff_75",real,1.000460e-06,True
...,...,...,...,...
777,Fx__number_crossing_m__m_1,constant,,False
778,Fx__ratio_beyond_r_sigma__r_5,constant,,False
779,Fx__ratio_beyond_r_sigma__r_6,constant,,False
780,Fx__ratio_beyond_r_sigma__r_7,constant,,False


### Real Functional Code

In [7]:
def calRelTable(phase_type, measure_type, stress_type):
    # phase_type = "dwell"  # or "weld"
    # measure_type = "Fx" # or "Fy", "Fz", "Mz"
    # stress_type = "stress_value_5052" # or "stress_value_6061", "stress_value_center"

    features = features_dict[measure_type][phase_type]

    # Merge features into a row of sample_no and drop sample_no column
    features_expanded = (
        stresses[["sample_no"]]
        .merge(features, on="sample_no", how="left")
        .drop(columns=["sample_no"])
    )

    relevance = calculate_relevance_table(
        features_expanded, stresses[stress_type], ml_task="regression"
    )

    # Remove extra "feature" columns since it appears in both index and data columns
    relevance = relevance.reset_index(drop=True)

    return relevance

In [8]:
import itertools

phase_types = ["dwell", "weld"]
measure_types = ["Fx", "Fy", "Fz", "Mz"]
stress_types = ["stress_value_5052", "stress_value_6061", "stress_value_center"]

iter_list = list(itertools.product(phase_types, measure_types, stress_types))

df_arr = []
for phase_type, measure_type, stress_type in iter_list[:]:
    print(
        f"Calculating relevance for: phase={phase_type}, measure={measure_type}, stress={stress_type}"
    )
    _relevance = calRelTable(phase_type, measure_type, stress_type)
    _relevance["phase_type"] = phase_type
    _relevance["measure_type"] = measure_type
    _relevance["stress_type"] = stress_type
    df_arr.append(_relevance)

relevances = pd.concat(df_arr).reset_index(drop=True)
relevances

Calculating relevance for: phase=dwell, measure=Fx, stress=stress_value_5052
Calculating relevance for: phase=dwell, measure=Fx, stress=stress_value_6061
Calculating relevance for: phase=dwell, measure=Fx, stress=stress_value_center
Calculating relevance for: phase=dwell, measure=Fy, stress=stress_value_5052
Calculating relevance for: phase=dwell, measure=Fy, stress=stress_value_6061
Calculating relevance for: phase=dwell, measure=Fy, stress=stress_value_center
Calculating relevance for: phase=dwell, measure=Fz, stress=stress_value_5052
Calculating relevance for: phase=dwell, measure=Fz, stress=stress_value_6061
Calculating relevance for: phase=dwell, measure=Fz, stress=stress_value_center
Calculating relevance for: phase=dwell, measure=Mz, stress=stress_value_5052
Calculating relevance for: phase=dwell, measure=Mz, stress=stress_value_6061
Calculating relevance for: phase=dwell, measure=Mz, stress=stress_value_center
Calculating relevance for: phase=weld, measure=Fx, stress=stress_val

Unnamed: 0,feature,type,p_value,relevant,phase_type,measure_type,stress_type
0,"Fx__fft_coefficient__attr_""abs""__coeff_11",real,1.565675e-10,True,dwell,Fx,stress_value_5052
1,Fx__ar_coefficient__coeff_0__k_10,real,2.866412e-08,True,dwell,Fx,stress_value_5052
2,Fx__variation_coefficient,real,1.952901e-07,True,dwell,Fx,stress_value_5052
3,"Fx__fft_coefficient__attr_""abs""__coeff_51",real,5.246075e-07,True,dwell,Fx,stress_value_5052
4,"Fx__fft_coefficient__attr_""angle""__coeff_75",real,1.000460e-06,True,dwell,Fx,stress_value_5052
...,...,...,...,...,...,...,...
18763,Mz__value_count__value_1,constant,,False,weld,Mz,stress_value_center
18764,Mz__value_count__value_-1,constant,,False,weld,Mz,stress_value_center
18765,Mz__ratio_beyond_r_sigma__r_6,constant,,False,weld,Mz,stress_value_center
18766,Mz__ratio_beyond_r_sigma__r_7,constant,,False,weld,Mz,stress_value_center


In [9]:
relevances.to_excel(CURRENT_DIR / "S03_af_feature_relevances.xlsx", index=False)