In [39]:
from pathlib import Path
import pandas as pd
from tsfresh.feature_selection.relevance import calculate_relevance_table
import os
import re

In [15]:
CURRENT_DIR = Path.cwd()


In [16]:
# Load Y data

stresses = pd.read_excel(CURRENT_DIR / "S02_data_y_prepared.xlsx")
stresses

Unnamed: 0,sample_no,location,stress_value_5052,stress_value_6061,stress_value_center
0,1,1,28.0,51.0,12.0
1,2,1,14.0,-21.0,17.0
2,3,1,10.0,35.0,12.0
3,4,1,10.0,-10.0,20.0
4,5,1,6.0,41.0,14.0
...,...,...,...,...,...
373,50,7,4.0,-23.0,2.0
374,51,7,0.0,-1.0,2.0
375,52,7,-2.0,-41.0,5.0
376,53,7,10.0,-90.0,1.0


In [42]:
# Load features
_filts = os.listdir(CURRENT_DIR)
files = [f for f in _filts if f.startswith("S01_af_features_") and f.endswith(".xlsx")]
print(files)

['S01_af_features_Fx.xlsx', 'S01_af_features_Fy.xlsx', 'S01_af_features_Fz.xlsx', 'S01_af_features_Mz.xlsx']


In [44]:
# Load features
data_dict = {}
for file in files:
    measure = re.match(r"S01_af_features_(\w+)\.xlsx", file).group(1)
    print(f"Loading features for: {measure}")

    ext_dwell = pd.read_excel(
        CURRENT_DIR / file,
        sheet_name="af_dwell",
    )
    ext_weld = pd.read_excel(CURRENT_DIR / file, sheet_name="af_weld")
    data = {
        "dwell": ext_dwell,
        "weld": ext_weld,
    }
    data_dict[measure] = data

Loading features for: Fx
Loading features for: Fy
Loading features for: Fz
Loading features for: Mz


In [None]:
# Combine features into a single DataFrame
df_features = (
    stresses[["sample_no"]]
    .merge(ext_dwell, on="sample_no", how="left")
    .drop(columns=["sample_no"])
)

In [29]:
df_rel = calculate_relevance_table(
    df_features, stresses["stress_value_6061"], ml_task="regression"
)

In [31]:
df_rel

Unnamed: 0_level_0,feature,type,p_value,relevant
feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fx__quantile__q_0.7,Fx__quantile__q_0.7,real,0.000509,False
"Fx__fft_coefficient__attr_""real""__coeff_62","Fx__fft_coefficient__attr_""real""__coeff_62",real,0.000730,False
"Fx__fft_coefficient__attr_""real""__coeff_42","Fx__fft_coefficient__attr_""real""__coeff_42",real,0.001160,False
Fx__quantile__q_0.8,Fx__quantile__q_0.8,real,0.002134,False
"Fx__fft_coefficient__attr_""angle""__coeff_62","Fx__fft_coefficient__attr_""angle""__coeff_62",real,0.002649,False
...,...,...,...,...
Fx__number_crossing_m__m_1,Fx__number_crossing_m__m_1,constant,,False
Fx__ratio_beyond_r_sigma__r_5,Fx__ratio_beyond_r_sigma__r_5,constant,,False
Fx__ratio_beyond_r_sigma__r_6,Fx__ratio_beyond_r_sigma__r_6,constant,,False
Fx__ratio_beyond_r_sigma__r_7,Fx__ratio_beyond_r_sigma__r_7,constant,,False


In [21]:
def calRelTable(df_features, df_y, col_y):
    # col_y = "AA5052_L2"
    df_relevance = calculate_relevance_table(
        df_features, df_y[col_y], ml_task="regression"
    )
    df_relevance["y"] = col_y
    print(f"Calculated relevance for {col_y}")
    return df_relevance

In [22]:
# Identify target columns
cols_y = [col for col in stresses.columns if col not in ["sample_no", "R", "W", "D"]]
print(cols_y)

['location', 'stress_value_5052', 'stress_value_6061', 'stress_value_center']


In [23]:
# Calculate relevance for dwell features
data_arr = []
for col_y in cols_y:
    r = calRelTable(ext_dwell, stresses, col_y)
    r = r.reset_index(
        drop=True
    )  # Remove extra "feature" columns since it appears in both index and data columns
    data_arr.append(r)

relevances_dwell = pd.concat(data_arr)


AssertionError: The index of X and y need to be the same

In [None]:
# Calculate relevance for weld features
data_arr = []
for col_y in cols_y:
    r = calRelTable(ext_weld, stresses, col_y)
    r = r.reset_index(
        drop=True
    )  # Remove extra "feature" columns since it appears in both index and data columns
    data_arr.append(r)

relevances_weld = pd.concat(data_arr)

Calculated relevance for AA5052_L1
Calculated relevance for AA5052_L2
Calculated relevance for AA5052_L3
Calculated relevance for AA5052_L4
Calculated relevance for AA5052_L5
Calculated relevance for AA5052_L6
Calculated relevance for AA5052_L7
Calculated relevance for AA6061_L1
Calculated relevance for AA6061_L2
Calculated relevance for AA6061_L3
Calculated relevance for AA6061_L4
Calculated relevance for AA6061_L5
Calculated relevance for AA6061_L6
Calculated relevance for AA6061_L7
Calculated relevance for Center_L1
Calculated relevance for Center_L2
Calculated relevance for Center_L3
Calculated relevance for Center_L4
Calculated relevance for Center_L5
Calculated relevance for Center_L6
Calculated relevance for Center_L7
Calculated relevance for AA5052_mean
Calculated relevance for AA6061_mean
Calculated relevance for Center_mean


In [None]:
with pd.ExcelWriter("S03_af_feature_relevance.xlsx", engine="openpyxl") as writer:
    relevances_dwell.to_excel(writer, sheet_name="af_dwell", index=False)
    relevances_weld.to_excel(writer, sheet_name="af_weld", index=False)