In [None]:
from pathlib import Path
import pandas as pd
from tsfresh.feature_selection.relevance import calculate_relevance_table
import os
import re

In [None]:
CURRENT_DIR = Path.cwd()


In [None]:
# Load Y data

stresses = pd.read_excel(CURRENT_DIR / "S02_data_y_prepared.xlsx")
stresses

In [None]:
# Load features
_filts = os.listdir(CURRENT_DIR)
files = [f for f in _filts if f.startswith("S01_af_features_") and f.endswith(".xlsx")]
print(files)

In [None]:
# Load features
features_dict = {}
for file in files:
    measure = re.match(r"S01_af_features_(\w+)\.xlsx", file).group(1)
    print(f"Loading features for: {measure}")

    ext_dwell = pd.read_excel(
        CURRENT_DIR / file,
        sheet_name="af_dwell",
    )
    ext_weld = pd.read_excel(CURRENT_DIR / file, sheet_name="af_weld")
    data = {
        "dwell": ext_dwell,
        "weld": ext_weld,
    }
    features_dict[measure] = data

### Prototype

In [None]:
phase_type = "dwell"  # or "weld"
measure_type = "Fx" # or "Fy", "Fz", "Mz"
stress_type = "stress_value_5052" # or "stress_value_6061", "stress_value_center"
features = features_dict[measure_type][phase_type]

# Merge features into a row of sample_no and drop sample_no column
features_expanded = (
    stresses[["sample_no"]]
    .merge(features, on="sample_no", how="left")
    .drop(columns=["sample_no"])
)

relevance = calculate_relevance_table(
    features_expanded, stresses[stress_type], ml_task="regression"
)

# Remove extra "feature" columns since it appears in both index and data columns
relevance = relevance.reset_index(drop=True) # 
display(relevance)

In [None]:
def calRelTable(phase_type, measure_type, stress_type):
    # phase_type = "dwell"  # or "weld"
    # measure_type = "Fx" # or "Fy", "Fz", "Mz"
    # stress_type = "stress_value_5052" # or "stress_value_6061", "stress_value_center"

    features = features_dict[measure_type][phase_type]

    # Merge features into a row of sample_no and drop sample_no column
    features_expanded = (
        stresses[["sample_no"]]
        .merge(features, on="sample_no", how="left")
        .drop(columns=["sample_no"])
    )

    relevance = calculate_relevance_table(
        features_expanded, stresses[stress_type], ml_task="regression"
    )

    # Remove extra "feature" columns since it appears in both index and data columns
    relevance = relevance.reset_index(drop=True) # 

    return relevance

In [None]:
import itertools

phase_types = ["dwell", "weld"]
measure_types = ["Fx", "Fy", "Fz", "Mz"]
stress_types = ["stress_value_5052", "stress_value_6061", "stress_value_center"]

iter_list = list(itertools.product(phase_types, measure_types, stress_types))

df_arr = []
for (phase_type, measure_type, stress_type) in iter_list[:]:
    print(f"Calculating relevance for: phase={phase_type}, measure={measure_type}, stress={stress_type}")
    _relevance = calRelTable(phase_type, measure_type, stress_type)
    _relevance["phase_type"] = phase_type
    _relevance["measure_type"] = measure_type
    _relevance["stress_type"] = stress_type
    df_arr.append(_relevance)

relevances = pd.concat(df_arr).reset_index(drop=True)
relevances

In [None]:
relevances.to_excel(CURRENT_DIR / "S03_af_feature_relevance.xlsx", index=False)