In [None]:
import pickle
from pathlib import Path
import pandas as pd
from tsfresh import extract_features
from sklearn.impute import SimpleImputer


In [None]:
CURRENT_DIR = Path.cwd()
PARENT_DIR = CURRENT_DIR.parent.parent
AF_DATA_DIR = PARENT_DIR / "P02_data" / "T03_af"
print(AF_DATA_DIR)

In [None]:
# Load AF data
pickle_filepath = AF_DATA_DIR / "S17_af_extract.pkl"
with open(pickle_filepath, "rb") as handle:
    data = pickle.load(handle)
afs = data["data"]

In [None]:
afs

In [None]:
def run_extract_features(af_phase, measure_type, num_samples=None, reduce_data=False):
    # num_samples = 3
    # af_phase = "af_dwell"

    # Defult to use all samples if num_samples is not specified
    if num_samples is None:
        num_samples = afs["sample_no"].nunique()

    # Prepare Fz data for tsfresh feature extraction.  Concat all samples together.
    data_arr = []
    for sample_no in afs["sample_no"].unique()[:num_samples]:
        af = afs.loc[afs["sample_no"] == sample_no, af_phase].values[0]
        _af = af[[measure_type, "sample_no"]].copy()
        _af.index = _af.index - _af.index[0]  # Reset time index to start from 0
        if reduce_data:
            # Reduce data size by downsampling (e.g., take every 10th point)
            _af = _af.iloc[::10, :].copy()
        data_arr.append(_af)
    df_af = pd.concat(data_arr)
    df_af = df_af.reset_index()

    # Extract features using tsfresh
    _ext = extract_features(
        df_af, column_id="sample_no", column_sort="Time", column_value=measure_type
    )

    # Drop columns which contains all NaN.
    _ext_drop = _ext.dropna(axis=1, how="any")
    imputer = SimpleImputer(strategy="mean")
    _ext_imputed = imputer.fit_transform(_ext_drop)
    ext = pd.DataFrame(_ext_imputed, columns=_ext_drop.columns, index=_ext_drop.index)
    return ext


In [None]:
# for measure_type in ["Fx", "Fy", "Fz", "Mz"]:
for measure_type in ["Fz", "Mz"]:
    print(f"Extracting features for measure type: {measure_type}")
    
    # Extract features for dwell and weld phases
    ext_dwell = run_extract_features(
        af_phase="af_dwell",
        measure_type=measure_type,
        num_samples=None,
        reduce_data=False,
    )
    ext_weld = run_extract_features(
        af_phase="af_weld",
        measure_type=measure_type,
        num_samples=None,
        reduce_data=False,
    )

    # Set index name
    ext_dwell.index.name = "sample_no"
    ext_weld.index.name = "sample_no"

    # Save to Excel
    with pd.ExcelWriter(
        f"S01_af_features_{measure_type}.xlsx", engine="openpyxl"
    ) as writer:
        ext_dwell.to_excel(writer, sheet_name="af_dwell")
        ext_weld.to_excel(writer, sheet_name="af_weld")