# Wide Format Conversion


In [1]:
import os
import pickle
import re
from pathlib import Path
import pandas as pd
from tsfresh import extract_features

In [2]:
CURRENT_DIR = Path.cwd()
PARENT_DIR = CURRENT_DIR.parent
STRESS_DATA_DIR = PARENT_DIR / "P02_data" / "T01_explore" / "S04"
print(STRESS_DATA_DIR)

c:\Users\admin\Coding\research\weld-ml\src\P02_data\T01_explore\S04


In [3]:
# Load stress data
stress_filepath = STRESS_DATA_DIR / "S04_residual_stress_imputed_20260104_114536.xlsx"
_data = pd.read_excel(stress_filepath)
_data

Unnamed: 0,idx_excel_post,section,sample_no,location,R,W,D,sigma_x_post,FWHM_post,idx_excel_pre,sigma_x_pre,FWHM_pre,diff_sigma_x
0,4,AA5052,1,1,1400,60,10,13.0,2.55,2,-15.0,2.50,28.0
1,25,AA5052,2,1,1400,60,15,16.0,2.51,9,2.0,2.47,14.0
2,46,AA5052,3,1,1400,60,20,19.0,2.47,16,9.0,2.48,10.0
3,67,AA5052,4,1,1400,70,10,20.0,2.45,23,10.0,2.48,10.0
4,88,AA5052,5,1,1400,70,15,6.0,2.47,30,0.0,2.49,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1129,1050,Center,50,7,1600,70,15,2.0,2.45,0,0.0,0.00,2.0
1130,1071,Center,51,7,1600,70,20,2.0,2.45,0,0.0,0.00,2.0
1131,1092,Center,52,7,1600,80,10,5.0,2.54,0,0.0,0.00,5.0
1132,1113,Center,53,7,1600,80,15,1.0,2.41,0,0.0,0.00,1.0


In [4]:
# Pivot the data to have multi-level columns for section and location
data1 = _data.pivot_table(
    index=["sample_no", "R", "W", "D"],
    columns=["section", "location"],
    values="diff_sigma_x",
).reset_index()
data1

section,sample_no,R,W,D,AA5052,AA5052,AA5052,AA5052,AA5052,AA5052,...,AA6061,AA6061,AA6061,Center,Center,Center,Center,Center,Center,Center
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,1,2,3,4,5,6,...,5,6,7,1,2,3,4,5,6,7
0,1,1400,60,10,28.0,12.0,33.0,42.0,23.0,10.0,...,-24.0,12.0,-2.942308,12.0,26.0,29.0,24.0,19.0,6.0,6.0
1,2,1400,60,15,14.0,10.0,26.0,27.0,21.0,-4.0,...,2.0,30.0,-83.0,17.0,25.0,23.0,14.0,10.0,3.0,2.0
2,3,1400,60,20,10.0,14.0,14.0,25.0,11.0,3.0,...,-35.0,-18.0,29.0,12.0,28.0,21.0,22.0,12.0,16.0,8.0
3,4,1400,70,10,10.0,36.0,22.0,11.0,14.0,23.0,...,100.0,-35.0,-46.0,20.0,24.0,29.0,18.0,11.0,18.0,3.0
4,5,1400,70,15,6.0,14.0,8.0,7.0,13.0,12.0,...,64.0,-33.0,31.0,14.0,4.0,8.0,11.0,9.0,4.0,6.0
5,6,1400,70,20,15.0,11.0,-13.0,-1.0,10.0,-20.0,...,46.0,-63.0,-76.0,11.0,12.0,13.0,4.0,7.0,2.0,12.0
6,7,1400,80,10,63.0,45.0,24.0,31.0,19.0,46.0,...,-12.169811,-87.0,-90.0,12.0,15.0,17.0,20.0,12.0,6.0,-1.0
7,8,1400,80,15,14.0,11.0,13.0,31.0,5.0,8.0,...,-28.0,16.0,-30.0,18.0,15.0,14.0,7.0,14.0,16.0,5.0
8,9,1400,80,20,26.0,40.0,32.0,29.0,32.0,13.0,...,15.0,7.0,33.0,17.0,21.0,18.0,24.0,13.0,4.0,-5.0
9,10,1500,60,10,23.0,30.0,22.0,20.0,14.0,18.0,...,-44.0,-46.0,19.0,11.0,18.0,9.0,10.0,6.0,-2.0,1.0


In [5]:
cols = [col[0] + "_L" + str(col[1]) if col[0] not in ["sample_no", "R", "W", "D"] else col[0] for col in data1.columns.to_flat_index()]
print(cols)

['sample_no', 'R', 'W', 'D', 'AA5052_L1', 'AA5052_L2', 'AA5052_L3', 'AA5052_L4', 'AA5052_L5', 'AA5052_L6', 'AA5052_L7', 'AA6061_L1', 'AA6061_L2', 'AA6061_L3', 'AA6061_L4', 'AA6061_L5', 'AA6061_L6', 'AA6061_L7', 'Center_L1', 'Center_L2', 'Center_L3', 'Center_L4', 'Center_L5', 'Center_L6', 'Center_L7']


In [6]:
data1.columns = cols
data1

Unnamed: 0,sample_no,R,W,D,AA5052_L1,AA5052_L2,AA5052_L3,AA5052_L4,AA5052_L5,AA5052_L6,...,AA6061_L5,AA6061_L6,AA6061_L7,Center_L1,Center_L2,Center_L3,Center_L4,Center_L5,Center_L6,Center_L7
0,1,1400,60,10,28.0,12.0,33.0,42.0,23.0,10.0,...,-24.0,12.0,-2.942308,12.0,26.0,29.0,24.0,19.0,6.0,6.0
1,2,1400,60,15,14.0,10.0,26.0,27.0,21.0,-4.0,...,2.0,30.0,-83.0,17.0,25.0,23.0,14.0,10.0,3.0,2.0
2,3,1400,60,20,10.0,14.0,14.0,25.0,11.0,3.0,...,-35.0,-18.0,29.0,12.0,28.0,21.0,22.0,12.0,16.0,8.0
3,4,1400,70,10,10.0,36.0,22.0,11.0,14.0,23.0,...,100.0,-35.0,-46.0,20.0,24.0,29.0,18.0,11.0,18.0,3.0
4,5,1400,70,15,6.0,14.0,8.0,7.0,13.0,12.0,...,64.0,-33.0,31.0,14.0,4.0,8.0,11.0,9.0,4.0,6.0
5,6,1400,70,20,15.0,11.0,-13.0,-1.0,10.0,-20.0,...,46.0,-63.0,-76.0,11.0,12.0,13.0,4.0,7.0,2.0,12.0
6,7,1400,80,10,63.0,45.0,24.0,31.0,19.0,46.0,...,-12.169811,-87.0,-90.0,12.0,15.0,17.0,20.0,12.0,6.0,-1.0
7,8,1400,80,15,14.0,11.0,13.0,31.0,5.0,8.0,...,-28.0,16.0,-30.0,18.0,15.0,14.0,7.0,14.0,16.0,5.0
8,9,1400,80,20,26.0,40.0,32.0,29.0,32.0,13.0,...,15.0,7.0,33.0,17.0,21.0,18.0,24.0,13.0,4.0,-5.0
9,10,1500,60,10,23.0,30.0,22.0,20.0,14.0,18.0,...,-44.0,-46.0,19.0,11.0,18.0,9.0,10.0,6.0,-2.0,1.0


In [7]:
# Pivot the data to have mean values for each section
data2 = _data.pivot_table(
    index=["sample_no"],
    columns=["section"],
    values="diff_sigma_x",
    aggfunc="mean",
).reset_index()
data2

data2.columns = [col + "_mean" if col != "sample_no" else col for col in data2.columns.to_flat_index()]
data2

Unnamed: 0,sample_no,AA5052_mean,AA6061_mean,Center_mean
0,1,23.714286,16.293956,17.428571
1,2,12.0,-28.428571,13.428571
2,3,11.857143,-8.428571,17.0
3,4,17.714286,8.857143,17.571429
4,5,10.0,26.0,8.0
5,6,0.285714,-38.142857,8.714286
6,7,32.857143,-37.309973,11.571429
7,8,13.142857,-21.142857,12.714286
8,9,25.714286,17.142857,13.142857
9,10,20.571429,-35.571429,7.571429


In [8]:
data = data1.merge(data2, on="sample_no", how="left")
data

Unnamed: 0,sample_no,R,W,D,AA5052_L1,AA5052_L2,AA5052_L3,AA5052_L4,AA5052_L5,AA5052_L6,...,Center_L1,Center_L2,Center_L3,Center_L4,Center_L5,Center_L6,Center_L7,AA5052_mean,AA6061_mean,Center_mean
0,1,1400,60,10,28.0,12.0,33.0,42.0,23.0,10.0,...,12.0,26.0,29.0,24.0,19.0,6.0,6.0,23.714286,16.293956,17.428571
1,2,1400,60,15,14.0,10.0,26.0,27.0,21.0,-4.0,...,17.0,25.0,23.0,14.0,10.0,3.0,2.0,12.0,-28.428571,13.428571
2,3,1400,60,20,10.0,14.0,14.0,25.0,11.0,3.0,...,12.0,28.0,21.0,22.0,12.0,16.0,8.0,11.857143,-8.428571,17.0
3,4,1400,70,10,10.0,36.0,22.0,11.0,14.0,23.0,...,20.0,24.0,29.0,18.0,11.0,18.0,3.0,17.714286,8.857143,17.571429
4,5,1400,70,15,6.0,14.0,8.0,7.0,13.0,12.0,...,14.0,4.0,8.0,11.0,9.0,4.0,6.0,10.0,26.0,8.0
5,6,1400,70,20,15.0,11.0,-13.0,-1.0,10.0,-20.0,...,11.0,12.0,13.0,4.0,7.0,2.0,12.0,0.285714,-38.142857,8.714286
6,7,1400,80,10,63.0,45.0,24.0,31.0,19.0,46.0,...,12.0,15.0,17.0,20.0,12.0,6.0,-1.0,32.857143,-37.309973,11.571429
7,8,1400,80,15,14.0,11.0,13.0,31.0,5.0,8.0,...,18.0,15.0,14.0,7.0,14.0,16.0,5.0,13.142857,-21.142857,12.714286
8,9,1400,80,20,26.0,40.0,32.0,29.0,32.0,13.0,...,17.0,21.0,18.0,24.0,13.0,4.0,-5.0,25.714286,17.142857,13.142857
9,10,1500,60,10,23.0,30.0,22.0,20.0,14.0,18.0,...,11.0,18.0,9.0,10.0,6.0,-2.0,1.0,20.571429,-35.571429,7.571429


In [9]:
data.to_excel("S01_residual_stress_wide_format.xlsx", index=False)