# Wide Format Conversion


In [1]:
import os
import pickle
import re
from pathlib import Path
import pandas as pd
from tsfresh import extract_features

In [2]:
# Define the value to predict
VALUE_TO_PREDICT = "sigma_x_post"

In [3]:
CURRENT_DIR = Path.cwd()
PARENT_DIR = CURRENT_DIR.parent.parent
STRESS_DATA_DIR = PARENT_DIR / "P02_data" / "T04_preprocess" 
print(STRESS_DATA_DIR)

c:\Users\admin\Coding\research\weld-ml\src\P02_data\T04_preprocess


In [4]:
# Load stress data
stress_filepath = STRESS_DATA_DIR / "S10_center_location_7_mean.xlsx"
_data = pd.read_excel(stress_filepath)
_data

Unnamed: 0,sample_no,section,location,R,W,D,sigma_x_post,diff_sigma_x
0,1,Center,7,1400,60,10,5.666667,5.666667
1,2,Center,7,1400,60,15,2.333333,2.333333
2,3,Center,7,1400,60,20,8.666667,8.666667
3,4,Center,7,1400,70,10,7.666667,7.666667
4,5,Center,7,1400,70,15,5.666667,5.666667
5,6,Center,7,1400,70,20,9.666667,9.666667
6,7,Center,7,1400,80,10,-3.0,-3.0
7,8,Center,7,1400,80,15,4.0,4.0
8,9,Center,7,1400,80,20,-2.0,-2.0
9,10,Center,7,1500,60,10,-0.333333,-0.333333


In [5]:
# Pivot the data to have multi-level columns for section and location
data1 = _data.pivot_table(
    index=["sample_no", "R", "W", "D"],
    columns=["section", "location"],
    values=VALUE_TO_PREDICT,
).reset_index()
data1

section,sample_no,R,W,D,Center
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,7
0,1,1400,60,10,5.666667
1,2,1400,60,15,2.333333
2,3,1400,60,20,8.666667
3,4,1400,70,10,7.666667
4,5,1400,70,15,5.666667
5,6,1400,70,20,9.666667
6,7,1400,80,10,-3.0
7,8,1400,80,15,4.0
8,9,1400,80,20,-2.0
9,10,1500,60,10,-0.333333


In [6]:
cols = [col[0] + "_L" + str(col[1]) if col[0] not in ["sample_no", "R", "W", "D"] else col[0] for col in data1.columns.to_flat_index()]
print(cols)

['sample_no', 'R', 'W', 'D', 'Center_L7']


In [7]:
data1.columns = cols
data1

Unnamed: 0,sample_no,R,W,D,Center_L7
0,1,1400,60,10,5.666667
1,2,1400,60,15,2.333333
2,3,1400,60,20,8.666667
3,4,1400,70,10,7.666667
4,5,1400,70,15,5.666667
5,6,1400,70,20,9.666667
6,7,1400,80,10,-3.0
7,8,1400,80,15,4.0
8,9,1400,80,20,-2.0
9,10,1500,60,10,-0.333333


In [8]:
# Pivot the data to have mean values for each section
data2 = _data.pivot_table(
    index=["sample_no"],
    columns=["section"],
    values="diff_sigma_x",
    aggfunc="mean",
).reset_index()
data2

data2.columns = [col + "_mean" if col != "sample_no" else col for col in data2.columns.to_flat_index()]
data2

Unnamed: 0,sample_no,Center_mean
0,1,5.666667
1,2,2.333333
2,3,8.666667
3,4,7.666667
4,5,5.666667
5,6,9.666667
6,7,-3.0
7,8,4.0
8,9,-2.0
9,10,-0.333333


In [9]:
data = data1.merge(data2, on="sample_no", how="left")
data

Unnamed: 0,sample_no,R,W,D,Center_L7,Center_mean
0,1,1400,60,10,5.666667,5.666667
1,2,1400,60,15,2.333333,2.333333
2,3,1400,60,20,8.666667,8.666667
3,4,1400,70,10,7.666667,7.666667
4,5,1400,70,15,5.666667,5.666667
5,6,1400,70,20,9.666667,9.666667
6,7,1400,80,10,-3.0,-3.0
7,8,1400,80,15,4.0,4.0
8,9,1400,80,20,-2.0,-2.0
9,10,1500,60,10,-0.333333,-0.333333


In [11]:
data.to_excel("S01_residual_stress_wide_format.xlsx", index=False)