In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import os
from pathlib import Path

In [2]:
CURRENT_DIR = Path.cwd()
PARENT_DIR = CURRENT_DIR.parent
DATA_DIR = PARENT_DIR / "T02_stress"
print(CURRENT_DIR)
print(DATA_DIR)

c:\Users\admin\Coding\research\weld-ml\src\P02_data\T04_preprocess
c:\Users\admin\Coding\research\weld-ml\src\P02_data\T02_stress


In [3]:
filepath_first = os.path.join(DATA_DIR, "S02_residual_stress_post_weld_first.xlsx")
filepath_second = os.path.join(DATA_DIR, "S02_residual_stress_post_weld_second.xlsx")
first = pd.read_excel(filepath_first).rename(
    columns={
        "Sigma(x)": "sigma_x_post",
        "FWHM": "FWHM_post",
        "idx_excel": "idx_excel_post",
    }
)
second = pd.read_excel(filepath_second).rename(
    columns={
        "Sigma(x)": "sigma_x_post",
        "FWHM": "FWHM_post",
        "idx_excel": "idx_excel_post",
    }
)

In [4]:
first["measurement"] = "first"
second["measurement"] = "second"

In [5]:
first

Unnamed: 0,idx_excel_post,section,sample_no,location,R,W,D,sigma_x_post,FWHM_post,measurement
0,2,AA6061,1,1,1400,60,10,32,2.62,first
1,3,Center,1,1,1400,60,10,12,2.64,first
2,4,AA5052,1,1,1400,60,10,13,2.55,first
3,5,AA6061,1,2,1400,60,10,28,2.71,first
4,6,Center,1,2,1400,60,10,26,2.56,first
...,...,...,...,...,...,...,...,...,...,...
1129,1131,Center,54,6,1600,80,20,6,2.43,first
1130,1132,AA5052,54,6,1600,80,20,-3,2.47,first
1131,1133,AA6061,54,7,1600,80,20,21,2.67,first
1132,1134,Center,54,7,1600,80,20,4,2.42,first


In [6]:
second

Unnamed: 0,idx_excel_post,section,sample_no,location,R,W,D,sigma_x_post,FWHM_post,measurement
0,29,Center,1,7,1400,60,10,7,2.58,second
1,83,Center,1,7,1400,60,10,4,2.55,second
2,34,Center,2,7,1400,60,15,1,2.49,second
3,88,Center,2,7,1400,60,15,4,2.48,second
4,31,Center,3,7,1400,60,20,8,2.57,second
...,...,...,...,...,...,...,...,...,...,...
103,73,Center,52,7,1600,80,10,3,2.44,second
104,5,Center,53,7,1600,80,15,5,2.43,second
105,59,Center,53,7,1600,80,15,5,2.45,second
106,3,Center,54,7,1600,80,20,2,2.42,second


In [7]:
filt_center = (first["section"] == "Center") & (first["location"] == 7)
first_filt = first[filt_center]
display(first_filt)

Unnamed: 0,idx_excel_post,section,sample_no,location,R,W,D,sigma_x_post,FWHM_post,measurement
19,21,Center,1,7,1400,60,10,6,2.51,first
40,42,Center,2,7,1400,60,15,2,2.48,first
61,63,Center,3,7,1400,60,20,8,2.54,first
82,84,Center,4,7,1400,70,10,3,2.58,first
103,105,Center,5,7,1400,70,15,6,2.43,first
124,126,Center,6,7,1400,70,20,12,2.46,first
145,147,Center,7,7,1400,80,10,-1,2.47,first
166,168,Center,8,7,1400,80,15,5,2.47,first
187,189,Center,9,7,1400,80,20,-5,2.46,first
208,210,Center,10,7,1500,60,10,1,2.43,first


In [8]:
dfm1 = pd.concat([first_filt, second], ignore_index=True)
dfm1

Unnamed: 0,idx_excel_post,section,sample_no,location,R,W,D,sigma_x_post,FWHM_post,measurement
0,21,Center,1,7,1400,60,10,6,2.51,first
1,42,Center,2,7,1400,60,15,2,2.48,first
2,63,Center,3,7,1400,60,20,8,2.54,first
3,84,Center,4,7,1400,70,10,3,2.58,first
4,105,Center,5,7,1400,70,15,6,2.43,first
...,...,...,...,...,...,...,...,...,...,...
157,73,Center,52,7,1600,80,10,3,2.44,second
158,5,Center,53,7,1600,80,15,5,2.43,second
159,59,Center,53,7,1600,80,15,5,2.45,second
160,3,Center,54,7,1600,80,20,2,2.42,second


In [9]:
def checkGroup(grp):
    # print(grp.name)
    # There must be 3 entries per sample_no
    assert len(grp) == 3, f"Sample_no {grp.name} does not have 3 entries"

    # There must be one 'first' measurement and two 'second' measurements
    measurements = grp["measurement"].values
    assert (measurements == np.array(["first", "second", "second"])).all(), (
        f"Sample_no {grp.name} does not have the correct measurements"
    )

    # There must be only one unique value for R, W, and D
    for col in ["R", "W", "D"]:
        unique_values = grp[col].unique()
        assert len(unique_values) == 1, (
            f"Sample_no {grp.name} has multiple unique values for {col}"
        )


_ = dfm1.groupby(["sample_no"]).apply(checkGroup, include_groups=False)

In [10]:
dfm2 = dfm1.groupby(["sample_no"]).agg(
    {
        "section": "first",
        "location": "first",
        "R": "first",
        "W": "first",
        "D": "first",
        "sigma_x_post": "mean",
    }
)
dfm2 = dfm2.reset_index()
dfm2

Unnamed: 0,sample_no,section,location,R,W,D,sigma_x_post
0,1,Center,7,1400,60,10,5.666667
1,2,Center,7,1400,60,15,2.333333
2,3,Center,7,1400,60,20,8.666667
3,4,Center,7,1400,70,10,7.666667
4,5,Center,7,1400,70,15,5.666667
5,6,Center,7,1400,70,20,9.666667
6,7,Center,7,1400,80,10,-3.0
7,8,Center,7,1400,80,15,4.0
8,9,Center,7,1400,80,20,-2.0
9,10,Center,7,1500,60,10,-0.333333


In [11]:
dfm1["diff_sigma_x"] = dfm1["sigma_x_post"]
dfm2["diff_sigma_x"] = dfm2["sigma_x_post"]

In [12]:
dfm1.to_excel(CURRENT_DIR / "S10_center_location_7_individual.xlsx", index=False)
dfm2.to_excel(CURRENT_DIR / "S10_center_location_7_mean.xlsx", index=False)