In [1]:
import pandas as pd

excel_path = "data/raw/DataSheet_ETH_250902.xlsx"   # <- adjust to your path
csv_out_path = "data/processed/ethanol_spray_tidy.csv"

In [2]:
# -----------------------------------------------------------
# 1. Static conditions: get injection temperature per case
# -----------------------------------------------------------
exp_cond = pd.read_excel(excel_path, sheet_name="Exp. Conditions")

mask_case = exp_cond["Critical conditions"].astype(str).str.startswith("ETH-")
static_cases = exp_cond[mask_case].copy()

static_cases = static_cases.rename(
    columns={
        "Critical conditions": "case_id",
        "Unnamed: 9": "injection_temperature_K",
    }
)

static_cases_slim = static_cases[["case_id", "injection_temperature_K"]].copy()

In [3]:
# -----------------------------------------------------------
# 2. Time-varying conditions: "Exp. Conditions in Time"
#    Build (case_id, time_ms, chamber_pressure, chamber_temp, etc.)
# -----------------------------------------------------------
time_raw = pd.read_excel(excel_path, sheet_name="Exp. Conditions in Time")

labels_row = time_raw.iloc[0]          # row with ETH-01..ETH-06 labels
time_data = time_raw.iloc[1:].reset_index(drop=True)  # numeric rows only

# Convert all non-time columns to numeric
for col in time_data.columns:
    if col != "Time(ms)":
        time_data[col] = pd.to_numeric(time_data[col], errors="coerce")
time_data["Time(ms)"] = pd.to_numeric(time_data["Time(ms)"], errors="coerce")

# Map each column to its parameter group (propagate last non-Unnamed header)
param_group = {}
current_param = None
for col in time_raw.columns:
    if col == "Time(ms)":
        param_group[col] = None
        continue
    if not col.startswith("Unnamed"):
        current_param = col
    param_group[col] = current_param

# Map parameter group header -> tidy name
param_name_map = {
    "Chamber pressure (bar)": "chamber_pressure_bar",
    "Chamber temperature (K)": "chamber_temperature_K",
    "Injection pressure (bar)": "injection_pressure_bar",
    "Density (kg/m3)": "density_kg_m3",
    "Viscosity (Pas)": "viscosity_Pa_s",
}

# Case IDs from labels row
case_ids = sorted(
    {str(v) for v in labels_row.values if isinstance(v, str) and v.startswith("ETH-")}
)

time_records = []
for case in case_ids:
    df_case = pd.DataFrame({
        "case_id": case,
        "time_ms": time_data["Time(ms)"].values,
    })
    for col in time_data.columns:
        if col == "Time(ms)":
            continue
        label = labels_row[col]
        if label != case:
            continue
        group_header = param_group[col]
        param_col_name = param_name_map.get(group_header)
        if param_col_name is None:
            continue
        df_case[param_col_name] = time_data[col].values
    time_records.append(df_case)

time_long = pd.concat(time_records, axis=0, ignore_index=True)


In [4]:
# -----------------------------------------------------------
# 3. Helper to build long tables for spray angle / penetration sheets
# -----------------------------------------------------------
def build_meas_long(sheet_name, value_type):
    """
    sheet_name: Excel sheet name
    value_type:
        'angle_shadow', 'angle_mie',
        'pen_shadow_mm', 'pen_shadow_ld',
        'pen_mie_mm', 'pen_mie_ld'
    Returns DataFrame[case_id, time_ms, ...]
    """
    df_raw = pd.read_excel(excel_path, sheet_name=sheet_name)
    labels = df_raw.iloc[0]
    df = df_raw.iloc[1:].reset_index(drop=True)
    df["Time(ms)"] = pd.to_numeric(df["Time(ms)"], errors="coerce")

    # Group headers (raw vs smoothed, mm vs L/D)
    group_header = {}
    current = None
    for col in df_raw.columns:
        if col == "Time(ms)":
            group_header[col] = None
            continue
        if not col.startswith("Unnamed"):
            current = col
        group_header[col] = current

    if value_type in ["angle_shadow", "angle_mie"]:
        target_raw_header = "Angle (deg)"
        target_smooth_header = "Smoothed angle (deg)"
        col_name_raw = value_type + "_deg"
        col_name_smooth = value_type + "_smoothed_deg"
    elif value_type in ["pen_shadow_mm", "pen_mie_mm"]:
        target_mm_header = "Penetration (mm)"
        col_name = value_type.replace("pen_", "penetration_")
    elif value_type in ["pen_shadow_ld", "pen_mie_ld"]:
        target_ld_header = "Penetration (L/D)"
        col_name = value_type.replace("pen_", "penetration_")
    else:
        raise ValueError("Unknown value_type")

    records = []
    for case in case_ids:
        df_case = pd.DataFrame({
            "case_id": case,
            "time_ms": df["Time(ms)"].values,
        })
        for col in df.columns:
            if col == "Time(ms)":
                continue
            label = labels[col]
            if label != case:
                continue

            g = group_header[col]

            if value_type in ["angle_shadow", "angle_mie"]:
                if g == target_raw_header:
                    df_case[col_name_raw] = pd.to_numeric(df[col], errors="coerce")
                elif g == target_smooth_header:
                    df_case[col_name_smooth] = pd.to_numeric(df[col], errors="coerce")
            elif value_type in ["pen_shadow_mm", "pen_mie_mm"]:
                if g == target_mm_header:
                    df_case[col_name] = pd.to_numeric(df[col], errors="coerce")
            elif value_type in ["pen_shadow_ld", "pen_mie_ld"]:
                if g == target_ld_header:
                    df_case[col_name] = pd.to_numeric(df[col], errors="coerce")

        records.append(df_case)

    out = pd.concat(records, axis=0, ignore_index=True)
    # Drop any pure-NaN columns (separators etc.)
    out = out.dropna(axis=1, how="all")
    return out

angle_shadow_long = build_meas_long("Spray Angle (Shadow)", "angle_shadow")
angle_mie_long    = build_meas_long("Spray Angle (Mie)",    "angle_mie")
pen_shadow_mm_long = build_meas_long("Spray Penetration (Shadow)", "pen_shadow_mm")
pen_shadow_ld_long = build_meas_long("Spray Penetration (Shadow)", "pen_shadow_ld")
pen_mie_mm_long    = build_meas_long("Spray Penetration (Mie)",    "pen_mie_mm")
pen_mie_ld_long    = build_meas_long("Spray Penetration (Mie)",    "pen_mie_ld")


In [5]:
# -----------------------------------------------------------
# 4. Merge everything into one tidy table
# -----------------------------------------------------------
merged = time_long.copy()

# Add static injection temperature
merged = merged.merge(static_cases_slim, on="case_id", how="left")

# Merge spray angle / penetration tables
for df_add in [
    angle_shadow_long,
    angle_mie_long,
    pen_shadow_mm_long,
    pen_shadow_ld_long,
    pen_mie_mm_long,
    pen_mie_ld_long,
]:
    merged = merged.merge(df_add, on=["case_id", "time_ms"], how="left")

# Convert columns to proper numeric types and drop all-NaN columns
for col in merged.columns:
    if col != "case_id":
        merged[col] = pd.to_numeric(merged[col], errors="coerce")

merged = merged.dropna(axis=1, how="all")

# Make sure injection temperature is float as well
if "injection_temperature_K" in merged.columns:
    merged["injection_temperature_K"] = merged["injection_temperature_K"].astype(float)


In [6]:
# -----------------------------------------------------------
# 5. Save to CSV
# -----------------------------------------------------------
merged.to_csv(csv_out_path, index=False)
print("Saved tidy CSV to:", csv_out_path)
print("Shape:", merged.shape)
print(merged.head())

Saved tidy CSV to: data/processed/ethanol_spray_tidy.csv
Shape: (726, 16)
  case_id  time_ms  chamber_pressure_bar  chamber_temperature_K  \
0  ETH-01    0.000              55.03180             192.029519   
1  ETH-01    0.025              55.00570             192.015831   
2  ETH-01    0.050              55.00810             191.988228   
3  ETH-01    0.075              55.01635             192.081988   
4  ETH-01    0.100              55.01250             191.988000   

   injection_pressure_bar  density_kg_m3  viscosity_Pa_s  \
0               98.864550     810.720228        0.001879   
1               98.874062     810.718262        0.001879   
2               98.907356     810.718443        0.001879   
3               98.855037     810.719065        0.001879   
4               98.878819     810.718775        0.001879   

   injection_temperature_K  angle_shadow_deg  angle_shadow_smoothed_deg  \
0                    273.0               NaN                        NaN   
1           