In [None]:
from pathlib import Path

def parse_cwru_filename(path):
    stem = Path(path).stem  # 예: "1730_B_14_DE48"
    parts = stem.split("_")

    rpm = int(parts[0])  # 1730

    if parts[1] == "Normal":
        fault_present = 0
        fault_family = "Normal"
        fault_raw = "Normal"
        diameter = None
        or_pos = None
        sensor = None
    else:
        fault_present = 1
        fault_raw = parts[1]      # "B", "IR", "OR@6", ...
        sensor = parts[-1]        # "DE12", "DE48", "FE"
        diameter = int(parts[2])  # 7, 14, 21, 28

        if fault_raw.startswith("OR@"):
            fault_family = "OR"
            or_pos = fault_raw.split("@")[1]  # "3", "6", "12"
        else:
            fault_family = fault_raw         # "B" or "IR"
            or_pos = None

    return {
        "rpm": rpm,
        "fault_present": fault_present,
        "fault_family": fault_family,  # Normal / B / IR / OR
        "fault_raw": fault_raw,        # Normal / B / IR / OR@3/6/12
        "diameter": diameter,
        "or_pos": or_pos,
        "sensor": sensor,              # DE12 / DE48 / FE / None
    }


In [10]:
file = "/ws/pdm_2025/datasets/cwru_bearing/CWRU_Bearing_NumPy-main/Data/1730 RPM/1730_B_7_DE12.npz"
out = parse_cwru_filename(file)
out

{'rpm': 1730,
 'fault_present': 1,
 'fault_family': 'B',
 'fault_raw': 'B',
 'diameter': 7,
 'or_pos': None,
 'sensor': 'DE12'}

In [None]:
import numpy as np

print(file)
data = np.load(file)
data.files


/ws/pdm_2025/datasets/cwru_bearing/CWRU_Bearing_NumPy-main/Data/1730 RPM/1730_B_7_DE12.npz


['DE', 'FE', 'BA']

In [None]:
x_de = data["DE"]
x_de.shape

(121556, 1)

In [20]:
from pathlib import Path
import numpy as np
import pandas as pd

base_dir = Path("/ws/pdm_2025/datasets/cwru_bearing/CWRU_Bearing_NumPy-main/Data")
rpm_dir = base_dir / "1730 RPM"

records = []
for path in sorted(rpm_dir.glob("*.npz")):
    info = parse_cwru_filename(path)
    info["path"] = str(path)

    data = np.load(path)

    # 각 채널별 시계열 길이/개수를 기록
    for key in data.files:  # 보통 'DE', 'FE', 'BA' 중 일부
        arr = data[key]
        if arr.ndim == 1:
            n_series = 1
            length = arr.shape[0]
        else:
            n_series = arr.shape[0]
            length = arr.shape[1]

        info[f"n_series_{key}"] = n_series
        info[f"length_{key}"] = length

    records.append(info)

df = pd.DataFrame(records)


In [21]:
print(df.head())
print(df["fault_family"].value_counts())
print(df["fault_raw"].value_counts())
print(df[["fault_family", "diameter", "sensor"]].drop_duplicates())

print(df[["length_DE", "length_FE"]].describe())



    rpm  fault_present fault_family fault_raw  diameter or_pos sensor  \
0  1730              1            B         B      14.0   None   DE12   
1  1730              1            B         B      14.0   None   DE48   
2  1730              1            B         B      14.0   None     FE   
3  1730              1            B         B      21.0   None   DE12   
4  1730              1            B         B      21.0   None   DE48   

                                                path  n_series_DE  length_DE  \
0  /ws/pdm_2025/datasets/cwru_bearing/CWRU_Bearin...       122136          1   
1  /ws/pdm_2025/datasets/cwru_bearing/CWRU_Bearin...       486804          1   
2  /ws/pdm_2025/datasets/cwru_bearing/CWRU_Bearin...       121168          1   
3  /ws/pdm_2025/datasets/cwru_bearing/CWRU_Bearin...       122136          1   
4  /ws/pdm_2025/datasets/cwru_bearing/CWRU_Bearin...       486804          1   

   n_series_FE  length_FE  n_series_BA  length_BA  
0     122136.0        1.0   