# converting signals into images, bin files(.npz)

このノートブックではディレクトリを指定して、その中にある生波形データ(.mat)を、一括して画像(.png)及びその値（.npz）に変換し保存するという処理を行っています。

## シミュレーションデータ変換  
 シミュレーションで生成した`.mat`のファイルを統一形式である`.npz`に変換します。以下は、フォルダを指定するとその配下のファイルをすべて一括で変換し、`/processed`ディレクトリに保存するという処理を行うものです。

In [1]:
from src import mat2npz_sim,npz2png
import os
import glob

# Define input and output directories
# Define the case name as a variable for clarity and reusability
case_name = "case20"

# Define base data directory for clarity
base_data_dir = "/home/smatsubara/documents/airlift/data/simulation"
visualize_dir = "/home/smatsubara/documents/airlift/data/visualize"
def convert_all_simulation_mat_to_npz(case_name, base_data_dir):
    """
    Convert all simulation .mat files in the specified case directory to .npz format.

    Parameters
    ----------
    case_name : str
        The name of the simulation case (e.g., "case5").
    base_data_dir : str
        The base directory where simulation data is stored.
    """
    # Define input directory for raw simulation signals (relative to base_data_dir and case_name)
    mat_dir = os.path.join(base_data_dir, f"rawsignal/{case_name}/data")

    # Define config file path (relative to base_data_dir and case_name)
    config_path = os.path.join(base_data_dir, f"rawsignal/{case_name}/config.json")

    # Define output directory for processed files (relative to base_data_dir and case_name)
    output_dir = os.path.join(base_data_dir, f"processed/{case_name}")
    # Create the output directory if it does not exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created output directory: {output_dir}")
    # Save a copy of the config.json file to the output directory for reference
    import shutil
    config_copy_path = os.path.join(output_dir, "config.json")
    shutil.copy2(config_path, config_copy_path)
    print(f"Copied config.json to: {config_copy_path}")

    # Create the output directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)
    mat_files_list = glob.glob(os.path.join(mat_dir, "*.mat"))
    for mat_file in mat_files_list:
        print(f"Processing: {mat_file}")
        mat2npz_sim(mat_file, config_path, output_dir)

# 関数の呼び出し例
convert_all_simulation_mat_to_npz(case_name, base_data_dir)

npz2png(file_path="/home/smatsubara/documents/airlift/data/simulation/processed/case9/solid_liquid_reflector3_processed.npz",save_path=visualize_dir,full=False,pulse_index=0)
npz2png(file_path="/home/smatsubara/documents/airlift/data/simulation/processed/case9/solid_liquid_reflector3_processed.npz",save_path=visualize_dir,full=True,pulse_index=0)

Copied config.json to: /home/smatsubara/documents/airlift/data/simulation/processed/case20/config.json
Processing: /home/smatsubara/documents/airlift/data/simulation/rawsignal/case20/data/solid_liquid_reflector1.mat
<KeysViewHDF5 ['#refs#', '#subsystem#', 'kgrid', 'sensor_data']>
['Nt', 'Nx', 'Ny', 'Nz', 'dim', 'dt', 'dx', 'dxudxn', 'dxudxn_sgx', 'dy', 'dyudyn', 'dyudyn_sgy', 'dz', 'dzudzn', 'dzudzn_sgz', 'k', 'k_max', 'kx_max', 'kx_vec', 'ky_max', 'ky_vec', 'kz_max', 'kz_vec', 'nonuniform', 'xn_vec', 'xn_vec_sgx', 'yn_vec', 'yn_vec_sgy', 'zn_vec', 'zn_vec_sgz']
999999999.9999999
keys: ['#refs#', '#subsystem#', 'kgrid', 'sensor_data']
['#refs#', '#subsystem#', 'kgrid', 'sensor_data']
(50000,)
Processed data and metadata saved to: /home/smatsubara/documents/airlift/data/simulation/processed/case20/solid_liquid_reflector1_processed.npz
Processing: /home/smatsubara/documents/airlift/data/simulation/rawsignal/case20/data/solid_liquid_reflector2.mat
<KeysViewHDF5 ['#refs#', '#subsystem#', '

## 機械学習用データセット生成（シミュレーション）
　次に、変換した`.npz`のファイルに対応する目標変数となる値を`/config.json`を使って計算し、データセットとなる`x_train.npy`,`t_train.npy`を作成していきます。これらのiDの紐づけが狂うとすべての計算の意味がなくなってしまうので、最大限注意してください。  
また、実機への展開をスムーズにするために、最大値を用いてスケーリングしていることとに注して下さい。その他順序付けなど筆者は細心の注意を払って実装していますが、もし誤りがあればご指摘いただけると幸いです。


In [2]:
from src import calculate_gvf_and_signal,npz2png,process_case_and_return_dataset
import numpy as np
import math
import json
import glob
import os
#units are all mm

x_list = []
t_list = []

# Define the base directory containing all cases
processed_base_dir = "/home/smatsubara/documents/airlift/data/simulation/processed"
output_path = "/home/smatsubara/documents/airlift/data/simulation/dataset"

# Get all case directories (e.g., case5, case6, ...)
case_dirs = sorted([d for d in os.listdir(processed_base_dir) if os.path.isdir(os.path.join(processed_base_dir, d)) and d.startswith("case")])

x_train_list = []
t_train_list = []

for case_name in case_dirs:
    base_dir = os.path.join(processed_base_dir, case_name)
    print(f"Processing {case_name} in {base_dir}")
    x_tmp, t_tmp = process_case_and_return_dataset(case_name, base_dir)
    print(f"x_tmp shape: {x_tmp.shape}, t_tmp shape: {t_tmp.shape}")
    x_train_list.append(x_tmp)
    t_train_list.append(t_tmp)
print("list done")
# Concatenate all cases into single arrays
x_train = np.concatenate(x_train_list, axis=0)
t_train = np.concatenate(t_train_list, axis=0)
x_train = x_train/np.max(x_train)
print("Final x_train shape:", x_train.shape)
print("Final t_train shape:", t_train.shape)
print(np.max(x_train),np.min(x_train))
np.save(os.path.join(output_path, "x_train.npy"), x_train)
np.save(os.path.join(output_path, "t_train.npy"), t_train)
#print(np.max(x_train))
#print(t_train)
#print(x_train)
npz_file_path = "/home/smatsubara/documents/airlift/data/simulation/processed/case4/solid_liquid7_processed.npz"
output_folder_path = "/home/smatsubara/documents/airlift/data/visualize"
npz2png(npz_file_path, output_folder_path, channel_index=0, start_time=0.0, end_time=None, full=True, pulse_index=0)
npz2png(npz_file_path, output_folder_path, channel_index=0, start_time=0.0, end_time=None, full=False, pulse_index=0)


Processing case10 in /home/smatsubara/documents/airlift/data/simulation/processed/case10
['/home/smatsubara/documents/airlift/data/simulation/processed/case10/solid_liquid_reflector10_processed.npz', '/home/smatsubara/documents/airlift/data/simulation/processed/case10/solid_liquid_reflector1_processed.npz', '/home/smatsubara/documents/airlift/data/simulation/processed/case10/solid_liquid_reflector2_processed.npz', '/home/smatsubara/documents/airlift/data/simulation/processed/case10/solid_liquid_reflector3_processed.npz', '/home/smatsubara/documents/airlift/data/simulation/processed/case10/solid_liquid_reflector4_processed.npz', '/home/smatsubara/documents/airlift/data/simulation/processed/case10/solid_liquid_reflector5_processed.npz', '/home/smatsubara/documents/airlift/data/simulation/processed/case10/solid_liquid_reflector6_processed.npz', '/home/smatsubara/documents/airlift/data/simulation/processed/case10/solid_liquid_reflector7_processed.npz', '/home/smatsubara/documents/airlift/d

## 実機データ変換  
実機データの`.mat`のファイルも、先ほどと同様に統一形式である`.npz`に変換します。同じく、`experiments/processed`に保存されるようにしています。

In [2]:
from src import mat2npz_exp,npz2png
import os

import glob

# Get all .mat files in the rawsignal directory
rawsignal_dir = "/home/smatsubara/documents/airlift/data/experiments/rawsignal"
output_dir = "/home/smatsubara/documents/airlift/data/experiments/processed"
visualize_dir = "/home/smatsubara/documents/airlift/data/visualize"
mat_files = glob.glob(os.path.join(rawsignal_dir, "*.mat"))
path_tmp = "/home/smatsubara/documents/airlift/data/experiments/rawsignal/P20241018-1530.mat"
processed_path_tmp ="/home/smatsubara/documents/airlift/data/experiments/processed/P20241018-1530_processed.npz"
# # Run mat2npz_exp for each .mat file
mat2npz_exp(
        file_path=path_tmp,
        output_dir=output_dir,
        start_time=0,  #初期の信号は不安定であることが多いため除外
        duration=5.0,
        amplitude_threshold=2,
        window_width=0.1e-3,
        signal_key="TDX1"
    )
npz2png(file_path=processed_path_tmp,save_path=visualize_dir,full=False,pulse_index=0)


Loading data...
Loading successful
Using device: cuda
Number of detected triggers: (14988,)
arranged_pulses.shape: (14988, 5208, 4)
convert_exp finished
max: inf
processed_data.shape: (14000, 2500, 4)
max: inf
processed_data[0,:,0].shape: (2500,)
max: 1.0998042821884155
argmax: 329
maxes argmax: 3,max: 3.4028234663852886e+38
(14000, 1, 4) 0.04892368 3.4028235e+38
scaled: ((14000, 2500, 4), -3.4028235e+38, 3.4028235e+38)
max_value: 3.4028234663852886e+38
['__header__', '__version__', '__globals__', 'Tstart', 'Tinterval', 'ExtraSamples', 'RequestedLength', 'Length', 'Version', 'TDX1', 'TDX2', 'TDX3', 'TDX1_enlarged']
signal points: (2500,)
Processed data and metadata saved to: /home/smatsubara/documents/airlift/data/experiments/processed/P20241018-1530_processed.npz
shape is 3
/home/smatsubara/documents/airlift/data/visualize/P20241018-1530_processed_0pulse.png


## 機械学習用データセット生成(実機)  
次に、変換した`.npz`のファイルに対応する目標変数となる値を`/target_variables.csv`を使って参照し、データセット`x_test.npy` `t_test.npy`を作成していきます。


In [39]:
import polars as pl

# CSVファイルをUTF-8 (BOM付き) で読み込むことで文字化けを防ぐ
target_variables = pl.read_csv(
    '/home/smatsubara/documents/airlift/data/experiments/target_valiables.csv',
    encoding="SHIFT_JIS"
)
print(target_variables.head())

# 既存の"IDXX"列が重複して作成されるのを防ぐため、まず"IDXX"列が存在すれば削除する
if "IDXX" in target_variables.columns:
    target_variables = target_variables.drop("NAME")

# "IDXX"列を新たに作成
# 1列目・2列目のカラム名を取得
date_col = target_variables.columns[0]
time_col = target_variables.columns[1]
target_variables = target_variables.with_columns(
    (pl.lit("P") + pl.col(date_col).cast(pl.Utf8) + "-" + pl.col(time_col).cast(pl.Utf8)).alias("NAME")
)

# 表示用のカラムリストを作成（重複がないようにする）
cols_to_show = [col for col in target_variables.columns[2:] if col != "NAME"] + ["NAME"]

# 結果を表示
print(target_variables.select(cols_to_show))
from src import mat2npz_exp


import glob

# Get all .mat files in the rawsignal directory
rawsignal_dir = "/home/smatsubara/documents/airlift/data/experiments/rawsignal"
output_dir = "/home/smatsubara/documents/airlift/data/experiments/processed"

mat_files = glob.glob(os.path.join(rawsignal_dir, "*.mat"))

# Run mat2npz_exp for each .mat file
for file_path in mat_files:
    print(f"Processing {file_path}")
    mat2npz_exp(
        file_path=file_path,
        output_dir=output_dir,
        start_time=0.1,  #初期の信号は不安定であることが多いため除外
        duration=5.0,
        amplitude_threshold=2,
        window_width=0.1e-3,
        signal_key="TDX1"
    )


shape: (5, 9)
┌──────────┬──────┬────────────┬────────────┬───┬────────────┬────────────┬────────────┬───────────┐
│ 日付     ┆ 時分 ┆ 固相見かけ ┆ 気相見かけ ┆ … ┆ 固相体積率 ┆ 気相体積率 ┆ 液相体積率 ┆ ガラス球  │
│ ---      ┆ ---  ┆ 流速       ┆ 流速       ┆   ┆ ---        ┆ ---        ┆ ---        ┆ 直径      │
│ i64      ┆ i64  ┆ ---        ┆ ---        ┆   ┆ f64        ┆ f64        ┆ f64        ┆ ---       │
│          ┆      ┆ f64        ┆ f64        ┆   ┆            ┆            ┆            ┆ str       │
╞══════════╪══════╪════════════╪════════════╪═══╪════════════╪════════════╪════════════╪═══════════╡
│ 20240726 ┆ 1022 ┆ 0.0        ┆ 31.922755  ┆ … ┆ 0.0        ┆ 0.749158   ┆ 0.250842   ┆ -         │
│ 20240726 ┆ 1055 ┆ 0.0        ┆ 32.685636  ┆ … ┆ 0.0        ┆ 0.745521   ┆ 0.254479   ┆ -         │
│ 20240726 ┆ 1113 ┆ 0.0        ┆ 32.048131  ┆ … ┆ 0.0        ┆ 0.7546132  ┆ 0.2453868  ┆ -         │
│ 20240726 ┆ 1122 ┆ 0.0        ┆ 31.968982  ┆ … ┆ 0.0        ┆ 0.742794   ┆ 0.257206   ┆ -         │
│ 20240726 ┆ 13