# converting signals into images, bin files(.npz)

このノートブックではディレクトリを指定して、その中にある生波形データ(.mat)を、一括して画像(.png)及びその値（.npz）に変換し保存するという処理を行っています。

## シミュレーションデータ変換  
 シミュレーションで生成した`.mat`のファイルを統一形式である`.npz`に変換します。以下は、フォルダを指定するとその配下のファイルをすべて一括で変換し、`/processed`ディレクトリに保存するという処理を行うものです。

In [12]:
from src import mat2npz_sim
import os
import glob

# Define input and output directories
# Define the case name as a variable for clarity and reusability
case_name = "case4"

# Define base data directory for clarity
base_data_dir = "/home/smatsubara/documents/airlift/data/sandbox/simulation"

def convert_all_simulation_mat_to_npz(case_name, base_data_dir):
    """
    Convert all simulation .mat files in the specified case directory to .npz format.

    Parameters
    ----------
    case_name : str
        The name of the simulation case (e.g., "case5").
    base_data_dir : str
        The base directory where simulation data is stored.
    """
    # Define input directory for raw simulation signals (relative to base_data_dir and case_name)
    mat_dir = os.path.join(base_data_dir, f"rawsignal/{case_name}/data")

    # Define config file path (relative to base_data_dir and case_name)
    config_path = os.path.join(base_data_dir, f"rawsignal/{case_name}/config.json")

    # Define output directory for processed files (relative to base_data_dir and case_name)
    output_dir = os.path.join(base_data_dir, f"processed/{case_name}")
    # Create the output directory if it does not exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        print(f"Created output directory: {output_dir}")
    # Save a copy of the config.json file to the output directory for reference
    import shutil
    config_copy_path = os.path.join(output_dir, "config.json")
    shutil.copy2(config_path, config_copy_path)
    print(f"Copied config.json to: {config_copy_path}")

    # Create the output directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)
    mat_files_list = glob.glob(os.path.join(mat_dir, "*.mat"))
    for mat_file in mat_files_list:
        print(f"Processing: {mat_file}")
        mat2npz_sim(mat_file, config_path, output_dir)

# 関数の呼び出し例
convert_all_simulation_mat_to_npz(case_name, base_data_dir)




Copied config.json to: /home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case4/config.json
Processing: /home/smatsubara/documents/airlift/data/sandbox/simulation/rawsignal/case4/data/solid_liquid2.mat
<KeysViewHDF5 ['#refs#', '#subsystem#', 'kgrid', 'sensor_data']>
['Nt', 'Nx', 'Ny', 'Nz', 'dim', 'dt', 'dx', 'dxudxn', 'dxudxn_sgx', 'dy', 'dyudyn', 'dyudyn_sgy', 'dz', 'dzudzn', 'dzudzn_sgz', 'k', 'k_max', 'kx_max', 'kx_vec', 'ky_max', 'ky_vec', 'kz_max', 'kz_vec', 'nonuniform', 'xn_vec', 'xn_vec_sgx', 'yn_vec', 'yn_vec_sgy', 'zn_vec', 'zn_vec_sgz']
999999999.9999999
keys: ['#refs#', '#subsystem#', 'kgrid', 'sensor_data']
['#refs#', '#subsystem#', 'kgrid', 'sensor_data']
(100001,)
Processed data and metadata saved to: /home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case4/solid_liquid2_processed.npz
Processing: /home/smatsubara/documents/airlift/data/sandbox/simulation/rawsignal/case4/data/solid_liquid1.mat
<KeysViewHDF5 ['#refs#', '#subsystem#', 'k

## 機械学習用データセット生成（シミュレーション）
　次に、変換した`.npz`のファイルに対応する目標変数となる値を`/config.json`を使って計算し、データセットとなる`x_train.npy`,`t_train.npy`を作成していきます。これらのiDの紐づけが狂うとすべての計算の意味がなくなってしまうので、最大限注意してください。筆者は細心の注意を払って実装していますが、もし誤りがあればご指摘いただけると幸いです。


In [3]:
from src.utils import calculate_gvf_and_signal
import numpy as np
import math
import json
import glob
import os
#units are all mm

# Example usage
config_path = "/home/smatsubara/documents/airlift/data/sandbox/simulation/rawsignal/case6/config.json"
npz_path = "/home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case6/solid_liquid_reflector1_processed.npz"
# To concatenate many input_tmp and target_tmp arrays for machine learning, 
# you should collect them in lists and then convert to numpy arrays at the end.
# Here is an example in English:

x_list = []
t_list = []

# Process all .npz files in the specified directory with the single config.json


def process_case_and_return_dataset(case_name, base_dir):
    """
    Process all .npz files in the specified case directory, extract signals and GVF,
    and return the resulting datasets for machine learning.

    Parameters
    ----------
    case_name : str
        The name of the case (e.g., "case6").
    base_dir : str
        The base directory where the processed case data is stored.

    Returns
    -------
    x_train : np.ndarray
        Array of input signals for machine learning.
    t_train : np.ndarray
        Array of target GVF values for machine learning.
    """
    import glob
    import os
    import numpy as np
    import json
    npz_dir = base_dir
    config_path = os.path.join(base_dir, "config.json")
    npz_files = sorted(glob.glob(os.path.join(npz_dir, "*.npz")))
    print(npz_files)
    x_list = []
    t_list = []

    for npz_path in npz_files:
        input_tmp, target_tmp = calculate_gvf_and_signal(config_path, npz_path)
        x_list.append(input_tmp)
        t_list.append(target_tmp)
    #print(len(x_list))
    #print(len(t_list))
    # Convert lists to numpy arrays for machine learning
    x_train = np.array(x_list)
    t_train = np.array(t_list)
    #print(x_train.shape)
    #print(t_train.shape)
    return x_train, t_train

# Process all cases in the processed directory and aggregate x_train and t_train

import glob
import os
import numpy as np

# Define the base directory containing all cases
processed_base_dir = "/home/smatsubara/documents/airlift/data/sandbox/simulation/processed"
output_path = "/home/smatsubara/documents/airlift/data/sandbox/simulation/dataset"

# Get all case directories (e.g., case5, case6, ...)
case_dirs = sorted([d for d in os.listdir(processed_base_dir) if os.path.isdir(os.path.join(processed_base_dir, d)) and d.startswith("case")])

x_train_list = []
t_train_list = []

for case_name in case_dirs:
    base_dir = os.path.join(processed_base_dir, case_name)
    print(f"Processing {case_name} in {base_dir}")
    x_tmp, t_tmp = process_case_and_return_dataset(case_name, base_dir)
    print(f"x_tmp shape: {x_tmp.shape}, t_tmp shape: {t_tmp.shape}")
    x_train_list.append(x_tmp)
    t_train_list.append(t_tmp)

# Concatenate all cases into single arrays
x_train = np.concatenate(x_train_list, axis=0)
t_train = np.concatenate(t_train_list, axis=0)

print("Final x_train shape:", x_train.shape)
print("Final t_train shape:", t_train.shape)

#print(t_train)


Processing case2 in /home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case2
['/home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case2/solid_liquid_reflector10_processed.npz', '/home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case2/solid_liquid_reflector1_processed.npz', '/home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case2/solid_liquid_reflector2_processed.npz', '/home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case2/solid_liquid_reflector3_processed.npz', '/home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case2/solid_liquid_reflector4_processed.npz', '/home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case2/solid_liquid_reflector5_processed.npz', '/home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case2/solid_liquid_reflector6_processed.npz', '/home/smatsubara/documents/airlift/data/sandbox/simulation/processed/case2/solid_liquid_re

## 実機データ変換  
実機データの`.mat`のファイルも、先ほどと同様に統一形式である`.npz`に変換します。同じく、`experiments/processed`に保存されるようにしています。

In [2]:
from src import mat2npz_exp

## 機械学習用データセット生成(実機)  
次に、変換した`.npz`のファイルに対応する目標変数となる値を`/target_vasriables.csv`を使って参照し、データセット`x_test.npy` `t_test.npy`を作成していきます。実装に誤りがあればご指摘ください。
