In [1]:
import os
import sys
import pywt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.ndimage as ndimage
sys.path.append(os.path.abspath(os.path.join(r"../../Seismic-wave/")))
from tqdm import tqdm
from utils.data_utils import split_3_channel, test_train_split

In [2]:
feature_path = "../data/processed/balance_16k/preprocessing/x_train.csv"
label_path = "../data/processed/balance_16k/preprocessing/y_train.csv"

df = pd.read_csv(feature_path)
df_label = pd.read_csv(label_path)
df["labels"] = df_label['S']
train = df
train_features, train_labels = split_3_channel(train)

In [3]:
def znorm(data):
    return (data - np.mean(data)) / (np.std(data)+ 0.001)


def min_max(data):
    return (data - np.min(data)) / (np.max(data) - np.min(data))

def apply_cwt_and_save_images(data, scales, waveletname, labels, output_folder):    
    num_samples = data.shape[0]
    num_scales = len(scales)

    for i in tqdm(range(num_samples),desc="Processing samples"):
        BHZ = znorm(data[i][0])
        BHN = znorm(data[i][1])
        BHE = znorm(data[i][2])
        
        # Perform CWT on each component (BHZ, BHN, BHE)
        cwt_BHZ, _ = pywt.cwt(BHZ, scales, waveletname)
        cwt_BHN, _ = pywt.cwt(BHN, scales, waveletname)
        cwt_BHE, _ = pywt.cwt(BHE, scales, waveletname)

        # Stack the three components to create a 3D array
        composite_image = np.zeros((num_scales, cwt_BHZ.shape[1], 3))
        composite_image[:, :, 0] = np.abs(cwt_BHZ)
        composite_image[:, :, 1] = np.abs(cwt_BHN)
        composite_image[:, :, 2] = np.abs(cwt_BHE)
        
        # Normalize the composite image between 0 and 1
        composite_image = min_max(composite_image)

        # Check for NaN or Inf values and skip if found
        if np.isnan(composite_image).any() or np.isinf(composite_image).any():
            print(f"Error: NaN or Inf values detected in composite_image for sample {i}. Skipping this sample.")
            continue
        
        # Construct the output file path based on the label
        label = labels[i]
        label_folder = os.path.join(output_folder, label)
        os.makedirs(label_folder, exist_ok=True)
        
        filename = f"{i}.png"
        filepath = os.path.join(label_folder, filename)
        
        # Plot and save the spectrogram image
        plt.figure(figsize=(10, 6))
        plt.imshow(composite_image, aspect="auto", cmap='jet')
        plt.axis("off")
        plt.savefig(filepath, format="png", bbox_inches="tight", pad_inches=0)
        plt.close()
        


In [4]:
waveletname = 'cmor1.5-1.0' 
# waveletname = 'mexh' 
scales = np.geomspace(1, 3024, num=100)
num_columns_per_channel = 2401
output_folder = "../spectrogram_images/"
apply_cwt_and_save_images(train_features, scales, waveletname, train_labels, output_folder)

Processing samples: 100%|██████████| 16303/16303 [5:22:48<00:00,  1.19s/it]  
