In [1]:
import numpy as np
import pywt
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import librosa

In [2]:
train_features_path = "../data/processed/balance_16k/preprocessing/x_train.csv"
labels_path = "../data/processed/balance_16k/preprocessing/y_train.csv"

In [3]:
df_train = pd.read_csv(train_features_path, header=None,nrows=100)
df_labels = pd.read_csv(labels_path, header=None,nrows=100)

In [4]:
df_train.columns = [str(i) for i in range(df_train.shape[1])]
df_labels.columns = [str(i) for i in range(df_labels.shape[1])]

In [5]:
def apply_cwt_to_single_row(row, num_columns_per_channel, scales, waveletname='morl'):
    BHZ = row[:num_columns_per_channel]         
    BHN = row[num_columns_per_channel:2*num_columns_per_channel] 
    BHE = row[2*num_columns_per_channel:]       
    
    cwt_BHZ, _ = pywt.cwt(BHZ, scales, waveletname)
    cwt_BHN, _ = pywt.cwt(BHN, scales, waveletname)
    cwt_BHE, _ = pywt.cwt(BHE, scales, waveletname)
    
    cwt_combined = np.stack([cwt_BHZ, cwt_BHN, cwt_BHE], axis=0)
    
    return cwt_combined

In [6]:
def apply_cwt_to_all_rows(data, num_columns_per_channel, scales, waveletname='morl'):
    cwt_results = []
    
    for _, row in data.iterrows():
        row_array = row.to_numpy()
        cwt_result = apply_cwt_to_single_row(row_array, num_columns_per_channel, scales, waveletname)
        cwt_results.append(cwt_result)
    
    return np.array(cwt_results)

In [7]:
df_combined = pd.concat([df_train, df_labels], axis=1)

In [8]:
columns = list(df_train.columns) + ['label']
df_combined.columns = columns

In [9]:
df_sampled = df_combined.groupby('label').apply(lambda x: x.sample(n=2, random_state=42)).reset_index(drop=True)


  df_sampled = df_combined.groupby('label').apply(lambda x: x.sample(n=2, random_state=42)).reset_index(drop=True)


In [10]:
df_train_sampled = df_sampled.iloc[:, :-1]  
df_labels_sampled = df_sampled.iloc[:, -1] 

In [11]:
scales = np.arange(1, 128) 
num_columns_per_channel = 2401

cwt_transformed_data = apply_cwt_to_all_rows(df_train_sampled, num_columns_per_channel, scales)

In [12]:
cwt_transformed_data.shape

(12, 3, 127, 2401)

In [13]:
cwt_transformed_data[0][0]

array([[  -4.16213318, -111.89046461,    4.15088227, ...,   -4.1449966 ,
          37.35571541, -111.34510575],
       [  22.10545811,  167.81235449, -195.3181616 , ...,   77.3953873 ,
        -222.30537981,  202.61569699],
       [  11.98499199,  309.26120317,  -11.22226595, ..., -141.09156323,
        -154.82002669,  296.67226009],
       ...,
       [ 221.23594736,  416.07957812,  453.85187515, ...,  652.24850387,
         462.75369766,  460.44321372],
       [ 223.85324664,  411.85427967,  441.59628818, ...,  660.69711179,
         465.48571359,  478.53186719],
       [ 262.13733308,  450.0848452 ,  477.96878044, ...,  676.45204283,
         469.5232531 ,  474.23206018]])

In [14]:
def save_spectrogram_images_from_cwt(cwt_data, labels, output_folder):
    num_samples = cwt_data.shape[0]
    num_scales = cwt_data.shape[2]

    for i in range(num_samples):
        sample_data = cwt_data[i]
        label = labels[i]
        
        BHZ = np.array(sample_data[0])
        BHN = np.array(sample_data[1])
        BHE = np.array(sample_data[2]) 
        
        print(f"Sample {i}, BHZ shape: {BHZ.shape}, BHN shape: {BHN.shape}, BHE shape: {BHE.shape}")

        print(f"Data types: BHZ: {BHZ.dtype}, BHN: {BHN.dtype}, BHE: {BHE.dtype}")
        
        assert BHZ.shape == BHN.shape == BHE.shape, f"Shapes don't match: BHZ: {BHZ.shape}, BHN: {BHN.shape}, BHE: {BHE.shape}"

        composite_image = np.zeros((num_scales, BHZ.shape[1], 3))
        composite_image[:, :, 0] = np.abs(BHZ)
        composite_image[:, :, 1] = np.abs(BHN)
        composite_image[:, :, 2] = np.abs(BHE)

        if np.isnan(composite_image).any() or np.isinf(composite_image).any():
            print(f"Error: NaN or Inf values detected in composite_image for sample {i}. Skipping this sample.")
            continue

        max_value = composite_image.max()
        if max_value != 0:
            composite_image /= max_value
        else:
            print(f"Warning: Max value is zero for sample {i}. Skipping normalization.")
            continue
        
        print(f"Composite image dtype: {composite_image.dtype}")
        print(f"Composite image max value: {composite_image.max()}")
        print(f"Composite image min value: {composite_image.min()}")
        print(f"Composite image type: {type(composite_image)}")
        print(f"Composite image sample: {composite_image[0, 0, :]}")

        print(f"Composite image shape for sample {i}: {composite_image.shape}, dtype: {composite_image.dtype}")
        
        filename = f"spectrogram_class_{label}_{i}.png"
        filepath = os.path.join(output_folder, filename)
        
        # Plot the spectrogram
        plt.figure(figsize=(10, 6))
        plt.imshow(composite_image, aspect='auto')
        plt.title(f'Spectrogram Image (Class {label})')
        plt.xlabel('Time')
        plt.ylabel('Scales')
        # plt.colorbar(label='Amplitude')  # Commented for RGB images
        
        # Save the image
        plt.savefig(filepath, format='png')  # Ensure png format
        plt.close()  # Close the plot to avoid memory issues

        print(f"Saved: {filepath}")

# Example usage
output_folder = "../spectrogram_images/"
save_spectrogram_images_from_cwt(cwt_transformed_data, df_labels_sampled, output_folder)

Sample 0, BHZ shape: (127, 2401), BHN shape: (127, 2401), BHE shape: (127, 2401)
Data types: BHZ: float64, BHN: float64, BHE: float64
Composite image dtype: float64
Composite image max value: 1.0
Composite image min value: 3.9970062844788614e-08
Composite image type: <class 'numpy.ndarray'>
Composite image sample: [0.00138548 0.00189641 0.00374148]
Composite image shape for sample 0: (127, 2401, 3), dtype: float64
Saved: ../spectrogram_images/spectrogram_class_Lg_0.png
Sample 1, BHZ shape: (127, 2401), BHN shape: (127, 2401), BHE shape: (127, 2401)
Data types: BHZ: float64, BHN: float64, BHE: float64
Composite image dtype: float64
Composite image max value: 1.0
Composite image min value: 1.3762598042846505e-07
Composite image type: <class 'numpy.ndarray'>
Composite image sample: [0.00149304 0.00220448 0.00133666]
Composite image shape for sample 1: (127, 2401, 3), dtype: float64
Saved: ../spectrogram_images/spectrogram_class_Lg_1.png
Sample 2, BHZ shape: (127, 2401), BHN shape: (127, 2