In [9]:
import numpy as np
import pywt
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import librosa

In [10]:
train_features_path = "/nfs/student/r/rajeshupadhayaya/Projects/Seismic-wave/data/processed/balance_16k/preprocessing/x_train.csv"
labels_path = "/nfs/student/r/rajeshupadhayaya/Projects/Seismic-wave/data/processed/balance_16k/preprocessing/y_train.csv"

In [11]:
df_train = pd.read_csv(train_features_path, header=None)
df_labels = pd.read_csv(labels_path, header=None)

In [12]:
df_train.columns = [str(i) for i in range(df_train.shape[1])]
df_labels.columns = [str(i) for i in range(df_labels.shape[1])]

In [13]:
def apply_cwt_to_single_row(row, num_columns_per_channel, scales, waveletname='morl'):
    BHZ = row[:num_columns_per_channel]         
    BHN = row[num_columns_per_channel:2*num_columns_per_channel] 
    BHE = row[2*num_columns_per_channel:]       
    
    cwt_BHZ, _ = pywt.cwt(BHZ, scales, waveletname)
    cwt_BHN, _ = pywt.cwt(BHN, scales, waveletname)
    cwt_BHE, _ = pywt.cwt(BHE, scales, waveletname)
    
    cwt_combined = np.stack([cwt_BHZ, cwt_BHN, cwt_BHE], axis=0)
    
    return cwt_combined

In [14]:
def apply_cwt_to_all_rows(data, num_columns_per_channel, scales, waveletname='morl'):
    cwt_results = []
    
    for _, row in data.iterrows():
        row_array = row.to_numpy()
        cwt_result = apply_cwt_to_single_row(row_array, num_columns_per_channel, scales, waveletname)
        cwt_results.append(cwt_result)
    
    return np.array(cwt_results)

In [15]:
df_combined = pd.concat([df_train, df_labels], axis=1)

In [16]:
df_combined.columns = list(df_train.columns) + ['label']

In [17]:
df_sampled = df_combined.groupby('label').apply(lambda x: x.sample(n=10, random_state=42)).reset_index(drop=True)


  df_sampled = df_combined.groupby('label').apply(lambda x: x.sample(n=10, random_state=42)).reset_index(drop=True)


In [18]:
df_train_sampled = df_sampled.iloc[:, :-1]  
df_labels_sampled = df_sampled.iloc[:, -1] 

In [19]:
scales = np.arange(1, 128) 
num_columns_per_channel = 2401

cwt_transformed_data = apply_cwt_to_all_rows(df_train_sampled, num_columns_per_channel, scales)

In [20]:
cwt_transformed_data.shape

(60, 3, 127, 2401)

In [21]:
cwt_transformed_data[0][0]

array([[  97.77381405, -309.12341512, -282.33865496, ...,   77.9998463 ,
        -108.11050301,   95.66010079],
       [-141.03548916,  280.64084973, -132.2923018 , ..., -126.29114925,
         360.59552374, -314.40158656],
       [-511.86081345,  549.72844969,  532.92275601, ...,  468.99138509,
         436.98278377, -498.69872383],
       ...,
       [ 307.05088438,  595.39731765,  525.55823286, ..., 8820.11128522,
        8932.03833584, 8632.88465607],
       [ 640.92391957,  920.55135951,  857.22045906, ..., 9436.82242524,
        9263.90776331, 8699.96636672],
       [ 589.62475005,  776.4940746 ,  667.6072744 , ..., 9819.17941255,
        9871.51485784, 9538.0194507 ]])

In [39]:
def save_spectrogram_images_from_cwt(cwt_data, labels, output_folder):
    num_samples = cwt_data.shape[0]
    num_scales = cwt_data.shape[2]

    for i in range(num_samples):
        sample_data = cwt_data[i]
        label = labels[i]
        
        BHZ = np.array(sample_data[0])
        BHN = np.array(sample_data[1])
        BHE = np.array(sample_data[2]) 
        
        print(f"Sample {i}, BHZ shape: {BHZ.shape}, BHN shape: {BHN.shape}, BHE shape: {BHE.shape}")

        print(f"Data types: BHZ: {BHZ.dtype}, BHN: {BHN.dtype}, BHE: {BHE.dtype}")
        
        assert BHZ.shape == BHN.shape == BHE.shape, f"Shapes don't match: BHZ: {BHZ.shape}, BHN: {BHN.shape}, BHE: {BHE.shape}"

        composite_image = np.zeros((num_scales, BHZ.shape[1], 3))
        composite_image[:, :, 0] = np.abs(BHZ)
        composite_image[:, :, 1] = np.abs(BHN)
        composite_image[:, :, 2] = np.abs(BHE)

        if np.isnan(composite_image).any() or np.isinf(composite_image).any():
            print(f"Error: NaN or Inf values detected in composite_image for sample {i}. Skipping this sample.")
            continue

        max_value = composite_image.max()
        if max_value != 0:
            composite_image /= max_value
        else:
            print(f"Warning: Max value is zero for sample {i}. Skipping normalization.")
            continue
        
        print(f"Composite image dtype: {composite_image.dtype}")
        print(f"Composite image max value: {composite_image.max()}")
        print(f"Composite image min value: {composite_image.min()}")
        print(f"Composite image type: {type(composite_image)}")
        print(f"Composite image sample: {composite_image[0, 0, :]}")

        print(f"Composite image shape for sample {i}: {composite_image.shape}, dtype: {composite_image.dtype}")
        
        filename = f"spectrogram_class_{label}_{i}.png"
        filepath = os.path.join(output_folder, filename)
        
        # Plot the spectrogram
        plt.figure(figsize=(10, 6))
        plt.imshow(composite_image, aspect='auto')
        plt.title(f'Spectrogram Image (Class {label})')
        plt.xlabel('Time')
        plt.ylabel('Scales')
        # plt.colorbar(label='Amplitude')  # Commented for RGB images
        
        # Save the image
        plt.savefig(filepath, format='png')  # Ensure png format
        plt.close()  # Close the plot to avoid memory issues

        print(f"Saved: {filepath}")

# Example usage
output_folder = "/nfs/student/r/rajeshupadhayaya/Projects/Seismic-wave/data/processed/balance_16k/spectrogram_images"
save_spectrogram_images_from_cwt(cwt_transformed_data, df_labels_sampled, output_folder)

Sample 0, BHZ shape: (127, 2401), BHN shape: (127, 2401), BHE shape: (127, 2401)
Data types: BHZ: float64, BHN: float64, BHE: float64
Composite image shape for sample 0: (127, 2401, 3), dtype: float32


ValueError: object __array__ method not producing an array

Error in callback <function _draw_all_if_interactive at 0x7fccea6859e0> (for post_execute), with arguments args (),kwargs {}:


ValueError: object __array__ method not producing an array

In [None]:
df_train, df_test, train_labels, test_labels = train_test_split(cwt_transformed_data, df_labels, test_size=0.1, random_state=42, shuffle=True)