In [175]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import spectrogram
from tensorflow.keras.preprocessing import image
from sklearn.preprocessing import MinMaxScaler
import cv2
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import requests
import gdown




# Load data and generate spectrogram functions

In [157]:
def load_strain_data(file_path):
    with h5py.File(file_path, "r") as h:
        strain = h["strain"]["Strain"][:]
    return strain


def generate_spectrogram(hdf5_file):
    strain_data = load_strain_data(hdf5_file)
    frequency, time, spectrogram_array = spectrogram(strain_data, fs=4096)

    scaler = MinMaxScaler()
    s_scaled = scaler.fit_transform(spectrogram_array)

    return s_scaled

In [177]:
folder_url = 'https://drive.google.com/drive/u/0/folders/1_ZEDaANx7ywRjHQm7A6EZZTGSc-0Jj_u'
gdown.download_folder(folder_url, quiet=False)


Retrieving folder contents


Processing file 1AiNI-WMFfs25HIoFvC7Ez-JGPMZzEqK4 Event-1.hdf5
Processing file 1-NFzkDmm6CsPmswhVUN07jDc2dxxOAHh Event-2.hdf5
Processing file 1ML-fxbVkYEDQRzt-gmNVwKr7-AcQZ_qU Event-3.hdf5
Processing file 1NiDcVime-jUXnw8qNwjJdMXIHk9Vmfsu Event-4.hdf5
Processing file 1COlAVH3aYftW7ggE7zMdhNlLJrswccXi Event-5.hdf5
Processing file 1Ck0-_s_tjQhvb11Y7sSXU-cZnfBZNPxe Event-6.hdf5
Processing file 1339oOnVcEgXEAgW2T9RP9xrhoJAtYWK- Event-7.hdf5
Processing file 1jEDNJa6Rsc0jnfpdBr-VM6DDHd9at0OD Event-8.hdf5
Processing file 12wOpDgr7vuCf0_3S34texYOcNIL83G0J Event-9.hdf5
Processing file 1NcNp_MO4ZkzhlU9UxAO4WqtQeJljDOyg Event-10.hdf5
Processing file 1ATmmepRC35UrqKKuZel_twhD7J0yJ7dV Non-Event-1.hdf5
Processing file 1-b3onqNQ-m-cEpuR9t46F265ag2QZB4b Non-Event-2.hdf5
Processing file 1dYNGFNbf21DvJijw93lR5zBEQYuN4tgW Non-Event-3.hdf5
Processing file 1NjUDoTAw6YVaoBQ1cmEqtUK1kY38fcqu Non-Event-4.hdf5
Processing file 119gD90jIxa_tFOyJIf-Uo-XHrB1-dKKC Non-Event-5.hdf5
Processing file 1EhBbTptbJz7hFPmVz

Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From (original): https://drive.google.com/uc?id=1AiNI-WMFfs25HIoFvC7Ez-JGPMZzEqK4
From (redirected): https://drive.google.com/uc?id=1AiNI-WMFfs25HIoFvC7Ez-JGPMZzEqK4&confirm=t&uuid=f0b9b59d-3836-46b0-b61b-f095f2ab813e
To: C:\Users\erict\Documents\projects\gwave_classify\raw_strain_data\Event-1.hdf5
 99%|█████████▊| 128M/130M [00:13<00:00, 9.19MB/s] 

KeyboardInterrupt: 

In [179]:
hdf5_dataset = pd.read_csv('/data/raw/D_files.csv')
# hdf5_dataset = pd.read_csv(r"C:\Users\erict\Documents\projects\gwave_classify\D_files.csv")
hdf5_dataset['Filename'] = r"C:\Users\erict\Documents\projects\gwave_classify\raw_strain_data\\" + hdf5_dataset['Filename'] 
hdf5_dataset['Category'] = hdf5_dataset['Category'].replace({'Event': 1, 'Non-Event': 0})

FileNotFoundError: [Errno 2] No such file or directory: '/data/raw/D_files.csv'

# Image feature extraction model 

In [159]:
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

def spectrogram_feature_extraction(spectrogram_array):
    resized_spectrogram = cv2.resize(spectrogram_array, (224, 224))
    final_spectrogram = np.stack([resized_spectrogram] * 3, axis=-1)
    print(final_spectrogram.shape)
    img_array = final_spectrogram[None, ...]
    img_array = preprocess_input(img_array)

    features = model.predict(img_array)
    return features

In [160]:
features_list = []
for hdf5_file in hdf5_dataset['Filename']:
    spec_data = generate_spectrogram(hdf5_file)
    features = spectrogram_feature_extraction(spec_data).flatten()
    features_list.append(features)


(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step


  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
(224, 224, 3)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step


# Random forest classifier

In [169]:
# Not nearly enough datapoints to get anything accurate. Files are too large and it would take too long to extract features.
# Only 20 datapoints used. Need more for training and testing the model

X = np.array(features_list)
y = hdf5_dataset['Category'].values


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

classifier = RandomForestClassifier()
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 60.00%
