In [2]:
import os
import matplotlib.pyplot as plt
import numpy as np
import soundfile as sf
from tqdm import tqdm   
import tensorflow as tf

from data_processing.read_rml import get_attrubuts

import sys
sys.path.append("/var/data/apnea/src/vggish")

import vggish_input, vggish_params, vggish_slim
from vggish.vggish_slim import define_vggish_slim, load_vggish_slim_checkpoint

In [2]:
APNEA_DIR = '/var/data/apnea/mic_dataset_spec/1/'
NO_APNEA_DIR = '/var/data/apnea/mic_dataset_spec/0/'

checkpoint_path = "/var/data/apnea/src/vggish/vggish_model.ckpt"  
pca_params_path = "/var/data/apnea/src/vggish/vggish_pca_params.npz"

# Датасет

In [3]:
# with tf.Graph().as_default(), tf.compat.v1.Session() as sess:
#     pool4_output = define_vggish_slim()

#     checkpoint_path = "/var/data/apnea/src/vggish/vggish_model.ckpt"
#     load_vggish_slim_checkpoint(sess, checkpoint_path)

#     for op in sess.graph.get_operations():
#         print(op.name)

In [3]:
def create_dataset(apnea_dir, no_apnea_dir):
    apnea_files = os.listdir(apnea_dir)
    no_apnea_files = os.listdir(no_apnea_dir)[:1500]
    spectograms = []
    labels = []

    for apnea_file in apnea_files:
        apnea_data = np.load(apnea_dir + apnea_file, allow_pickle=True).item()
        
        spectograms.append(apnea_data['spectograms'])
        
        labels.append(apnea_data['label'])

    for no_apnea_file in no_apnea_files:
        no_apnea_data = np.load(no_apnea_dir + no_apnea_file, allow_pickle=True).item()
        
        spectograms.append(no_apnea_data['spectograms'])
        
        labels.append(no_apnea_data['label'])
       

    return np.array(spectograms), np.array(labels)
        


In [4]:
X, y = create_dataset(APNEA_DIR, NO_APNEA_DIR)

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Форма X_train:", X_train.shape)  #(len, 17, 12288)
print("Форма y_train:", y_train.shape) #(len, )

Форма X_train: (2367, 17, 12288)
Форма y_train: (2367,)


# Модель и обучение без оптимизации


In [None]:
from tensorflow.keras import layers, models

def create_model(input_shape):
    model = models.Sequential()

    # Входной слой
    model.add(layers.Input(shape=input_shape))  # input_shape = (17, 12288)

    # Применяем Dense к каждой спектрограмме
    model.add(layers.TimeDistributed(layers.Dense(256, activation='relu')))
    model.add(layers.TimeDistributed(layers.Dense(128, activation='relu')))

    # BiLSTM слой для обработки временной последовательности
    model.add(layers.Bidirectional(layers.LSTM(15, return_sequences=False)))

    # Dropout для регуляризации
    model.add(layers.Dropout(0.5))

    # Полносвязный слой
    model.add(layers.Dense(64, activation='relu'))

    # Выходной слой с одним нейроном и сигмоидной активацией
    model.add(layers.Dense(1, activation='sigmoid'))  # Один выход: вероятность класса 1

    return model

# Пример использования
input_shape = (17, 12288)  # Форма одного элемента датасета
model = create_model(input_shape)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed (TimeDistri (None, 17, 256)           3145984   
_________________________________________________________________
time_distributed_1 (TimeDist (None, 17, 128)           32896     
_________________________________________________________________
bidirectional (Bidirectional (None, 256)               263168    
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                16448     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 3,458,561
Trainable params: 3,458,561
Non-trainable params: 0
______________________________________________

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Precision, Recall

early_stopping = EarlyStopping(
    monitor='val_loss',  
    patience=5,       
    restore_best_weights=True 
)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)

history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=64,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping]
)

# Модель и обучение с оптимизацией

In [None]:
import numpy as np
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from skopt import gp_minimize
from skopt.space import Integer, Categorical
from skopt.utils import use_named_args

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Precision, Recall


space = [
    Categorical([5, 7, 8, 10, 13, 15, 20, 25, 32, 48, 64, 100, 128, 156, 180, 256], name="hidden_units"),  # Количество скрытых блоков BiLSTM
    Categorical([0.2, 0.3, 0.4, 0.5], name="dropout_rate")  # Dropout
]

def create_model(input_shape, hidden_units, dropout_rate):
    model = models.Sequential()

    model.add(layers.Input(shape=input_shape))  # input_shape = (17, 12288)

    model.add(layers.TimeDistributed(layers.Dense(256, activation='relu')))
    model.add(layers.TimeDistributed(layers.Dense(128, activation='relu')))

    model.add(layers.Bidirectional(layers.LSTM(hidden_units, return_sequences=False)))

    model.add(layers.Dropout(dropout_rate))

    model.add(layers.Dense(64, activation='relu'))

    model.add(layers.Dense(1, activation='sigmoid'))

    return model

@use_named_args(space)
def objective(hidden_units, dropout_rate):
    print(f"Тестируем: hidden_units={hidden_units}, dropout_rate={dropout_rate}")
    
    model = create_model(X_train.shape[1:], hidden_units, dropout_rate)

    early_stopping = EarlyStopping(
        monitor='val_loss',  
        patience=5,       
        restore_best_weights=True 
    )

    model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
    )

    model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=64,
        validation_data=(X_test, y_test),
        callbacks=[early_stopping],
        verbose=0
    )
    
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    print(f"Precision: {precision}, Recall: {recall}, F1-Score: {f1}")
    print('-'*50)
    print()
    
    return -f1

result = gp_minimize(objective, space, n_calls=64, random_state=42)

print("Лучшие параметры:")
print(f"hidden_units: {result.x[0]}")
print(f"dropout_rate: {result.x[1]}")

Лучшие параметры:

Тестируем: hidden_units=5, dropout_rate=0.5
Precision: 0.9244604316546763, Recall: 0.8624161073825504, F1-Score: 0.892361111111111

Тестируем: hidden_units=7, dropout_rate=0.4
Precision: 0.9280575539568345, Recall: 0.8657718120805369, F1-Score: 0.8958333333333333

In [17]:
from skopt.space import Categorical
from skopt.utils import use_named_args

# Определяем пространство гиперпараметров
search_space = [
    Categorical([1, 3, 5, 6, 8, 10, 11, 13, 15, 17], name='hidden_units_bilstm'),  # Нейроны BiLSTM
    Categorical([0.2, 0.3, 0.4, 0.5], name='dropout_rate')  # Dropout
]


In [9]:
# Извлечение лучших параметров
best_hidden_units_bilstm, best_dropout_rate = result.x

# Создание и обучение модели с лучшими параметрами
best_model = create_model(
    input_shape=(17, 12288),
    hidden_units=best_hidden_units_bilstm,
    dropout_rate=best_dropout_rate
)
best_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', Precision(name='precision'), Recall(name='recall')]
)
best_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=30,
    batch_size=64,
    verbose=1
)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30

KeyboardInterrupt: 

# Тест

In [29]:
predictions = best_model.predict(X_test)

In [32]:
from sklearn.metrics import recall_score, precision_score, accuracy_score

bin_predictions = (predictions > 0.3).astype(int)

recall = recall_score(y_test, bin_predictions)
precision = precision_score(y_test, bin_predictions)
accuracy = accuracy_score(y_test, bin_predictions)

print("Recall:", recall)
print("Precision:", precision)
print("Accuracy:", accuracy)

Recall: 0.8523489932885906
Precision: 0.8523489932885906
Accuracy: 0.8513513513513513
