In [6]:
import os, numpy as np, scipy as sp, scipy.io, scipy.io.wavfile
import torchaudio
import torch
from torch.nn import ZeroPad2d
import librosa
from tqdm.notebook import tqdm

from Const import *
from helper_code import *
from audio_util import AudioUtil

In [None]:
import matplotlib.pyplot as plt
from pylab import rcParams

rcParams['figure.figsize'] = 25, 15

In [9]:
from torchvggish import vggish, vggish_input

# Initialise model and download weights
embedding_model = vggish()

In [10]:
data_folder = "C:/Users/lumin/Desktop/Work/20212/Data/circor-heart-sound/final/test"
recording_locations = ['AV', 'MV', 'PV', 'TV', 'PhC']
patient_files = find_patient_files(data_folder)

recording_cycles_embeddings = None

for patient in tqdm(patient_files):
    current_patient_data = load_patient_data(patient)
    current_recordings_paths, current_recordings = load_recordings(data_folder, current_patient_data, get_paths=True, preprocess=False)

    cur_recording_cycles_embeddings = torch.zeros((len(recording_locations), 128 * EMBEDDING_ROWS))

    for i in range(len(current_recordings_paths)):
        segmentation_file_path = AudioUtil.get_segmentation_file(current_recordings_paths[i])
        cardiac_states = AudioUtil.get_cardiac_states(segmentation_file_path)

        if cardiac_states is None:
            continue

        cardiac_cycles = AudioUtil.split_cardiac_cycles(current_recordings[i], cardiac_states)
        
        loc_embeddings = None

        for cycle in cardiac_cycles:
            cur_cycle = AudioUtil.audio_norm(cycle)
            cur_cycle = AudioUtil.pad_signal(cur_cycle, MAX_DURATION)

            example = vggish_input.waveform_to_examples(data=cur_cycle, sample_rate=NEW_SAMPLING_RATE)
            embeddings = embedding_model.forward(example)
            
            loc_embeddings = loc_embeddings = embeddings if loc_embeddings is None else torch.vstack((loc_embeddings, embeddings))

        # Pad embeddings to reach size of (EMBEDDING_ROWS, 128)
        cur_no_rows = loc_embeddings.size()[0]
        pad = ZeroPad2d((0, 0, 0, EMBEDDING_ROWS - cur_no_rows))
        loc_embeddings = pad(loc_embeddings)

        cur_recording_cycles_embeddings[i] = torch.flatten(loc_embeddings)

    cur_recording_cycles_embeddings = torch.flatten(cur_recording_cycles_embeddings)
    recording_cycles_embeddings = cur_recording_cycles_embeddings if recording_cycles_embeddings is None else torch.vstack((recording_cycles_embeddings, cur_recording_cycles_embeddings))

  0%|          | 0/199 [00:00<?, ?it/s]

In [8]:
import torchvggish

print(dir(torchvggish))

['ADAM_EPSILON', 'AUDIO_EMBEDDING_FEATURE_NAME', 'EMBEDDING_SIZE', 'EXAMPLE_HOP_SECONDS', 'EXAMPLE_WINDOW_SECONDS', 'INIT_STDDEV', 'INPUT_OP_NAME', 'INPUT_TENSOR_NAME', 'LEARNING_RATE', 'LOG_OFFSET', 'MEL_MAX_HZ', 'MEL_MIN_HZ', 'NUM_BANDS', 'NUM_FRAMES', 'NUM_MEL_BINS', 'OUTPUT_OP_NAME', 'OUTPUT_TENSOR_NAME', 'PCA_EIGEN_VECTORS_NAME', 'PCA_MEANS_NAME', 'PCA_PARAMS', 'Postprocessor', 'QUANTIZE_MAX_VAL', 'QUANTIZE_MIN_VAL', 'SAMPLE_RATE', 'STFT_HOP_LENGTH_SECONDS', 'STFT_WINDOW_LENGTH_SECONDS', 'VGG', 'VGGISH_WEIGHTS', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'hub', 'make_layers', 'mel_features', 'name', 'nn', 'np', 'resampy', 'sf', 'torch', 'torchvggish', 'vggish', 'vggish_input', 'vggish_params', 'waveform_to_examples', 'wavfile_to_examples']


In [3]:
# example_file = "C:/Users/lumin/Desktop/Work/20212/Data/circor-heart-sound/final/test/50149_MV.wav"
example_file = "C:/Users/lumin/Desktop/Work/20212/Data/circor-heart-sound/final/test/13918_MV.wav"
example = vggish_input.wavfile_to_examples(example_file)
embeddings = embedding_model.forward(example)

In [4]:
print(embeddings.size())

torch.Size([29, 128])
