In [1]:
from matlab import engine, double
import io

eng = engine.start_matlab()
out = io.StringIO()
err = io.StringIO()

### Load Logistic-Regression HSMM vars

In [6]:
hsmm_vars = eng.load('trained_logreg_hsmm_variables.mat')

B_matrix = hsmm_vars['logistic_regression_B_matrix']
pi_vector = hsmm_vars['pi_vector']
total_obs_distribution = hsmm_vars['total_obs_distribution']

Fs = 1000.0

### Run Segmentation on test audio data

In [3]:
# Load and resample test audio data
import sys
sys.path.append('C:/Users/lumin/Desktop/Work/20212/source-code/python-classifier-2022')
from helper_code import *

test_data_folder = 'C:/Users/lumin/Desktop/Work/20212/Data/circor-heart-sound/final/test/'

In [4]:
from os.path import join, dirname, basename
import numpy as np
from tqdm.notebook import tqdm


def run_segmentation_algo(folder_path, matlab_engine):
    """
    Run segmentation algorithm on every audio file in folder and save them in a .tsv file
    """
    patient_files = find_patient_files(folder_path)

    for current_patient in tqdm(patient_files, desc='Patients', position=0):
        current_patient_data = load_patient_data(current_patient)
        rec_files, current_recordings = load_recordings(folder_path, current_patient_data, get_name=True)

        for i in tqdm(range(len(current_recordings)), desc='Recordings', position=1):
            current_audio_data = double(np.vstack(current_recordings[i]))
            ret = matlab_engine.runSpringerSegmentationAlgorithm(current_audio_data, Fs, B_matrix, pi_vector, total_obs_distribution, nargout=1)
            
            recording_name = basename(rec_files[i])
            seg_file = join(dirname(rec_files[i]), f"seg_{recording_name.replace('wav', 'tsv')}")
            
            process_segmentation_result(ret, seg_file)

        break

def process_segmentation_result(seg_array, output_file):
    SAMPLING_RATE = 1000.0
    
    current_state = None
    state_start = None

    print(f'Writing to {output_file}')
    with open(output_file, 'w+') as f:
        for sample_no, state in enumerate(seg_array):
            state = int(float(str(state).replace('[','').replace(']', '')))
            if state != current_state: # If a state change is detected
                print(f'{current_state} - {state} - {sample_no}')
                if current_state is not None: # If this is not first sample
                    start_t = state_start / SAMPLING_RATE

                    state_end = float(sample_no) - 1
                    end_t = state_end / SAMPLING_RATE

                    # to_write = f'{start_t}\t{end_t}\t{current_state}\n'
                    # print(to_write)
                    f.write(f'{start_t}\t{end_t}\t{current_state}\n') # Write state info to output file

                # register new current state
                current_state = state
                state_start = sample_no

        f.close()


In [7]:
run_segmentation_algo(test_data_folder, eng)

Patients:   0%|          | 0/199 [00:00<?, ?it/s]

Recordings:   0%|          | 0/4 [00:00<?, ?it/s]

Writing to C:/Users/lumin/Desktop/Work/20212/Data/circor-heart-sound/final/test\seg_13918_AV.tsv
None - 0 - 0
0 - 1 - 20
1 - 2 - 160
2 - 3 - 280
3 - 4 - 380
4 - 1 - 640
1 - 2 - 760
2 - 3 - 860
3 - 4 - 940
4 - 1 - 1200
1 - 2 - 1320
2 - 3 - 1420
3 - 4 - 1520
4 - 1 - 1780
1 - 2 - 1900
2 - 3 - 2000
3 - 4 - 2100
4 - 1 - 2360
1 - 2 - 2480
2 - 3 - 2580
3 - 4 - 2680
4 - 1 - 2940
1 - 2 - 3060
2 - 3 - 3140
3 - 4 - 3240
4 - 1 - 3500
1 - 2 - 3620
2 - 3 - 3720
3 - 4 - 3820
4 - 1 - 4080
1 - 2 - 4200
2 - 3 - 4300
3 - 4 - 4400
4 - 1 - 4660
1 - 2 - 4780
2 - 3 - 4880
3 - 4 - 4980
4 - 1 - 5240
1 - 2 - 5360
2 - 3 - 5460
3 - 4 - 5540
4 - 1 - 5800
1 - 2 - 5920
2 - 3 - 6020
3 - 4 - 6120
4 - 1 - 6380
1 - 2 - 6500
2 - 3 - 6600
3 - 4 - 6700
4 - 1 - 6960
1 - 2 - 7080
2 - 3 - 7180
3 - 4 - 7280
4 - 1 - 7520
1 - 2 - 7640
2 - 3 - 7740
3 - 4 - 7840
4 - 1 - 8120
1 - 2 - 8240
2 - 3 - 8320
3 - 4 - 8420
4 - 1 - 8660
1 - 2 - 8800
2 - 3 - 8900
3 - 4 - 9000
4 - 1 - 9240
1 - 2 - 9360
2 - 3 - 9460
3 - 4 - 9600
4 - 1 - 9900
1 