In [None]:
import os.path
import logging
import pytest
import math
import matplotlib.pyplot as plt
from hq.hdf.io import lifeq_hdf_store
from hq.hdf.names.on_device import MetricPpgGreen

import pandas as pd
import numpy as np
from scipy import fftpack
import ipympl

from lq.features.feature_extraction_tools import get_ppg_points, get_apg_points

import lq.features.five_features_extraction as fivefeat
from lq.features.ppg_signal_process import (
    find_start_stop_of_signal,
    subtract_fitted_line_from_pulse,
    filter_outlier_rr_and_pulses_from_signal,
)

In [None]:
user_id = 'ef36c4fb-18ba-477c-b1c1-68adff06b670'

In [None]:
logger = logging.getLogger(__name__)
BASE_DIR = os.path.dirname(os.path.abspath(''))
test_file_path = os.path.join(BASE_DIR, "data", "MMI", f"{user_id}.h5")

In [None]:
EXPECTED_TIME_DIFF = 40000000
ALLOWED_DELTA = 1000000
MINIMUM_ALLOWED_DURATION_SEC = 25

MINIMUM_PWF_CONF = 20

In [None]:
def split_dataframe_on_timestamps(df, max_diff_secs=10, max_len_ppg_event_secs=600):
    # Create column to partition ppg signal segments based on timestamps
    t_diffs = np.append([40000000], np.diff(df.index))
    df = df.assign(counter=np.cumsum(t_diffs > max_diff_secs * 40000000))

    dfs = []
    for (k, d) in df.groupby("counter"):
        if not d.empty:
            if len(d) <= max_len_ppg_event_secs * 25:
                dfs.append(d)
            else:
                logger.warn("Max duration of PPG event exceeded.")

    # Log warning if no data available
    if len(dfs) == 0:
        logger.warning("No appropriate PPG data available for PWF calculations.")

    return dfs

In [None]:
h5repo = lifeq_hdf_store.LifeQHDFStore(test_file_path)

table_name_ppg_green = h5repo.find_table_name_by_type(MetricPpgGreen.table_type)
data_ppg_green = h5repo.get_table_by_name(table_name_ppg_green)

## 25Hz pulses before any preprocess

In [None]:
data_ppg_green.columns = ["ppg_green"]
data_ppg_green = data_ppg_green[~data_ppg_green.index.duplicated(keep="first")]
data_ppg_green.sort_index(inplace=True)

data_pwf_valid = np.isfinite(data_ppg_green["ppg_green"].values)
data_ppg_green = data_ppg_green.fillna(method="pad").bfill()
data_ppg_green = data_ppg_green[data_pwf_valid]

pwf_dfs = split_dataframe_on_timestamps(data_ppg_green)

In [None]:
len(pwf_dfs)

In [None]:
pwf_series = pwf_dfs[100]["ppg_green"]

signal = pwf_series.values.flatten()
signal_duration = (pwf_series.index[-1] - pwf_series.index[0]) / 1000000000

In [None]:
start_time = pwf_series.index[0]

In [None]:
start_time

In [None]:
plt.plot(signal)
plt.show()

In [None]:
f_s=25

X = fftpack.fft(signal)
freqs = fftpack.fftfreq(len(signal)) * f_s

fig, ax = plt.subplots()

ax.stem(freqs, np.abs(X))
ax.set_title(f'frequency domain_{start_time}\n_{user_id}')
ax.set_xlabel('Frequency in Hertz [Hz]')
ax.set_ylabel('Frequency Domain (Spectrum) Magnitude')
ax.set_xlim(-5, 5)
ax.set_ylim(-5, 5e5)
plt.tight_layout()
plt.savefig(f'frequency domain_fs{f_s}_{start_time}_{user_id}.png', dpi=150)

### Butter filter

In [None]:
import scipy.signal as sps

In [None]:
%matplotlib notebook
sos = sps.butter(4, 0.5, 'hp', fs=25, output='sos')
filtered = sps.sosfilt(sos, signal)

fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
ax1.plot(signal)
ax1.set_title('MMI signal with 25Hz sampling rate')

ax2.plot(signal)
ax2.plot(filtered)
ax2.set_title('after 7 Hz butter low-pass filter')
# ax2.axis([-1, 751, np.min(signal), np.max(signal)])
plt.show()

### cheybyshev2 filter 

In [None]:
%matplotlib inline
# sos_cheby2_order4 = sps.cheby2(4, 20, 0.28, 'lp', output='sos')
sos_cheby2_order4 = sps.cheby2(4, 20, 0.325, 'lp', output='sos')
filtered_cheb2_order4 = sps.sosfilt(sos_cheby2_order4, signal)

fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
ax1.plot(-signal)
ax1.set_title('MMI signal with 25Hz sampling rate')

# ax2.plot(-signal, label='raw signal')
ax2.plot(-filtered_cheb2_order4, label='cheb2(m=4, Rs=20, cutoff=15Hz)', color='red')
# ax2.plot(filtered, label='butter(m=6, cutoff=15Hz)')
ax2.set_title('15 Chebyshev2 lowpass filter with Rs=20, m=4')
ax2.axis([200, 300, np.min(-signal), np.max(-signal)])
# ax2.legend(bbox_to_anchor=(1, 0.5))
plt.show()

In [None]:
# Here we use the same order and Rs with Dr. Elgendi's paper 
# n =4, Rs = 20, 
# cutoff frequency is not in paper, but we assume is 15 Hz. 
# Wn = Fc/Fs = 15 / 1000 * 2 = 0.03
# here, Fs = 25 Hz, 7/25 * 2 = 0.56, 


# sos_cheby2_order4 = sps.cheby2(4, 18, 7, 'lp', fs=25, output='sos')
# wn = [i*0.07 for i in range(1, 10)]
# wn = [i*0.025 for i in range(1, 30)]
wn = [0.3, 0.32]

for i in range(len(wn)):
    sos_cheby2_order4 = sps.cheby2(4, 20, wn[i], 'lp', output='sos')
    filtered_cheb2_order4 = sps.sosfilt(sos_cheby2_order4, signal)
    fig, ax = plt.subplots(nrows=1, ncols=1)
    ax.plot(-signal, label='raw signal')
    ax.plot(-filtered_cheb2_order4, label='cheb2')
    # ax2.plot(filtered, label='butter(m=6, cutoff=15Hz)')
    ax.set_title(f'Chebyshev2 lowpass filter with m=4, Rs=20, wn={wn[i]}')
    ax.axis([200, 300, np.min(-signal), np.max(-signal)])
    ax.legend(bbox_to_anchor=(1, 0.5))
#     plt.savefig(f'Cheby2_lowpass_m=4_Rs=20_wn={wn[i]}_{start_time}_5b75df30-f453-4bd8-bdf4-df6332f21685.png', dpi=150)

seem like the 30 second segment in 25Hz is not improved significantly 

In [None]:
len(filtered_cheb2_order4)

## 250Hz pulse after subtraction 

In [None]:
from lq.features.ppg_signal_process import (
    detrend_and_denoise_signal_with_filters,
    peak_detection_for_pwf,
    process_pulse,
    subtract_fitted_line_from_pulse,
    find_start_stop_of_signal
)

In [None]:
peak_indices=[],
resample_size_on_full_signal_factor=10
buffer_size_in_samples_25hz=3
sampling_frequency = 25

raw signal resampled to 250Hz

In [None]:
detrend_signal = detrend_and_denoise_signal_with_filters(signal, sampling_frequency)

peak_indices = peak_detection_for_pwf(detrend_signal, sampling_frequency)

In [None]:
resample_signal = sps.resample(detrend_signal, len(detrend_signal) * resample_size_on_full_signal_factor)
peak_indices = peak_indices * resample_size_on_full_signal_factor

In [None]:
pulse_start = peak_indices[:-1]
pulse_stop = peak_indices[1:]

In [None]:
pulse_start = (
    pulse_start - buffer_size_in_samples_25hz * resample_size_on_full_signal_factor
)
pulse_start[0] = np.max((pulse_start[0], 0))
pulse_stop = pulse_stop + buffer_size_in_samples_25hz * resample_size_on_full_signal_factor
pulse_stop[-1] = np.min((pulse_stop[-1], len(signal)))

In [None]:
pulse_start

In [None]:
pulse_stop

In [None]:
all_pulses = [
    -resample_signal[tmp_start:tmp_stop] for tmp_start, tmp_stop in zip(pulse_start, pulse_stop)
]

filtered signal resample to 250Hz

In [None]:
detrend_filtered_signal = detrend_and_denoise_signal_with_filters(filtered_cheb2_order4, sampling_frequency)

peak_indices = peak_detection_for_pwf(detrend_filtered_signal, sampling_frequency)

resample_filterd_signal = sps.resample(detrend_filtered_signal, len(detrend_filtered_signal) * resample_size_on_full_signal_factor)
peak_indices = peak_indices * resample_size_on_full_signal_factor

pulse_start = peak_indices[:-1]
pulse_stop = peak_indices[1:]

pulse_start = (
    pulse_start - buffer_size_in_samples_25hz * resample_size_on_full_signal_factor
)
pulse_start[0] = np.max((pulse_start[0], 0))
pulse_stop = pulse_stop + buffer_size_in_samples_25hz * resample_size_on_full_signal_factor
pulse_stop[-1] = np.min((pulse_stop[-1], len(signal)))

all_filtered_pulses = [
    -resample_filterd_signal[tmp_start:tmp_stop] for tmp_start, tmp_stop in zip(pulse_start, pulse_stop)
]

In [None]:
pulse_start

In [None]:
pulse_stop

In [None]:
%matplotlib notebook
plt.plot(-detrend_signal)
plt.plot(-filtered_cheb2_order4)
plt.show()

In [None]:
for i in range(len(all_pulses)):
    pulse = all_pulses[i]
    filtered_pulse = all_filtered_pulses[i]
    
    start_index, stop_index = find_start_stop_of_signal(pulse)
    pulse = subtract_fitted_line_from_pulse(pulse, start_index, stop_index)
    pulse = pulse[start_index : stop_index + 1]
    
    start_index, stop_index = find_start_stop_of_signal(filtered_pulse)
    filtered_pulse = subtract_fitted_line_from_pulse(filtered_pulse, start_index, stop_index)
    filtered_pulse = filtered_pulse[start_index : stop_index + 1]
    
    
    fig, ax = plt.subplots(nrows=1, ncols=1)
    ax.plot(pulse)
    ax.plot(filtered_pulse)
    ax.set_title(f'{i}th pulse')
    plt.show()

### cheybyshev2 filter

In [None]:
order = 4 
Rs = 20
Fc = 7

# wn = 7 / 250 * 2 = 0.056
sos = sps.cheby2(order, Rs, Fc, 'lp', fs=250, output='sos')
filtered_cheb2 = sps.sosfilt(sos, pulse)

fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
ax1.plot(pulse)
ax1.set_title('MMI signal with 250Hz sampling rate')

ax2.plot(pulse, label='raw signal')
ax2.plot(filtered_cheb2, label=f'cheb2(m={order}, Rs={Rs}, cutoff={Fc}Hz)')
# ax2.plot(filtered, label='butter(m=6, cutoff=15Hz)')
ax2.set_title(f'cheb2(m={order}, Rs={Rs}, cutoff={Fc}Hz)')
ax2.axis([-1, 251, np.min(pulse), np.max(pulse)])
ax2.legend(bbox_to_anchor=(1, 0.5))
plt.show()

In [None]:
# Here we use the same order and Rs with Dr. Elgendi's paper 
# N = 4, Rs = 20, 
# cutoff frequency is not in paper, but we assume is 15 Hz. 
# Wn = Fc/Fs = 15 / 1000 * 2 = 0.03
# here, Fs = 25 Hz, 7/25 * 2 = 0.56, 
%matplotlib inline

# sos_cheby2_order4 = sps.cheby2(4, 18, 7, 'lp', fs=25, output='sos')
# wn = [i*0.07 for i in range(1, 10)]
wn = [i*0.0025 for i in range(1, 30)]

for i in range(len(wn)):
    sos_cheby2_order4 = sps.cheby2(4, 20, wn[i], 'lp', output='sos')
    filtered_cheb2_order4 = sps.sosfilt(sos_cheby2_order4, pulse)
    fig, ax = plt.subplots(nrows=1, ncols=1)
    ax.plot(pulse, label='raw signal')
    ax.plot(filtered_cheb2_order4, label='cheb2')
    # ax2.plot(filtered, label='butter(m=6, cutoff=15Hz)')
    ax.set_title(f'Chebyshev2 lowpass filter with m=4, Rs=20, wn={wn[i]}')
    ax.axis([-1, 251, np.min(pulse), np.max(pulse)])
    ax.legend(bbox_to_anchor=(1, 0.5))
    plt.show()