In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import numpy as np
import pandas as pd
import scipy.signal as signal
from scipy.stats import skew, kurtosis
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.ensemble import AdaBoostRegressor

import cv2

def preprocess_ppg(ppg_signal, fs=125):
    b, a = signal.cheby1(4, 0.5, 10 / (0.5 * fs))
    filtered_signal = signal.filtfilt(b, a, ppg_signal)
    dc_component = signal.medfilt(filtered_signal, kernel_size=201)
    dc_free_signal = filtered_signal - dc_component
    return dc_free_signal, dc_component

def detect_key_points(ppg_signal, fs=125):
    b, a = signal.cheby1(4, 0.5, 10 / (0.5 * fs))
    filtered_signal = signal.filtfilt(b, a, ppg_signal)
    smoothed_signal = signal.savgol_filter(filtered_signal, window_length=31, polyorder=3)
    peaks, _ = signal.find_peaks(smoothed_signal, distance=50)
    systolic_peaks = peaks[smoothed_signal[peaks].argsort()][-len(peaks)//10:]
    return systolic_peaks

def extract_features(ppg_signal, systolic_peaks, dc_component, fs=125, num_pulses=5):
    features = []
    heart_rate = 60 * fs / np.mean(np.diff(systolic_peaks))
    features.append(heart_rate)
    mnpv = np.ptp(ppg_signal) / np.mean(dc_component)
    features.append(mnpv)
    # features.append(skew(ppg_signal))
    # features.append(kurtosis(ppg_signal))
    # features.append(np.std(ppg_signal))
    # features.append(np.var(ppg_signal))
    # features.append(np.mean(ppg_signal))

    for i in range(1, min(len(systolic_peaks), num_pulses + 1)):
        pulse = ppg_signal[systolic_peaks[i-1]:systolic_peaks[i]]
        area = np.trapz(pulse)
        features.append(area)

    while len(features) < num_pulses + 7:
        features.append(0)

    return features

def load_dataset(directory):
    ppg_signals = []
    sbp_values = []
    dbp_values = []
    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                filepath = os.path.join(subdir, file)
                data = pd.read_csv(filepath)
                data.columns = data.columns.str.strip()
                data = data.dropna()
                data['SBP'] = data['SBP'].astype(float)
                data['DBP'] = data['DBP'].astype(float)
                if not data.empty:
                    ppg_signals.append(data['PPG'].values)
                    sbp_values.append(data['SBP'].values[0])
                    dbp_values.append(data['DBP'].values[0])
    return ppg_signals, sbp_values, dbp_values

def evaluate_model(model, X, y):
    y_pred = model.predict(X)
    sbp_mae = mean_absolute_error(y[:, 0], y_pred[:, 0])
    dbp_mae = mean_absolute_error(y[:, 1], y_pred[:, 1])

    sbp_correct = np.abs(y[:, 0] - y_pred[:, 0]) < 10
    dbp_correct = np.abs(y[:, 1] - y_pred[:, 1]) < 10

    sbp_accuracy = np.mean(sbp_correct)
    dbp_accuracy = np.mean(dbp_correct)

    sbp_r2 = r2_score(y[:, 0], y_pred[:, 0])
    dbp_r2 = r2_score(y[:, 1], y_pred[:, 1])

    return sbp_mae, dbp_mae, sbp_accuracy, dbp_accuracy, sbp_r2, dbp_r2

def train_combined_model(ppg_signals, sbp_values, dbp_values):
    X = []
    y = []
    for ppg_signal, sbp, dbp in zip(ppg_signals, sbp_values, dbp_values):
        dc_free_signal, dc_component = preprocess_ppg(ppg_signal)
        systolic_peaks = detect_key_points(dc_free_signal)
        features = extract_features(dc_free_signal, systolic_peaks, dc_component)
        X.append(features)
        y.append([sbp, dbp])

    X = np.array(X)
    y = np.array(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    models = {
       # 'XGBoost': MultiOutputRegressor(XGBRegressor(objective='reg:squarederror')),
        'RandomForest': MultiOutputRegressor(RandomForestRegressor()),
       'AdaBoost': MultiOutputRegressor(AdaBoostRegressor()),
    }

    param_grids = {
        'XGBoost': {
            'estimator__n_estimators': [100, 200, 300],
            'estimator__learning_rate': [0.01, 0.05, 0.1],
            'estimator__max_depth': [3, 5, 7],
            'estimator__min_child_weight': [1, 3, 5],
            'estimator__subsample': [0.8, 0.9, 1.0],
            'estimator__colsample_bytree': [0.8, 0.9, 1.0]
        },
        'RandomForest': {
            'estimator__n_estimators': [100, 200, 300],
            'estimator__max_depth': [None, 10, 20, 30],
            'estimator__min_samples_split': [2, 5, 10],
            'estimator__min_samples_leaf': [1, 2, 4]
        },
        'AdaBoost': {
            'estimator__n_estimators': [50, 100, 150],
            'estimator__learning_rate': [0.01, 0.1, 1.0]
        }
    }

    best_models = {}
    results = {}

    for name, model in models.items():
        print(f"Training {name} model...")
        grid_search = GridSearchCV(model, param_grids[name], cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        best_models[name] = best_model

        # Evaluate on training set
        train_sbp_mae, train_dbp_mae, train_sbp_acc, train_dbp_acc, train_sbp_r2, train_dbp_r2 = evaluate_model(best_model, X_train, y_train)
        print(f"{name} - Training Results:")
        print(f"  Systolic BP: MAE = {train_sbp_mae:.2f} mmHg, Accuracy = {train_sbp_acc*100:.2f}%, R2 = {train_sbp_r2:.4f}")
        print(f"  Diastolic BP: MAE = {train_dbp_mae:.2f} mmHg, Accuracy = {train_dbp_acc*100:.2f}%, R2 = {train_dbp_r2:.4f}")

        # Evaluate on test set
        test_sbp_mae, test_dbp_mae, test_sbp_acc, test_dbp_acc, test_sbp_r2, test_dbp_r2 = evaluate_model(best_model, X_test, y_test)
        print(f"{name} - Test Results:")
        print(f"  Systolic BP: MAE = {test_sbp_mae:.2f} mmHg, Accuracy = {test_sbp_acc*100:.2f}%, R2 = {test_sbp_r2:.4f}")
        print(f"  Diastolic BP: MAE = {test_dbp_mae:.2f} mmHg, Accuracy = {test_dbp_acc*100:.2f}%, R2 = {test_dbp_r2:.4f}")

        results[name] = {
            'train': {'sbp_mae': train_sbp_mae, 'dbp_mae': train_dbp_mae, 'sbp_acc': train_sbp_acc, 'dbp_acc': train_dbp_acc, 'sbp_r2': train_sbp_r2, 'dbp_r2': train_dbp_r2},
            'test': {'sbp_mae': test_sbp_mae, 'dbp_mae': test_dbp_mae, 'sbp_acc': test_sbp_acc, 'dbp_acc': test_dbp_acc, 'sbp_r2': test_sbp_r2, 'dbp_r2': test_dbp_r2}
        }

    return best_models, results

def video_to_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    cap.release()
    return np.array(frames)

def frames_to_ppg(frames, fs=125):
    if frames.size == 0:
        return np.array([])
    ppg_signal = np.mean(frames, axis=(1, 2))
    return ppg_signal

def predict_blood_pressure(video_path, best_models):
    frames = video_to_frames(video_path)
    ppg_signal = frames_to_ppg(frames)
    if len(ppg_signal) == 0:
        print(f"Warning: No PPG signal extracted from {video_path}. Skipping...")
        return
    dc_free_signal, dc_component = preprocess_ppg(ppg_signal)
    systolic_peaks = detect_key_points(dc_free_signal)
    features = extract_features(dc_free_signal, systolic_peaks, dc_component)

    for name, model in best_models.items():
        bp_prediction = model.predict([features])
        print(f'{name} - Predicted Systolic BP for {video_path}: {bp_prediction[0][0]:.2f} mmHg')
        print(f'{name} - Predicted Diastolic BP for {video_path}: {bp_prediction[0][1]:.2f} mmHg')

def main():
    dataset_directory = '/content/drive/MyDrive/Queensland_signals'
    ppg_signals, sbp_values, dbp_values = load_dataset(dataset_directory)
    best_models, results = train_combined_model(ppg_signals, sbp_values, dbp_values)

    video_path = input("Enter the video file path: ")
    predict_blood_pressure(video_path, best_models)

if __name__ == "__main__":
    main()

Training RandomForest model...
RandomForest - Training Results:
  Systolic BP: MAE = 6.23 mmHg, Accuracy = 86.49%, R2 = 0.5746
  Diastolic BP: MAE = 7.87 mmHg, Accuracy = 72.97%, R2 = 0.5088
RandomForest - Test Results:
  Systolic BP: MAE = 12.62 mmHg, Accuracy = 60.00%, R2 = -0.2148
  Diastolic BP: MAE = 11.35 mmHg, Accuracy = 40.00%, R2 = -0.5004
Training AdaBoost model...
AdaBoost - Training Results:
  Systolic BP: MAE = 3.83 mmHg, Accuracy = 91.89%, R2 = 0.8308
  Diastolic BP: MAE = 4.87 mmHg, Accuracy = 89.19%, R2 = 0.8181
AdaBoost - Test Results:
  Systolic BP: MAE = 12.69 mmHg, Accuracy = 70.00%, R2 = -0.2248
  Diastolic BP: MAE = 12.12 mmHg, Accuracy = 40.00%, R2 = -0.6546
Enter the video file path: /content/drive/MyDrive/BP_videos /Udanthika_1.MOV
RandomForest - Predicted Systolic BP for /content/drive/MyDrive/BP_videos /Udanthika_1.MOV: 111.51 mmHg
RandomForest - Predicted Diastolic BP for /content/drive/MyDrive/BP_videos /Udanthika_1.MOV: 66.92 mmHg
AdaBoost - Predicted Syst

In [None]:
import os
import numpy as np
import pandas as pd
import scipy.signal as signal
from scipy.stats import skew, kurtosis
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, r2_score
import cv2

def preprocess_ppg(ppg_signal, fs=125):
    b, a = signal.cheby1(4, 0.5, 10 / (0.5 * fs))
    filtered_signal = signal.filtfilt(b, a, ppg_signal)
    dc_component = signal.medfilt(filtered_signal, kernel_size=201)
    dc_free_signal = filtered_signal - dc_component
    return dc_free_signal, dc_component

def detect_key_points(ppg_signal, fs=125):
    b, a = signal.cheby1(4, 0.5, 10 / (0.5 * fs))
    filtered_signal = signal.filtfilt(b, a, ppg_signal)
    smoothed_signal = signal.savgol_filter(filtered_signal, window_length=31, polyorder=3)
    peaks, _ = signal.find_peaks(smoothed_signal, distance=50)
    systolic_peaks = peaks[smoothed_signal[peaks].argsort()][-len(peaks)//10:]
    return systolic_peaks

def extract_features(ppg_signal, systolic_peaks, dc_component, fs=125, num_pulses=5):
    features = []
    heart_rate = 60 * fs / np.mean(np.diff(systolic_peaks))
    features.append(heart_rate)
    mnpv = np.ptp(ppg_signal) / np.mean(dc_component)
    features.append(mnpv)
    features.append(skew(ppg_signal))
    features.append(kurtosis(ppg_signal))
    features.append(np.std(ppg_signal))
    features.append(np.var(ppg_signal))
    features.append(np.mean(ppg_signal))

    for i in range(1, min(len(systolic_peaks), num_pulses + 1)):
        pulse = ppg_signal[systolic_peaks[i-1]:systolic_peaks[i]]
        area = np.trapz(pulse)
        features.append(area)

    while len(features) < num_pulses + 7:
        features.append(0)

    return features

def load_dataset(directory):
    ppg_signals = []
    sbp_values = []
    dbp_values = []
    for subdir, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.csv'):
                filepath = os.path.join(subdir, file)
                data = pd.read_csv(filepath)
                data.columns = data.columns.str.strip()
                data = data.dropna()
                data['SBP'] = data['SBP'].astype(float)
                data['DBP'] = data['DBP'].astype(float)
                if not data.empty:
                    ppg_signals.append(data['PPG'].values)
                    sbp_values.append(data['SBP'].values[0])
                    dbp_values.append(data['DBP'].values[0])
    return ppg_signals, sbp_values, dbp_values

def evaluate_model(model, X, y):
    y_pred = model.predict(X)
    sbp_mae = mean_absolute_error(y[:, 0], y_pred[:, 0])
    dbp_mae = mean_absolute_error(y[:, 1], y_pred[:, 1])

    sbp_correct = np.abs(y[:, 0] - y_pred[:, 0]) < 10
    dbp_correct = np.abs(y[:, 1] - y_pred[:, 1]) < 10

    sbp_accuracy = np.mean(sbp_correct)
    dbp_accuracy = np.mean(dbp_correct)

    sbp_r2 = r2_score(y[:, 0], y_pred[:, 0])
    dbp_r2 = r2_score(y[:, 1], y_pred[:, 1])

    return sbp_mae, dbp_mae, sbp_accuracy, dbp_accuracy, sbp_r2, dbp_r2

def train_combined_model(ppg_signals, sbp_values, dbp_values):
    X = []
    y = []
    for ppg_signal, sbp, dbp in zip(ppg_signals, sbp_values, dbp_values):
        dc_free_signal, dc_component = preprocess_ppg(ppg_signal)
        systolic_peaks = detect_key_points(dc_free_signal)
        features = extract_features(dc_free_signal, systolic_peaks, dc_component)
        X.append(features)
        y.append([sbp, dbp])

    X = np.array(X)
    y = np.array(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    models = {
        'XGBoost': MultiOutputRegressor(XGBRegressor(objective='reg:squarederror')),
        'RandomForest': MultiOutputRegressor(RandomForestRegressor()),
        'AdaBoost': MultiOutputRegressor(AdaBoostRegressor()),
    }

    param_grids = {
        'XGBoost': {
            'estimator__n_estimators': [100, 200, 300],
            'estimator__learning_rate': [0.01, 0.05, 0.1],
            'estimator__max_depth': [3, 5, 7],
            'estimator__min_child_weight': [1, 3, 5],
            'estimator__subsample': [0.8, 0.9, 1.0],
            'estimator__colsample_bytree': [0.8, 0.9, 1.0]
        },
        'RandomForest': {
            'estimator__n_estimators': [100, 200, 300],
            'estimator__max_depth': [None, 10, 20, 30],
            'estimator__min_samples_split': [2, 5, 10],
            'estimator__min_samples_leaf': [1, 2, 4]
        },
        'AdaBoost': {
            'estimator__n_estimators': [50, 100, 150],
            'estimator__learning_rate': [0.01, 0.1, 1.0]
        }
    }

    best_models = {}
    results = {}

    for name, model in models.items():
        print(f"Training {name} model...")
        grid_search = GridSearchCV(model, param_grids[name], cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        best_models[name] = best_model

        # Evaluate on training set
        train_sbp_mae, train_dbp_mae, train_sbp_acc, train_dbp_acc, train_sbp_r2, train_dbp_r2 = evaluate_model(best_model, X_train, y_train)
        print(f"{name} - Training Results:")
        print(f"  Systolic BP: MAE = {train_sbp_mae:.2f} mmHg, Accuracy = {train_sbp_acc*100:.2f}%, R2 = {train_sbp_r2:.4f}")
        print(f"  Diastolic BP: MAE = {train_dbp_mae:.2f} mmHg, Accuracy = {train_dbp_acc*100:.2f}%, R2 = {train_dbp_r2:.4f}")

        # Evaluate on test set
        test_sbp_mae, test_dbp_mae, test_sbp_acc, test_dbp_acc, test_sbp_r2, test_dbp_r2 = evaluate_model(best_model, X_test, y_test)
        print(f"{name} - Test Results:")
        print(f"  Systolic BP: MAE = {test_sbp_mae:.2f} mmHg, Accuracy = {test_sbp_acc*100:.2f}%, R2 = {test_sbp_r2:.4f}")
        print(f"  Diastolic BP: MAE = {test_dbp_mae:.2f} mmHg, Accuracy = {test_dbp_acc*100:.2f}%, R2 = {test_dbp_r2:.4f}")

        results[name] = {
            'train': {'sbp_mae': train_sbp_mae, 'dbp_mae': train_dbp_mae, 'sbp_acc': train_sbp_acc, 'dbp_acc': train_dbp_acc, 'sbp_r2': train_sbp_r2, 'dbp_r2': train_dbp_r2},
            'test': {'sbp_mae': test_sbp_mae, 'dbp_mae': test_dbp_mae, 'sbp_acc': test_sbp_acc, 'dbp_acc': test_dbp_acc, 'sbp_r2': test_sbp_r2, 'dbp_r2': test_dbp_r2}
        }

    return best_models, results

dataset_directory = '/content/drive/MyDrive/Queensland_signals'
ppg_signals, sbp_values, dbp_values = load_dataset(dataset_directory)
best_models, results = train_combined_model(ppg_signals, sbp_values, dbp_values)



Training XGBoost model...
XGBoost - Training Results:
  Systolic BP: MAE = 5.23 mmHg, Accuracy = 83.78%, R2 = 0.6491
  Diastolic BP: MAE = 6.59 mmHg, Accuracy = 81.08%, R2 = 0.6601
XGBoost - Test Results:
  Systolic BP: MAE = 11.57 mmHg, Accuracy = 70.00%, R2 = -0.1990
  Diastolic BP: MAE = 9.17 mmHg, Accuracy = 60.00%, R2 = -0.0831
Training RandomForest model...
RandomForest - Training Results:
  Systolic BP: MAE = 5.92 mmHg, Accuracy = 81.08%, R2 = 0.5885
  Diastolic BP: MAE = 6.35 mmHg, Accuracy = 83.78%, R2 = 0.6383
RandomForest - Test Results:
  Systolic BP: MAE = 11.35 mmHg, Accuracy = 70.00%, R2 = -0.1756
  Diastolic BP: MAE = 8.11 mmHg, Accuracy = 60.00%, R2 = 0.2327
Training AdaBoost model...
AdaBoost - Training Results:
  Systolic BP: MAE = 3.41 mmHg, Accuracy = 100.00%, R2 = 0.8831
  Diastolic BP: MAE = 2.87 mmHg, Accuracy = 94.59%, R2 = 0.9304
AdaBoost - Test Results:
  Systolic BP: MAE = 10.90 mmHg, Accuracy = 70.00%, R2 = -0.1676
  Diastolic BP: MAE = 10.68 mmHg, Accuracy

In [None]:
def video_to_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    cap.release()
    return np.array(frames)

def frames_to_ppg(frames, fs=125):
    if frames.size == 0:
        return np.array([])
    ppg_signal = np.mean(frames, axis=(1, 2))
    return ppg_signal

def predict_blood_pressure(video_path, best_models):
    frames = video_to_frames(video_path)
    ppg_signal = frames_to_ppg(frames)
    if len(ppg_signal) == 0:
        print(f"Warning: No PPG signal extracted from {video_path}. Skipping...")
        return
    dc_free_signal, dc_component = preprocess_ppg(ppg_signal)
    systolic_peaks = detect_key_points(dc_free_signal)
    features = extract_features(dc_free_signal, systolic_peaks, dc_component)

    for name, model in best_models.items():
        bp_prediction = model.predict([features])
        print(f'{name} - Predicted Systolic BP for {video_path}: {bp_prediction[0][0]:.2f} mmHg')
        print(f'{name} - Predicted Diastolic BP for {video_path}: {bp_prediction[0][1]:.2f} mmHg')

video_path = input("Enter the video file path: ")
predict_blood_pressure(video_path, best_models)



Enter the video file path: /content/drive/MyDrive/S032_V.MOV
XGBoost - Predicted Systolic BP for /content/drive/MyDrive/S032_V.MOV: 112.08 mmHg
XGBoost - Predicted Diastolic BP for /content/drive/MyDrive/S032_V.MOV: 54.87 mmHg
RandomForest - Predicted Systolic BP for /content/drive/MyDrive/S032_V.MOV: 110.27 mmHg
RandomForest - Predicted Diastolic BP for /content/drive/MyDrive/S032_V.MOV: 61.98 mmHg
AdaBoost - Predicted Systolic BP for /content/drive/MyDrive/S032_V.MOV: 116.08 mmHg
AdaBoost - Predicted Diastolic BP for /content/drive/MyDrive/S032_V.MOV: 66.40 mmHg
