In [None]:
from scipy import stats
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import load_model

In [2]:
# Data load

def Result_calculator(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
        SBP_val = float(lines[1][12:-1])
        DBP_val = float(lines[2][12:-1])
        SBP_PR_val = float(lines[3][-4:-1])
        DBP_PR_val = float(lines[4][-4:-1])
        
        return [SBP_val, DBP_val, SBP_PR_val, DBP_PR_val]

# 각 환자별 평균 계산 - attempt 없애기
def np_to_df(res_data):
    res_by_sub = np.empty((0,5))
    sub_list = np.unique(res_data[:,0])

    # Check every data is processed
    if sub_list.shape[0] == 50:
        if res_data.shape[0] != 150:
            print("Exp is not done yet, only: ", len(sub_list))

    for sub in sub_list:
        temp_sub_list = np.where(res_data[:,0]==sub)
        temp_res_val = res_data[temp_sub_list, 3:]
        temp_mean = np.mean(temp_res_val, axis = 1)
        temp_mean = np.insert(temp_mean, 0, sub)
        res_by_sub = np.insert(res_by_sub, res_by_sub.shape[0], temp_mean, axis = 0)
    
    res_dict = {"Subject":res_by_sub[:,0].astype(int), "SBP MAE":res_by_sub[:,1], "DBP MAE":res_by_sub[:,2], 
                   "SBP PR":res_by_sub[:,3], "DBP PR":res_by_sub[:,4]}
    res_pd = pd.DataFrame(res_dict)
    res_pd = res_pd.set_index('Subject')

    return res_pd

def res_reader(directory_path):
    # From model folders to Dataframe
    
    res_50 = np.empty((0,7), dtype = float) #axis = 3: Sub num, data_num, attempt, SBP_MAE, DBP_MAE, SBP_PR, DBP_PR
    res_100 = np.empty((0,7), dtype = float)
    res_360 = np.empty((0,7), dtype = float)
    res_720 = np.empty((0,7), dtype = float)
    res_1800 = np.empty((0,7), dtype = float)
    
    sub_res = np.empty((0,7), dtype = float)

    for root, dirs, files in os.walk(directory_path):
        for file in files:
            file_path = os.path.join(root, file)
            
            if file.endswith(".txt"):
                directory, filename = os.path.split(file_path)
                directory, attempt = os.path.split(directory)
                if int(attempt) < 4:
                    directory, data_num = os.path.split(directory)
                    directory, sub_num = os.path.split(directory)
                    result = Result_calculator(file_path)
                    temp_res = [sub_num, data_num, attempt]
                    temp_res = np.append(temp_res, result)
                    sub_res = np.insert(sub_res, sub_res.shape[0], temp_res, axis = 0)
    
    # 하나의 넘파이 배열 안에 모든 데이터 넣기 - attempt 별 데이터 모두 들어감
    for data in sub_res:
        if int(data[1]) == 50:
            res_50 = np.insert(res_50, res_50.shape[0], data, axis = 0)
        if int(data[1]) == 100:
            res_100 = np.insert(res_100, res_100.shape[0], data, axis = 0)
        if int(data[1]) == 360:
            res_360 = np.insert(res_360, res_360.shape[0], data, axis = 0)
        if int(data[1]) == 720:
            res_720 = np.insert(res_720, res_720.shape[0], data, axis = 0)
        if int(data[1]) == 1800:
            res_1800 = np.insert(res_1800, res_1800.shape[0], data, axis = 0)
    
    # 각 환자별 평균 계산 - attempt 없애기
    res_50_df = np_to_df(res_50)
    res_100_df = np_to_df(res_100)
    res_360_df = np_to_df(res_360)
    res_720_df = np_to_df(res_720)
    res_1800_df = np_to_df(res_1800)

    return res_50_df, res_100_df, res_360_df, res_720_df, res_1800_df

def find_files(folder_path, kind):
    file_root_list = []
    for (path, dir, files) in os.walk(folder_path):
        for filename in files:
            if filename.endswith(kind):
                file_root = os.path.join(path, filename)
                file_root_list.append(file_root)
    return file_root_list

## T-test

In [44]:
def pred_mae(sub_list):
    for sub in sub_list:
        a = os.path.dirname(sub)
        b = os.path.dirname(a)
        tl_data_number = int(os.path.split(b)[1])
        
        if tl_data_number == 50:
            
            model_path = find_files(a, ".h5")
            model = load_model(model_path[0])
            data = np.load(sub)
            x_test, y_test = data['x_test'], data['y_test']
    
            y_pred = model.predict(x_test, verbose = 0)
            mae = y_test - y_pred
        else:
            continue
        break
    return mae

# T-test
dir_path_1 = '/home/yckim/research/nibp_ppg/ex_3/model/transfer/m2_to_m1/'
dir_path_2 = '/home/yckim/research/nibp_ppg/ex_3/model/transfer/v2_to_m1/'
sub_list_1 = find_files(dir_path_1, "used_data_1.npz")
sub_list_2 = find_files(dir_path_2, "used_data_1.npz")

sample1 = pred_mae(sub_list_1)
sample2 = pred_mae(sub_list_2)

t_statistic, p_value = stats.ttest_ind(sample1, sample2)

# Print the results
print("T-statistic:", t_statistic)
print("P-value:", p_value)

# Interpret the results
alpha = 0.05
if p_value[0] < alpha:
    print("Reject the null hypothesis: There is a significant difference between the means of the two samples.")
else:
    print("Fail to reject the null hypothesis: There is no significant difference between the means of the two samples.")

T-statistic: [0.34592913 2.69756306]
P-value: [0.72944658 0.00706655]
Fail to reject the null hypothesis: There is no significant difference between the means of the two samples.


# Subject-calibration centring metric (SDS) -> Groupwise analysis
### For the result of the paper: Analyze the dissimilarity

In [6]:
# SDS metric calculation
dataset_path = input("Type dataset folder: ")
file_list = os.listdir(dataset_path)
bp_val = np.empty((0,2,0))
all_bp_val = np.empty((3240,2,0))
for file_name in file_list:
    if file_name.endswith('.npz'):
        bp_val = np.load(dataset_path+"//"+file_name)['y'][:3240,:]
        all_bp_val = np.insert(all_bp_val, all_bp_val.shape[2], bp_val,axis = 2)

s_val = all_bp_val - all_bp_val[0,:,:]
s_mean = np.sum(np.sum(s_val, axis=0, keepdims = True),axis=2, keepdims=True)/162000
s_diff = (s_val-s_mean)**2
sds_val = np.sqrt(np.sum(np.sum(s_diff, axis=0, keepdims = True),axis=2, keepdims=True)/161999)
sds_val

Type dataset folder:  /home/yckim/research/nibp_ppg/ex_3/data/m2/


array([[[20.74962849],
        [10.16338094]]])

m1: array([[[22.36624238], [10.74361466]]])
m2: array([[[20.74962849], [10.16338094]]])
v1: array([[[26.81278678], [15.68518243]]])
v2: array([[[25.015235  ], [14.88755577]]])