Create part of the standardized dataframe from the PSG data

In [1]:
import pandas as pd
import re

# From i8f_mc_gpif_psg.csv derived the COHORT from subject2treatment mapping with the adsl.csv demographic data. 
# Check the watchpat_analysis_dbi_sample_ipynb for details on how ./psg_standardized_df.csv was created

df = pd.read_csv("./data/psg_standardized_df.csv") 
df.drop(columns=['Unnamed: 0', 'NPSGDT', 'NPSGEDT', 'NPSGTM', 'NPSGETM'], inplace=True) # drop all columns that isn't used for analysis

# Function to extract visit number
def get_visit_number(visit):
    match = re.match(r'^Visit(\d+)$', visit, re.IGNORECASE)
    if match:
        return int(match.group(1))
    elif visit == 'Screening':
        return 0
    else:
        return None

# Function to determine the severity label for AHI values
def ahi2SeverityLabel(x):
    """
    Converts an AHI value to a severity label.

    Parameters
    ----------
    x : float
        The AHI value.

    Returns
    ----------
    str
        The severity label corresponding to the AHI value.
    """
    if x < 5:
        return 'No'
    elif x < 15:
        return 'Mild'
    elif x < 30:
        return 'Moderate'
    else:
        return 'Severe'

# List of digital endpoints (excluding non-numeric and already accounted columns)
digital_endpoints = ['TIB', 'SOL', 'REML', 'TST', 'LPS', 'WASO', 'SE', 'STN1', 'STN1P', 'STN2', 'STN2P', 'SWS', 'SWSP', 'REM', 'REMP', 'AHI', 'CMP_AH4', 'TST_SpO290', 'TST_SpO285', 'TST_SpO280', 'SpO2_ODI3', 'SpO2_ODI4']

# New DataFrame to store the transformed data
new_data = []

# Iterate through the digital endpoints and transform the data
for endpoint in digital_endpoints:
    for idx, row in df.iterrows():
        # Determine severity category based on endpoint
        if endpoint == 'AHI':
            severity_category = ahi2SeverityLabel(row[endpoint])
        else:
            severity_category = pd.NA

        new_row = {
            'VISIT': row['VISIT'],
            'USUBJID': row['USUBJID'],
            'digital_EP': endpoint,
            'digital_EP_value': row[endpoint],
            'digital_EP_severity_category': severity_category,
            'COHORT': row['COHORT'],
            'DEVICE': row['DEVICE']
        }
        new_data.append(new_row)

new_df = pd.DataFrame(new_data)

Create part of the standardized dataframe from the WatchPAT data

In [2]:
df1 = pd.read_csv("./data/watchPAT_standardized_df.csv")
df1 = df1[['VISIT', 'USUBJID', 'pAHI', 'COHORT', 'DEVICE']]
df1.rename(columns={'pAHI': 'AHI'}, inplace=True)
digital_endpoints = ['AHI']

#digital_endpoints = ['pAHI']
for endpoint in digital_endpoints:
    for idx, row in df1.iterrows():
        # Determine severity category based on endpoint
        if endpoint == 'AHI':
            severity_category = ahi2SeverityLabel(row[endpoint])
        else:
            severity_category = pd.NA

        new_row = {
            'VISIT': row['VISIT'],
            'USUBJID': row['USUBJID'],
            'digital_EP': endpoint,
            'digital_EP_value': row[endpoint],
            'digital_EP_severity_category': severity_category,
            'COHORT': row['COHORT'],
            'DEVICE': row['DEVICE']
        }
        new_data.append(new_row)

new_df1 = pd.DataFrame(new_data)
new_df1['VISIT'] = new_df1['VISIT'].replace('SCREENING', 'Screening')
new_df1['VISIT'] = new_df1['VISIT'].replace('VISIT7', 'Visit7')
new_df1['VISIT'] = new_df1['VISIT'].replace('VISIT11', 'Visit11')

Merge the two dataframes to get the full standardized analysis ready dataframe

In [3]:
df_merged = pd.concat([new_df, new_df1], ignore_index=True)
df = df_merged
df.to_csv("./data/standardized_analysis_ready_df.csv")