In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import time
import re

In [12]:
# Load the data
file_path = 'MEESSI_AHF_Test_Data.xlsx' # Change from this example into your file path
df = pd.read_excel(file_path)


In [None]:
# Specify the path to the ChromeDriver executable
chromedriver_path = '/path/to/chromedriver' # Change from this example into your path to ChromeDriver

In [None]:
# Show top of the data import into dataframe
df.head()

Unnamed: 0,Patient ID,Barthel index,SBP,Age,NTprobnp,K,NYHA_class_IV,Troponin,RR,Low output,Spo2RA,ACS,LVH,Cr
0,1,Unknown,160,78,,3.2,Yes,Positive,28,1,96,1,1,1.2
1,2,60,130,82,7000.0,4.0,No,Normal,24,0,89,0,0,2.0
2,3,80,120,85,15000.0,5.0,No,Unknown,30,0,85,1,1,1.8
3,4,Unknown,150,76,,3.8,Yes,Positive,22,1,92,0,1,1.1
4,5,90,100,90,25000.0,5.6,Yes,Normal,26,1,83,1,0,2.5


In [14]:
# Mapping of image filenames to quintile/decile categories
image_to_quintile_decile = {}

for letter in "ABCDEFGH":
    image_to_quintile_decile[f"{letter}1.jpg"] = "Low risk (1st quintile)"
    image_to_quintile_decile[f"{letter}2.jpg"] = "Low risk (2nd quintile)"
    image_to_quintile_decile[f"{letter}3.jpg"] = "Intermediate risk (3rd quintile)"
    image_to_quintile_decile[f"{letter}4.jpg"] = "Intermediate risk (4th quintile)"
    image_to_quintile_decile[f"{letter}5.jpg"] = "High risk (9th decile)"
    image_to_quintile_decile[f"{letter}6.jpg"] = "Very high risk (10th decile)"

In [None]:
# Set up the ChromeDriver service
service = Service(executable_path=chromedriver_path)
driver = webdriver.Chrome(service=service)

for index, row in df.iterrows():
    # Load the calculator page
    driver.get('https://meessi-ahf.risk.score-calculator-ica-semes.portalsemes.org/calc.html')

    # Fill Barthel Index
    barthel_index = row['Barthel index']
    if barthel_index == 'Unknown':
        driver.find_element(By.ID, "BIA5").click()  # Unknown option
    elif barthel_index >= 75:
        driver.find_element(By.ID, "BIA1").click()
    elif 50 <= barthel_index < 75:
        driver.find_element(By.ID, "BIA2").click()
    elif 25 <= barthel_index < 50:
        driver.find_element(By.ID, "BIA3").click()
    else:
        driver.find_element(By.ID, "BIA4").click()

    # Fill Systolic BP (SBP)
    sbp = row['SBP']
    if sbp >= 155:
        driver.find_element(By.ID, "SBP1").click()
    elif sbp >= 140:
        driver.find_element(By.ID, "SBP2").click()
    elif sbp >= 125:
        driver.find_element(By.ID, "SBP3").click()
    elif sbp >= 110:
        driver.find_element(By.ID, "SBP4").click()
    elif sbp >= 95:
        driver.find_element(By.ID, "SBP5").click()
    else:
        driver.find_element(By.ID, "SBP6").click()

    # Fill Age
    age = row['Age']
    if age < 75:
        driver.find_element(By.ID, "Age1").click()
    elif age < 80:
        driver.find_element(By.ID, "Age2").click()
    elif age < 85:
        driver.find_element(By.ID, "Age3").click()
    elif age < 90:
        driver.find_element(By.ID, "Age4").click()
    else:
        driver.find_element(By.ID, "Age5").click()

    # Fill NT-proBNP
    if pd.isna(row['NTprobnp']):
        driver.find_element(By.ID, "NT5").click()  # Unknown
    elif row['NTprobnp'] < 8000:
        driver.find_element(By.ID, "NT1").click()
    elif row['NTprobnp'] < 16000:
        driver.find_element(By.ID, "NT2").click()
    elif row['NTprobnp'] < 24000:
        driver.find_element(By.ID, "NT3").click()
    else:
        driver.find_element(By.ID, "NT4").click()

    # Fill Potassium (K)
    potassium = row['K']
    if potassium < 3.5:
        driver.find_element(By.ID, "Pot1").click()
    elif potassium <= 4.9:
        driver.find_element(By.ID, "Pot2").click()
    elif potassium <= 5.5:
        driver.find_element(By.ID, "Pot3").click()
    else:
        driver.find_element(By.ID, "Pot4").click()

    # Fill NYHA class IV
    driver.find_element(By.ID, f"NYHA_{'Yes' if row['NYHA_class_IV'] == 'Yes' else 'No'}").click()

    # Fill Troponin level
    troponin = row['Troponin']
    if troponin == 'Normal':
        driver.find_element(By.ID, "PTL1").click()  # Select Normal
    elif troponin == 'Positive':
        driver.find_element(By.ID, "PTL2").click()  # Select Positive
    else:
        driver.find_element(By.ID, "PTL3").click()  # Select Unknown

    # Fill Respiratory rate (RR)
    rr = row['RR']
    if rr < 25:
        driver.find_element(By.ID, "RR1").click()
    elif rr < 30:
        driver.find_element(By.ID, "RR2").click()
    else:
        driver.find_element(By.ID, "RR3").click()

    # Fill Low output symptoms
    driver.find_element(By.ID, f"LOS_{'Yes' if row['Low output'] == 1 else 'No'}").click()

    # Fill Oxygen saturation (Spo2RA)
    spo2 = row['Spo2RA']
    if spo2 >= 95:
        driver.find_element(By.ID, "OS1").click()
    elif spo2 >= 90:
        driver.find_element(By.ID, "OS2").click()
    elif spo2 >= 84:
        driver.find_element(By.ID, "OS3").click()
    else:
        driver.find_element(By.ID, "OS4").click()

    # Fill Episode associated with ACS
    driver.find_element(By.ID, f"ACS_{'Yes' if row['ACS'] == 1 else 'No'}").click()

    # Fill Hypertrophy at ECG (LVH)
    driver.find_element(By.ID, f"ECG_{'Yes' if row['LVH'] == 1 else 'No'}").click()

    # Fill Creatinine (Cr)
    cr = row['Cr']
    if cr < 1.5:
        driver.find_element(By.ID, "CRE1").click()
    elif cr <= 2.4:
        driver.find_element(By.ID, "CRE2").click()
    else:
        driver.find_element(By.ID, "CRE3").click()

    # Click the Calculate button
    driver.find_element(By.ID, "calcBtn").click()

    # Wait for the result modal to appear
    try:
        result_modal = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.CLASS_NAME, "modal-body"))
        )
        result_text = result_modal.text
        print(f"Extracted modal text for index {index}: {result_text}")  # Debug print

        # Initialize variables to capture extracted information
        risk_percentage = None
        risk_group = None
        quintile_decile = None

        # Extract the risk percentage and risk group from the text
        for line in result_text.splitlines():
            if "mortality risk" in line:
                risk_percentage = line.split(" ")[-1]  # Extract the percentage
                print(f"Extracted mortality risk: {risk_percentage}")  # Debug print
            elif "risk group" in line:
                # Use regex to capture only the risk group level (e.g., VERY HIGH, HIGH)
                match = re.search(r"in\s+(.+?)\s+risk group", line, re.IGNORECASE)
                if match:
                    risk_group = match.group(1).strip().upper()
                print(f"Extracted risk group: {risk_group}")  # Debug print

        # Locate the image element within modal-img div and map it
        try:
            modal_img_div = WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.CLASS_NAME, "modal-img"))
            )
            image_element = modal_img_div.find_element(By.TAG_NAME, "img")
            image_src = image_element.get_attribute("src")
            image_filename = image_src.split("/")[-1]
            print(f"Extracted image filename: {image_filename}")  # Debug print

            # Map the filename to the quintile/decile
            quintile_decile = image_to_quintile_decile.get(image_filename, "Unknown")

        except (NoSuchElementException, TimeoutException) as e:
            print(f"Could not locate a relevant image for index {index}: {e}")
            quintile_decile = "Image not found"

        # Store the extracted information in the DataFrame
        df.at[index, 'MEESSI'] = risk_percentage
        df.at[index, 'Risk Group'] = risk_group
        df.at[index, 'Quintile/Decile'] = quintile_decile

    except Exception as e:
        print(f"Error retrieving result for index {index}: {e}")

    # Close the result modal to continue with the next patient
    driver.find_element(By.ID, "closeBtn").click()
    time.sleep(1)  # Short delay to avoid overwhelming the server, you can shorten as you seem so

# Close the WebDriver when done
driver.quit()

Extracted modal text for index 0: Barthel index at admission unknown
NT-proBNP unknown
This patient's predicted 30-day mortality risk is 35.269%
This patient is in VERY HIGH risk group
Extracted mortality risk: 35.269%
Extracted risk group: VERY HIGH
Extracted image filename: F6.jpg
Extracted modal text for index 1: This patient's predicted 30-day mortality risk is 4.047%
This patient is in INTERMEDIATE risk group
Extracted mortality risk: 4.047%
Extracted risk group: INTERMEDIATE
Extracted image filename: A3.jpg
Extracted modal text for index 2: Positive Troponine Level unknown
This patient's predicted 30-day mortality risk is 41.905%
This patient is in VERY HIGH risk group
Extracted mortality risk: 41.905%
Extracted risk group: VERY HIGH
Extracted image filename: C6.jpg
Extracted modal text for index 3: Barthel index at admission unknown
NT-proBNP unknown
This patient's predicted 30-day mortality risk is 17.752%
This patient is in HIGH risk group
Extracted mortality risk: 17.752%
Ext

In [None]:
# Show top of the data that include the extracted calculated contents
df.head(10)

Unnamed: 0,Patient ID,Barthel index,SBP,Age,NTprobnp,K,NYHA_class_IV,Troponin,RR,Low output,Spo2RA,ACS,LVH,Cr,MEESSI,Risk Group,Quintile/Decile
0,1,Unknown,160,78,,3.2,Yes,Positive,28,1,96,1,1,1.2,35.269%,VERY HIGH,Very high risk (10th decile)
1,2,60,130,82,7000.0,4.0,No,Normal,24,0,89,0,0,2.0,4.047%,INTERMEDIATE,Intermediate risk (3rd quintile)
2,3,80,120,85,15000.0,5.0,No,Unknown,30,0,85,1,1,1.8,41.905%,VERY HIGH,Very high risk (10th decile)
3,4,Unknown,150,76,,3.8,Yes,Positive,22,1,92,0,1,1.1,17.752%,HIGH,High risk (9th decile)
4,5,90,100,90,25000.0,5.6,Yes,Normal,26,1,83,1,0,2.5,71.992%,VERY HIGH,Very high risk (10th decile)


In [None]:
# Save results back to the file
df.to_excel('MEESSI_with_scores.xlsx', index=False)