In [1]:
# Importing necessary libraries
import pyperclip
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import pandas as pd
import time
import re

In [2]:
# Debug: Log each field's status
def log_and_fill(field, value, field_name):
    try:
        field.clear()
        field.send_keys(value)
        print(f"{field_name} filled with: {value}")
    except Exception as e:
        print(f"Error filling {field_name}: {e}")

In [3]:
# Function to extract results from clipboard content
def extract_results(content):
    # Match EHMRG Score (integer or decimal)
    ehmrg_match = re.search(r"RESULT SUMMARY:\s*([-+]?\d+\.?\d*)\s*points", content)

    # Match percentage (integer or decimal)
    percent_match = re.search(r"(\d+\.?\d*)\s*%", content)

    # Match text after the percentage
    text_match = re.search(r"\d+\.?\d*\s*%\s*(.*)", content)

    # Extract matched groups
    ehmrg = ehmrg_match.group(1) if ehmrg_match else None
    percent = percent_match.group(1) if percent_match else None
    text = text_match.group(1).strip() if text_match else None

    return ehmrg, percent, text

In [4]:
# Load the data
file_path = 'EHMRG_test_data.xlsx'  # Update with your data file path
df = pd.read_excel(file_path)

In [5]:
# Display top 10 rows
df.head(10)

Unnamed: 0,Age,SBP,HR,Spo2RA,Cr,K,Transport by EMS,Troponin Positive,Active cancer,On metolazone
0,65,120,85,95,1.1,4.2,1,1,0,1
1,72,110,90,88,0.9,4.6,0,1,1,0
2,50,130,78,90,1.2,4.1,1,0,0,1
3,80,140,100,85,1.3,3.9,1,0,1,0
4,45,125,95,92,1.0,4.3,0,1,0,1
5,60,135,87,96,1.4,4.7,1,1,1,0
6,55,118,92,87,1.1,4.5,1,0,1,1
7,68,122,89,89,1.3,4.4,0,1,0,1
8,70,132,84,91,0.8,4.8,1,0,1,0
9,62,128,80,93,1.0,4.0,0,1,0,1


In [None]:
# Specify the path to the ChromeDriver executable
chromedriver_path = '/path/to/chromedriver'  # Update this path

# Set up the ChromeDriver service
service = Service(executable_path=chromedriver_path)
driver = webdriver.Chrome(service=service)

# Open the EHMRG calculator website
url = "https://www.mdcalc.com/calc/1755/emergency-heart-failure-mortality-risk-grade-ehmrg"
driver.get(url)

# Wait for user to log in
print("Please log in manually to the MDCalc website. Then come back and continue the script.")

Please log in manually to the MDCalc website. Then come back and continue the script.


In [7]:
# Define WebDriverWait globally
wait = WebDriverWait(driver, 15)

In [8]:
# Refresh page after log in
driver.refresh()

In [9]:
# Start iteration with debug if occur
for idx, row in df.iterrows():
    try:
        print(f"\nProcessing row {idx}...")

        # Filling text fields
        print("=== Filling text fields ===")
        fields = [
            ("age", row["Age"], "Age"),
            ("systolic_bp", row["SBP"], "Systolic BP"),
            ("heart_rate", row["HR"], "Heart Rate"),
            # Apply the adjustment for Spo2RA
            ("o2_sat", max(row["Spo2RA"], 40), "Oxygen Saturation"),
            ("creatinine", row["Cr"], "Creatinine"),
        ]
        for field_name, value, field_display in fields:
            print(f"Attempting to fill {field_display} with value {value}...")
            field = wait.until(EC.element_to_be_clickable((By.NAME, field_name)))
            driver.execute_script("arguments[0].scrollIntoView(true);", field)
            log_and_fill(field, str(value), field_display)

        # Handling Potassium range
        print("=== Handling Potassium range ===")
        potassium = row["K"]
        potassium_id = None
        if potassium < 4:
            potassium_id = "potassium-<4"
        elif potassium > 4.5:
            potassium_id = "potassium-> 4.5"  # Note the space here
        else:
            print("Potassium is 4-4.5. Skipping click as it's already default.")

        if potassium_id:
            print(f"Checking presence of Potassium element with ID: {potassium_id}...")
            potassium_element = wait.until(
                EC.presence_of_element_located(
                    (By.XPATH, f"//input[@id='{potassium_id}']/..")  # Select the label parent
                )
            )
            is_selected = potassium_element.get_attribute("class")
            if "calc_btn-selected" in is_selected:
                print(f"Potassium range {potassium_id} is already selected. Skipping click.")
            else:
                print(f"Clicking Potassium element with ID: {potassium_id}...")
                driver.execute_script("arguments[0].click();", potassium_element)
                print(f"Potassium range {potassium_id} selected.")

        # Handling Yes/No fields
        print("=== Handling Yes/No fields ===")
        yes_no_fields = [
            ("ems", "Yes" if row["Transport by EMS"] == 1 else "No", "EMS"),
            ("troponin", "Yes" if row["Troponin Positive"] else "No", "Troponin"),
            ("cancer", "Yes" if row["Active cancer"] == 1 else "No", "Active Cancer"),
            ("metolazone", "Yes" if row["On metolazone"] == 1 else "No", "Metolazone"),
        ]
        for field_prefix, value, field_display in yes_no_fields:
            if value == "No":
                print(f"{field_display} is No (default). Skipping click.")
                continue
            element_id = f"{field_prefix}-{value}"
            print(f"Attempting to set {field_display} to {value} using ID: {element_id}...")
            element = wait.until(EC.presence_of_element_located((By.ID, element_id)))
            is_selected = element.get_attribute("class")
            if "selected" in is_selected:
                print(f"{field_display} {value} is already selected. Skipping click.")
            else:
                print(f"Clicking {field_display} with ID: {element_id}...")
                driver.execute_script("arguments[0].click();", element)
                print(f"{field_display} set to {value}.")

        # Handling clipboard results
        print("=== Handling clipboard results ===")
        clipboard_button = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "calc_copy-clipboard-button__fJsVs")))
        print("Clipboard button found. Clicking...")
        driver.execute_script("arguments[0].click();", clipboard_button)
        print("Clipboard button clicked.")

        # Wait for clipboard content to update
        time.sleep(0.1) # Adjust this to sleep longer if the result any uplicate EHMRG scores in consecutive rows
        clipboard_content = pyperclip.paste()
        print(f"Clipboard Content for row {idx}:\n{clipboard_content}")

        # Extract results and update DataFrame
        ehmrg, percent, text = extract_results(clipboard_content)
        df.at[idx, "EHMRG"] = ehmrg
        df.at[idx, "Percent"] = percent
        df.at[idx, "Text"] = text

        # Refresh the page for the next input
        print("Refreshing the page for the next input...")
        driver.refresh()
        time.sleep(0.1)

    except Exception as e:
        print(f"Error processing row {idx}: {e}")
        debug_file = f"debug_row_{idx}.html"
        with open(debug_file, "w", encoding="utf-8") as f:
            f.write(driver.page_source)
        print(f"Saved page source for row {idx} to {debug_file}.")
        df.at[idx, "Error"] = str(e)


Processing row 0...
=== Filling text fields ===
Attempting to fill Age with value 65.0...
Age filled with: 65.0
Attempting to fill Systolic BP with value 120.0...
Systolic BP filled with: 120.0
Attempting to fill Heart Rate with value 85.0...
Heart Rate filled with: 85.0
Attempting to fill Oxygen Saturation with value 95.0...
Oxygen Saturation filled with: 95.0
Attempting to fill Creatinine with value 1.1...
Creatinine filled with: 1.1
=== Handling Potassium range ===
Potassium is 4-4.5. Skipping click as it's already default.
=== Handling Yes/No fields ===
Attempting to set EMS to Yes using ID: ems-Yes...
Clicking EMS with ID: ems-Yes...
EMS set to Yes.
Attempting to set Troponin to Yes using ID: troponin-Yes...
Clicking Troponin with ID: troponin-Yes...
Troponin set to Yes.
Active Cancer is No (default). Skipping click.
Attempting to set Metolazone to Yes using ID: metolazone-Yes...
Clicking Metolazone with ID: metolazone-Yes...
Metolazone set to Yes.
=== Handling clipboard results 

In [10]:
# Check the dataframe with results
df

Unnamed: 0,Age,SBP,HR,Spo2RA,Cr,K,Transport by EMS,Troponin Positive,Active cancer,On metolazone,EHMRG,Percent,Text
0,65,120,85,95,1.1,4.2,1,1,0,1,125.0,8.2,Highest Risk of 7-day mortality. Consider admi...
1,72,110,90,88,0.9,4.6,0,1,1,0,113.0,8.2,Highest Risk of 7-day mortality. Consider admi...
2,50,130,78,90,1.2,4.1,1,0,0,1,113.0,8.2,Highest Risk of 7-day mortality. Consider admi...
3,80,140,100,85,1.3,3.9,1,0,1,0,98.0,8.2,Highest Risk of 7-day mortality. Consider admi...
4,45,125,95,92,1.0,4.3,0,1,0,1,28.0,2.0,Intermediate Risk of 7-day mortality.
5,60,135,87,96,1.4,4.7,1,1,1,0,123.0,8.2,Highest Risk of 7-day mortality. Consider admi...
6,55,118,92,87,1.1,4.5,1,0,1,1,109.0,8.2,Highest Risk of 7-day mortality. Consider admi...
7,68,122,89,89,1.3,4.4,0,1,0,1,83.0,3.5,High Risk of 7-day mortality.
8,70,132,84,91,0.8,4.8,1,0,1,0,73.0,3.5,High Risk of 7-day mortality.
9,62,128,80,93,1.0,4.0,0,1,0,1,44.0,2.0,Intermediate Risk of 7-day mortality.


In [None]:
print("\n=== Checking for consecutive duplicate EHMRG scores ===")
duplicates_found = False

for i in range(len(df) - 1):
    if df.loc[i, "EHMRG"] == df.loc[i + 1, "EHMRG"]:
        duplicates_found = True
        print(f"Duplicate EHMRG score found between row {i} and row {i + 1}: {df.loc[i, 'EHMRG']}")
        print(f"Recalculating row {i + 1}...")

        try:
            # Refresh the page
            driver.refresh()
            time.sleep(0.5)

            # Repeat the calculation process for row i+1
            row = df.loc[i + 1]

            # Refill fields for row i+1
            fields = [
                ("age", row["Age"], "Age"),
                ("systolic_bp", row["SBP"], "Systolic BP"),
                ("heart_rate", row["HR"], "Heart Rate"),
                ("o2_sat", max(row["Spo2RA"], 40), "Oxygen Saturation"),
                ("creatinine", row["Cr"], "Creatinine"),
            ]
            for field_name, value, field_display in fields:
                field = wait.until(EC.element_to_be_clickable((By.NAME, field_name)))
                driver.execute_script("arguments[0].scrollIntoView(true);", field)
                log_and_fill(field, str(value), field_display)

            # Handle Potassium range for recalculation
            potassium = row["K"]
            potassium_id = None
            if potassium < 4:
                potassium_id = "potassium-<4"
            elif potassium > 4.5:
                potassium_id = "potassium-> 4.5"

            if potassium_id:
                potassium_element = wait.until(
                    EC.presence_of_element_located(
                        (By.XPATH, f"//input[@id='{potassium_id}']/..")
                    )
                )
                driver.execute_script("arguments[0].click();", potassium_element)

            # Handle Yes/No fields for recalculation
            yes_no_fields = [
                ("ems", "Yes" if row["Transport by EMS"] == 1 else "No", "EMS"),
                ("troponin", "Yes" if row["Troponin Positive"] else "No", "Troponin"),
                ("cancer", "Yes" if row["Active cancer"] == 1 else "No", "Active Cancer"),
                ("metolazone", "Yes" if row["On metolazone"] == 1 else "No", "Metolazone"),
            ]
            for field_prefix, value, field_display in yes_no_fields:
                if value == "Yes":
                    element_id = f"{field_prefix}-{value}"
                    element = wait.until(EC.presence_of_element_located((By.ID, element_id)))
                    driver.execute_script("arguments[0].click();", element)

            # Re-extract clipboard results for recalculated row
            clipboard_button = wait.until(
                EC.presence_of_element_located((By.CLASS_NAME, "calc_copy-clipboard-button__fJsVs"))
            )
            driver.execute_script("arguments[0].click();", clipboard_button)
            time.sleep(0.1)
            clipboard_content = pyperclip.paste()

            ehmrg, percent, text = extract_results(clipboard_content)
            df.at[i + 1, "EHMRG"] = ehmrg
            df.at[i + 1, "Percent"] = percent
            df.at[i + 1, "Text"] = text

            print(f"Row {i+1} recalculated successfully.")

        except Exception as e:
            print(f"Error recalculating row {i + 1}: {e}")
            df.at[i + 1, "Error"] = str(e)

if not duplicates_found:
    print("No duplicate EHMRG scores found.")


=== Checking for consecutive duplicate EHMRG scores ===
Duplicate EHMRG score found between row 1 and row 2: 113.0
Recalculating row 2...
Age filled with: 50
Systolic BP filled with: 130
Heart Rate filled with: 78
Oxygen Saturation filled with: 90
Creatinine filled with: 1.2
Row 1 recalculated successfully.


In [14]:
# Save the updated DataFrame
output_file = "EHMRG_results.xlsx"
df.to_excel(output_file, index=False)
print(f"Results saved to {output_file}.")

Results saved to EHMRG_results.xlsx.
