In [None]:
'''
parseSVG_V01
Version Discription: Retrieve information from SVG files
Last Update: 20240612
'''

In [1]:
import os
import pandas as pd
import xml.etree.ElementTree as ET



In [2]:
def extract_patient_data(svg_file_path):
    """
    Extracts patient data from a given SVG file.
    Includes error handling to ensure the script continues even if parsing fails.

    Parameters:
    - svg_file_path: Path to the SVG file.

    Returns:
    A tuple containing (patientid, date, heartrate, print, qrsdur, qtcb, qrsfrontaxis), 
    with values as None if parsing fails.
    """
    try:
        # Parse the SVG file
        tree = ET.parse(svg_file_path)
        root = tree.getroot()

        # Define SVG namespace if needed
        namespaces = {'svg': 'http://www.w3.org/2000/svg'}

        # Function to find text content by element ID
        def find_text_by_id(root, element_id):
            for element in root.findall(".//*[@id='" + element_id + "']", namespaces=namespaces):
                return element.text
            return None

        # Extract patient data
        patientid = find_text_by_id(root, 'patientid')
        age = find_text_by_id(root, 'age')
        sex = find_text_by_id(root, 'sex')
        date = find_text_by_id(root, 'date')
        time = find_text_by_id(root, 'time')
        heartrate = find_text_by_id(root, 'heartrate')
        print_value = find_text_by_id(root, 'print')
        qrsdur = find_text_by_id(root, 'qrsdur')
        qtcb = find_text_by_id(root, 'qtcb')
        qrsfrontaxis = find_text_by_id(root, 'qrsfrontaxis')

        return patientid, age, sex, date, time, heartrate, print_value, qrsdur, qtcb, qrsfrontaxis
    except Exception as e:
        print(f"Error processing file {svg_file_path}: {e}")
        return None, None, None, None, None, None, None, None, None, None

In [5]:
def process_svg_files(folder_path):
    # Create an empty DataFrame
    data = []

    # Loop through each SVG file in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".svg"):
            file_path = os.path.join(folder_path, filename)
            patient_data = extract_patient_data(file_path)
            data.append((filename, *patient_data))

    # Convert the data to a DataFrame
    columns = ['filename', 'patientid', 'age', 'sex', 'date', 'time', 'heartrate', 'print', 'qrsdur', 'qtcb', 'qrsfrontaxis']
    df = pd.DataFrame(data, columns=columns)

    return df

In [6]:
folder_path = 'normal_svg'
df = process_svg_files(folder_path)
df

Unnamed: 0,filename,patientid,age,sex,date,time,heartrate,print,qrsdur,qtcb,qrsfrontaxis
0,PageWriterTouchECG202210244129476.svg,24583117,\n,\n,2022/10/2,下午 12:25:54,58,156,80,393,43
1,PageWriterTouchECG2022102514446462.svg,49010802,5 yrs,男性,2022/10/25,上午 09:43:23,73,122,84,409,68
2,PageWriterTouchECG2022101413822802.svg,13261557,\n,\n,2022/10/14,上午 07:37:20,75,168,109,453,76
3,PageWriterTouchECG2022102712919522.svg,12326102,\n,\n,2022/10/27,上午 07:56:19,81,128,101,450,42
4,PageWriterTouchECG2022101813956161.svg,27442520,\n,\n,2022/10/18,上午 08:31:05,79,155,87,442,80
...,...,...,...,...,...,...,...,...,...,...,...
195,PageWriterTouchECG202210201150492.svg,25366794,\n,\n,2022/10/20,上午 08:21:12,72,169,96,433,76
196,PageWriterTouchECG2022102512425911.svg,49034880,\n,\n,2022/10/25,上午 08:47:47,69,166,86,437,77
197,PageWriterTouchECG202210244120661.svg,41438986,\n,\n,2022/9/28,上午 10:57:25,79,148,80,418,-3
198,PageWriterTouchECG2022101712232831.svg,7018000,\n,\n,2022/10/17,上午 08:35:55,76,155,96,449,-5


In [7]:
# # Save the DataFrame to an Excel file
# df.to_excel('normal_svg.xlsx', index=False)

In [14]:
ICH_Data = pd.read_csv('ICH_Data.csv')
ICH_Data

Unnamed: 0,去識別化編號,年月,心電圖有無,姓名,病歷編號,缺血,TIA,出血,無中風,年齡,...,檢傷SBP,檢傷DBP,檢傷HR,"rhythm(SR=0, Afib=1, aFLUTTER=2, pacing=3, junctional=4, others=5)","Bunddle branch block(non=0, RBBB=1, LBBB=2, LAFB=3, LPFB=4, other=5)",ventricular rate,PR interval,QRS duration,QTC,qrs axis
0,60,21-Nov,1,陳俊安,47886351,,,1,,50,...,241.0,115.0,91.0,0,0,94,204.0,80,451,49
1,75,21-Nov,1,郭若瑟,38983221,,,1,,70,...,216.0,96.0,78.0,0,0,76,184.0,90,459,36
2,92,21-Oct,1,沈金環,45622160,,,1,,58,...,239.0,114.0,79.0,0,0,64,172.0,84,430,42
3,100,21-Oct,1,薛家欣,36227619,,,1,,39,...,247.0,154.0,91.0,0,0,81,184.0,92,479,22
4,150,21-Aug,1,林世澤,19792859,,,1,,56,...,140.0,97.0,93.0,0,0,98,196.0,94,470,30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142,1393,18-Aug,1,陳博宗,44893814,,,1,,47,...,169.0,96.0,123.0,0,0,114,164.0,84,491,48
143,1396,18-Aug,1,謝東宏,15484205,,,1,,34,...,212.0,125.0,101.0,0,0,98,168.0,104,486,37
144,1410,18-Jul,1,潘美玉,1653356,,,1,,76,...,,,,0,0,117,160.0,84,480,31
145,1423,18-Jul,1,謝吳甚,4095005,,,1,,93,...,112.0,56.0,49.0,3,0,50,182.0,136,424,-80


In [None]:
# ICH_Data.to_excel('ICH_Data.xlsx', index=False)