In [1]:
import pandas as pd
import re

### parse markdown

In [2]:
def parse_markdown_to_dataframe(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        lines = file.readlines()
    
    data = []
    current_number = None
    current_heading = None
    current_clause = []
    
    for line in lines:
        line = line.strip()
        
        # Check if the line is a heading
        if line.startswith("#"):  
            # Save the current clause if there's an active heading
            if current_number is not None:
                data.append((current_number, current_heading if current_heading else "", " ".join(current_clause).strip()))
            
            # Extract clause number and heading text (if any)
            match = re.match(r'#\s*([\d\.]+)\s*(.*)', line)
            if match:
                current_number = match.group(1)  # Clause number
                current_heading = match.group(2).strip() if match.group(2) else ""  # Heading text (may be empty)
                current_clause = []  # Start a new clause
        elif line:  # If it's not a heading and not empty
            current_clause.append(line)  # Add to the current clause
    
    # Add the last clause to the DataFrame
    if current_number is not None:
        data.append((current_number, current_heading if current_heading else "", " ".join(current_clause).strip()))
    
    # Convert to DataFrame
    df = pd.DataFrame(data, columns=["Number", "Heading", "Clause"])
    return df

In [3]:
import os
from pathlib import Path
src_dir = Path(os.getcwd()).parent
data_dir_path = Path(src_dir, "data")
md_path = Path(data_dir_path, "ISO26262-8.md")
print(md_path)

C:\Projects\Technion\FuSa\src\data\ISO26262-8.md


In [4]:
df = parse_markdown_to_dataframe(md_path)

In [5]:
df.head(100)

Unnamed: 0,Number,Heading,Clause
0,1,Scope,This document is intended to be applied to saf...
1,2,Normative references,The following documents are referred to in the...
2,3,Terms and definitions,"For the purposes of this document, the terms, ..."
3,4,Requirements for compliance,
4,4.1,Purpose,This clause describes how: - a) to achieve com...
...,...,...,...
95,14.4.5.2,Target values for proven in use,NOTE When any ASIL is not yet assigned to the ...
96,14.4.5.2.1,,The rationale for the calculation of the evalu...
97,14.4.5.2.2,,The evaluation period of the candidate shall r...
98,14.4.5.2.3,,The observation period of each specimen with t...


In [52]:
len(df)

139

### Save dataframe

In [56]:
csv_path = Path(data_dir_path, "ISO26262-8_df.csv")
print(csv_path)

C:\Projects\Technion\FuSa\src\data\ISO26262-8_df.csv


In [54]:
df.to_csv(csv_path)