# DOX to RST Converter

This notebook parses `.dox` files in the `docs/cxx` directory and generates `.rst` files for classes and their attributes and inputs. This helps in converting C++ documentation from Doxygen format to Sphinx-compatible ReStructuredText format.

## Import Required Libraries

Import libraries such as os, re, and any XML or text parsing libraries like xml.etree.ElementTree or BeautifulSoup.

In [None]:
import os
import re
import sys
import glob
from bs4 import BeautifulSoup
from pathlib import Path
import xml.etree.ElementTree as ET

# Check if BeautifulSoup is installed, install if not
try:
    from bs4 import BeautifulSoup
except ImportError:
    print("BeautifulSoup not found. Installing...")
    !pip install beautifulsoup4
    from bs4 import BeautifulSoup

## Define Directory Paths

Set the path for the docs/cxx directory and the output directory for the .rst files.

In [None]:
# Base directory path
base_dir = Path("/workspaces/AnalysisG")

# Source directory with .dox files
dox_dir = base_dir / "docs" / "cxx"

# Output directory for .rst files
rst_output_dir = base_dir / "docs" / "sphinx" / "classes"

# Create output directory if it doesn't exist
os.makedirs(rst_output_dir, exist_ok=True)

print(f"DOX files directory: {dox_dir}")
print(f"RST output directory: {rst_output_dir}")

## Parse .dox Files

Iterate through all .dox files in the directory and read their content.

In [None]:
def find_dox_files(directory):
    """Find all .dox files in the given directory and subdirectories"""
    return list(Path(directory).glob("**/*.dox"))

dox_files = find_dox_files(dox_dir)
print(f"Found {len(dox_files)} .dox files")

# Function to read the content of a .dox file
def read_dox_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return None

# Process one file to see the structure
if dox_files:
    sample_file = dox_files[0]
    print(f"Sample file: {sample_file}")
    sample_content = read_dox_file(sample_file)
    print(f"First 500 characters of content:\n{sample_content[:500] if sample_content else 'No content'}")

## Extract Class Information

Use parsing logic to extract class names, attributes, and inputs from the .dox file content.

In [None]:
def extract_class_info(dox_content):
    """
    Extract class name, attributes, and inputs from a .dox file content.
    
    Returns a dictionary with:
    - class_name: The name of the class
    - class_description: The class description
    - attributes: List of dictionaries with attribute details
    - inputs: List of dictionaries with input details
    """
    if not dox_content:
        return None
    
    class_info = {
        'class_name': None,
        'class_description': "",
        'attributes': [],
        'inputs': []
    }
    
    # Extract class name using regex
    class_match = re.search(r'\\class\s+([A-Za-z0-9_:]+)', dox_content)
    if class_match:
        class_info['class_name'] = class_match.group(1)
    
    # Extract class description - often between \class directive and \section or other directives
    desc_match = re.search(r'\\class\s+[A-Za-z0-9_:]+\s+(.*?)(?:\\section|\Z)', dox_content, re.DOTALL)
    if desc_match:
        class_info['class_description'] = desc_match.group(1).strip()
    
    # Extract attributes - typically in a \section attributes
    attr_section_match = re.search(r'\\section\s+[A-Za-z0-9_]+_attributes\s+(.*?)(?:\\section|\\subsection|\Z)', dox_content, re.DOTALL)
    if attr_section_match:
        attr_text = attr_section_match.group(1)
        # Look for attribute patterns, often in bullet lists or tables
        attr_matches = re.finditer(r'\\li\s+\\c\s+([A-Za-z0-9_]+)(.*?)(?:\\li|\Z)', attr_text, re.DOTALL)
        for match in attr_matches:
            attr_name = match.group(1)
            attr_desc = match.group(2).strip()
            class_info['attributes'].append({
                'name': attr_name,
                'description': attr_desc
            })
    
    # Extract inputs - typically in a \section inputs
    input_section_match = re.search(r'\\section\s+[A-Za-z0-9_]+_inputs\s+(.*?)(?:\\section|\\subsection|\Z)', dox_content, re.DOTALL)
    if input_section_match:
        input_text = input_section_match.group(1)
        # Look for input patterns
        input_matches = re.finditer(r'\\li\s+\\c\s+([A-Za-z0-9_]+)(.*?)(?:\\li|\Z)', input_text, re.DOTALL)
        for match in input_matches:
            input_name = match.group(1)
            input_desc = match.group(2).strip()
            class_info['inputs'].append({
                'name': input_name,
                'description': input_desc
            })
    
    return class_info

# Test the extraction on a sample file
if dox_files and sample_content:
    class_info = extract_class_info(sample_content)
    print("Extracted class information:")
    print(f"Class name: {class_info['class_name']}")
    print(f"Class description: {class_info['class_description'][:100]}...")
    print(f"Attributes: {len(class_info['attributes'])}")
    print(f"Inputs: {len(class_info['inputs'])}")

## Generate .rst Files

Create .rst files for each class, formatting the extracted information into a structured .rst format.

In [None]:
def generate_rst_content(class_info):
    """Generate RST content from the extracted class information"""
    if not class_info or not class_info['class_name']:
        return None
    
    class_name = class_info['class_name']
    
    # Start with the class name as the title
    rst_content = [
        f"{class_name}",
        "=" * len(class_name),
        ""
    ]
    
    # Add the class description
    if class_info['class_description']:
        rst_content.extend([
            class_info['class_description'],
            ""
        ])
    
    # Add attributes section if there are attributes
    if class_info['attributes']:
        rst_content.extend([
            "Attributes",
            "----------",
            ""
        ])
        
        for attr in class_info['attributes']:
            rst_content.extend([
                f".. attribute:: {attr['name']}",
                "",
                f"   {attr['description']}",
                ""
            ])
    
    # Add inputs section if there are inputs
    if class_info['inputs']:
        rst_content.extend([
            "Inputs",
            "------",
            ""
        ])
        
        for input_item in class_info['inputs']:
            rst_content.extend([
                f".. input:: {input_item['name']}",
                "",
                f"   {input_item['description']}",
                ""
            ])
    
    # Join all lines with line breaks
    return "\n".join(rst_content)

def save_rst_file(class_name, rst_content, output_dir):
    """Save RST content to a file"""
    if not class_name or not rst_content:
        return False
    
    # Clean class name for filename (remove namespaces, etc.)
    clean_name = class_name.split('::')[-1]
    file_path = os.path.join(output_dir, f"{clean_name}.rst")
    
    try:
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(rst_content)
        return True
    except Exception as e:
        print(f"Error saving RST file for {class_name}: {e}")
        return False

# Function to process all dox files
def process_all_dox_files(dox_files, output_dir):
    """Process all .dox files and generate corresponding .rst files"""
    success_count = 0
    failure_count = 0
    
    for dox_file in dox_files:
        print(f"Processing {dox_file}...")
        content = read_dox_file(dox_file)
        
        if not content:
            print(f"  Failed to read content")
            failure_count += 1
            continue
        
        class_info = extract_class_info(content)
        if not class_info or not class_info['class_name']:
            print(f"  Failed to extract class information")
            failure_count += 1
            continue
        
        rst_content = generate_rst_content(class_info)
        if not rst_content:
            print(f"  Failed to generate RST content")
            failure_count += 1
            continue
        
        if save_rst_file(class_info['class_name'], rst_content, output_dir):
            print(f"  Successfully created RST file for {class_info['class_name']}")
            success_count += 1
        else:
            print(f"  Failed to save RST file")
            failure_count += 1
    
    return success_count, failure_count

# Test generating RST for a single file first
if dox_files and sample_content and class_info:
    rst_content = generate_rst_content(class_info)
    print("\nGenerated RST content sample:")
    print(rst_content[:300] + "..." if rst_content else "Failed to generate RST content")

In [None]:
# Process all files
print("\nProcessing all .dox files...")
success, failure = process_all_dox_files(dox_files, rst_output_dir)
print(f"\nProcessing complete. Successfully processed {success} files, {failure} failures.")

# List generated .rst files
print("\nGenerated .rst files:")
rst_files = list(Path(rst_output_dir).glob("*.rst"))
for rst_file in rst_files[:10]:  # Show only first 10 to avoid cluttering output
    print(f"- {rst_file.name}")

if len(rst_files) > 10:
    print(f"... and {len(rst_files) - 10} more files")