diff --git a/V2er/Sources/RichView/Converters/HTMLToMarkdownConverter.swift b/V2er/Sources/RichView/Converters/HTMLToMarkdownConverter.swift
index 8f4852c..67239a5 100644
--- a/V2er/Sources/RichView/Converters/HTMLToMarkdownConverter.swift
+++ b/V2er/Sources/RichView/Converters/HTMLToMarkdownConverter.swift
@@ -173,8 +173,117 @@ public class HTMLToMarkdownConverter {
case "hr":
result += "\n---\n"
+ // Table support
+ case "table":
+ result += try convertTable(childElement)
+
+ case "thead", "tbody", "tfoot":
+ // These are handled by table, but if encountered alone, process children
+ result += try convertElement(childElement)
+
+ case "tr", "th", "td":
+ // These should be handled by table, but if encountered alone, process children
+ result += try convertElement(childElement)
+
+ // Strikethrough
+ case "del", "s", "strike":
+ let content = try convertElement(childElement)
+ result += "~~\(content)~~"
+
+ // Underline - no standard markdown, preserve as HTML for custom renderer
+ case "u", "ins":
+ let content = try convertElement(childElement)
+ result += "\(content)"
+
+ // Superscript/subscript - preserve as HTML for custom renderer
+ case "sup":
+ let content = try convertElement(childElement)
+ result += "\(content)"
+
+ case "sub":
+ let content = try convertElement(childElement)
+ result += "\(content)"
+
+ // Mark/highlight - render with markers
+ case "mark":
+ let content = try convertElement(childElement)
+ result += "==\(content)=="
+
+ // Definition list
+ case "dl":
+ result += try convertDefinitionList(childElement)
+
+ case "dt":
+ let content = try convertElement(childElement)
+ result += "\n**\(content)**\n"
+
+ case "dd":
+ let content = try convertElement(childElement)
+ result += ": \(content)\n"
+
+ // Abbreviation - just show the text with title
+ case "abbr":
+ let content = try convertElement(childElement)
+ if let title = try? childElement.attr("title"), !title.isEmpty {
+ result += "\(content) (\(title))"
+ } else {
+ result += content
+ }
+
+ // Citation
+ case "cite":
+ let content = try convertElement(childElement)
+ result += "*\(content)*"
+
+ // Keyboard input
+ case "kbd":
+ let content = try convertElement(childElement)
+ result += "`\(content)`"
+
+ // Sample output
+ case "samp":
+ let content = try convertElement(childElement)
+ result += "`\(content)`"
+
+ // Variable
+ case "var":
+ let content = try convertElement(childElement)
+ result += "*\(content)*"
+
+ // Small text
+ case "small":
+ let content = try convertElement(childElement)
+ result += content
+
+ // Figure and figcaption
+ case "figure":
+ result += try convertElement(childElement)
+
+ case "figcaption":
+ let content = try convertElement(childElement)
+ result += "\n*\(content)*\n"
+
+ // Address
+ case "address":
+ let content = try convertElement(childElement)
+ result += "\n*\(content)*\n"
+
+ // Time - just show the text
+ case "time":
+ let content = try convertElement(childElement)
+ result += content
+
+ // Details/summary - collapsible sections
+ case "details":
+ result += try convertElement(childElement)
+
+ case "summary":
+ let content = try convertElement(childElement)
+ result += "\n**\(content)**\n"
+
// Container elements - just process children
- case "div", "span", "body", "html":
+ case "div", "span", "body", "html", "article", "section", "nav", "aside",
+ "header", "footer", "main", "caption":
result += try convertElement(childElement)
default:
@@ -212,6 +321,86 @@ public class HTMLToMarkdownConverter {
return result
}
+ /// Convert table to Markdown
+ private func convertTable(_ element: Element) throws -> String {
+ var result = "\n"
+ var rows: [[String]] = []
+
+ // Get all rows from thead and tbody
+ let allRows = try element.select("tr")
+
+ for row in allRows {
+ var cells: [String] = []
+
+ // Get th and td cells
+ for cell in row.children() {
+ let tagName = cell.tagName().lowercased()
+ if tagName == "th" || tagName == "td" {
+ let content = try convertElement(cell)
+ .replacingOccurrences(of: "\n", with: " ")
+ .replacingOccurrences(of: "|", with: "\\|") // Escape pipes for Markdown tables
+ .trimmingCharacters(in: .whitespaces)
+ cells.append(content)
+ }
+ }
+
+ if !cells.isEmpty {
+ rows.append(cells)
+ }
+ }
+
+ guard !rows.isEmpty else { return "" }
+
+ // Calculate column widths
+ let columnCount = rows.map { $0.count }.max() ?? 0
+ guard columnCount > 0 else { return "" }
+
+ // Normalize rows to have the same column count
+ let normalizedRows = rows.map { row -> [String] in
+ var normalized = row
+ while normalized.count < columnCount {
+ normalized.append("")
+ }
+ return normalized
+ }
+
+ // Build markdown table
+ for (index, row) in normalizedRows.enumerated() {
+ result += "| " + row.joined(separator: " | ") + " |\n"
+
+ // Add separator after header row
+ if index == 0 {
+ let separator = Array(repeating: "---", count: columnCount)
+ result += "| " + separator.joined(separator: " | ") + " |\n"
+ }
+ }
+
+ result += "\n"
+ return result
+ }
+
+ /// Convert definition list to Markdown
+ private func convertDefinitionList(_ element: Element) throws -> String {
+ var result = "\n"
+
+ for child in element.children() {
+ let tagName = child.tagName().lowercased()
+ let content = try convertElement(child)
+
+ switch tagName {
+ case "dt":
+ result += "\n**\(content)**\n"
+ case "dd":
+ result += ": \(content)\n"
+ default:
+ result += content
+ }
+ }
+
+ result += "\n"
+ return result
+ }
+
/// Escape special Markdown characters
private func escapeMarkdown(_ text: String) -> String {
// Only escape characters that would cause markdown parsing issues
diff --git a/V2er/Sources/RichView/Renderers/MarkdownRenderer.swift b/V2er/Sources/RichView/Renderers/MarkdownRenderer.swift
index 1d99ac4..d405aee 100644
--- a/V2er/Sources/RichView/Renderers/MarkdownRenderer.swift
+++ b/V2er/Sources/RichView/Renderers/MarkdownRenderer.swift
@@ -80,6 +80,12 @@ public class MarkdownRenderer {
} else if line.starts(with: "---") {
// Horizontal rule
attributedString.append(AttributedString("—————————————\n"))
+ } else if line.starts(with: "|") && line.hasSuffix("|") {
+ // Markdown table
+ let (tableBlock, linesConsumed) = extractTableBlock(lines, startIndex: index)
+ attributedString.append(renderTable(tableBlock))
+ index += linesConsumed
+ continue
} else {
// Regular paragraph with inline formatting
attributedString.append(renderInlineMarkdown(line))
@@ -296,6 +302,46 @@ public class MarkdownRenderer {
continue
}
+ // Check for strikethrough
+ if let strikeMatch = currentText.firstMatch(of: /~~(.+?)~~/) {
+ // Add text before strikethrough
+ let beforeRange = currentText.startIndex.. ([[String]], Int) {
+ var rows: [[String]] = []
+ var index = startIndex
+
+ while index < lines.count {
+ let line = lines[index]
+
+ // Check if line is a table row
+ guard line.starts(with: "|") && line.hasSuffix("|") else {
+ break
+ }
+
+ // Skip separator row (| --- | --- | or with colons for alignment)
+ if line.range(of: #"^\|\s*(:?-+:?)\s*(\|\s*(:?-+:?)\s*)*\|$"#, options: .regularExpression) != nil {
+ index += 1
+ continue
+ }
+
+ // Parse cells
+ let cells = line
+ .trimmingCharacters(in: CharacterSet(charactersIn: "|"))
+ .components(separatedBy: "|")
+ .map { $0.trimmingCharacters(in: .whitespaces) }
+
+ if !cells.isEmpty {
+ rows.append(cells)
+ }
+
+ index += 1
+ }
+
+ return (rows, index - startIndex)
+ }
+
+ /// Render markdown table
+ private func renderTable(_ rows: [[String]]) -> AttributedString {
+ guard !rows.isEmpty else { return AttributedString() }
+
+ var result = AttributedString("\n")
+
+ // Get column count
+ let columnCount = rows.map { $0.count }.max() ?? 0
+ guard columnCount > 0 else { return AttributedString() }
+
+ // Calculate column widths for alignment
+ var columnWidths: [Int] = Array(repeating: 0, count: columnCount)
+ for row in rows {
+ for (i, cell) in row.enumerated() where i < columnCount {
+ columnWidths[i] = max(columnWidths[i], cell.count)
+ }
+ }
+
+ for (rowIndex, row) in rows.enumerated() {
+ // Render each cell
+ for (cellIndex, cell) in row.enumerated() {
+ // Add cell content
+ var cellText = renderInlineMarkdown(cell)
+
+ // Apply header style for first row
+ if rowIndex == 0 {
+ cellText.font = .system(size: stylesheet.body.fontSize, weight: .semibold)
+ }
+
+ result.append(cellText)
+
+ // Add separator between cells
+ if cellIndex < row.count - 1 {
+ var separator = AttributedString(" │ ")
+ separator.foregroundColor = Color.gray.opacity(0.5)
+ result.append(separator)
+ }
+ }
+
+ result.append(AttributedString("\n"))
+
+ // Add separator line after header
+ if rowIndex == 0 && rows.count > 1 {
+ var separatorLine = AttributedString(String(repeating: "─", count: 40) + "\n")
+ separatorLine.foregroundColor = Color.gray.opacity(0.3)
+ result.append(separatorLine)
+ }
+ }
+
+ result.append(AttributedString("\n"))
+ return result
+ }
}
diff --git a/V2erTests/RichView/HTMLToMarkdownConverterTests.swift b/V2erTests/RichView/HTMLToMarkdownConverterTests.swift
index ac37b8e..b75393c 100644
--- a/V2erTests/RichView/HTMLToMarkdownConverterTests.swift
+++ b/V2erTests/RichView/HTMLToMarkdownConverterTests.swift
@@ -233,6 +233,226 @@ class HTMLToMarkdownConverterTests: XCTestCase {
XCTAssertTrue(markdown.contains("\\#"))
}
+ // MARK: - Table Tests
+
+ func testBasicTableConversion() throws {
+ let html = """
+
+ | Header 1 | Header 2 |
+ | Cell 1 | Cell 2 |
+
+ """
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("| Header 1 | Header 2 |"))
+ XCTAssertTrue(markdown.contains("| --- | --- |"))
+ XCTAssertTrue(markdown.contains("| Cell 1 | Cell 2 |"))
+ }
+
+ func testTableWithTheadTbody() throws {
+ let html = """
+
+ """
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("| Name | Value |"))
+ XCTAssertTrue(markdown.contains("| Item | 100 |"))
+ }
+
+ func testTableWithMultipleRows() throws {
+ let html = """
+
+ | 功能模块 | 详细说明 |
+ | 多种格式 | EPUB/MOBI/AZW3 |
+ | 数据同步 | 多端覆盖 |
+
+ """
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("功能模块"))
+ XCTAssertTrue(markdown.contains("多种格式"))
+ XCTAssertTrue(markdown.contains("数据同步"))
+ }
+
+ func testTableWithPipeInContent() throws {
+ let html = """
+
+ | Option | Description |
+ | A | B | Choose A or B |
+
+ """
+ let markdown = try converter.convert(html)
+ // Pipes should be escaped in cell content
+ XCTAssertTrue(markdown.contains("A \\| B"))
+ }
+
+ // MARK: - Strikethrough Tests
+
+ func testDelTagConversion() throws {
+ let html = "Deleted text"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("~~Deleted text~~"))
+ }
+
+ func testSTagConversion() throws {
+ let html = "Strikethrough text"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("~~Strikethrough text~~"))
+ }
+
+ func testStrikeTagConversion() throws {
+ let html = "Old strike tag"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("~~Old strike tag~~"))
+ }
+
+ // MARK: - Underline Tests
+
+ func testUnderlineTagConversion() throws {
+ let html = "Underlined text"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("Underlined text"))
+ }
+
+ func testInsTagConversion() throws {
+ let html = "Inserted text"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("Inserted text"))
+ }
+
+ // MARK: - Superscript/Subscript Tests
+
+ func testSuperscriptConversion() throws {
+ let html = "x2"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("2"))
+ }
+
+ func testSubscriptConversion() throws {
+ let html = "H2O"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("2"))
+ }
+
+ // MARK: - Mark/Highlight Tests
+
+ func testMarkTagConversion() throws {
+ let html = "Highlighted text"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("==Highlighted text=="))
+ }
+
+ // MARK: - Definition List Tests
+
+ func testDefinitionListConversion() throws {
+ let html = """
+
+ - Term
+ - Definition of the term
+
+ """
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("**Term**"))
+ XCTAssertTrue(markdown.contains(": Definition of the term"))
+ }
+
+ // MARK: - Semantic Element Tests
+
+ func testAbbreviationWithTitle() throws {
+ let html = "HTML"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("HTML"))
+ XCTAssertTrue(markdown.contains("HyperText Markup Language"))
+ }
+
+ func testCiteTagConversion() throws {
+ let html = "Book Title"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("*Book Title*"))
+ }
+
+ func testKbdTagConversion() throws {
+ let html = "Press Ctrl+C"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("`Ctrl`"))
+ XCTAssertTrue(markdown.contains("`C`"))
+ }
+
+ func testSampTagConversion() throws {
+ let html = "Error: File not found"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("`Error: File not found`"))
+ }
+
+ func testVarTagConversion() throws {
+ let html = "The variable x is used"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("*x*"))
+ }
+
+ func testFigcaptionConversion() throws {
+ let html = """
+
+
+ Image caption
+
+ """
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains(""))
+ XCTAssertTrue(markdown.contains("*Image caption*"))
+ }
+
+ func testAddressTagConversion() throws {
+ let html = "Contact us at example@email.com"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("*Contact us at example@email.com*"))
+ }
+
+ func testTimeTagConversion() throws {
+ let html = ""
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("January 1, 2024"))
+ }
+
+ func testSummaryTagConversion() throws {
+ let html = """
+
+ Click to expand
+ Hidden content
+
+ """
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("**Click to expand**"))
+ XCTAssertTrue(markdown.contains("Hidden content"))
+ }
+
+ // MARK: - Container Element Tests
+
+ func testArticleContainerProcessing() throws {
+ let html = "Article content"
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("Article content"))
+ }
+
+ func testSectionContainerProcessing() throws {
+ let html = ""
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("Section content"))
+ }
+
+ func testNavContainerProcessing() throws {
+ let html = ""
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("[Link]"))
+ }
+
+ func testHeaderFooterProcessing() throws {
+ let html = ""
+ let markdown = try converter.convert(html)
+ XCTAssertTrue(markdown.contains("Header"))
+ XCTAssertTrue(markdown.contains("Footer"))
+ }
+
// MARK: - Performance Tests
func testPerformanceLargeHTML() throws {
@@ -242,4 +462,16 @@ class HTMLToMarkdownConverterTests: XCTestCase {
_ = try? converter.convert(repeatedHTML)
}
}
+
+ func testPerformanceComplexTable() throws {
+ var tableHTML = "| Header 1 | Header 2 | Header 3 |
"
+ for i in 1...50 {
+ tableHTML += "| Row \(i) Col 1 | Row \(i) Col 2 | Row \(i) Col 3 |
"
+ }
+ tableHTML += "
"
+
+ measure {
+ _ = try? converter.convert(tableHTML)
+ }
+ }
}
\ No newline at end of file