Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 190 additions & 1 deletion V2er/Sources/RichView/Converters/HTMLToMarkdownConverter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,117 @@ public class HTMLToMarkdownConverter {
case "hr":
result += "\n---\n"

// Table support
case "table":
result += try convertTable(childElement)

case "thead", "tbody", "tfoot":
// These are handled by table, but if encountered alone, process children
result += try convertElement(childElement)

case "tr", "th", "td":
// These should be handled by table, but if encountered alone, process children
result += try convertElement(childElement)

// Strikethrough
case "del", "s", "strike":
let content = try convertElement(childElement)
result += "~~\(content)~~"

// Underline - no standard markdown, preserve as HTML for custom renderer
case "u", "ins":
let content = try convertElement(childElement)
result += "<u>\(content)</u>"

// Superscript/subscript - preserve as HTML for custom renderer
case "sup":
let content = try convertElement(childElement)
result += "<sup>\(content)</sup>"

case "sub":
let content = try convertElement(childElement)
result += "<sub>\(content)</sub>"

// Mark/highlight - render with markers
case "mark":
let content = try convertElement(childElement)
result += "==\(content)=="

// Definition list
case "dl":
result += try convertDefinitionList(childElement)

case "dt":
let content = try convertElement(childElement)
result += "\n**\(content)**\n"

case "dd":
let content = try convertElement(childElement)
result += ": \(content)\n"

// Abbreviation - just show the text with title
case "abbr":
let content = try convertElement(childElement)
if let title = try? childElement.attr("title"), !title.isEmpty {
result += "\(content) (\(title))"
} else {
result += content
}

// Citation
case "cite":
let content = try convertElement(childElement)
result += "*\(content)*"

// Keyboard input
case "kbd":
let content = try convertElement(childElement)
result += "`\(content)`"

// Sample output
case "samp":
let content = try convertElement(childElement)
result += "`\(content)`"

// Variable
case "var":
let content = try convertElement(childElement)
result += "*\(content)*"

// Small text
case "small":
let content = try convertElement(childElement)
result += content

// Figure and figcaption
case "figure":
result += try convertElement(childElement)

case "figcaption":
let content = try convertElement(childElement)
result += "\n*\(content)*\n"

// Address
case "address":
let content = try convertElement(childElement)
result += "\n*\(content)*\n"

// Time - just show the text
case "time":
let content = try convertElement(childElement)
result += content

// Details/summary - collapsible sections
case "details":
result += try convertElement(childElement)

case "summary":
let content = try convertElement(childElement)
result += "\n**\(content)**\n"

// Container elements - just process children
case "div", "span", "body", "html":
case "div", "span", "body", "html", "article", "section", "nav", "aside",
"header", "footer", "main", "caption":
result += try convertElement(childElement)

default:
Expand Down Expand Up @@ -212,6 +321,86 @@ public class HTMLToMarkdownConverter {
return result
}

/// Convert table to Markdown
private func convertTable(_ element: Element) throws -> String {
var result = "\n"
var rows: [[String]] = []

// Get all rows from thead and tbody
let allRows = try element.select("tr")

for row in allRows {
var cells: [String] = []

// Get th and td cells
for cell in row.children() {
let tagName = cell.tagName().lowercased()
if tagName == "th" || tagName == "td" {
let content = try convertElement(cell)
.replacingOccurrences(of: "\n", with: " ")
Copy link

Copilot AI Dec 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Table cells containing pipe characters (|) are not escaped, which will break the markdown table structure. For example, a cell containing "option A | option B" would be split into multiple columns.

Consider escaping pipe characters in cell content before building the table:

let content = try convertElement(cell)
    .replacingOccurrences(of: "\n", with: " ")
    .replacingOccurrences(of: "|", with: "\\|")  // Escape pipes
    .trimmingCharacters(in: .whitespaces)
Suggested change
.replacingOccurrences(of: "\n", with: " ")
.replacingOccurrences(of: "\n", with: " ")
.replacingOccurrences(of: "|", with: "\\|") // Escape pipes for Markdown tables

Copilot uses AI. Check for mistakes.
.replacingOccurrences(of: "|", with: "\\|") // Escape pipes for Markdown tables
.trimmingCharacters(in: .whitespaces)
cells.append(content)
}
}

if !cells.isEmpty {
rows.append(cells)
}
}

guard !rows.isEmpty else { return "" }

// Calculate column widths
let columnCount = rows.map { $0.count }.max() ?? 0
guard columnCount > 0 else { return "" }

// Normalize rows to have the same column count
let normalizedRows = rows.map { row -> [String] in
var normalized = row
while normalized.count < columnCount {
normalized.append("")
}
return normalized
}

// Build markdown table
for (index, row) in normalizedRows.enumerated() {
result += "| " + row.joined(separator: " | ") + " |\n"

// Add separator after header row
if index == 0 {
let separator = Array(repeating: "---", count: columnCount)
result += "| " + separator.joined(separator: " | ") + " |\n"
}
}

result += "\n"
return result
}

/// Convert definition list to Markdown
private func convertDefinitionList(_ element: Element) throws -> String {
var result = "\n"

for child in element.children() {
let tagName = child.tagName().lowercased()
let content = try convertElement(child)

switch tagName {
case "dt":
result += "\n**\(content)**\n"
case "dd":
result += ": \(content)\n"
default:
result += content
}
}

result += "\n"
return result
}

/// Escape special Markdown characters
private func escapeMarkdown(_ text: String) -> String {
// Only escape characters that would cause markdown parsing issues
Expand Down
136 changes: 136 additions & 0 deletions V2er/Sources/RichView/Renderers/MarkdownRenderer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ public class MarkdownRenderer {
} else if line.starts(with: "---") {
// Horizontal rule
attributedString.append(AttributedString("—————————————\n"))
} else if line.starts(with: "|") && line.hasSuffix("|") {
// Markdown table
let (tableBlock, linesConsumed) = extractTableBlock(lines, startIndex: index)
attributedString.append(renderTable(tableBlock))
index += linesConsumed
continue
} else {
// Regular paragraph with inline formatting
attributedString.append(renderInlineMarkdown(line))
Expand Down Expand Up @@ -296,6 +302,46 @@ public class MarkdownRenderer {
continue
}

// Check for strikethrough
if let strikeMatch = currentText.firstMatch(of: /~~(.+?)~~/) {
// Add text before strikethrough
let beforeRange = currentText.startIndex..<strikeMatch.range.lowerBound
if !beforeRange.isEmpty {
result.append(renderPlainText(String(currentText[beforeRange])))
}

// Add strikethrough text
var strikeText = AttributedString(String(strikeMatch.1))
strikeText.font = .system(size: stylesheet.body.fontSize)
strikeText.foregroundColor = stylesheet.body.color.uiColor
strikeText.strikethroughStyle = .single
result.append(strikeText)

// Continue with remaining text
currentText = String(currentText[strikeMatch.range.upperBound...])
continue
}

// Check for highlight/mark
if let highlightMatch = currentText.firstMatch(of: /==(.+?)==/) {
// Add text before highlight
let beforeRange = currentText.startIndex..<highlightMatch.range.lowerBound
if !beforeRange.isEmpty {
result.append(renderPlainText(String(currentText[beforeRange])))
}

// Add highlighted text
var highlightText = AttributedString(String(highlightMatch.1))
highlightText.font = .system(size: stylesheet.body.fontSize)
highlightText.foregroundColor = stylesheet.body.color.uiColor
highlightText.backgroundColor = Color.yellow.opacity(0.3)
result.append(highlightText)

// Continue with remaining text
currentText = String(currentText[highlightMatch.range.upperBound...])
continue
}

// No more special elements, add remaining text
result.append(renderPlainText(currentText))
break
Expand All @@ -322,4 +368,94 @@ public class MarkdownRenderer {
let content = String(match.2)
return (number, content)
}

// MARK: - Table Rendering

/// Extract table block from lines
private func extractTableBlock(_ lines: [String], startIndex: Int) -> ([[String]], Int) {
var rows: [[String]] = []
var index = startIndex

while index < lines.count {
let line = lines[index]

// Check if line is a table row
guard line.starts(with: "|") && line.hasSuffix("|") else {
break
}

// Skip separator row (| --- | --- | or with colons for alignment)
if line.range(of: #"^\|\s*(:?-+:?)\s*(\|\s*(:?-+:?)\s*)*\|$"#, options: .regularExpression) != nil {
index += 1
continue
}

// Parse cells
let cells = line
.trimmingCharacters(in: CharacterSet(charactersIn: "|"))
.components(separatedBy: "|")
.map { $0.trimmingCharacters(in: .whitespaces) }

if !cells.isEmpty {
rows.append(cells)
}

index += 1
}

return (rows, index - startIndex)
}

/// Render markdown table
private func renderTable(_ rows: [[String]]) -> AttributedString {
guard !rows.isEmpty else { return AttributedString() }

var result = AttributedString("\n")

// Get column count
let columnCount = rows.map { $0.count }.max() ?? 0
guard columnCount > 0 else { return AttributedString() }

// Calculate column widths for alignment
var columnWidths: [Int] = Array(repeating: 0, count: columnCount)
for row in rows {
for (i, cell) in row.enumerated() where i < columnCount {
columnWidths[i] = max(columnWidths[i], cell.count)
}
}

for (rowIndex, row) in rows.enumerated() {
// Render each cell
for (cellIndex, cell) in row.enumerated() {
// Add cell content
var cellText = renderInlineMarkdown(cell)

// Apply header style for first row
if rowIndex == 0 {
cellText.font = .system(size: stylesheet.body.fontSize, weight: .semibold)
}

result.append(cellText)

// Add separator between cells
if cellIndex < row.count - 1 {
var separator = AttributedString(" │ ")
separator.foregroundColor = Color.gray.opacity(0.5)
result.append(separator)
}
}

result.append(AttributedString("\n"))

// Add separator line after header
if rowIndex == 0 && rows.count > 1 {
var separatorLine = AttributedString(String(repeating: "─", count: 40) + "\n")
separatorLine.foregroundColor = Color.gray.opacity(0.3)
result.append(separatorLine)
}
}

result.append(AttributedString("\n"))
return result
}
}
Loading
Loading