Skip to content

Commit

Permalink
encoding.xml: update parser logic to skip BOM before prolog (#19858)
Browse files Browse the repository at this point in the history
  • Loading branch information
hungrybluedev committed Nov 13, 2023
1 parent e0207b6 commit 5f08d45
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 2 deletions.
19 changes: 17 additions & 2 deletions vlib/encoding/xml/parser.v
Expand Up @@ -18,6 +18,9 @@ const (
double_dash = '--'.bytes()
c_tag = '[C'.bytes()
data_chars = 'DATA'.bytes()

byte_order_marking_first = u8(0xEF)
byte_order_marking_bytes = [u8(0xBB), 0xBF]
)

// Helper types to assist in parsing
Expand Down Expand Up @@ -296,18 +299,30 @@ fn parse_doctype(mut reader io.Reader) !DocumentType {
}

fn parse_prolog(mut reader io.Reader) !(Prolog, u8) {
// Trim trailing whitespace
// Skip trailing whitespace and invalid characters
mut local_buf := [u8(0)]
mut ch := next_char(mut reader, mut local_buf)!
for {
match ch {
` `, `\t`, `\n` {
` `, `\t`, `\r`, `\n` {
ch = next_char(mut reader, mut local_buf)!
continue
}
`<` {
break
}
xml.byte_order_marking_first {
// UTF-8 BOM
mut bom_buf := [u8(0), 0]
if reader.read(mut bom_buf)! != 2 {
return error('Invalid UTF-8 BOM.')
}
if bom_buf != xml.byte_order_marking_bytes {
return error('Invalid UTF-8 BOM.')
}
ch = next_char(mut reader, mut local_buf)!
continue
}
else {
return error('Expecting a prolog or root node starting with "<".')
}
Expand Down
17 changes: 17 additions & 0 deletions vlib/encoding/xml/test/local/20_bom_file/bom_test.v
@@ -0,0 +1,17 @@
module main

import os
import encoding.xml

fn test_valid_parsing() {
// We use a .bin file to avoid stripping the BOM from the XML file
path := os.join_path(os.dir(@FILE), 'workbook.bin')

doc := xml.XMLDocument.from_file(path) or {
assert false, 'Failed to parse workbook.bin'
exit(1)
}

sheets := doc.get_elements_by_tag('sheet')
assert sheets.len == 1, 'Expected 1 sheet, got ${sheets.len}'
}
17 changes: 17 additions & 0 deletions vlib/encoding/xml/test/local/20_bom_file/workbook.bin
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<workbook xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" mc:Ignorable="x15" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main">
<fileVersion appName="xl" lastEdited="6" lowestEdited="6" rupBuild="14420"/>
<workbookPr defaultThemeVersion="164011"/>
<bookViews>
<workbookView xWindow="0" yWindow="0" windowWidth="22260" windowHeight="12645"/>
</bookViews>
<sheets>
<sheet name="Sheet1" sheetId="1" r:id="rId1"/>
</sheets>
<calcPr calcId="162913"/>
<extLst>
<ext uri="{140A7094-0E35-4892-8432-C4D2E57EDEB5}" xmlns:x15="http://schemas.microsoft.com/office/spreadsheetml/2010/11/main">
<x15:workbookPr chartTrackingRefBase="1"/>
</ext>
</extLst>
</workbook>

0 comments on commit 5f08d45

Please sign in to comment.