Skip to content

Commit

Permalink
fix: avoid panics in ParseNode()
Browse files Browse the repository at this point in the history
ParseNode(node, nil) should be able to avoid panic even if node does
not contain a valid absolute base path in its <head>, since there can
be usable microformats with absolute paths or no paths.

ParseNode(nil, base) should return nil instead of panic, because why
panic?
  • Loading branch information
nekr0z committed Jan 31, 2023
1 parent ffbbdae commit 3f6d1e0
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 3 deletions.
15 changes: 12 additions & 3 deletions microformats.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,23 +108,32 @@ type parser struct {
}

// Parse the microformats found in the HTML document read from r. baseURL is
// the URL this document was retrieved from and is used to resolve any
// relative URLs.
// the URL this document was retrieved from and is used to expand any
// relative URLs. If baseURL is nil and the base URL is not referenced in the
// document, relative URLs are not expanded.
func Parse(r io.Reader, baseURL *url.URL) *Data {
doc, _ := html.Parse(r)
return ParseNode(doc, baseURL)
}

// ParseNode parses the microformats found in doc. baseURL is the URL this
// document was retrieved from and is used to resolve any relative URLs.
// document was retrieved from and is used to expand any relative URLs. If
// baseURL is nil and the base URL is not referenced in the document,
// relative URLs are not expanded.
func ParseNode(doc *html.Node, baseURL *url.URL) *Data {
if doc == nil { // makes no sense to go further
return nil
}
p := new(parser)
p.curData = &Data{
Items: make([]*Microformat, 0),
Rels: make(map[string][]string),
RelURLs: make(map[string]*RelURL),
}
p.base = baseURL
if p.base == nil { // can make sense if base can be inferred from contents
p.base = &url.URL{}
}
p.baseFound = false
p.root = doc
p.walk(doc)
Expand Down
20 changes: 20 additions & 0 deletions microformats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -561,3 +561,23 @@ func Test_GetFirstPropValue(t *testing.T) {
}
}
}

func Test_ParseNodeNil(t *testing.T) {
tests := []struct {
name string
html string
}{
{"absolute", "<html><head><base href=\"https://example.com\"></head></html>"},
{"relative", "<html><head><base href=\"./something\"></head></html>"},
{"none", "<html><head></head></html>"}, // parseNode(tt.html) == nil
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
n, err := parseNode(tt.html)
if err != nil {
t.Fatalf("Error parsing HTML: %v", err)
}
ParseNode(n, nil) // this should not panic
})
}
}

0 comments on commit 3f6d1e0

Please sign in to comment.