Skip to content

Commit

Permalink
move HTML normalization into the parser itself
Browse files Browse the repository at this point in the history
these serializing rules are actually specified by the microformats
parsing spec, so they do belong in the parser, not just in the tests.
  • Loading branch information
willnorris committed Aug 21, 2018
1 parent 4fc1a06 commit 8202255
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 5 deletions.
6 changes: 6 additions & 0 deletions microformats.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,12 @@ func (p *parser) walk(node *html.Node) {
html.Render(&buf, c)
}
htmlbody = strings.TrimSpace(buf.String())

// HTML spec: Serializing HTML Fragments algorithm does not include
// a trailing slash, so remove it. Nor should apostrophes be
// encoded, which golang.org/x/net/html is doing.
htmlbody = strings.Replace(htmlbody, `/>`, `>`, -1)
htmlbody = strings.Replace(htmlbody, `'`, `'`, -1)
case "dt":
if value == nil {
value = getDateTimeValue(node)
Expand Down
5 changes: 0 additions & 5 deletions testsuite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,6 @@ func runTest(t *testing.T, test string) {
if err != nil {
t.Fatalf("error reading file %q: %v", test+".json", err)
}
// normalize self-closing HTML tags to match what net/html produces
expectedJSON = bytes.Replace(expectedJSON, []byte(" />"), []byte("/>"), -1)

want := make(map[string]interface{})
err = json.Unmarshal(expectedJSON, &want)
Expand All @@ -134,9 +132,6 @@ func runTest(t *testing.T, test string) {
}

outputJSON, _ := json.Marshal(data)
// reverse golang.org/x/net/http's escaping of apostrophes
outputJSON = bytes.Replace(outputJSON, []byte(`\u0026#39;`), []byte("'"), -1)

got := make(map[string]interface{})
err = json.Unmarshal(outputJSON, &got)
if err != nil {
Expand Down

0 comments on commit 8202255

Please sign in to comment.