Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions benchmarks/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,15 @@ func TestFetchPostCovers(t *testing.T){
t.Log(node.GetInnerText())
}
t.Log(time.Since(tim).Seconds())
}

func toNodeTree(url string) *GoHtml.Node{
res, err := http.Get(url)
if err != nil || res.StatusCode != http.StatusOK{
return nil
}
defer res.Body.Close()

rootNode, _ := GoHtml.Decode(res.Body)
return rootNode
}
3 changes: 2 additions & 1 deletion parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,6 @@ func ExampleDecode() {
title = titleNode.GetInnerText()
}
fmt.Println(title)
//Output: User Profile
//Output:
//User Profile
}
9 changes: 3 additions & 6 deletions querying.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,13 @@ func (node *Node) GetElementsById(idName string) NodeList {
}

/*
QuerySearch search returns a iterator that traverse through the node tree from given node and passes nodes that matches the given selector.
*/
QuerySearch search returns a iterator that traverse through the node tree from given node and passes nodes that matches the given selector.
*/
func QuerySearch(node *Node, selector string) iter.Seq[*Node] {
traverser := NewTraverser(node)
return func(yield func(node *Node) bool) {
selectorTokens := TokenizeSelectorsAndCombinators(selector)
iter := traverser.Walkthrough
for node := range iter {
for node := range traverser.Walkthrough {
if matchFromRightMostSelectors(node, selectorTokens) && !yield(node) {
return
}
Expand All @@ -128,7 +127,6 @@ func matchFromRightMostSelectors(node *Node, selectorTokens []CombinatorEl) bool
return node != nil
}


// QuerySelector returns the first node that matches with the selector from the node.
func (node *Node) QuerySelector(selector string) *Node {
iter := QuerySearch(node, selector)
Expand All @@ -148,4 +146,3 @@ func (node *Node) QuerySelectorAll(selector string) NodeList {
}
return nodeList
}

20 changes: 11 additions & 9 deletions selectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package GoHtml

import (
"strings"

"golang.org/x/net/html"
)

Expand All @@ -13,8 +14,8 @@ const (
Tag
)

//Selector struct represents a single css selector
//Ex: .my-class, #video, div
// Selector struct represents a single css selector
// Ex: .my-class, #video, div
type Selector struct {
selector string
selectorName string
Expand Down Expand Up @@ -42,8 +43,8 @@ func matchNode(node *Node, basicSelectorName string, basicSelectorType BasicSele
return false
}

//NewSelector takes a single css selector and returns a Selector struct.
//Selector string should be only of basic selector.
// NewSelector takes a single css selector and returns a Selector struct.
// Selector string should be only of basic selector.
func NewSelector(selector string) Selector {
selector = strings.TrimSpace(html.EscapeString(selector))
selectorStruct := Selector{}
Expand All @@ -60,7 +61,7 @@ func NewSelector(selector string) Selector {
selectorStruct.selectorType = Tag
}

selectorStruct.selector = strings.ToLower(selector)
//selectorStruct.selector = strings.ToLower(selector)
if selectorStruct.selectorType != Tag {
selectorStruct.selectorName = selector[1:]
} else {
Expand All @@ -80,26 +81,26 @@ const (
NoneCombinator
)

//CombinatorEl is used to represent selectors that are around a combinator.
// CombinatorEl is used to represent selectors that are around a combinator.
type CombinatorEl struct {
Type Combinator
Selector1 Selector
Selector2 Selector
}

//This takes a selector or combinators and selectors and then returns a slice of CombinatorEl.
// This takes a selector or combinators and selectors and then returns a slice of CombinatorEl.
func TokenizeSelectorsAndCombinators(selector string) []CombinatorEl {
iter := func(yield func(string) bool) {
currentStr := ""
for _, char := range selector {
switch char {
case ' ', '>', '+', '~':
if !yield(currentStr) || !yield(string(char)){
if !yield(currentStr) || !yield(string(char)) {
return
}
currentStr = ""
default:
currentStr+=string(char)
currentStr += string(char)
}
}
yield(currentStr)
Expand Down Expand Up @@ -167,6 +168,7 @@ func (ce *CombinatorEl) getDescended(node *Node) *Node {
if matchNode(parentNode, ce.Selector1.selectorName, ce.Selector1.selectorType) {
return parentNode
}

parentNode = parentNode.GetParent()
}
return nil
Expand Down