Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ef59a25
Add closest
udan-jayanith Aug 16, 2025
8c107e4
Add QuerySelector and it need to be tested furthermore.
udan-jayanith Aug 16, 2025
dfd0e60
pop function bug fixed
udan-jayanith Aug 16, 2025
a3d00d4
Bug fixed and add QuerySearch. QuerySearch still have to be tested btw
udan-jayanith Aug 16, 2025
3a4713a
tokanizer CurrentNode to GetCurrentNode to match with the library schema
udan-jayanith Aug 16, 2025
771f70c
Fixed deprecated function use error in the parser.go
udan-jayanith Aug 16, 2025
ee66653
Add QuerySelectorAll and redocumented the library. QuerySearch, Query…
udan-jayanith Aug 16, 2025
39f9bbc
Updated the readme.md
udan-jayanith Aug 16, 2025
fa601d3
Fixed extra tabs in readme.md example go code section.
udan-jayanith Aug 16, 2025
447b637
Updated benchmark_test.go
udan-jayanith Aug 17, 2025
bdef2bb
Made GetParent fast. GetParent need be tested manually. GetParent and…
udan-jayanith Aug 17, 2025
423ceea
Bug fixed
udan-jayanith Aug 17, 2025
caaab5f
Removed QueryAll and Query methods
udan-jayanith Aug 18, 2025
1a79a08
Half finished selectors
udan-jayanith Aug 22, 2025
985b84d
Changed future changelog
udan-jayanith Aug 17, 2025
c16f5fc
Add new selectors. Need manual testing. Closest is temparorly comment…
udan-jayanith Aug 23, 2025
5b58d2e
Fixed selectos does not ignore html tag case
udan-jayanith Aug 23, 2025
3286bce
Fixed a bug in selectors and combinators tokenizer
udan-jayanith Aug 25, 2025
56bb8fb
Bug fixes
udan-jayanith Aug 28, 2025
48ebd58
Added Closest again.
udan-jayanith Aug 28, 2025
5b55acd
Refactored the Encode function
udan-jayanith Aug 28, 2025
f764a27
Bug fixses in test file
udan-jayanith Aug 29, 2025
599645e
Updated the documentation and add example codes
udan-jayanith Aug 29, 2025
62755a7
Documented the Tokenizer
udan-jayanith Aug 29, 2025
bc870b7
Updated the README.md
udan-jayanith Aug 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
{
"cSpell.words": [
"arraystack",
"autoplay",
"Combinators",
"DOCTYPEDTD",
"emirpasic",
"gohtml",
"Kottue",
"linkedliststack",
"println",
"yosssi"
]
}
3 changes: 1 addition & 2 deletions FUTURE-CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
## v0.0.3
- Closest

18 changes: 5 additions & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,35 +26,27 @@ import (
- Querying

## Example

Heres an example of fetching a website and parsing and then using querying methods.

```go
res, err := http.Get("https://www.metalsucks.net/")
if err != nil {
t.Fatal(err)
}
defer res.Body.Close()

//Parses the given html reader and then returns the root node and an error.
node, err := GoHtml.Decode(res.Body)
if err != nil {
t.Fatal(err)
}

nodeList := node.GetElementsByClassName("post-title")
iter := nodeList.IterNodeList()
for node := range iter{
print(node.GetInnerText())
nodeList := node.QuerySelectorAll(".left-content article .post-title")
for node := range nodeList.IterNodeList(){
println(node.GetInnerText())
}
```

## Changelog

Changes, bug fixes and new features in this version.
- add: Tokenizer
- add: NodeTreeBuilder
- renamed: QuerySelector to Query
- renamed: QuerySelectorAll to QueryAll

## Documentation

Fully fledged [documentation](https://pkg.go.dev/github.com/udan-jayanith/GoHTML) is available at [go.pkg](https://pkg.go.dev/)
Expand Down
6 changes: 4 additions & 2 deletions benchmarks/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import(
"net/http"
"time"
)

/*
Adapted from [GoQuery example](https://github.com/PuerkitoBio/goquery?tab=readme-ov-file#examples)
*/
func TestFetchPostCovers(t *testing.T){
res, err := http.Get("https://www.metalsucks.net/")
if err != nil {
Expand All @@ -20,7 +22,7 @@ func TestFetchPostCovers(t *testing.T){
t.Fatal(err)
}

nodeList := node.QueryAll(".sm-feat .clearfix article")
nodeList := node.QuerySelectorAll(".left-content article .post-title")
t.Log("Got ", nodeList.Len(), " post titles.")
iter := nodeList.IterNodeList()
for node := range iter{
Expand Down
2 changes: 1 addition & 1 deletion classList.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (classList ClassList) Encode() string {
return classes
}

// EncodeTo encode className for the node.
// EncodeTo encodes classNames for the node.
// If node is nil EncodeTo does nothing.
func (classList ClassList) EncodeTo(node *Node){
if node == nil {
Expand Down
44 changes: 38 additions & 6 deletions classList_test.go
Original file line number Diff line number Diff line change
@@ -1,31 +1,63 @@
package GoHtml_test

import(
import (
"fmt"
"testing"
"github.com/udan-jayanith/GoHTML"

GoHtml "github.com/udan-jayanith/GoHTML"
)

func TestClasses(t *testing.T){
func TestClasses(t *testing.T) {
node := GoHtml.CreateNode("div")
node.SetAttribute("class", "div-container main")

classList := GoHtml.NewClassList()
classList.DecodeFrom(node)
if !classList.Contains("main"){
if !classList.Contains("main") {
t.Fatal("")
return
}
classList.DeleteClass("main")
if classList.Contains("main"){
if classList.Contains("main") {
t.Fatal("")
return
}

classList.AppendClass("main-div")
if !classList.Contains("main-div"){
if !classList.Contains("main-div") {
t.Fatal("")
return
}

classList.EncodeTo(node)
}

func ExampleClassList_Contains() {
//Creates a div that has classes video-container and main-contents
div := GoHtml.CreateNode("div")
div.SetAttribute("class", "video-container main-contents")

classList := GoHtml.NewClassList()
//Add the classes in the div to the class list
classList.DecodeFrom(div)

//Checks wether the following classes exists in the classList
fmt.Println(classList.Contains("container"))
fmt.Println(classList.Contains("video-container"))

//Output:
//false
//true
}

func ExampleClassList_Encode(){
classList := GoHtml.NewClassList()

//Add classes to the class list
classList.AppendClass("container")
classList.AppendClass("warper")
classList.AppendClass("main-content")

//This would output something like this "warper container main-content". Order of the output is not guaranteed.
fmt.Println(classList.Encode())
}
2 changes: 1 addition & 1 deletion node-list.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import (
"iter"
)

//NodeList can store nodes by appended order.
//NodeList can store nodes by appended order and can iterate over the node list by invoking IterNodeList method.
type NodeList struct {
list *list.List
currentEl *list.Element
Expand Down
18 changes: 18 additions & 0 deletions node-list_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package GoHtml_test

import (
"fmt"
"os"
"testing"

Expand All @@ -13,6 +14,7 @@ func TestIterNodeList1(t *testing.T) {
t.Fatal(err)
return
}
defer file.Close()

node, err := GoHtml.Decode(file)
if err != nil {
Expand All @@ -39,4 +41,20 @@ func TestIterNodeList2(t *testing.T){
for node := range iter{
t.Log(node)
}
}

func ExampleNodeList(){
nodeList := GoHtml.NewNodeList()
nodeList.Append(GoHtml.CreateNode("br"))
nodeList.Append(GoHtml.CreateNode("hr"))
nodeList.Append(GoHtml.CreateNode("div"))

iter := nodeList.IterNodeList()
for node := range iter{
fmt.Println(node.GetTagName())
}
//Output:
//br
//hr
//div
}
27 changes: 24 additions & 3 deletions node-tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package GoHtml

import (
"strings"

"golang.org/x/net/html"
)

Expand Down Expand Up @@ -38,7 +39,7 @@ func (node *Node) SetPreviousNode(previousNode *Node) {
node.previousNode = previousNode
}

// GetChildNode returns the first child elements of this node.
// GetChildNode returns the first child node of this node.
func (node *Node) GetChildNode() *Node {
return node.childNode
}
Expand Down Expand Up @@ -75,7 +76,7 @@ func (node *Node) GetAttribute(attributeName string) (string, bool) {
// RemoveAttribute remove or delete the specified attribute.
func (node *Node) RemoveAttribute(attributeName string) {
delete(node.attributes, strings.TrimSpace(strings.ToLower(attributeName)))

}

// IterateAttributes calls callback at every attribute in the node by passing attribute and value of the node.
Expand Down Expand Up @@ -114,19 +115,21 @@ func (node *Node) AppendChild(childNode *Node) {

lastNode := node.GetChildNode().GetLastNode()
childNode.SetPreviousNode(lastNode)
childNode.setParentNode(lastNode.GetParent())
lastNode.SetNextNode(childNode)
}

// Append inserts the newNode to end of the node chain.
func (node *Node) Append(newNode *Node) {
lastNode := node.GetLastNode()
newNode.SetPreviousNode(lastNode)
newNode.setParentNode(lastNode.GetParent())
lastNode.SetNextNode(newNode)
}

// GetParent returns a pointer to the parent node.
func (node *Node) GetParent() *Node {
return node.GetFirstNode().getParentNode()
return node.parentNode
}

// GetLastNode returns the last node in the node branch.
Expand Down Expand Up @@ -203,3 +206,21 @@ func (node *Node) RemoveNode() {
func (node *Node) IsTextNode() bool {
return node.GetTagName() == ""
}

// Closest traverses the node tree and its parents (heading toward the root node) until it finds a node that matches the selector and returns that node.
// Adapted from [https://developer.mozilla.org/en-US/docs/Web/API/Element/closest](MDN Element: closest() method)
func (node *Node) Closest(selector string) *Node {
traverser := NewTraverser(node)
selectors := TokenizeSelectorsAndCombinators(selector)

for traverser.GetCurrentNode() != nil {
if matchFromRightMostSelectors(traverser.GetCurrentNode(), selectors) {
break
} else if traverser.GetCurrentNode().GetPreviousNode() == nil {
traverser.SetCurrentNodeTo(traverser.GetCurrentNode().GetParent())
} else {
traverser.Previous()
}
}
return traverser.GetCurrentNode()
}
20 changes: 20 additions & 0 deletions node-tree_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,24 @@ func TestRemoveNode(t *testing.T){

//p.RemoveNode()
//t.Log(GoHtml.NodeTreeToHTML(article))
}

func TestClosest(t *testing.T){
node, err := testFile4NodeTree()
if err != nil{
t.Fatal(err)
}
node = node.GetElementByClassName("ordered-item")
if node == nil {
t.Fatal("Node is nil.")
}

node = node.Closest("img+.ordered-list")
if node == nil {
t.Fatal("Node is nil")
}else if node.GetTagName() != "ol"{
t.Fatal("Unexpected element.")
}


}
8 changes: 3 additions & 5 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,23 @@ package GoHtml
import (
"io"
"strings"

"golang.org/x/net/html"
)


// Decode reads from rd and create a node-tree. Then returns the root node and nil.
func Decode(r io.Reader) (*Node, error) {
t := NewTokenizer(r)
nodeTreeBuilder := NewNodeTreeBuilder()
for {
tt := t.Advanced()
if tt == html.ErrorToken{
if tt == html.ErrorToken {
break
}

nodeTreeBuilder.WriteNodeTree(t.CurrentNode(), tt)
nodeTreeBuilder.WriteNodeTree(t.GetCurrentNode(), tt)
}
return nodeTreeBuilder.GetRootNode(), nil

}

// HTMLToNodeTree return html code as a node-tree. If error were to occur it would be SyntaxError.
Expand All @@ -30,4 +29,3 @@ func HTMLToNodeTree(html string) (*Node, error) {
return node, err
}


30 changes: 30 additions & 0 deletions parser_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package GoHtml_test

import (
"fmt"
"os"
"strings"
"testing"
Expand All @@ -14,6 +15,7 @@ func TestDecode(t *testing.T) {
t.Fatal(err)
return
}
defer file.Close()

node, err := GoHtml.Decode(file)
if err != nil {
Expand All @@ -24,3 +26,31 @@ func TestDecode(t *testing.T) {
var builder strings.Builder
GoHtml.Encode(&builder, node)
}

func ExampleDecode() {
r := strings.NewReader(`
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>User Profile</title>
</head>
<body>
<h1 class="username">Udan</h1>
<p class="email">udanjayanith@gmail.com</p>
<p>Joined: 01/08/2024</p>
</body>
</html>
`)

rootNode, _ := GoHtml.Decode(r)

titleNode := rootNode.QuerySelector("title")
title := ""
if titleNode != nil {
title = titleNode.GetInnerText()
}
fmt.Println(title)
//Output: User Profile
}
Loading