Skip to content

Commit

Permalink
feat: add heading tokenizer (#1723)
Browse files Browse the repository at this point in the history
  • Loading branch information
boojack committed May 23, 2023
1 parent 616b8b0 commit fa53a25
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 50 deletions.
53 changes: 32 additions & 21 deletions plugin/gomark/parser/heading.go
Original file line number Diff line number Diff line change
@@ -1,41 +1,52 @@
package parser

import (
"strings"

"github.com/usememos/memos/plugin/gomark/ast"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)

type HeadingTokenizer struct {
Level int
ContentTokens []*tokenizer.Token
}

func NewHeadingTokenizer() *HeadingTokenizer {
return &HeadingTokenizer{}
}

func (*HeadingTokenizer) Trigger() []byte {
return []byte{'#'}
}

func (*HeadingTokenizer) Parse(parent *ast.Node, block string) *ast.Node {
line := block
level := 0
for _, c := range line {
if c == '#' {
level++
} else if c == ' ' {
break
func (*HeadingTokenizer) Match(tokens []*tokenizer.Token) *HeadingTokenizer {
cursor := 0
for _, token := range tokens {
if token.Type == tokenizer.Hash {
cursor++
} else {
return nil
break
}
}
if len(tokens) <= cursor+1 {
return nil
}
if tokens[cursor].Type != tokenizer.Space {
return nil
}
level := cursor
if level == 0 || level > 6 {
return nil
}
text := strings.TrimSpace(line[level+1:])
node := ast.NewNode("h1", text)
if parent != nil {
parent.AddChild(node)

cursor++
contentTokens := []*tokenizer.Token{}
for _, token := range tokens[cursor:] {
if token.Type == tokenizer.Newline {
break
}
contentTokens = append(contentTokens, token)
}
if len(contentTokens) == 0 {
return nil
}

return &HeadingTokenizer{
Level: level,
ContentTokens: contentTokens,
}
return node
}
94 changes: 94 additions & 0 deletions plugin/gomark/parser/heading_test.go
Original file line number Diff line number Diff line change
@@ -1 +1,95 @@
package parser

import (
"testing"

"github.com/stretchr/testify/require"
"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
)

func TestHeadingParser(t *testing.T) {
tests := []struct {
text string
heading *HeadingTokenizer
}{
{
text: "*Hello world!",
heading: nil,
},
{
text: "## Hello World!",
heading: &HeadingTokenizer{
Level: 2,
ContentTokens: []*tokenizer.Token{
{
Type: tokenizer.Text,
Value: "Hello",
},
{
Type: tokenizer.Space,
Value: " ",
},
{
Type: tokenizer.Text,
Value: "World!",
},
},
},
},
{
text: "# # Hello World",
heading: &HeadingTokenizer{
Level: 1,
ContentTokens: []*tokenizer.Token{
{
Type: tokenizer.Hash,
Value: "#",
},
{
Type: tokenizer.Space,
Value: " ",
},
{
Type: tokenizer.Text,
Value: "Hello",
},
{
Type: tokenizer.Space,
Value: " ",
},
{
Type: tokenizer.Text,
Value: "World",
},
},
},
},
{
text: " # 123123 Hello World!",
heading: nil,
},
{
text: `# 123
Hello World!`,
heading: &HeadingTokenizer{
Level: 1,
ContentTokens: []*tokenizer.Token{
{
Type: tokenizer.Text,
Value: "123",
},
{
Type: tokenizer.Space,
Value: " ",
},
},
},
},
}

for _, test := range tests {
tokens := tokenizer.Tokenize(test.text)
headingTokenizer := NewHeadingTokenizer()
require.Equal(t, test.heading, headingTokenizer.Match(tokens))
}
}
27 changes: 0 additions & 27 deletions plugin/gomark/parser/tokenizer/token.go

This file was deleted.

30 changes: 29 additions & 1 deletion plugin/gomark/parser/tokenizer/tokenizer.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,41 @@
package tokenizer

func tokenize(text string) []*Token {
type TokenType = string

const (
Underline TokenType = "_"
Star TokenType = "*"
Hash TokenType = "#"
Newline TokenType = "\n"
Space TokenType = " "
)

const (
Text TokenType = ""
)

type Token struct {
Type TokenType
Value string
}

func NewToken(tp, text string) *Token {
return &Token{
Type: tp,
Value: text,
}
}

func Tokenize(text string) []*Token {
tokens := []*Token{}
for _, c := range text {
switch c {
case '_':
tokens = append(tokens, NewToken(Underline, "_"))
case '*':
tokens = append(tokens, NewToken(Star, "*"))
case '#':
tokens = append(tokens, NewToken(Hash, "#"))
case '\n':
tokens = append(tokens, NewToken(Newline, "\n"))
case ' ':
Expand Down
37 changes: 36 additions & 1 deletion plugin/gomark/parser/tokenizer/tokenizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,44 @@ func TestTokenize(t *testing.T) {
},
},
},
{
text: `# hello
world`,
tokens: []*Token{
{
Type: Hash,
Value: "#",
},
{
Type: Space,
Value: " ",
},
{
Type: Text,
Value: "hello",
},
{
Type: Space,
Value: " ",
},
{
Type: Newline,
Value: "\n",
},
{
Type: Space,
Value: " ",
},
{
Type: Text,
Value: "world",
},
},
},
}

for _, test := range tests {
result := tokenize(test.text)
result := Tokenize(test.text)
require.Equal(t, test.tokens, result)
}
}

0 comments on commit fa53a25

Please sign in to comment.