Skip to content

Commit

Permalink
Move stripLinebreak to a struct field in the HTML parser
Browse files Browse the repository at this point in the history
  • Loading branch information
tulir committed Apr 8, 2019
1 parent 887e2e2 commit 927be9b
Showing 1 changed file with 31 additions and 27 deletions.
58 changes: 31 additions & 27 deletions ui/messages/parser/htmlparser.go
Expand Up @@ -39,6 +39,8 @@ var matrixToURL = regexp.MustCompile("^(?:https?://)?(?:www\\.)?matrix\\.to/#/([

type htmlParser struct {
room *rooms.Room

keepLinebreak bool
}

func AdjustStyleBold(style tcell.Style) tcell.Style {
Expand Down Expand Up @@ -78,8 +80,8 @@ func (parser *htmlParser) getAttribute(node *html.Node, attribute string) string
return ""
}

func (parser *htmlParser) listToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
children := parser.nodeToEntities(node.FirstChild, stripLinebreak)
func (parser *htmlParser) listToEntity(node *html.Node) messages.HTMLEntity {
children := parser.nodeToEntities(node.FirstChild)
ordered := node.Data == "ol"
start := 1
if ordered {
Expand All @@ -100,10 +102,10 @@ func (parser *htmlParser) listToEntity(node *html.Node, stripLinebreak bool) mes
return messages.NewListEntity(ordered, start, listItems)
}

func (parser *htmlParser) basicFormatToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
func (parser *htmlParser) basicFormatToEntity(node *html.Node) messages.HTMLEntity {
entity := &messages.BaseHTMLEntity{
Tag: node.Data,
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
Children: parser.nodeToEntities(node.FirstChild),
}
switch node.Data {
case "b", "strong":
Expand Down Expand Up @@ -149,24 +151,24 @@ func (parser *htmlParser) parseColor(node *html.Node, mainName, altName string)
return tcell.NewRGBColor(int32(r), int32(g), int32(b)), true
}

func (parser *htmlParser) headerToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
func (parser *htmlParser) headerToEntity(node *html.Node) messages.HTMLEntity {
length := int(node.Data[1] - '0')
prefix := strings.Repeat("#", length) + " "
return (&messages.BaseHTMLEntity{
Tag: node.Data,
Text: prefix,
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
Children: parser.nodeToEntities(node.FirstChild),
}).AdjustStyle(AdjustStyleBold)
}

func (parser *htmlParser) blockquoteToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
return messages.NewBlockquoteEntity(parser.nodeToEntities(node.FirstChild, stripLinebreak))
func (parser *htmlParser) blockquoteToEntity(node *html.Node) messages.HTMLEntity {
return messages.NewBlockquoteEntity(parser.nodeToEntities(node.FirstChild))
}

func (parser *htmlParser) linkToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
func (parser *htmlParser) linkToEntity(node *html.Node) messages.HTMLEntity {
entity := &messages.BaseHTMLEntity{
Tag: "a",
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
Children: parser.nodeToEntities(node.FirstChild),
}
href := parser.getAttribute(node, "href")
if len(href) == 0 {
Expand Down Expand Up @@ -263,68 +265,70 @@ func (parser *htmlParser) codeblockToEntity(node *html.Node) messages.HTMLEntity
}
}
}
parser.keepLinebreak = true
text := (&messages.BaseHTMLEntity{
Children: parser.nodeToEntities(node.FirstChild, false),
Children: parser.nodeToEntities(node.FirstChild),
}).PlainText()
parser.keepLinebreak = false
return parser.syntaxHighlight(text, lang)
}

func (parser *htmlParser) tagNodeToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
func (parser *htmlParser) tagNodeToEntity(node *html.Node) messages.HTMLEntity {
switch node.Data {
case "blockquote":
return parser.blockquoteToEntity(node, stripLinebreak)
return parser.blockquoteToEntity(node)
case "ol", "ul":
return parser.listToEntity(node, stripLinebreak)
return parser.listToEntity(node)
case "h1", "h2", "h3", "h4", "h5", "h6":
return parser.headerToEntity(node, stripLinebreak)
return parser.headerToEntity(node)
case "br":
return messages.NewBreakEntity()
case "b", "strong", "i", "em", "s", "del", "u", "ins", "font":
return parser.basicFormatToEntity(node, stripLinebreak)
return parser.basicFormatToEntity(node)
case "a":
return parser.linkToEntity(node, stripLinebreak)
return parser.linkToEntity(node)
case "img":
return parser.imageToEntity(node)
case "pre":
return parser.codeblockToEntity(node)
default:
return &messages.BaseHTMLEntity{
Tag: node.Data,
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
Children: parser.nodeToEntities(node.FirstChild),
Block: parser.isBlockTag(node.Data),
}
}
}

func (parser *htmlParser) singleNodeToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
func (parser *htmlParser) singleNodeToEntity(node *html.Node) messages.HTMLEntity {
switch node.Type {
case html.TextNode:
if stripLinebreak {
if !parser.keepLinebreak {
node.Data = strings.ReplaceAll(node.Data, "\n", "")
}
return &messages.BaseHTMLEntity{
Tag: "text",
Text: node.Data,
}
case html.ElementNode:
return parser.tagNodeToEntity(node, stripLinebreak)
return parser.tagNodeToEntity(node)
case html.DocumentNode:
if node.FirstChild.Data == "html" && node.FirstChild.NextSibling == nil {
return parser.singleNodeToEntity(node.FirstChild, stripLinebreak)
return parser.singleNodeToEntity(node.FirstChild)
}
return &messages.BaseHTMLEntity{
Tag: "html",
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
Children: parser.nodeToEntities(node.FirstChild),
Block: true,
}
default:
return nil
}
}

func (parser *htmlParser) nodeToEntities(node *html.Node, stripLinebreak bool) (entities []messages.HTMLEntity) {
func (parser *htmlParser) nodeToEntities(node *html.Node) (entities []messages.HTMLEntity) {
for ; node != nil; node = node.NextSibling {
if entity := parser.singleNodeToEntity(node, stripLinebreak); entity != nil {
if entity := parser.singleNodeToEntity(node); entity != nil {
entities = append(entities, entity)
}
}
Expand All @@ -344,15 +348,15 @@ func (parser *htmlParser) isBlockTag(tag string) bool {

func (parser *htmlParser) Parse(htmlData string) messages.HTMLEntity {
node, _ := html.Parse(strings.NewReader(htmlData))
return parser.singleNodeToEntity(node, true)
return parser.singleNodeToEntity(node)
}

// ParseHTMLMessage parses a HTML-formatted Matrix event into a UIMessage.
func ParseHTMLMessage(room *rooms.Room, evt *mautrix.Event, senderDisplayname string) messages.HTMLEntity {
htmlData := evt.Content.FormattedBody
htmlData = strings.Replace(htmlData, "\t", " ", -1)

parser := htmlParser{room}
parser := htmlParser{room: room}
root := parser.Parse(htmlData)
root.(*messages.BaseHTMLEntity).Block = false

Expand Down

0 comments on commit 927be9b

Please sign in to comment.