Skip to content

Commit

Permalink
trim telegram audio captions to 1024 symbols
Browse files Browse the repository at this point in the history
  • Loading branch information
paskal committed Mar 24, 2022
1 parent 0afcaa9 commit d011c9a
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 20 deletions.
37 changes: 20 additions & 17 deletions app/proc/telegram.go
Expand Up @@ -75,7 +75,7 @@ func (client TelegramClient) Send(channelID string, item feed.Item) (err error)
func (client TelegramClient) sendText(channelID string, item feed.Item) (*tb.Message, error) {
message, err := client.Bot.Send(
recipient{chatID: channelID},
client.getMessageHTML(item, true),
client.getMessageHTML(item, true, false),
tb.ModeHTML,
tb.NoPreview,
)
Expand All @@ -97,7 +97,7 @@ func (client TelegramClient) sendAudio(channelID string, item feed.Item) (*tb.Me
File: tb.FromReader(&httpBodyCopy),
FileName: item.GetFilename(),
MIME: "audio/mpeg",
Caption: client.getMessageHTML(item, false),
Caption: client.getMessageHTML(item, false, true),
Title: item.Title,
Performer: item.Author,
Duration: client.duration(tee),
Expand All @@ -122,32 +122,35 @@ func (client TelegramClient) tagLinkOnlySupport(htmlText string) string {
}

// getMessageHTML generates HTML message from provided feed.Item
func (client TelegramClient) getMessageHTML(item feed.Item, withMp3Link bool) string {
description := string(item.Description)

description = strings.TrimPrefix(description, "<![CDATA[")
description = strings.TrimSuffix(description, "]]>")

// apparently bluemonday doesn't remove escaped HTML tags
description = client.tagLinkOnlySupport(html.UnescapeString(description))
description = strings.TrimSpace(description)
messageHTML := description

func (client TelegramClient) getMessageHTML(item feed.Item, withMp3Link, trimCaption bool) string {
var header, footer string
title := strings.TrimSpace(item.Title)
if title != "" {
switch {
case item.Link == "":
messageHTML = fmt.Sprintf("%s\n\n", title) + messageHTML
header = fmt.Sprintf("%s\n\n", title)
case item.Link != "":
messageHTML = fmt.Sprintf("<a href=%q>%s</a>\n\n", item.Link, title) + messageHTML
header = fmt.Sprintf("<a href=%q>%s</a>\n\n", item.Link, title)
}
}

if withMp3Link {
messageHTML += fmt.Sprintf("\n\n%s", item.Enclosure.URL)
footer += fmt.Sprintf("\n\n%s", item.Enclosure.URL)
}

description := string(item.Description)
description = strings.TrimPrefix(description, "<![CDATA[")
description = strings.TrimSuffix(description, "]]>")
// apparently bluemonday doesn't remove escaped HTML tags
description = client.tagLinkOnlySupport(html.UnescapeString(description))
description = strings.TrimSpace(description)

// https://limits.tginfo.me/en 1024 symbol limit for caption
if trimCaption && len(header+description+footer) > 1024 {
description = CleanText(description, 1020-len(header+footer))
}

return messageHTML
return header + description + footer
}

// duration scans MP3 file from provided io.Reader and returns it's duration in seconds, ignoring possible errors
Expand Down
14 changes: 12 additions & 2 deletions app/proc/telegram_test.go
Expand Up @@ -2,7 +2,9 @@ package proc

import (
"bytes"
"html/template"
"strconv"
"strings"
"testing"
"testing/iotest"
"time"
Expand Down Expand Up @@ -126,12 +128,20 @@ func TestFormattedMessage(t *testing.T) {
i := i
tc := tc
t.Run(strconv.Itoa(i), func(t *testing.T) {
htmlMessage := client.getMessageHTML(tc.item, false)
htmlMessage := client.getMessageHTML(tc.item, false, false)
assert.Equal(t, tc.expectedHTML, htmlMessage)
})
}
}

func TestTruncatedMessage(t *testing.T) {
client := TelegramClient{}
htmlMessage := client.getMessageHTML(feed.Item{Title: "title", Enclosure: feed.Enclosure{URL: "https://example.com/some.mp3"}, Description: template.HTML(strings.Repeat("test", 1000))}, true, true) //nolint:gosec
assert.True(t, strings.HasPrefix(htmlMessage, "title\n\n"))
assert.True(t, strings.HasSuffix(htmlMessage, "\n\nhttps://example.com/some.mp3"))
assert.LessOrEqual(t, len(htmlMessage), 1024)
}

func TestGetMessageHTML(t *testing.T) {
item := feed.Item{
Title: "\tPodcast\n\t",
Expand All @@ -145,7 +155,7 @@ func TestGetMessageHTML(t *testing.T) {
expected := "<a href=\"https://example.com/xyz\">Podcast</a>\n\nNews <a href=\"/test\">Podcast Link</a>\n\nhttps://example.com"

client := TelegramClient{}
msg := client.getMessageHTML(item, true)
msg := client.getMessageHTML(item, true, false)
assert.Equal(t, expected, msg)
}

Expand Down
2 changes: 1 addition & 1 deletion app/proc/twitter.go
Expand Up @@ -47,7 +47,7 @@ func (t *TwitterClient) Send(item feed.Item) error {
return nil
}

// CleanText removes html tags and shrinks result
// CleanText removes html tags and shrinks result, adding 4 symbols on top
func CleanText(inp string, max int) string {
res := striphtmltags.StripTags(inp)
if len([]rune(res)) > max {
Expand Down

0 comments on commit d011c9a

Please sign in to comment.