Skip to content

Commit

Permalink
Support create rich text
Browse files Browse the repository at this point in the history
  • Loading branch information
web-flow committed Jun 30, 2021
1 parent f4099c6 commit ffc9648
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 38 deletions.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/wabarc/telegra.ph
go 1.15

require (
github.com/PuerkitoBio/goquery v1.7.0
github.com/chromedp/cdproto v0.0.0-20210610012203-ae0add727b87 // indirect
github.com/cixtor/readability v1.0.0
github.com/google/uuid v1.2.0 // indirect
Expand All @@ -12,6 +13,6 @@ require (
github.com/wabarc/imgbb v1.0.0
github.com/wabarc/logger v0.0.0-20210417045349-d0d82e8e99ee
github.com/wabarc/screenshot v1.1.3-0.20210613000512-b98688415e94
golang.org/x/net v0.0.0-20210614182718-04defd469f4e // indirect
golang.org/x/net v0.0.0-20210614182718-04defd469f4e
golang.org/x/sys v0.0.0-20210611083646-a4fc73990273 // indirect
)
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
github.com/PuerkitoBio/goquery v1.7.0 h1:O5SP3b9JWqMSVMG69zMfj577zwkSNpxrFf7ybS74eiw=
github.com/PuerkitoBio/goquery v1.7.0/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/chromedp/cdproto v0.0.0-20210526005521-9e51b9051fd0/go.mod h1:At5TxYYdxkbQL0TSefRjhLE3Q0lgvqKKMSFUglJ7i1U=
github.com/chromedp/cdproto v0.0.0-20210610012203-ae0add727b87 h1:cwQmsgb/AXeaToM+wiwNrui8E5hzKVHQL/gUtCNVqaI=
github.com/chromedp/cdproto v0.0.0-20210610012203-ae0add727b87/go.mod h1:At5TxYYdxkbQL0TSefRjhLE3Q0lgvqKKMSFUglJ7i1U=
Expand Down Expand Up @@ -37,8 +41,12 @@ github.com/wabarc/logger v0.0.0-20210417045349-d0d82e8e99ee h1:MMIp++7eem2CI1jIY
github.com/wabarc/logger v0.0.0-20210417045349-d0d82e8e99ee/go.mod h1:4uYr9fnQaQoDk1ttTzLnSB3lZm3i/vrJwN8EZIB2YuI=
github.com/wabarc/screenshot v1.1.3-0.20210613000512-b98688415e94 h1:bcs0QyucWsEymCZ8u7NOyymqzVd4ZSkqgfSdBXbmON0=
github.com/wabarc/screenshot v1.1.3-0.20210613000512-b98688415e94/go.mod h1:Qmk4IOGVkBBG5tdZfOzKyRlj75yXUwhM2ciHL6VQpJo=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201207223542-d4d67f95c62d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand All @@ -47,6 +55,8 @@ golang.org/x/sys v0.0.0-20210601080250-7ecdf8ef093b/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20210611083646-a4fc73990273 h1:faDu4veV+8pcThn4fewv6TVlNCezafGoC1gM/mxQLbQ=
golang.org/x/sys v0.0.0-20210611083646-a4fc73990273/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
Expand Down
225 changes: 189 additions & 36 deletions ph.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,28 @@ import (
"fmt"
"image"
"image/png"
"io"
"io/ioutil"
"net"
"net/http"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"

"github.com/PuerkitoBio/goquery"
"github.com/cixtor/readability"
"github.com/kallydev/telegraph-go"
"github.com/oliamb/cutter"
"github.com/wabarc/helper"
"github.com/wabarc/imgbb"
"github.com/wabarc/logger"
"github.com/wabarc/screenshot"
"golang.org/x/net/html"
"golang.org/x/net/html/charset"
)

type subject struct {
Expand All @@ -34,8 +40,11 @@ type subject struct {
}

type Archiver struct {
Author string
Shots []screenshot.Screenshots
sync.RWMutex

Author string
Shots []screenshot.Screenshots
Articles map[string]readability.Article

client *telegraph.Client
subject subject
Expand Down Expand Up @@ -152,20 +161,29 @@ func (arc *Archiver) Wayback(links []string) (map[string]string, error) {
return
}

article, err := readability.New().Parse(bytes.NewReader(shot.HTML), shot.URL)
arc.RLock()
article := arc.Articles[shot.URL]
arc.RUnlock()
if article.Content != "" {
logger.Debug("[telegraph] found content on Archiver.Articles")
goto post
}

article, err = readability.New().Parse(bytes.NewReader(shot.HTML), shot.URL)
if err != nil {
logger.Error("[telegraph] parse html failed: %v", err)
return
goto post
}
if article.TextContent == "" {
if article.Content == "" {
logger.Info("[telegraph] text content empty")
return
}
if strings.TrimSpace(shot.Title) == "" {
shot.Title = "Missing Title"
}

post:
arc.subject = subject{title: []rune(shot.Title), source: shot.URL}
arc.post(article.TextContent, file.Name(), ch)
arc.post(article.Content, file.Name(), ch)
// Replace posted result in the map
collect[shot.URL] = <-ch
}(shot)
Expand Down Expand Up @@ -203,40 +221,61 @@ func (arc *Archiver) post(content, imgpath string, ch chan<- string) {
}

nodes := []telegraph.Node{}
// nodes = append(nodes, "source: ")
// nodes = append(nodes, telegraph.NodeElement{
// Tag: "a",
// Attrs: map[string]string{
// "href": arc.subject.source,
// "target": "_blank",
// },
// Children: []telegraph.Node{arc.subject.source},
// })
nodes = append(nodes, "screenshots: ")
for i, path := range paths {
nodes = append(nodes, telegraph.NodeElement{
Tag: "a",
Attrs: map[string]string{
"href": path,
"target": "_blank",
if content == "" {
for _, path := range paths {
nodes = append(nodes, telegraph.NodeElement{
Tag: "img",
Attrs: map[string]string{
"src": path,
"alt": "",
},
})
}
nodes = []telegraph.Node{
telegraph.NodeElement{
Tag: "p",
Children: nodes,
},
Children: []telegraph.Node{strconv.Itoa(i + 1)},
})
}
} else {
nodes = append(nodes, "screenshots: ")
for i, path := range paths {
nodes = append(nodes, telegraph.NodeElement{
Tag: "a",
Attrs: map[string]string{
"href": path,
"target": "_blank",
},
Children: []telegraph.Node{strconv.Itoa(i + 1)},
})
}
nodes = []telegraph.Node{
telegraph.NodeElement{
Tag: "em",
Children: nodes,
},
telegraph.NodeElement{
Tag: "br",
},
}
}
nodes = []telegraph.Node{
telegraph.NodeElement{
Tag: "em",
Children: nodes,
},
telegraph.NodeElement{
Tag: "br",
},
telegraph.NodeElement{

body, er := charset.NewReader(strings.NewReader(content), "utf-8")
if er != nil || body == nil {
logger.Error("[telegraph] convert charset failed: %v", er)
goto create
}

// TODO: improvement for node large than 64 KB
logger.Debug("[telegraph] content: %#v", content)
if doc, err := goquery.NewDocumentFromReader(body); err == nil {
nodes = append(nodes, telegraph.NodeElement{
Tag: "p",
Children: []telegraph.Node{content},
},
Children: castNodes(traverseNodes(doc.Contents(), arc.client)),
})
}

create:
var pat bool
var err error
var page *telegraph.Page
Expand Down Expand Up @@ -289,6 +328,7 @@ func (arc *Archiver) newClient() (*telegraph.Client, error) {
func upload(filename string) (paths []string, err error) {
url, err := imgbb.NewImgBB(nil, "").Upload(filename)
if err != nil {
logger.Error("[telegraph] upload image to imgbb failed: %v", err)
return paths, err
}

Expand Down Expand Up @@ -392,3 +432,116 @@ func (arc *Archiver) ByRemote(addr string) *Archiver {

return arc
}

// copied from: https://github.com/meinside/telegraph-go/blob/8b212a807f0302374ab467d61011e9aa5d26fbd1/methods.go#L402
func traverseNodes(selections *goquery.Selection, client *telegraph.Client) (nodes []telegraph.Node) {
var tag string
var attrs map[string]string
var element telegraph.NodeElement

selections.Each(func(_ int, child *goquery.Selection) {
for _, node := range child.Nodes {
switch node.Type {
case html.TextNode:
nodes = append(nodes, node.Data)
case html.ElementNode:
attrs = map[string]string{}
for _, attr := range node.Attr {
// Upload image to telegra.ph
if attr.Key == "src" && attr.Val != "" {
if newurl := uploadImage(client, attr.Val); newurl != "" {
attr.Val = newurl
}
}
attrs[attr.Key] = attr.Val
}
if len(node.Namespace) > 0 {
tag = fmt.Sprintf("%s.%s", node.Namespace, node.Data)
} else {
tag = node.Data
}
element = telegraph.NodeElement{
Tag: tag,
Attrs: attrs,
Children: traverseNodes(child.Contents(), client),
}
nodes = append(nodes, element)
}
}
})

return
}

func castNodes(nodes []telegraph.Node) (castNodes []telegraph.Node) {
for _, node := range nodes {
switch node.(type) {
case telegraph.NodeElement:
castNodes = append(castNodes, node)
default:
if cast, ok := node.(string); ok {
castNodes = append(castNodes, cast)
} else {
logger.Error("param casting error: %#v", node)
}
}
}

return castNodes
}

func download(u *url.URL) (path string, err error) {
// default path
if file, err := ioutil.TempFile(os.TempDir(), "telegraph-*"); err == nil {
path = file.Name()
}

// set a new path from url.URL.Path
if paths := strings.Split(u.Path, "/"); len(paths) > 0 {
path = paths[len(paths)-1]
}

path = filepath.Join(os.TempDir(), path)
fd, err := os.Create(path)
if err != nil {
return path, err
}
defer fd.Close()

resp, err := http.Get(u.String())
if err != nil {
return path, err
}
defer resp.Body.Close()

if _, err = io.Copy(fd, resp.Body); err != nil {
return path, err
}

return path, nil
}

func uploadImage(client *telegraph.Client, s string) (newurl string) {
u, err := url.Parse(s)
if err != nil {
logger.Error("[telegraph] parse url failed: %v", err)
return newurl
}

path, err := download(u)
if err != nil {
logger.Error("[telegraph] download image failed: %v", err)
return newurl
}
logger.Debug("[telegraph] downloaded image path: %s", path)

paths, err := client.Upload([]string{path})
if err != nil || len(paths) == 0 {
logger.Error("[telegraph] upload image failed: %v", err)
return newurl
}
newurl = paths[0] + "?orig=" + s
logger.Debug("[telegraph] new uri: %s", newurl)

return newurl
}
2 changes: 1 addition & 1 deletion ph_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ func TestWaybackWithShots(t *testing.T) {

resp, err := http.Get(r)
if err != nil {
t.Error(err)
t.Fatal(err)
}
defer resp.Body.Close()

Expand Down

0 comments on commit ffc9648

Please sign in to comment.