/
book.go
100 lines (84 loc) · 2.24 KB
/
book.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package gobook
import (
"regexp"
"strings"
"github.com/zhuomouren/gohelpers/gonet"
"github.com/zhuomouren/gohelpers"
)
const (
StatusSerial = iota // 0
StatusFinished
)
type Book struct {
URL string `json:"url"`
Name string `json:"name"`
Author string `json:"author"`
Cover string `json:"cover"`
Category string `json:"category"`
Summary string `json:"summary"`
ChapterCount int `json:"chapter_count"`
ReadURL string `json:"read_url"`
Status int `json:"status"`
}
func GetBook(url string) *Book {
html, err := gonet.NewRequest().GET(url).String()
if err != nil {
return nil
}
return GetBookByOGP(url, html)
}
func GetBookByOGP(url, html string) *Book {
// 提取 head
substr := "</head>"
if strings.Contains(html, substr) {
data := strings.Split(html, substr)
if len(data) == 2 {
html = data[0]
}
}
book := &Book{}
name := GetOpenGraphProtocol("og:novel:book_name", html)
if name == "" {
name = GetOpenGraphProtocol("og:title", html)
}
author := GetOpenGraphProtocol("og:novel:author", html)
readURL := GetOpenGraphProtocol("og:novel:read_url", html)
if !gohelpers.URL.HasPrefix(readURL) {
readURL, _ = gohelpers.URL.AbsoluteURL(readURL, url)
}
if name == "" || author == "" || readURL == "" {
return nil
} else {
book.Name = name
book.Author = author
book.ReadURL = readURL
}
cover := GetOpenGraphProtocol("og:image", html)
// 应该都是绝对路径,没有这种情况
if !gohelpers.URL.HasPrefix(cover) {
cover, _ = gohelpers.URL.AbsoluteURL(cover, url)
}
book.Cover = cover
book.Category = GetOpenGraphProtocol("og:novel:category", html)
book.Summary = GetOpenGraphProtocol("og:description", html)
status := GetOpenGraphProtocol("og:novel:status", html) // 写作进度
if strings.Contains(status, "完") {
book.Status = StatusFinished
} else {
book.Status = StatusSerial
}
return book
}
// 获取 Open Graph Protocol
func GetOpenGraphProtocol(tag, html string) string {
re, err := regexp.Compile(`(?i)<meta\s*\b(property|name)\b=["|']` + tag + `["|']\s*content=["|']([\s\S]*?)["|'|;].*?>`)
if err != nil {
return ""
}
og := ""
ret := re.FindStringSubmatch(html)
if len(ret) == 3 {
og = strings.TrimSpace(ret[2])
}
return og
}