-
-
Notifications
You must be signed in to change notification settings - Fork 369
/
comment.go
190 lines (170 loc) · 6.68 KB
/
comment.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
package store
import (
"fmt"
"html/template"
"regexp"
"strings"
"time"
"github.com/microcosm-cc/bluemonday"
)
// Comment represents a single comment with optional reference to its parent
type Comment struct {
ID string `json:"id" bson:"_id"`
ParentID string `json:"pid"`
Text string `json:"text"`
Orig string `json:"orig,omitempty"` // important: never render this as HTML! It's not sanitized.
User User `json:"user"`
Locator Locator `json:"locator"`
Score int `json:"score"`
Votes map[string]bool `json:"votes,omitempty"`
VotedIPs map[string]VotedIPInfo `json:"voted_ips,omitempty"` // voted ips (hashes) with TS
Vote int `json:"vote"` // vote for the current user, -1/1/0.
Controversy float64 `json:"controversy,omitempty"`
Timestamp time.Time `json:"time" bson:"time"`
Edit *Edit `json:"edit,omitempty" bson:"edit,omitempty"` // pointer to have empty default in json response
Pin bool `json:"pin,omitempty" bson:"pin,omitempty"`
Deleted bool `json:"delete,omitempty" bson:"delete"`
Imported bool `json:"imported,omitempty" bson:"imported"`
PostTitle string `json:"title,omitempty" bson:"title"`
}
// Locator keeps site and url of the post
type Locator struct {
SiteID string `json:"site,omitempty" bson:"site"`
URL string `json:"url"`
}
// Edit indication
type Edit struct {
Timestamp time.Time `json:"time" bson:"time"`
Summary string `json:"summary"`
}
// PostInfo holds summary for given post url
type PostInfo struct {
URL string `json:"url,omitempty"` // can be attached to site-wide comments but won't be set then
Count int `json:"count"`
ReadOnly bool `json:"read_only,omitempty" bson:"read_only,omitempty"` // can be attached to site-wide comments but won't be set then
FirstTS time.Time `json:"first_time,omitempty" bson:"first_time,omitempty"`
LastTS time.Time `json:"last_time,omitempty" bson:"last_time,omitempty"`
}
// BlockedUser holds id and ts for blocked user
type BlockedUser struct {
ID string `json:"id"`
Name string `json:"name"`
Until time.Time `json:"time"`
}
// VotedIPInfo keeps timestamp and voting value (direction). Used as VotedIPs value
type VotedIPInfo struct {
Timestamp time.Time
Value bool
}
// DeleteMode defines how much comment info will be erased
type DeleteMode int
// DeleteMode enum
const (
SoftDelete DeleteMode = 0
HardDelete DeleteMode = 1
)
// Maximum length for URL text shortening.
const shortURLLen = 48
const snippetLen = 200
// PrepareUntrusted pre-processes a comment received from untrusted source by clearing all
// autogen fields and reset everything users not supposed to provide
func (c *Comment) PrepareUntrusted() {
c.ID = "" // don't allow user to define ID, force auto-gen
c.Timestamp = time.Time{} // reset time, force auto-gen
c.Votes = make(map[string]bool)
c.VotedIPs = make(map[string]VotedIPInfo)
c.Score = 0
c.Controversy = 0
c.Edit = nil
c.Pin = false
c.Deleted = false
c.Imported = false
}
// SetDeleted clears comment info, reset to deleted state. hard flag will clear all user info as well
func (c *Comment) SetDeleted(mode DeleteMode) {
c.Text = ""
c.Orig = ""
c.Score = 0
c.Controversy = 0
c.Votes = map[string]bool{}
c.VotedIPs = make(map[string]VotedIPInfo)
c.Edit = nil
c.Deleted = true
c.Pin = false
if mode == HardDelete {
c.User.Name = "deleted"
c.User.ID = "deleted"
c.User.Picture = ""
c.User.IP = ""
}
}
// Sanitize clean dangerous html/js from the comment.
// Comment.Orig which is used to store the original comment text is not sanitized
// as we expect to never render it as HTML and render Comment.Text instead
func (c *Comment) Sanitize() {
p := bluemonday.UGCPolicy()
p.AllowAttrs("class").Matching(regexp.MustCompile("^chroma$")).OnElements("pre")
// special case for embedding the quotes from Twitter
p.AllowAttrs("class").Matching(regexp.MustCompile("^twitter-tweet$")).OnElements("blockquote")
// this is list of <span> tag classes which could be produced by chroma code renderer
// source: https://github.com/alecthomas/chroma/blob/c263f6f/types.go#L209-L306
const codeSpanClassRegex = "^(bg|chroma|line|ln|lnt|hl|lntable|lntd|lnlinks|cl|w|err|x|k|kc" +
"|kd|kn|kp|kr|kt|n|na|nb|bp|nc|no|nd|ni|ne|nf|fm|py|nl|nn|nx|nt|nv|vc|vg" +
"|vi|vm|l|ld|s|sa|sb|sc|dl|sd|s2|se|sh|si|sx|sr|s1|ss|m|mb|mf|mh|mi|il" +
"|mo|o|ow|p|c|ch|cm|cp|cpf|c1|cs|g|gd|ge|gr|gh|gi|go|gp|gs|gu|gt|gl)$"
p.AllowAttrs("class").Matching(regexp.MustCompile(codeSpanClassRegex)).OnElements("span")
p.AllowAttrs("loading").Matching(regexp.MustCompile("^(lazy|eager)$")).OnElements("img")
c.Text = p.Sanitize(c.Text)
c.User.ID = template.HTMLEscapeString(c.User.ID)
c.User.Name = c.SanitizeText(c.User.Name)
c.User.Picture = c.SanitizeAsURL(c.User.Picture)
c.Locator.URL = c.SanitizeAsURL(c.Locator.URL)
c.PostTitle = c.SanitizeText(c.PostTitle)
}
// Snippet from comment's text
func (c *Comment) Snippet(limit int) string {
if limit <= 0 {
limit = snippetLen
}
cleanText := strings.Replace(c.Text, "\n", " ", -1)
size := len([]rune(cleanText))
if size < limit {
return cleanText
}
snippet := []rune(cleanText)[:limit]
// go back in snippet and found the first space
for i := len(snippet) - 1; i >= 0; i-- {
if snippet[i] == ' ' {
snippet = snippet[:i]
break
}
}
// Don't add a space if comment is just a one single word which has been truncated.
if len(snippet) == limit {
return string(snippet) + "..."
}
return string(snippet) + " ..."
}
var reHref = regexp.MustCompile(`<a\s+(?:[^>]*?\s+)?href="([^"]*)"`)
// SanitizeAsURL drops dangerous code from a url.
// It wraps input with href to trigger bluemonday sanitizer and cleans href after sanitizing done
func (c *Comment) SanitizeAsURL(inp string) string {
h := fmt.Sprintf(`<a href=%q>`, inp)
clean := bluemonday.UGCPolicy().Sanitize(h)
if match := reHref.FindStringSubmatch(clean); len(match) > 1 {
return match[1]
}
return "" // this shouldn't happen as we build the href
}
func (c *Comment) escapeHTMLWithSome(inp string) string {
res := template.HTMLEscapeString(inp)
res = strings.Replace(res, "&", "&", -1)
res = strings.Replace(res, """, "\"", -1)
res = strings.Replace(res, "'", "'", -1)
return res
}
// SanitizeText used to sanitize any input string, and removes any HTML tags
func (c *Comment) SanitizeText(inp string) string {
clean := bluemonday.StrictPolicy().Sanitize(inp)
return strings.TrimSpace(c.escapeHTMLWithSome(clean))
}