-
Notifications
You must be signed in to change notification settings - Fork 18
/
styledStringSplitter.go
278 lines (224 loc) · 6.64 KB
/
styledStringSplitter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
package textstyles
import (
"fmt"
"strings"
"unicode/utf8"
log "github.com/sirupsen/logrus"
"github.com/walles/moar/m/linenumbers"
"github.com/walles/moar/twin"
)
const esc = '\x1b'
type styledStringSplitter struct {
input string
lineNumber *linenumbers.LineNumber
nextByteIndex int
previousByteIndex int
inProgressString strings.Builder
inProgressStyle twin.Style
numbersBuffer []uint
trailer twin.Style
callback func(str string, style twin.Style)
}
// Returns the style of the line's trailer
func styledStringsFromString(s string, lineNumber *linenumbers.LineNumber, callback func(string, twin.Style)) twin.Style {
if !strings.ContainsAny(s, "\x1b") {
// This shortcut makes BenchmarkPlainTextSearch() perform a lot better
callback(s, twin.StyleDefault)
return twin.StyleDefault
}
splitter := styledStringSplitter{
input: s,
lineNumber: lineNumber,
callback: callback,
}
splitter.run()
return splitter.trailer
}
func (s *styledStringSplitter) nextChar() rune {
if s.nextByteIndex >= len(s.input) {
s.previousByteIndex = s.nextByteIndex
return -1
}
char, size := utf8.DecodeRuneInString(s.input[s.nextByteIndex:])
s.previousByteIndex = s.nextByteIndex
s.nextByteIndex += size
return char
}
// Returns whatever the last call to nextChar() returned
func (s *styledStringSplitter) lastChar() rune {
if s.previousByteIndex >= len(s.input) {
return -1
}
char, _ := utf8.DecodeRuneInString(s.input[s.previousByteIndex:])
return char
}
func (s *styledStringSplitter) run() {
char := s.nextChar()
for {
if char == -1 {
s.finalizeCurrentPart()
return
}
if char == esc {
escIndex := s.previousByteIndex
err := s.handleEscape()
if err != nil {
header := ""
if s.lineNumber != nil {
header = fmt.Sprintf("Line %s: ", s.lineNumber.Format())
}
failed := s.input[escIndex:s.nextByteIndex]
log.Debug(header, "<", strings.ReplaceAll(failed, "\x1b", "ESC"), ">: ", err)
// Somewhere in handleEscape(), we got a character that was
// unexpected. We need to treat everything up to before that
// character as just plain runes.
for _, char := range s.input[escIndex:s.previousByteIndex] {
s.handleRune(char)
}
// Start over with the character that caused the problem
char = s.lastChar()
continue
}
} else {
s.handleRune(char)
}
char = s.nextChar()
}
}
func (s *styledStringSplitter) handleRune(char rune) {
s.inProgressString.WriteRune(char)
}
func (s *styledStringSplitter) handleEscape() error {
char := s.nextChar()
if char == '[' || char == ']' {
// Got the start of a CSI or an OSC sequence
return s.consumeControlSequence(char)
}
return fmt.Errorf("Unhandled Fe sequence ESC%c", char)
}
func (s *styledStringSplitter) consumeControlSequence(charAfterEsc rune) error {
// Points to right after "ESC["
startIndex := s.nextByteIndex
// We're looking for a letter to end the CSI sequence
for {
char := s.nextChar()
if char == -1 {
return fmt.Errorf("Line ended in the middle of a control sequence")
}
// Range from here:
// https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences
if char >= 0x30 && char <= 0x3f {
// Sequence still in progress
if charAfterEsc == ']' && s.input[startIndex:s.nextByteIndex] == "8;;" {
// Special case, here comes the URL
return s.handleURL()
}
continue
}
// The end, handle what we got
endIndexExclusive := s.nextByteIndex
return s.handleCompleteControlSequence(charAfterEsc, s.input[startIndex:endIndexExclusive])
}
}
// If the whole CSI sequence is ESC[33m, you should call this function with just
// "33m".
func (s *styledStringSplitter) handleCompleteControlSequence(charAfterEsc rune, sequence string) error {
if charAfterEsc == ']' {
return s.handleOsc(sequence)
}
if charAfterEsc != '[' {
return fmt.Errorf("Unexpected charAfterEsc: %c", charAfterEsc)
}
if sequence == "K" || sequence == "0K" {
// Clear to end of line
s.trailer = s.inProgressStyle
return nil
}
lastChar := sequence[len(sequence)-1]
if lastChar == 'm' {
var newStyle twin.Style
var err error
newStyle, s.numbersBuffer, err = rawUpdateStyle(s.inProgressStyle, sequence, s.numbersBuffer)
if err != nil {
return err
}
s.startNewPart(newStyle)
return nil
}
return fmt.Errorf("Unhandled CSI type %q", lastChar)
}
func (s *styledStringSplitter) handleOsc(sequence string) error {
if strings.HasPrefix(sequence, "133;") && len(sequence) == len("133;A") {
// Got ESC]133;X, where "X" could be anything. These are prompt hints,
// and rendering those makes no sense. We should just ignore them:
// https://gitlab.freedesktop.org/Per_Bothner/specifications/blob/master/proposals/semantic-prompts.md
endMarker := s.nextChar()
if endMarker == '\x07' {
return nil
}
if endMarker == esc {
if s.nextChar() == '\\' {
return nil
}
return fmt.Errorf("Expected ESC \\ after ESC]133;X, got %q", s.lastChar())
}
}
return fmt.Errorf("Unhandled OSC sequence")
}
// We just got ESC]8; and should now read the URL. URLs end with ASCII 7 BEL or ESC \.
func (s *styledStringSplitter) handleURL() error {
// Valid URL characters.
// Ref: https://stackoverflow.com/a/1547940/473672
const validChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;="
// Points to right after "ESC]8;"
urlStartIndex := s.nextByteIndex
justSawEsc := false
for {
char := s.nextChar()
if char == -1 {
return fmt.Errorf("Line ended in the middle of a URL")
}
if justSawEsc {
if char != '\\' {
return fmt.Errorf("Expected ESC \\ but got ESC %q", char)
}
// End of URL
urlEndIndexExclusive := s.nextByteIndex - 2
url := s.input[urlStartIndex:urlEndIndexExclusive]
s.startNewPart(s.inProgressStyle.WithHyperlink(&url))
return nil
}
// Invariant: justSawEsc == false
if char == esc {
justSawEsc = true
continue
}
if char == '\x07' {
// End of URL
urlEndIndexExclusive := s.nextByteIndex - 1
url := s.input[urlStartIndex:urlEndIndexExclusive]
s.startNewPart(s.inProgressStyle.WithHyperlink(&url))
return nil
}
if !strings.ContainsRune(validChars, char) {
return fmt.Errorf("Invalid URL character: %q", char)
}
// It's a valid URL char, keep going
}
}
func (s *styledStringSplitter) startNewPart(style twin.Style) {
if style == s.inProgressStyle {
// No need to start a new part
return
}
s.finalizeCurrentPart()
s.inProgressString.Reset()
s.inProgressStyle = style
}
func (s *styledStringSplitter) finalizeCurrentPart() {
if s.inProgressString.Len() == 0 {
// Nothing to do
return
}
s.callback(s.inProgressString.String(), s.inProgressStyle)
}