Skip to content

Commit bfe0a78

Browse files
authored
builtin,strconv: fix and optimize utf8 and formatting functions (#9874)
1 parent 3c8d2bb commit bfe0a78

File tree

4 files changed

+37
-16
lines changed

4 files changed

+37
-16
lines changed

vlib/builtin/utf8.v

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -103,16 +103,12 @@ fn utf8_len(c byte) int {
103103
}
104104

105105
// Calculate string length for in number of codepoints
106-
fn utf8_str_len(s string) int {
106+
pub fn utf8_str_len(s string) int {
107107
mut l := 0
108-
for i := 0; i < s.len; i++ {
108+
mut i := 0
109+
for i < s.len {
109110
l++
110-
c := unsafe { s.str[i] }
111-
if (c & (1 << 7)) != 0 {
112-
for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
113-
i++
114-
}
115-
}
111+
i += ((0xe5000000 >> ((unsafe { s.str[i] } >> 3) & 0x1e)) & 3) + 1
116112
}
117113
return l
118114
}
@@ -124,17 +120,16 @@ pub fn utf8_str_visible_length(s string) int {
124120
mut l := 0
125121
mut ul := 1
126122
for i := 0; i < s.len; i += ul {
127-
ul = 1
128123
c := unsafe { s.str[i] }
129-
if (c & (1 << 7)) != 0 {
130-
for t := byte(1 << 6); (c & t) != 0; t >>= 1 {
131-
ul++
132-
}
133-
}
124+
ul = ((0xe5000000 >> ((unsafe { s.str[i] } >> 3) & 0x1e)) & 3) + 1
134125
if i + ul > s.len { // incomplete UTF-8 sequence
135126
return l
136127
}
137128
l++
129+
// avoid the match if not needed
130+
if ul == 1 {
131+
continue
132+
}
138133
// recognize combining characters and wide characters
139134
match ul {
140135
2 {

vlib/strconv/f32_str.v

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,14 @@ pub fn (d Dec32) get_string_32(neg bool, i_n_digit int, i_pad_digit int) string
9494
x++
9595
}
9696

97+
// no decimal digits needed, end here
98+
if i_n_digit == 0 {
99+
unsafe {
100+
buf[i]=0
101+
return tos(byteptr(&buf[0]), i)
102+
}
103+
}
104+
97105
if out_len >= 1 {
98106
buf[y - x] = `.`
99107
x++

vlib/strconv/f64_str.v

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,14 @@ fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string {
110110
x++
111111
}
112112

113+
// no decimal digits needed, end here
114+
if i_n_digit == 0 {
115+
unsafe {
116+
buf[i]=0
117+
return tos(byteptr(&buf[0]), i)
118+
}
119+
}
120+
113121
if out_len >= 1 {
114122
buf[y - x] = `.`
115123
x++

vlib/strconv/format.v

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ enum Char_parse_state {
2929
reset_params
3030
}
3131

32-
enum Align_text {
32+
pub enum Align_text {
3333
right = 0
3434
left
3535
center
@@ -176,6 +176,12 @@ pub fn f64_to_str_lnd(f f64, dec_digit int) string {
176176
i++
177177
}
178178
}
179+
180+
// no more digits needed, stop here
181+
if dec_digit <= 0 {
182+
return unsafe { tos(res.data, dot_res_sp) }
183+
}
184+
179185
//println("r_i-d_pos: ${r_i - d_pos}")
180186
if dot_res_sp >= 0 {
181187
if (r_i - dot_res_sp) > dec_digit {
@@ -204,6 +210,7 @@ pub fn f64_to_str_lnd(f f64, dec_digit int) string {
204210
205211
*/
206212
pub struct BF_param {
213+
pub mut:
207214
pad_ch byte = byte(` `) // padding char
208215
len0 int = -1 // default len for whole the number or string
209216
len1 int = 6 // number of decimal digits, if needed
@@ -214,7 +221,10 @@ pub struct BF_param {
214221
}
215222

216223
pub fn format_str(s string, p BF_param) string {
217-
dif := p.len0 - s.len
224+
if p.len0 <= 0 {
225+
return s
226+
}
227+
dif := p.len0 - utf8_str_visible_length(s)
218228
if dif <= 0 {
219229
return s
220230
}

0 commit comments

Comments
 (0)