@@ -538,6 +538,7 @@ const (
538
538
max_latin_1 = rune (0x00ff ) // '\u00FF' // `ÿ`
539
539
)
540
540
541
+ // Represents all unicode in unicode category L.
541
542
const letter_table = RangeTable{
542
543
r16 : [
543
544
Range16 {0x0041 , 0x005a , 1 },
@@ -1132,6 +1133,7 @@ const letter_table = RangeTable{
1132
1133
latin_offset: 6
1133
1134
}
1134
1135
1136
+ // Represents all unicodes in unicode category Z with property white space.
1135
1137
const white_space_table = RangeTable{
1136
1138
r16 : [
1137
1139
Range16 {0x0009 , 0x000d , 1 },
@@ -1146,6 +1148,146 @@ const white_space_table = RangeTable{
1146
1148
latin_offset: 2
1147
1149
}
1148
1150
1151
+ // Represents all unicodes in unicode category N.
1152
+ const number_table = RangeTable{
1153
+ r16 : [
1154
+ Range16 {0x0030 , 0x0039 , 1 },
1155
+ Range16 {0x00b2 , 0x00b3 , 1 },
1156
+ Range16 {0x00b9 , 0x00bc , 3 },
1157
+ Range16 {0x00bd , 0x00be , 1 },
1158
+ Range16 {0x0660 , 0x0669 , 1 },
1159
+ Range16 {0x06f0 , 0x06f9 , 1 },
1160
+ Range16 {0x07c0 , 0x07c9 , 1 },
1161
+ Range16 {0x0966 , 0x096f , 1 },
1162
+ Range16 {0x09e6 , 0x09ef , 1 },
1163
+ Range16 {0x09f4 , 0x09f9 , 1 },
1164
+ Range16 {0x0a66 , 0x0a6f , 1 },
1165
+ Range16 {0x0ae6 , 0x0aef , 1 },
1166
+ Range16 {0x0b66 , 0x0b6f , 1 },
1167
+ Range16 {0x0b72 , 0x0b77 , 1 },
1168
+ Range16 {0x0be6 , 0x0bf2 , 1 },
1169
+ Range16 {0x0c66 , 0x0c6f , 1 },
1170
+ Range16 {0x0c78 , 0x0c7e , 1 },
1171
+ Range16 {0x0ce6 , 0x0cef , 1 },
1172
+ Range16 {0x0d58 , 0x0d5e , 1 },
1173
+ Range16 {0x0d66 , 0x0d78 , 1 },
1174
+ Range16 {0x0de6 , 0x0def , 1 },
1175
+ Range16 {0x0e50 , 0x0e59 , 1 },
1176
+ Range16 {0x0ed0 , 0x0ed9 , 1 },
1177
+ Range16 {0x0f20 , 0x0f33 , 1 },
1178
+ Range16 {0x1040 , 0x1049 , 1 },
1179
+ Range16 {0x1090 , 0x1099 , 1 },
1180
+ Range16 {0x1369 , 0x137c , 1 },
1181
+ Range16 {0x16ee , 0x16f0 , 1 },
1182
+ Range16 {0x17e0 , 0x17e9 , 1 },
1183
+ Range16 {0x17f0 , 0x17f9 , 1 },
1184
+ Range16 {0x1810 , 0x1819 , 1 },
1185
+ Range16 {0x1946 , 0x194f , 1 },
1186
+ Range16 {0x19d0 , 0x19da , 1 },
1187
+ Range16 {0x1a80 , 0x1a89 , 1 },
1188
+ Range16 {0x1a90 , 0x1a99 , 1 },
1189
+ Range16 {0x1b50 , 0x1b59 , 1 },
1190
+ Range16 {0x1bb0 , 0x1bb9 , 1 },
1191
+ Range16 {0x1c40 , 0x1c49 , 1 },
1192
+ Range16 {0x1c50 , 0x1c59 , 1 },
1193
+ Range16 {0x2070 , 0x2074 , 4 },
1194
+ Range16 {0x2075 , 0x2079 , 1 },
1195
+ Range16 {0x2080 , 0x2089 , 1 },
1196
+ Range16 {0x2150 , 0x2182 , 1 },
1197
+ Range16 {0x2185 , 0x2189 , 1 },
1198
+ Range16 {0x2460 , 0x249b , 1 },
1199
+ Range16 {0x24ea , 0x24ff , 1 },
1200
+ Range16 {0x2776 , 0x2793 , 1 },
1201
+ Range16 {0x2cfd , 0x3007 , 778 },
1202
+ Range16 {0x3021 , 0x3029 , 1 },
1203
+ Range16 {0x3038 , 0x303a , 1 },
1204
+ Range16 {0x3192 , 0x3195 , 1 },
1205
+ Range16 {0x3220 , 0x3229 , 1 },
1206
+ Range16 {0x3248 , 0x324f , 1 },
1207
+ Range16 {0x3251 , 0x325f , 1 },
1208
+ Range16 {0x3280 , 0x3289 , 1 },
1209
+ Range16 {0x32b1 , 0x32bf , 1 },
1210
+ Range16 {0xa620 , 0xa629 , 1 },
1211
+ Range16 {0xa6e6 , 0xa6ef , 1 },
1212
+ Range16 {0xa830 , 0xa835 , 1 },
1213
+ Range16 {0xa8d0 , 0xa8d9 , 1 },
1214
+ Range16 {0xa900 , 0xa909 , 1 },
1215
+ Range16 {0xa9d0 , 0xa9d9 , 1 },
1216
+ Range16 {0xa9f0 , 0xa9f9 , 1 },
1217
+ Range16 {0xaa50 , 0xaa59 , 1 },
1218
+ Range16 {0xabf0 , 0xabf9 , 1 },
1219
+ Range16 {0xff10 , 0xff19 , 1 },
1220
+ ]
1221
+ r32 : [
1222
+ Range32 {0x10107 , 0x10133 , 1 },
1223
+ Range32 {0x10140 , 0x10178 , 1 },
1224
+ Range32 {0x1018a , 0x1018b , 1 },
1225
+ Range32 {0x102e1 , 0x102fb , 1 },
1226
+ Range32 {0x10320 , 0x10323 , 1 },
1227
+ Range32 {0x10341 , 0x1034a , 9 },
1228
+ Range32 {0x103d1 , 0x103d5 , 1 },
1229
+ Range32 {0x104a0 , 0x104a9 , 1 },
1230
+ Range32 {0x10858 , 0x1085f , 1 },
1231
+ Range32 {0x10879 , 0x1087f , 1 },
1232
+ Range32 {0x108a7 , 0x108af , 1 },
1233
+ Range32 {0x108fb , 0x108ff , 1 },
1234
+ Range32 {0x10916 , 0x1091b , 1 },
1235
+ Range32 {0x109bc , 0x109bd , 1 },
1236
+ Range32 {0x109c0 , 0x109cf , 1 },
1237
+ Range32 {0x109d2 , 0x109ff , 1 },
1238
+ Range32 {0x10a40 , 0x10a48 , 1 },
1239
+ Range32 {0x10a7d , 0x10a7e , 1 },
1240
+ Range32 {0x10a9d , 0x10a9f , 1 },
1241
+ Range32 {0x10aeb , 0x10aef , 1 },
1242
+ Range32 {0x10b58 , 0x10b5f , 1 },
1243
+ Range32 {0x10b78 , 0x10b7f , 1 },
1244
+ Range32 {0x10ba9 , 0x10baf , 1 },
1245
+ Range32 {0x10cfa , 0x10cff , 1 },
1246
+ Range32 {0x10d30 , 0x10d39 , 1 },
1247
+ Range32 {0x10e60 , 0x10e7e , 1 },
1248
+ Range32 {0x10f1d , 0x10f26 , 1 },
1249
+ Range32 {0x10f51 , 0x10f54 , 1 },
1250
+ Range32 {0x10fc5 , 0x10fcb , 1 },
1251
+ Range32 {0x11052 , 0x1106f , 1 },
1252
+ Range32 {0x110f0 , 0x110f9 , 1 },
1253
+ Range32 {0x11136 , 0x1113f , 1 },
1254
+ Range32 {0x111d0 , 0x111d9 , 1 },
1255
+ Range32 {0x111e1 , 0x111f4 , 1 },
1256
+ Range32 {0x112f0 , 0x112f9 , 1 },
1257
+ Range32 {0x11450 , 0x11459 , 1 },
1258
+ Range32 {0x114d0 , 0x114d9 , 1 },
1259
+ Range32 {0x11650 , 0x11659 , 1 },
1260
+ Range32 {0x116c0 , 0x116c9 , 1 },
1261
+ Range32 {0x11730 , 0x1173b , 1 },
1262
+ Range32 {0x118e0 , 0x118f2 , 1 },
1263
+ Range32 {0x11950 , 0x11959 , 1 },
1264
+ Range32 {0x11c50 , 0x11c6c , 1 },
1265
+ Range32 {0x11d50 , 0x11d59 , 1 },
1266
+ Range32 {0x11da0 , 0x11da9 , 1 },
1267
+ Range32 {0x11fc0 , 0x11fd4 , 1 },
1268
+ Range32 {0x12400 , 0x1246e , 1 },
1269
+ Range32 {0x16a60 , 0x16a69 , 1 },
1270
+ Range32 {0x16b50 , 0x16b59 , 1 },
1271
+ Range32 {0x16b5b , 0x16b61 , 1 },
1272
+ Range32 {0x16e80 , 0x16e96 , 1 },
1273
+ Range32 {0x1d2e0 , 0x1d2f3 , 1 },
1274
+ Range32 {0x1d360 , 0x1d378 , 1 },
1275
+ Range32 {0x1d7ce , 0x1d7ff , 1 },
1276
+ Range32 {0x1e140 , 0x1e149 , 1 },
1277
+ Range32 {0x1e2f0 , 0x1e2f9 , 1 },
1278
+ Range32 {0x1e8c7 , 0x1e8cf , 1 },
1279
+ Range32 {0x1e950 , 0x1e959 , 1 },
1280
+ Range32 {0x1ec71 , 0x1ecab , 1 },
1281
+ Range32 {0x1ecad , 0x1ecaf , 1 },
1282
+ Range32 {0x1ecb1 , 0x1ecb4 , 1 },
1283
+ Range32 {0x1ed01 , 0x1ed2d , 1 },
1284
+ Range32 {0x1ed2f , 0x1ed3d , 1 },
1285
+ Range32 {0x1f100 , 0x1f10c , 1 },
1286
+ Range32 {0x1fbf0 , 0x1fbf9 , 1 },
1287
+ ]
1288
+ latin_offset: 4
1289
+ }
1290
+
1149
1291
struct RangeTable {
1150
1292
pub :
1151
1293
r16 []Range16
@@ -1167,6 +1309,7 @@ pub:
1167
1309
stride u32
1168
1310
}
1169
1311
1312
+ // tests if rune is in the given range table.
1170
1313
fn is_excluding_latin (table & RangeTable, r rune ) bool {
1171
1314
r16 := & table.r16
1172
1315
off := table.latin_offset
0 commit comments