@@ -635,3 +635,75 @@ fn test_compatibility_layer() {
635635 assert false , 'match_str should return none when no match is found'
636636 }
637637}
638+
639+ fn test_hex_escapes () {
640+ // \xHH — two hex digits
641+ tst_find (r '\x41' , 'ABC' , 'A' ) // 0x41 = 'A'
642+ tst_find (r '\x61' , 'abc' , 'a' ) // 0x61 = 'a'
643+ tst_find (r '\x41+' , 'AAAB' , 'AAA' )
644+ tst_find (r '\x20\x41' , ' A test' , ' A' ) // space + 'A'
645+
646+ // \XHHHH — four hex digits (Unicode codepoint)
647+ tst_find (r '\X0041' , 'ABC' , 'A' ) // U+0041 = 'A'
648+ tst_find (r '\X0061' , 'abc' , 'a' ) // U+0061 = 'a'
649+ tst_find (r '\X03B1' , 'αβγ' , 'α' ) // U+03B1 = 'α'
650+
651+ // Mix with other escapes
652+ tst_find (r '\x48\x65\x6C\x6C\x6F' , 'Hello World' , 'Hello' ) // \x48\x65\x6C\x6C\x6F = "Hello"
653+
654+ // Invalid hex escape compile errors
655+ tst_compile_error (r '\x4' ) // only 1 digit
656+ tst_compile_error (r '\xGG' ) // invalid hex chars
657+ tst_compile_error (r '\X004' ) // only 3 digits
658+ }
659+
660+ fn test_duplicate_named_groups () {
661+ // Compile error: same name used twice
662+ tst_compile_error (r '(?P<id>\d+)-(?P<id>\w+)' )
663+ // Different names are fine
664+ r := pcre.compile (r '(?P<a>\d+)-(?P<b>\w+)' ) or {
665+ assert false , 'Should compile: ${err} '
666+ return
667+ }
668+ m := r.find ('12-abc' ) or {
669+ assert false , 'Should match'
670+ return
671+ }
672+ assert r.group_by_name (m, 'a' ) == '12'
673+ assert r.group_by_name (m, 'b' ) == 'abc'
674+ }
675+
676+ fn test_invalid_quantifier_ranges () {
677+ // min > max is an error
678+ tst_compile_error (r 'a{3,1}' )
679+ tst_compile_error (r 'a{5,2}' )
680+ // negative min-like patterns (parsed as 0)
681+ // {0,0} should compile and match empty string
682+ r := pcre.compile (r 'a{0,0}b' ) or {
683+ assert false , 'Should compile: ${err} '
684+ return
685+ }
686+ m := r.find ('b' ) or {
687+ assert false , 'Should match'
688+ return
689+ }
690+ assert m.text == 'b'
691+ }
692+
693+ fn test_find_all_utf8_safety () {
694+ // find_all with an empty-matching pattern must not get stuck inside a multi-byte rune
695+ r := pcre.compile (r 'x*' ) or { panic (err) }
696+ matches := r.find_all ('aé' ) // 'é' is 2 bytes (0xC3 0xA9)
697+ // Every result start/end must align on a rune boundary
698+ for m in matches {
699+ text_bytes := 'aé' .bytes ()
700+ if m.start < text_bytes.len {
701+ // byte at start must not be a UTF-8 continuation byte
702+ assert (text_bytes[m.start] & 0xC0 ) != 0x80 , 'Misaligned match start at ${m.start} '
703+ }
704+ }
705+ // find_all should not infinite-loop on emoji
706+ r2 := pcre.compile (r 'y*' ) or { panic (err) }
707+ matches2 := r2 .find_all ('😀!' )
708+ assert matches2 .len > 0
709+ }
0 commit comments