Skip to content

Commit 736c567

Browse files
committedAug 7, 2024
Strings::matchAll(): added option 'lazy'
1 parent 31f4684 commit 736c567

File tree

4 files changed

+127
-9
lines changed

4 files changed

+127
-9
lines changed
 

‎src/Utils/Strings.php

+26-6
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ public static function match(
589589
/**
590590
* Searches the string for all occurrences matching the regular expression and
591591
* returns an array of arrays containing the found expression and each subexpression.
592+
* @return ($lazy is true ? \Generator<int, array> : array[])
592593
*/
593594
public static function matchAll(
594595
string $subject,
@@ -599,21 +600,41 @@ public static function matchAll(
599600
bool $unmatchedAsNull = false,
600601
bool $patternOrder = false,
601602
bool $utf8 = false,
602-
): array
603+
bool $lazy = false,
604+
): array|\Generator
603605
{
604-
$flags = is_int($captureOffset) // back compatibility
605-
? $captureOffset
606-
: ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0) | ($patternOrder ? PREG_PATTERN_ORDER : 0);
607-
608606
if ($utf8) {
609607
$offset = strlen(self::substring($subject, 0, $offset));
610608
$pattern .= 'u';
611609
}
612610

611+
if ($lazy) {
612+
$flags = PREG_OFFSET_CAPTURE | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0);
613+
return (function () use ($utf8, $captureOffset, $flags, $subject, $pattern, $offset) {
614+
$counter = 0;
615+
while (
616+
$offset <= strlen($subject) - ($counter ? 1 : 0)
617+
&& self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset])
618+
) {
619+
$offset = $m[0][1] + max(1, strlen($m[0][0]));
620+
if (!$captureOffset) {
621+
$m = array_map(fn($item) => $item[0], $m);
622+
} elseif ($utf8) {
623+
$m = self::bytesToChars($subject, [$m])[0];
624+
}
625+
yield $counter++ => $m;
626+
}
627+
})();
628+
}
629+
613630
if ($offset > strlen($subject)) {
614631
return [];
615632
}
616633

634+
$flags = is_int($captureOffset) // back compatibility
635+
? $captureOffset
636+
: ($captureOffset ? PREG_OFFSET_CAPTURE : 0) | ($unmatchedAsNull ? PREG_UNMATCHED_AS_NULL : 0) | ($patternOrder ? PREG_PATTERN_ORDER : 0);
637+
617638
self::pcre('preg_match_all', [
618639
$pattern, $subject, &$m,
619640
($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
@@ -622,7 +643,6 @@ public static function matchAll(
622643
return $utf8 && $captureOffset
623644
? self::bytesToChars($subject, $m)
624645
: $m;
625-
626646
}
627647

628648

‎tests/Utils/Strings.match().phpt

+3-2
Original file line numberDiff line numberDiff line change
@@ -41,5 +41,6 @@ Assert::same([['k', 7]], Strings::match('žluťoučký kůň', '#[e-l]+#u', capt
4141

4242

4343
// right edge
44-
Assert::null(Strings::match('hello world!', '', offset: 50));
45-
Assert::null(Strings::match('', '', offset: 1));
44+
Assert::same([''], Strings::match('he', '#(?<=e)#', offset: 2));
45+
Assert::same(null, Strings::match('he', '#(?<=x)#', offset: 2));
46+
Assert::same(null, Strings::match('he', '##', offset: 3));

‎tests/Utils/Strings.matchAll().phpt

+9-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ require __DIR__ . '/../bootstrap.php';
1616
Assert::same([], Strings::matchAll('hello world!', '#([E-L])+#'));
1717

1818

19+
// sentinel
20+
Assert::same([
21+
[''], [''], [''],
22+
], Strings::matchAll('he', '##'));
23+
24+
1925
// capturing
2026
Assert::same([
2127
['hell', 'l'],
@@ -81,4 +87,6 @@ Assert::same([['e', null]], Strings::matchAll('hello world!', '#e(x)*#', unmatch
8187

8288

8389
// right edge
84-
Assert::same([], Strings::matchAll('hello world!', '', offset: 50));
90+
Assert::same([['']], Strings::matchAll('he', '#(?<=e)#', offset: 2));
91+
Assert::same([], Strings::matchAll('he', '#(?<=x)#', offset: 2));
92+
Assert::same([], Strings::matchAll('he', '##', offset: 3));
+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
<?php
2+
3+
/**
4+
* Test: Nette\Utils\Strings::matchAll()
5+
*/
6+
7+
declare(strict_types=1);
8+
9+
use Nette\Utils\Strings;
10+
use Tester\Assert;
11+
12+
require __DIR__ . '/../bootstrap.php';
13+
14+
15+
// not matched
16+
Assert::type(Generator::class, Strings::matchAll('hello world!', '#([E-L])+#', lazy: true));
17+
Assert::same(0, iterator_count(Strings::matchAll('hello world!', '#([E-L])+#', lazy: true)));
18+
19+
20+
// sentinel
21+
Assert::same(
22+
[['h'], ['e']],
23+
iterator_to_array(Strings::matchAll('he', '#.#', lazy: true)),
24+
);
25+
26+
Assert::same(
27+
[[''], ['']],
28+
iterator_to_array(Strings::matchAll('he', '##', lazy: true)),
29+
);
30+
31+
32+
// right edge
33+
Assert::same(
34+
[['']],
35+
iterator_to_array(Strings::matchAll('he', '#(?<=e)#', offset: 2, lazy: true)),
36+
);
37+
38+
Assert::same(
39+
[],
40+
iterator_to_array(Strings::matchAll('he', '#(?<=x)#', offset: 2, lazy: true)),
41+
);
42+
43+
Assert::same(
44+
[],
45+
iterator_to_array(Strings::matchAll('he', '##', offset: 3, lazy: true)),
46+
);
47+
48+
49+
// capturing
50+
Assert::same([
51+
['hell', 'l'],
52+
['l', 'l'],
53+
], iterator_to_array(Strings::matchAll('hello world!', '#([e-l])+#', lazy: true)));
54+
55+
Assert::same([
56+
['hell'],
57+
['l'],
58+
], iterator_to_array(Strings::matchAll('hello world!', '#[e-l]+#', lazy: true)));
59+
60+
61+
// options
62+
Assert::same([
63+
[['lu', 2], ['l', 2], ['u', 3]],
64+
[['ou', 6], ['o', 6], ['u', 7]],
65+
[['k', 10], ['k', 10], ['', 11]],
66+
[['k', 14], ['k', 14], ['', 15]],
67+
], iterator_to_array(Strings::matchAll('žluťoučký kůň!', '#([a-z])([a-z]*)#u', captureOffset: true, lazy: true)));
68+
69+
Assert::same([
70+
[['lu', 1], ['l', 1], ['u', 2]],
71+
[['ou', 4], ['o', 4], ['u', 5]],
72+
[['k', 7], ['k', 7], ['', 8]],
73+
[['k', 10], ['k', 10], ['', 11]],
74+
], iterator_to_array(Strings::matchAll('žluťoučký kůň!', '#([a-z])([a-z]*)#u', captureOffset: true, utf8: true, lazy: true)));
75+
76+
Assert::same(
77+
[['l'], ['k'], ['k']],
78+
iterator_to_array(Strings::matchAll('žluťoučký kůň', '#[e-l]+#u', offset: 2, lazy: true)),
79+
);
80+
81+
Assert::same(
82+
[['k'], ['k']],
83+
iterator_to_array(Strings::matchAll('žluťoučký kůň', '#[e-l]+#u', offset: 2, utf8: true, lazy: true)),
84+
);
85+
86+
Assert::same(
87+
[['e', null]],
88+
iterator_to_array(Strings::matchAll('hello world!', '#e(x)*#', unmatchedAsNull: true, lazy: true)),
89+
);

0 commit comments

Comments
 (0)
Failed to load comments.