Skip to content

Commit

Permalink
Merge pull request #66 from veewee/reader-improvements
Browse files Browse the repository at this point in the history
Add reader MatchingNode results and a signal to stop reading
  • Loading branch information
veewee committed Jan 14, 2024
2 parents 143c565 + 54bb7ec commit 1770824
Show file tree
Hide file tree
Showing 10 changed files with 252 additions and 23 deletions.
21 changes: 16 additions & 5 deletions docs/reader.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,31 @@ As a result, the reader provides a generator of XML strings that match your matc
## Example

```php
use VeeWee\Xml\Dom\Document;
use VeeWee\Xml\Dom\Configurator;
use VeeWee\Xml\Reader\Reader;
use VeeWee\Xml\Reader\Signal;
use VeeWee\Xml\Reader\Matcher;

$reader = Reader::fromXmlFile('large-data.xml');
$provider = $reader->provide(
Matcher\all(
$matcher = Matcher\all(
Matcher\node_name('item'),
Matcher\node_attribute('locale', 'nl-BE')
)
),
// Optionally, you can provide a signal to stop reading at a given point:
$signal = new Signal()
);

foreach ($provider as $nlItem) {
$dom = Document::fromXmlString($nlItem);
// Do something with it
$xml = $nlItem->xml();
$dom = $nlItem->intoDocument(Configurator\canonicalize());
$decoded = $nlItem->decode(Configurator\canonicalize());
$matched = $nlItem->matches($matcher);
$sequence = $nlItem->nodeSequence();

// If you have loaded sufficient items, you can stop reading the XML file:
$signal->stop();
}
```

Expand Down Expand Up @@ -54,7 +64,8 @@ The reader will keep only small parts of the XML in memory by reading the XML st
When the reader detects the first `breakfast_menu` element, it will ask the provided matchers if you are interested in this tag.
A matcher is a function that returns `true` when interested or `false` when it is not interested in this element.
When the matcher returns `true`, the reader will read the complete outer XML of current tag and `yield` this matching XML to your logic.
This means that the memory-safety of YOUR reader is based on the part inside the XML you are interested in:
This XML is wrapped in a `MatchingNode` which also contains the `NodeSequence` and some handy shortcut functions to e.g. convert the XML into a DOM Document.
Do note that, the memory-safety of YOUR reader is based on the part inside the XML you are interested in:
If you only match on the root node, it will yield the complete XML and therefore won't be memory-safe.

After deciding if you are interested in the previous tag, it jumps over to the next tag: `breakfast_menu > food[position() = 1 AND @soldOUt=false AND @bestSeller = true]` and asks the matcher if you are interested in this.
Expand Down
65 changes: 65 additions & 0 deletions src/Xml/Reader/MatchingNode.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<?php
declare(strict_types=1);

namespace VeeWee\Xml\Reader;

use DOMDocument;
use VeeWee\Xml\Dom\Document;
use VeeWee\Xml\Encoding\Exception\EncodingException;
use VeeWee\Xml\Exception\RuntimeException;
use VeeWee\Xml\Reader\Node\NodeSequence;
use function VeeWee\Xml\Encoding\xml_decode;

final class MatchingNode
{
/**
* @param non-empty-string $xml
*/
public function __construct(
private readonly string $xml,
private readonly NodeSequence $nodeSequence
) {
}

/**
* @return non-empty-string
*/
public function xml(): string
{
return $this->xml;
}

public function nodeSequence(): NodeSequence
{
return $this->nodeSequence;
}

/**
* @param list<callable(DOMDocument): DOMDocument> $configurators
*
* @throws RuntimeException
*/
public function intoDocument(callable ... $configurators): Document
{
return Document::fromXmlString($this->xml, ...$configurators);
}

/**
* @param list<callable(DOMDocument): DOMDocument> $configurators
*
* @throws RuntimeException
* @throws EncodingException
*/
public function decode(callable ... $configurators): array
{
return xml_decode($this->xml, ...$configurators);
}

/**
* @param callable(NodeSequence): bool $matcher
*/
public function matches(callable $matcher): bool
{
return $matcher($this->nodeSequence);
}
}
20 changes: 14 additions & 6 deletions src/Xml/Reader/Reader.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,18 +60,25 @@ public static function fromXmlString(string $xml, callable ... $configurators):
/**
* @param callable(NodeSequence): bool $matcher
*
* @return Generator<string>
* @return Generator<MatchingNode>
*
* @throws RuntimeException
*/
public function provide(callable $matcher): Generator
public function provide(callable $matcher, ?Signal $signal = null): Generator
{
$signal ??= new Signal();
$reader = ($this->factory)();
$pointer = Pointer::create();

yield from stop_on_first_issue(
static fn (): bool => $reader->read(),
static function () use ($reader, $pointer, $matcher) : ?string {
static function () use ($reader, $signal): bool {
if($signal->stopRequested()) {
return !$reader->close();
}

return $reader->read();
},
static function () use ($reader, $pointer, $matcher) : ?MatchingNode {
if ($reader->nodeType === XMLReader::END_ELEMENT) {
$pointer->leaveElement();

Expand All @@ -93,13 +100,14 @@ static function () use ($reader): array {
);

$pointer->enterElement($element);
$result = $matcher($pointer->getNodeSequence()) ? $reader->readOuterXml() : null;
$outerXml = $matcher($pointer->getNodeSequence()) ? $reader->readOuterXml() : null;
$match = $outerXml ? new MatchingNode($outerXml, $pointer->getNodeSequence()) : null;

if ($isEmptyElement) {
$pointer->leaveElement();
}

return $result;
return $match;
}

return null;
Expand Down
19 changes: 19 additions & 0 deletions src/Xml/Reader/Signal.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?php
declare(strict_types=1);

namespace VeeWee\Xml\Reader;

final class Signal
{
private bool $stopRequested = false;

public function stop(): void
{
$this->stopRequested = true;
}

public function stopRequested(): bool
{
return $this->stopRequested;
}
}
8 changes: 5 additions & 3 deletions tests/Xml/Reader/Configurator/SubstituteEntitiesTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
namespace VeeWee\Tests\Xml\Reader\Configurator;

use PHPUnit\Framework\TestCase;
use VeeWee\Xml\Reader\MatchingNode;
use VeeWee\Xml\Reader\Reader;
use function Psl\Vec\map;
use function VeeWee\Xml\Reader\Configurator\substitute_entities;
use function VeeWee\Xml\Reader\Matcher\node_name;

Expand All @@ -21,11 +23,11 @@ public function test_it_can_substitute_entities(): void
[
'<user>my entity value</user>',
],
[...$iterator]
map($iterator, static fn (MatchingNode $match): string => $match->xml())
);
}


public function test_it_can_skip_substituting_entities(): void
{
$xml = $this->buildXml();
Expand All @@ -36,7 +38,7 @@ public function test_it_can_skip_substituting_entities(): void
[
'<user>&entity;</user>',
],
[...$iterator]
map($iterator, static fn (MatchingNode $match): string => $match->xml())
);
}

Expand Down
14 changes: 8 additions & 6 deletions tests/Xml/Reader/Configurator/XsdSchemaTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
use PHPUnit\Framework\TestCase;
use VeeWee\Tests\Xml\Helper\FillFileTrait;
use VeeWee\Xml\Exception\RuntimeException;
use VeeWee\Xml\Reader\MatchingNode;
use VeeWee\Xml\Reader\Reader;
use XMLReader;
use function Psl\Vec\map;
use function VeeWee\Xml\Reader\Configurator\xsd_schema;
use function VeeWee\Xml\Reader\Matcher\node_name;

final class XsdSchemaTest extends TestCase
{
use FillFileTrait;


public function test_it_can_iterate_if_the_schema_matches(): void
{
[$xsdFile, $xsdHandle] = $this->createXsdFile();
Expand All @@ -37,13 +39,13 @@ public function test_it_can_iterate_if_the_schema_matches(): void
'<user>Bos</user>',
'<user>Mos</user>'
],
[...$iterator]
map($iterator, static fn (MatchingNode $match): string => $match->xml())
);

fclose($xsdHandle);
}


public function test_it_triggers_an_error_on_invalid_schema(): void
{
[$xsdFile, $xsdHandle] = $this->createXsdFile();
Expand All @@ -65,7 +67,7 @@ public function test_it_triggers_an_error_on_invalid_schema(): void
fclose($xsdHandle);
}


public function test_it_triggers_an_error_if_schema_file_does_not_exist(): void
{
$xml = '<root />';
Expand All @@ -80,7 +82,7 @@ public function test_it_triggers_an_error_if_schema_file_does_not_exist(): void
fclose($xsdHandle);
}


public function test_it_can_not_set_a_schema_if_the_reader_started_reading(): void
{
[$xsdFile, $xsdHandle] = $this->createXsdFile();
Expand All @@ -93,7 +95,7 @@ public function test_it_can_not_set_a_schema_if_the_reader_started_reading(): vo
fclose($xsdHandle);
}


public function test_it_can_not_set_a_schema_if_the_schema_is_invalid(): void
{
[$xsdFile, $xsdHandle] = $this->fillFile('invalid schema');
Expand Down
4 changes: 3 additions & 1 deletion tests/Xml/Reader/Matcher/AbstractMatcherTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
use Closure;
use Generator;
use PHPUnit\Framework\TestCase;
use VeeWee\Xml\Reader\MatchingNode;
use VeeWee\Xml\Reader\Node\NodeSequence;
use VeeWee\Xml\Reader\Reader;
use function Psl\Vec\map;

abstract class AbstractMatcherTest extends TestCase
{
Expand All @@ -23,7 +25,7 @@ abstract public static function provideMatcherCases(): Generator;
public function test_real_xml_cases(Closure $matcher, string $xml, array $expected)
{
$reader = Reader::fromXmlString($xml);
$actual = [...$reader->provide($matcher)];
$actual = map($reader->provide($matcher), static fn (MatchingNode $match): string => $match->xml());

static::assertSame($actual, $expected);
}
Expand Down
73 changes: 73 additions & 0 deletions tests/Xml/Reader/MatchingNodeTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<?php
declare(strict_types=1);

namespace VeeWee\Tests\Xml\Reader;

use PHPUnit\Framework\TestCase;
use VeeWee\Xml\Reader\MatchingNode;
use VeeWee\Xml\Reader\Node\ElementNode;
use VeeWee\Xml\Reader\Node\NodeSequence;
use function Psl\Fun\identity;
use function VeeWee\Xml\Dom\Locator\document_element;
use function VeeWee\Xml\Dom\Mapper\xml_string;
use function VeeWee\Xml\Reader\Matcher\element_name;

final class MatchingNodeTest extends TestCase
{

public function test_it_is_a_matching_node(): void
{
$match = new MatchingNode(
$xml = '<hello/>',
$sequence = new NodeSequence(
new ElementNode(1, 'hello', 'hello', '', '', [])
)
);

static::assertSame($xml, $match->xml());
static::assertSame($sequence, $match->nodeSequence());
}


public function test_it_can_match(): void
{
$match = new MatchingNode(
'<hello/>',
new NodeSequence(
new ElementNode(1, 'hello', 'hello', '', '', [])
)
);

static::assertTrue($match->matches(element_name('hello')));
static::assertFalse($match->matches(element_name('world')));
}


public function test_it_can_transform_into_a_dom_document(): void
{
$match = new MatchingNode(
$xml = '<hello/>',
new NodeSequence(
new ElementNode(1, 'hello', 'hello', '', '', [])
)
);

$document = $match->intoDocument(identity());

static::assertSame($xml, xml_string()($document->map(document_element())));
}

public function test_it_can_decode_the_xml(): void
{
$match = new MatchingNode(
$xml = '<hello/>',
new NodeSequence(
new ElementNode(1, 'hello', 'hello', '', '', [])
)
);

$decoded = $match->decode(identity());

static::assertSame(['hello' => ''], $decoded);
}
}
Loading

0 comments on commit 1770824

Please sign in to comment.