From 1d0f5bd5ccd5bdd4e2fcc123c500cc1136a73ebd Mon Sep 17 00:00:00 2001 From: Marius Hoch Date: Sat, 14 Apr 2018 15:30:33 +0200 Subject: [PATCH] Introduce EntityRetrievingClosestReferencedEntityIdLookup Part of https://phabricator.wikimedia.org/T179155 --- ...rievingClosestReferencedEntityIdLookup.php | 249 +++++++++ .../MaxReferenceDepthExhaustedException.php | 51 ++ ...ferencedEntityVisitsExhaustedException.php | 52 ++ ...ingClosestReferencedEntityIdLookupTest.php | 477 ++++++++++++++++++ ...axReferenceDepthExhaustedExceptionTest.php | 61 +++ ...ncedEntityVisitsExhaustedExceptionTest.php | 61 +++ 6 files changed, 951 insertions(+) create mode 100644 src/Lookup/EntityRetrievingClosestReferencedEntityIdLookup.php create mode 100644 src/Lookup/MaxReferenceDepthExhaustedException.php create mode 100644 src/Lookup/MaxReferencedEntityVisitsExhaustedException.php create mode 100644 tests/unit/Lookup/EntityRetrievingClosestReferencedEntityIdLookupTest.php create mode 100644 tests/unit/Lookup/MaxReferenceDepthExhaustedExceptionTest.php create mode 100644 tests/unit/Lookup/MaxReferencedEntityVisitsExhaustedExceptionTest.php diff --git a/src/Lookup/EntityRetrievingClosestReferencedEntityIdLookup.php b/src/Lookup/EntityRetrievingClosestReferencedEntityIdLookup.php new file mode 100644 index 00000000..64a92f4c --- /dev/null +++ b/src/Lookup/EntityRetrievingClosestReferencedEntityIdLookup.php @@ -0,0 +1,249 @@ + true) of already visited entities. + * + * @var bool[] + */ + private $alreadyVisited = []; + + /** + * @param EntityLookup $entityLookup + * @param EntityPrefetcher $entityPrefetcher + * @param int $maxDepth Maximum search depth: Maximum number of intermediate entities to search through. + * For example if 0 is given, only the entities immediately referenced will be found. + * If this limit gets exhausted, a MaxReferenceDepthExhaustedException is thrown. + * @param int $maxEntityVisits Maximum number of entities to retrieve during a lookup. + * If this limit gets exhausted, a MaxReferencedEntityVisitsExhaustedException is thrown. + */ + public function __construct( + EntityLookup $entityLookup, + EntityPrefetcher $entityPrefetcher, + $maxDepth, + $maxEntityVisits + ) { + $this->entityLookup = $entityLookup; + $this->entityPrefetcher = $entityPrefetcher; + $this->maxDepth = $maxDepth; + $this->maxEntityVisits = $maxEntityVisits; + } + + /** + * Get the closest entity (out of $toIds), from a given entity. The starting entity, and + * the target entities are (potentially indirectly, via intermediate entities) linked by + * statements with the given property ID, pointing from the starting entity to one of the + * target entities. + * + * @since 3.10 + * + * @param EntityId $fromId + * @param PropertyId $propertyId + * @param EntityId[] $toIds + * + * @return EntityId|null Returns null in case none of the target entities are referenced. + * @throws ReferencedEntityIdLookupException + */ + public function getReferencedEntityId( EntityId $fromId, PropertyId $propertyId, array $toIds ) { + if ( !$toIds ) { + return null; + } + + $this->alreadyVisited = []; + + $steps = $this->maxDepth + 1; // Add one as checking $fromId already is a step + $toVisit = [ $fromId ]; + + while ( $steps-- ) { + $this->entityPrefetcher->prefetch( $toVisit ); + $toVisitNext = []; + + foreach ( $toVisit as $curId ) { + $result = $this->processEntityById( $curId, $fromId, $propertyId, $toIds, $toVisitNext ); + if ( $result ) { + return $result; + } + } + // Remove already visited entities + $toVisit = array_unique( + array_diff( $toVisitNext, array_keys( $this->alreadyVisited ) ) + ); + + if ( !$toVisit ) { + return null; + } + } + + // Exhausted the max. depth without finding anything. + throw new MaxReferenceDepthExhaustedException( + $fromId, + $propertyId, + $toIds, + $this->maxDepth + ); + } + + /** + * Find out whether an entity (directly) references one of the target ids. + * + * @param EntityId $id Id of the entity to process + * @param EntityId $fromId Id this lookup started from + * @param PropertyId $propertyId + * @param EntityId[] $toIds + * @param EntityId[] &$toVisit List of entities that still need to be checked + * @return EntityId|null Target id the entity refers to, null if none. + */ + private function processEntityById( + EntityId $id, + EntityId $fromId, + PropertyId $propertyId, + array $toIds, + array &$toVisit + ) { + $entity = $this->getEntity( $id, $fromId, $propertyId, $toIds ); + if ( !$entity ) { + return null; + } + + $mainSnaks = $this->getMainSnaks( $entity, $propertyId ); + + foreach ( $mainSnaks as $mainSnak ) { + $result = $this->processSnak( $mainSnak, $toVisit, $toIds ); + if ( $result ) { + return $result; + } + } + + return null; + } + + /** + * @param EntityId $id Id of the entity to get + * @param EntityId $fromId Id this lookup started from + * @param PropertyId $propertyId + * @param EntityId[] $toIds + * + * @return StatementListProvider|null Null if not applicable. + */ + private function getEntity( EntityId $id, EntityId $fromId, PropertyId $propertyId, array $toIds ) { + if ( isset( $this->alreadyVisited[$id->getSerialization()] ) ) { + trigger_error( + 'Entity ' . $id->getSerialization() . ' already visited.', + E_USER_WARNING + ); + + return null; + } + + $this->alreadyVisited[$id->getSerialization()] = true; + + if ( count( $this->alreadyVisited ) > $this->maxEntityVisits ) { + throw new MaxReferencedEntityVisitsExhaustedException( + $fromId, + $propertyId, + $toIds, + $this->maxEntityVisits + ); + } + + try { + $entity = $this->entityLookup->getEntity( $id ); + } catch ( EntityLookupException $ex ) { + throw new ReferencedEntityIdLookupException( $fromId, $propertyId, $toIds, null, $ex ); + } + + if ( !( $entity instanceof StatementListProvider ) ) { + return null; + } + + return $entity; + } + + /** + * Decide whether a single Snak is pointing to one of the target ids. + * + * @param Snak $snak + * @param EntityId[] &$toVisit List of entities that still need to be checked + * @param EntityId[] $toIds + * @return EntityId|null Target id the Snak refers to, null if none. + */ + private function processSnak( Snak $snak, array &$toVisit, array $toIds ) { + if ( ! ( $snak instanceof PropertyValueSnak ) ) { + return null; + } + $dataValue = $snak->getDataValue(); + if ( ! ( $dataValue instanceof EntityIdValue ) ) { + return null; + } + + $entityId = $dataValue->getEntityId(); + if ( in_array( $entityId, $toIds, false ) ) { + return $entityId; + } + + $toVisit[] = $entityId; + + return null; + } + + /** + * @param StatementListProvider $statementListProvider + * @param PropertyId $propertyId + * @return Snak[] + */ + private function getMainSnaks( + StatementListProvider $statementListProvider, + PropertyId $propertyId + ) { + return $statementListProvider + ->getStatements() + ->getByPropertyId( $propertyId ) + ->getBestStatements() + ->getMainSnaks(); + } + +} diff --git a/src/Lookup/MaxReferenceDepthExhaustedException.php b/src/Lookup/MaxReferenceDepthExhaustedException.php new file mode 100644 index 00000000..af4c7df9 --- /dev/null +++ b/src/Lookup/MaxReferenceDepthExhaustedException.php @@ -0,0 +1,51 @@ +maxDepth = $maxDepth; + $message = $message ?: 'Referenced entity id lookup failed: Maximum depth of ' . $maxDepth . ' exhausted.'; + + parent::__construct( $fromId, $propertyId, $toIds, $message, $previous ); + } + + /** + * @return int + */ + public function getMaxDepth() { + return $this->maxDepth; + } + +} diff --git a/src/Lookup/MaxReferencedEntityVisitsExhaustedException.php b/src/Lookup/MaxReferencedEntityVisitsExhaustedException.php new file mode 100644 index 00000000..f7e146b3 --- /dev/null +++ b/src/Lookup/MaxReferencedEntityVisitsExhaustedException.php @@ -0,0 +1,52 @@ +maxEntityVisits = $maxEntityVisits; + $message = $message ?: 'Referenced entity id lookup failed: Maximum number of entity visits (' . + $maxEntityVisits . ') exhausted.'; + + parent::__construct( $fromId, $propertyId, $toIds, $message, $previous ); + } + + /** + * @return int + */ + public function getMaxEntityVisits() { + return $this->maxEntityVisits; + } + +} diff --git a/tests/unit/Lookup/EntityRetrievingClosestReferencedEntityIdLookupTest.php b/tests/unit/Lookup/EntityRetrievingClosestReferencedEntityIdLookupTest.php new file mode 100644 index 00000000..c5015a54 --- /dev/null +++ b/tests/unit/Lookup/EntityRetrievingClosestReferencedEntityIdLookupTest.php @@ -0,0 +1,477 @@ +getMock( EntityLookup::class ); + + $entityLookupMock->expects( + $expectedNumberOfGetEntityCalls === null ? $this->any() : $this->exactly( $expectedNumberOfGetEntityCalls ) + ) + ->method( 'getEntity' ) + ->willReturnCallback( function ( EntityId $entityId ) use ( $entityLookup ) { + return $entityLookup->getEntity( $entityId ); + } ); + + return $entityLookupMock; + } + + /** + * @param int $expectedPrefetches + * @return EntityPrefetcher + */ + private function newEntityPrefetcher( $expectedPrefetches ) { + $entityPrefetcher = $this->getMock( EntityPrefetcher::class ); + $entityPrefetcher->expects( $this->exactly( $expectedPrefetches ) ) + ->method( 'prefetch' ) + ->with( $this->isType( 'array' ) ); + + return $entityPrefetcher; + } + + /** + * @param PropertyId $via + * @param EntityId[] $to + * + * @return StatementList + */ + private function newReferencingStatementList( PropertyId $via, array $to ) { + $statementList = new StatementList(); + + foreach ( $to as $toId ) { + $value = new EntityIdValue( $toId ); + $mainSnak = new PropertyValueSnak( $via, $value ); + $statementList->addStatement( new Statement( $mainSnak ) ); + } + + return $statementList; + } + + /** + * @return EntityLookup + */ + private function newReferencingEntityStructure() { + // This returns the following entity structure (all entities linked by P599) + // Q1 -> Q5 -> Q599 -> Q1234 + // \ \ + // \ -- Q12 -> Q404 + // --- Q90 -> Q3 + // Note: Q404 doesn't exist + + $pSubclassOf = new PropertyId( 'P599' ); + $q1 = new ItemId( 'Q1' ); + $q5 = new ItemId( 'Q5' ); + $q599 = new ItemId( 'Q599' ); + $q12 = new ItemId( 'Q12' ); + $q404 = new ItemId( 'Q404' ); + $q1234 = new ItemId( 'Q1234' ); + $q90 = new ItemId( 'Q90' ); + $q3 = new ItemId( 'Q3' ); + + $lookup = new InMemoryEntityLookup(); + + $lookup->addEntity( + new Item( $q1, null, null, $this->newReferencingStatementList( $pSubclassOf, [ $q5, $q90 ] ) ) + ); + $lookup->addEntity( + new Item( $q5, null, null, $this->newReferencingStatementList( $pSubclassOf, [ $q599 ] ) ) + ); + $lookup->addEntity( + new Item( $q599, null, null, $this->newReferencingStatementList( $pSubclassOf, [ $q12, $q1234 ] ) ) + ); + $lookup->addEntity( + new Item( $q12, null, null, $this->newReferencingStatementList( $pSubclassOf, [ $q404 ] ) ) + ); + $lookup->addEntity( + new Item( $q90, null, null, $this->newReferencingStatementList( $pSubclassOf, [ $q3 ] ) ) + ); + $lookup->addEntity( new Item( $q1234, null, null, null ) ); + $lookup->addEntity( new Item( $q3, null, null, null ) ); + + return $lookup; + } + + /** + * @return EntityLookup + */ + private function newCircularReferencingEntityStructure() { + // This returns the following entity structure (all entities linked by P599) + // Q1 -> Q5 -> Q1 -> Q5 -> … + // \ \ + // --- Q90 --- Q90 + + $pSubclassOf = new PropertyId( 'P599' ); + $q1 = new ItemId( 'Q1' ); + $q5 = new ItemId( 'Q5' ); + $q90 = new ItemId( 'Q90' ); + + $lookup = new InMemoryEntityLookup(); + + $lookup->addEntity( + new Item( $q1, null, null, $this->newReferencingStatementList( $pSubclassOf, [ $q5, $q90 ] ) ) + ); + $lookup->addEntity( + new Item( $q5, null, null, $this->newReferencingStatementList( $pSubclassOf, [ $q1 ] ) ) + ); + $lookup->addEntity( + new Item( $q90, null, null, null ) + ); + + return $lookup; + } + + /** + * @SuppressWarnings(PHPMD.ExcessiveMethodLength) + */ + public function provideGetReferencedEntityIdNoError() { + $pSubclassOf = new PropertyId( 'P599' ); + $q1 = new ItemId( 'Q1' ); + $q3 = new ItemId( 'Q3' ); + $q5 = new ItemId( 'Q5' ); + $q12 = new ItemId( 'Q12' ); + $q403 = new ItemId( 'Q403' ); + $q404 = new ItemId( 'Q404' ); + $referencingEntityStructureLookup = $this->newReferencingEntityStructure(); + $circularReferencingEntityStructure = $this->newCircularReferencingEntityStructure(); + + return [ + 'empty list of target ids' => [ + null, + 0, + 0, + $referencingEntityStructureLookup, + $q1, + $pSubclassOf, + [] + ], + 'no such statement' => [ + null, + 1, + 0, + $referencingEntityStructureLookup, + $q1, + new PropertyId( 'P12345' ), + [ $q5 ] + ], + 'from id does not exist' => [ + null, + 1, + 0, + $referencingEntityStructureLookup, + $q404, + $pSubclassOf, + [ $q5 ] + ], + 'directly referenced entity #1' => [ + $q5, + 1, + 0, + $referencingEntityStructureLookup, + $q1, + $pSubclassOf, + [ $q5 ] + ], + 'directly referenced entity #2' => [ + $q1, + 1, + 0, + $circularReferencingEntityStructure, + $q5, + $pSubclassOf, + [ $q12, $q403, $q1, $q404 ] + ], + 'directly referenced entity, two target ids' => [ + $q5, + 1, + 0, + $referencingEntityStructureLookup, + $q1, + $pSubclassOf, + [ $q5, $q404 ] + ], + 'indirectly referenced entity #1' => [ + $q3, + 3, + 1, + $referencingEntityStructureLookup, + $q1, + $pSubclassOf, + [ $q3 ] + ], + 'indirectly referenced entity #2' => [ + $q12, + 4, + 2, + $referencingEntityStructureLookup, + $q1, + $pSubclassOf, + [ $q12 ] + ], + 'indirectly referenced entity, multiple target ids' => [ + $q12, + 4, + 2, + $referencingEntityStructureLookup, + $q1, + $pSubclassOf, + [ $q12, $q403, $q404 ] + ], + 'indirectly referenced entity, multiple target ids' => [ + $q12, + 4, + 2, + $referencingEntityStructureLookup, + $q1, + $pSubclassOf, + [ $q12, $q403, $q404 ] + ], + 'circular reference detection' => [ + null, + 3, + 1, + $circularReferencingEntityStructure, + $q1, + $pSubclassOf, + [ $q403, $q404 ] + ], + ]; + } + + /** + * @dataProvider provideGetReferencedEntityIdNoError + */ + public function testGetReferencedEntityIdNoError( + EntityId $expectedToId = null, + $maxEntityVisits, + $maxDepth, + EntityLookup $entityLookup, + EntityId $fromId, + PropertyId $propertyId, + array $toIds + ) { + // Number of prefetching operations to expect (Note: We call getReferencedEntityId twice) + $expectedNumberOfPrefetches = $maxEntityVisits ? ( $maxDepth + 1 ) * 2 : 0; + + $lookup = new EntityRetrievingClosestReferencedEntityIdLookup( + $this->restrictEntityLookup( $entityLookup, $maxEntityVisits * 2 ), + $this->newEntityPrefetcher( $expectedNumberOfPrefetches ), + $maxDepth, + $maxEntityVisits + ); + $result = $lookup->getReferencedEntityId( $fromId, $propertyId, $toIds ); + + $this->assertEquals( $expectedToId, $result ); + + // Run again to see if the maxDepth/visitedEntityRelated state is properly resetted + $this->assertEquals( + $expectedToId, + $lookup->getReferencedEntityId( $fromId, $propertyId, $toIds ) + ); + } + + public function provideGetReferencedEntityIdMaxDepthExceeded() { + $cases = $this->provideGetReferencedEntityIdNoError(); + + foreach ( $cases as $caseName => $case ) { + if ( end( $case ) === [] ) { + // In case we search for nothing, the max depth can't ever be exceeded + continue; + } + + // Remove expected to id + array_shift( $case ); + // Reduce max depth by 1 + $case[1]--; + + yield $caseName => $case; + } + } + + /** + * @dataProvider provideGetReferencedEntityIdMaxDepthExceeded + */ + public function testGetReferencedEntityIdMaxDepthExceeded( + $maxEntityVisits, + $maxDepth, + EntityLookup $entityLookup, + EntityId $fromId, + PropertyId $propertyId, + array $toIds + ) { + $lookup = new EntityRetrievingClosestReferencedEntityIdLookup( + $this->restrictEntityLookup( $entityLookup ), + new NullEntityPrefetcher(), + $maxDepth, + $maxEntityVisits + ); + + try { + $lookup->getReferencedEntityId( $fromId, $propertyId, $toIds ); + } catch ( MaxReferenceDepthExhaustedException $exception ) { + $this->assertSame( $maxDepth, $exception->getMaxDepth() ); + + return; + } + $this->fail( 'No expection thrown!' ); + } + + public function provideGetReferencedEntityIdMaxEntityVisitsExceeded() { + $cases = $this->provideGetReferencedEntityIdNoError(); + + foreach ( $cases as $caseName => $case ) { + if ( end( $case ) === [] ) { + // In case we search for nothing, no entity will ever be loaded + continue; + } + + // Remove expected to id + array_shift( $case ); + // Reduce max entity visits by 1 + $case[0]--; + + yield $caseName => $case; + } + } + + /** + * @dataProvider provideGetReferencedEntityIdMaxEntityVisitsExceeded + */ + public function testGetReferencedEntityIdMaxEntityVisitsExceeded( + $maxEntityVisits, + $maxDepth, + EntityLookup $entityLookup, + EntityId $fromId, + PropertyId $propertyId, + array $toIds + ) { + $lookup = new EntityRetrievingClosestReferencedEntityIdLookup( + $this->restrictEntityLookup( $entityLookup, $maxEntityVisits ), + new NullEntityPrefetcher(), + $maxDepth, + $maxEntityVisits + ); + + try { + $lookup->getReferencedEntityId( $fromId, $propertyId, $toIds ); + } catch ( MaxReferencedEntityVisitsExhaustedException $exception ) { + $this->assertSame( $maxEntityVisits, $exception->getMaxEntityVisits() ); + + return; + } + $this->fail( 'No expection thrown!' ); + } + + public function provideGetReferencedEntityIdTestInvalidSnak() { + $q42 = new ItemId( 'Q42' ); + $p1 = new PropertyId( 'P1' ); + $p2 = new PropertyId( 'P2' ); + $statementList = new StatementList(); + + $statementList->addStatement( + new Statement( new PropertyNoValueSnak( $p1 ) ) + ); + + $statementList->addStatement( + new Statement( new PropertyValueSnak( $p2, new StringValue( '12' ) ) ) + ); + + $entityLookup = new InMemoryEntityLookup(); + $entityLookup->addEntity( new Item( $q42, null, null, $statementList ) ); + + return [ + 'no value snak' => [ + $entityLookup, + $q42, + $p1, + [ $q42 ] + ], + 'wrong datatype' => [ + $entityLookup, + $q42, + $p2, + [ $q42 ] + ], + ]; + } + + /** + * @dataProvider provideGetReferencedEntityIdTestInvalidSnak + */ + public function testGetReferencedEntityIdTestInvalidSnak( + EntityLookup $entityLookup, + EntityId $fromId, + PropertyId $propertyId, + array $toIds + ) { + $lookup = new EntityRetrievingClosestReferencedEntityIdLookup( + $this->restrictEntityLookup( $entityLookup, 1 ), + new NullEntityPrefetcher(), + 0, + 1 + ); + + $this->assertNull( + $lookup->getReferencedEntityId( $fromId, $propertyId, $toIds ) + ); + } + + public function testGetReferencedEntityIdEntityLookupException() { + $q2013 = new ItemId( 'Q2013' ); + + $entityLookupException = new EntityLookupException( $q2013 ); + $entityLookup = new InMemoryEntityLookup(); + $entityLookup->addException( $entityLookupException ); + + $lookup = new EntityRetrievingClosestReferencedEntityIdLookup( + $entityLookup, + new NullEntityPrefetcher(), + 50, + 50 + ); + + try { + $lookup->getReferencedEntityId( $q2013, new PropertyId( 'P31' ), [ new ItemId( 'Q154187' ) ] ); + } catch ( ReferencedEntityIdLookupException $exception ) { + $this->assertInstanceOf( EntityLookupException::class, $exception->getPrevious() ); + + return; + } + $this->fail( 'No expection thrown!' ); + } + +} diff --git a/tests/unit/Lookup/MaxReferenceDepthExhaustedExceptionTest.php b/tests/unit/Lookup/MaxReferenceDepthExhaustedExceptionTest.php new file mode 100644 index 00000000..cf5eb273 --- /dev/null +++ b/tests/unit/Lookup/MaxReferenceDepthExhaustedExceptionTest.php @@ -0,0 +1,61 @@ +assertSame( 44, $exception->getMaxDepth() ); + $this->assertSame( + 'Referenced entity id lookup failed: Maximum depth of 44 exhausted.', + $exception->getMessage() + ); + $this->assertSame( 0, $exception->getCode() ); + $this->assertNull( $exception->getPrevious() ); + } + + public function testConstructorWithAllArguments() { + $entityId = new ItemId( 'Q1' ); + $propertyId = new PropertyId( 'P12' ); + $toIds = [ + new ItemId( 'Q5' ), + new ItemId( 'Q2013' ) + ]; + $previous = new Exception( 'previous' ); + + $exception = new MaxReferenceDepthExhaustedException( + $entityId, + $propertyId, + $toIds, + 123, + 'blah blah', + $previous + ); + + $this->assertSame( 123, $exception->getMaxDepth() ); + $this->assertSame( 'blah blah', $exception->getMessage() ); + $this->assertSame( 0, $exception->getCode() ); + $this->assertSame( $previous, $exception->getPrevious() ); + } + +} diff --git a/tests/unit/Lookup/MaxReferencedEntityVisitsExhaustedExceptionTest.php b/tests/unit/Lookup/MaxReferencedEntityVisitsExhaustedExceptionTest.php new file mode 100644 index 00000000..a0cf059f --- /dev/null +++ b/tests/unit/Lookup/MaxReferencedEntityVisitsExhaustedExceptionTest.php @@ -0,0 +1,61 @@ +assertSame( 44, $exception->getMaxEntityVisits() ); + $this->assertSame( + 'Referenced entity id lookup failed: Maximum number of entity visits (44) exhausted.', + $exception->getMessage() + ); + $this->assertSame( 0, $exception->getCode() ); + $this->assertNull( $exception->getPrevious() ); + } + + public function testConstructorWithAllArguments() { + $entityId = new ItemId( 'Q1' ); + $propertyId = new PropertyId( 'P12' ); + $toIds = [ + new ItemId( 'Q5' ), + new ItemId( 'Q2013' ) + ]; + $previous = new Exception( 'previous' ); + + $exception = new MaxReferencedEntityVisitsExhaustedException( + $entityId, + $propertyId, + $toIds, + 123, + 'blah blah', + $previous + ); + + $this->assertSame( 123, $exception->getMaxEntityVisits() ); + $this->assertSame( 'blah blah', $exception->getMessage() ); + $this->assertSame( 0, $exception->getCode() ); + $this->assertSame( $previous, $exception->getPrevious() ); + } + +}