Skip to content

Commit

Permalink
Merge pull request #195 from wmde/EntityRetrievingClosestReferencedEn…
Browse files Browse the repository at this point in the history
…tityIdLookup

Introduce EntityRetrievingClosestReferencedEntityIdLookup
  • Loading branch information
JonasKress committed Apr 24, 2018
2 parents 379b273 + 1d0f5bd commit 5b49bb4
Show file tree
Hide file tree
Showing 6 changed files with 951 additions and 0 deletions.
249 changes: 249 additions & 0 deletions src/Lookup/EntityRetrievingClosestReferencedEntityIdLookup.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,249 @@
<?php

namespace Wikibase\DataModel\Services\Lookup;

use Wikibase\DataModel\Entity\EntityId;
use Wikibase\DataModel\Entity\EntityIdValue;
use Wikibase\DataModel\Entity\PropertyId;
use Wikibase\DataModel\Services\Entity\EntityPrefetcher;
use Wikibase\DataModel\Snak\PropertyValueSnak;
use Wikibase\DataModel\Snak\Snak;
use Wikibase\DataModel\Statement\StatementListProvider;

/**
* Service for getting the closest entity (out of a specified set),
* from a given starting entity. The starting entity, and the target entities
* are (potentially indirectly, via intermediate entities) linked by statements
* with a given property ID, pointing from the starting entity to one of the
* target entities.
*
* @since 3.10
*
* @license GPL-2.0-or-later
* @author Marius Hoch
*/
class EntityRetrievingClosestReferencedEntityIdLookup implements ReferencedEntityIdLookup {

/**
* @var EntityLookup
*/
private $entityLookup;

/**
* @var EntityPrefetcher
*/
private $entityPrefetcher;

/**
* @var int Maximum search depth: Maximum number of intermediate entities to search through.
* For example 0 means that only the entities immediately referenced will be found.
*/
private $maxDepth;

/**
* @var int Maximum number of entities to retrieve.
*/
private $maxEntityVisits;

/**
* Map (entity id => true) of already visited entities.
*
* @var bool[]
*/
private $alreadyVisited = [];

/**
* @param EntityLookup $entityLookup
* @param EntityPrefetcher $entityPrefetcher
* @param int $maxDepth Maximum search depth: Maximum number of intermediate entities to search through.
* For example if 0 is given, only the entities immediately referenced will be found.
* If this limit gets exhausted, a MaxReferenceDepthExhaustedException is thrown.
* @param int $maxEntityVisits Maximum number of entities to retrieve during a lookup.
* If this limit gets exhausted, a MaxReferencedEntityVisitsExhaustedException is thrown.
*/
public function __construct(
EntityLookup $entityLookup,
EntityPrefetcher $entityPrefetcher,
$maxDepth,
$maxEntityVisits
) {
$this->entityLookup = $entityLookup;
$this->entityPrefetcher = $entityPrefetcher;
$this->maxDepth = $maxDepth;
$this->maxEntityVisits = $maxEntityVisits;
}

/**
* Get the closest entity (out of $toIds), from a given entity. The starting entity, and
* the target entities are (potentially indirectly, via intermediate entities) linked by
* statements with the given property ID, pointing from the starting entity to one of the
* target entities.
*
* @since 3.10
*
* @param EntityId $fromId
* @param PropertyId $propertyId
* @param EntityId[] $toIds
*
* @return EntityId|null Returns null in case none of the target entities are referenced.
* @throws ReferencedEntityIdLookupException
*/
public function getReferencedEntityId( EntityId $fromId, PropertyId $propertyId, array $toIds ) {
if ( !$toIds ) {
return null;
}

$this->alreadyVisited = [];

$steps = $this->maxDepth + 1; // Add one as checking $fromId already is a step
$toVisit = [ $fromId ];

while ( $steps-- ) {
$this->entityPrefetcher->prefetch( $toVisit );
$toVisitNext = [];

foreach ( $toVisit as $curId ) {
$result = $this->processEntityById( $curId, $fromId, $propertyId, $toIds, $toVisitNext );
if ( $result ) {
return $result;
}
}
// Remove already visited entities
$toVisit = array_unique(
array_diff( $toVisitNext, array_keys( $this->alreadyVisited ) )
);

if ( !$toVisit ) {
return null;
}
}

// Exhausted the max. depth without finding anything.
throw new MaxReferenceDepthExhaustedException(
$fromId,
$propertyId,
$toIds,
$this->maxDepth
);
}

/**
* Find out whether an entity (directly) references one of the target ids.
*
* @param EntityId $id Id of the entity to process
* @param EntityId $fromId Id this lookup started from
* @param PropertyId $propertyId
* @param EntityId[] $toIds
* @param EntityId[] &$toVisit List of entities that still need to be checked
* @return EntityId|null Target id the entity refers to, null if none.
*/
private function processEntityById(
EntityId $id,
EntityId $fromId,
PropertyId $propertyId,
array $toIds,
array &$toVisit
) {
$entity = $this->getEntity( $id, $fromId, $propertyId, $toIds );
if ( !$entity ) {
return null;
}

$mainSnaks = $this->getMainSnaks( $entity, $propertyId );

foreach ( $mainSnaks as $mainSnak ) {
$result = $this->processSnak( $mainSnak, $toVisit, $toIds );
if ( $result ) {
return $result;
}
}

return null;
}

/**
* @param EntityId $id Id of the entity to get
* @param EntityId $fromId Id this lookup started from
* @param PropertyId $propertyId
* @param EntityId[] $toIds
*
* @return StatementListProvider|null Null if not applicable.
*/
private function getEntity( EntityId $id, EntityId $fromId, PropertyId $propertyId, array $toIds ) {
if ( isset( $this->alreadyVisited[$id->getSerialization()] ) ) {
trigger_error(
'Entity ' . $id->getSerialization() . ' already visited.',
E_USER_WARNING
);

return null;
}

$this->alreadyVisited[$id->getSerialization()] = true;

if ( count( $this->alreadyVisited ) > $this->maxEntityVisits ) {
throw new MaxReferencedEntityVisitsExhaustedException(
$fromId,
$propertyId,
$toIds,
$this->maxEntityVisits
);
}

try {
$entity = $this->entityLookup->getEntity( $id );
} catch ( EntityLookupException $ex ) {
throw new ReferencedEntityIdLookupException( $fromId, $propertyId, $toIds, null, $ex );
}

if ( !( $entity instanceof StatementListProvider ) ) {
return null;
}

return $entity;
}

/**
* Decide whether a single Snak is pointing to one of the target ids.
*
* @param Snak $snak
* @param EntityId[] &$toVisit List of entities that still need to be checked
* @param EntityId[] $toIds
* @return EntityId|null Target id the Snak refers to, null if none.
*/
private function processSnak( Snak $snak, array &$toVisit, array $toIds ) {
if ( ! ( $snak instanceof PropertyValueSnak ) ) {
return null;
}
$dataValue = $snak->getDataValue();
if ( ! ( $dataValue instanceof EntityIdValue ) ) {
return null;
}

$entityId = $dataValue->getEntityId();
if ( in_array( $entityId, $toIds, false ) ) {
return $entityId;
}

$toVisit[] = $entityId;

return null;
}

/**
* @param StatementListProvider $statementListProvider
* @param PropertyId $propertyId
* @return Snak[]
*/
private function getMainSnaks(
StatementListProvider $statementListProvider,
PropertyId $propertyId
) {
return $statementListProvider
->getStatements()
->getByPropertyId( $propertyId )
->getBestStatements()
->getMainSnaks();
}

}
51 changes: 51 additions & 0 deletions src/Lookup/MaxReferenceDepthExhaustedException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<?php

namespace Wikibase\DataModel\Services\Lookup;

use Exception;
use Wikibase\DataModel\Entity\EntityId;
use Wikibase\DataModel\Entity\PropertyId;

/**
* @since 3.10
*
* @license GPL-2.0-or-later
* @author Marius Hoch
*/
class MaxReferenceDepthExhaustedException extends ReferencedEntityIdLookupException {

/**
* @var int
*/
private $maxDepth;

/**
* @param EntityId $fromId
* @param PropertyId $propertyId
* @param EntityId[] $toIds
* @param int $maxDepth
* @param string|null $message
* @param Exception|null $previous
*/
public function __construct(
EntityId $fromId,
PropertyId $propertyId,
array $toIds,
$maxDepth,
$message = null,
Exception $previous = null
) {
$this->maxDepth = $maxDepth;
$message = $message ?: 'Referenced entity id lookup failed: Maximum depth of ' . $maxDepth . ' exhausted.';

parent::__construct( $fromId, $propertyId, $toIds, $message, $previous );
}

/**
* @return int
*/
public function getMaxDepth() {
return $this->maxDepth;
}

}
52 changes: 52 additions & 0 deletions src/Lookup/MaxReferencedEntityVisitsExhaustedException.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php

namespace Wikibase\DataModel\Services\Lookup;

use Exception;
use Wikibase\DataModel\Entity\EntityId;
use Wikibase\DataModel\Entity\PropertyId;

/**
* @since 3.10
*
* @license GPL-2.0-or-later
* @author Marius Hoch
*/
class MaxReferencedEntityVisitsExhaustedException extends ReferencedEntityIdLookupException {

/**
* @var int
*/
private $maxEntityVisits;

/**
* @param EntityId $fromId
* @param PropertyId $propertyId
* @param EntityId[] $toIds
* @param int $maxEntityVisits
* @param string|null $message
* @param Exception|null $previous
*/
public function __construct(
EntityId $fromId,
PropertyId $propertyId,
array $toIds,
$maxEntityVisits,
$message = null,
Exception $previous = null
) {
$this->maxEntityVisits = $maxEntityVisits;
$message = $message ?: 'Referenced entity id lookup failed: Maximum number of entity visits (' .
$maxEntityVisits . ') exhausted.';

parent::__construct( $fromId, $propertyId, $toIds, $message, $previous );
}

/**
* @return int
*/
public function getMaxEntityVisits() {
return $this->maxEntityVisits;
}

}
Loading

0 comments on commit 5b49bb4

Please sign in to comment.