Skip to content

Commit

Permalink
Native ImageMap implementation
Browse files Browse the repository at this point in the history
This is just a first pass that leaves quite a few FIXMEs.

The "imagemap_desc_types" can be localized, which isn't supported here
yet.

The legacy extension does ad hoc link parsing, which results in link
content not interpreting wikitext syntax.  This results in a known
difference to how it's done here, which just takes the text content of
the resulting dom.

For now, this implementation punts on adding the magnifying / info
links to avoid the div soup.  For media thumbs in general, Parsoid uses
a css class to add them.  Perhaps a similar approach can be taken here.

The map gets added after the image but inside the figure, which is
different to how the legacy parser does it.  Do browsers mind?

The implementation leaves the images as inline, if they were written
that way, whereas the legacy parser may force block unnecessarily.

Some new tests are added to capture various edge and error cases, since
the test suite was woefully lacking.

Bug: T94793
Depends-On: I6ff81a01207e2734090c626b177e5f4d10bb6d61
Change-Id: Ia91a10d178fc3b3b9d22a1d49210bde82e981ee1
  • Loading branch information
arlolra authored and jenkins-bot committed Nov 20, 2020
1 parent 90935bd commit f554b15
Show file tree
Hide file tree
Showing 6 changed files with 479 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/Config/SiteConfig.php
Expand Up @@ -16,6 +16,7 @@
use Wikimedia\Parsoid\Ext\ExtensionModule;
use Wikimedia\Parsoid\Ext\ExtensionTagHandler;
use Wikimedia\Parsoid\Ext\Gallery\Gallery;
use Wikimedia\Parsoid\Ext\ImageMap\ImageMap;
use Wikimedia\Parsoid\Ext\JSON\JSON;
use Wikimedia\Parsoid\Ext\LST\LST;
use Wikimedia\Parsoid\Ext\Nowiki\Nowiki;
Expand Down Expand Up @@ -71,6 +72,7 @@ abstract class SiteConfig {
LST::class,
Poem::class,
Translate::class,
ImageMap::class,
];

/**
Expand Down
338 changes: 338 additions & 0 deletions src/Ext/ImageMap/ImageMap.php
@@ -0,0 +1,338 @@
<?php
declare( strict_types = 1 );

namespace Wikimedia\Parsoid\Ext\ImageMap;

use DOMDocumentFragment;
use Wikimedia\Parsoid\Ext\DOMDataUtils;
use Wikimedia\Parsoid\Ext\DOMUtils;
use Wikimedia\Parsoid\Ext\ExtensionError;
use Wikimedia\Parsoid\Ext\ExtensionModule;
use Wikimedia\Parsoid\Ext\ExtensionTagHandler;
use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
use Wikimedia\Parsoid\Utils\DOMCompat;

/**
* This is an adaptation of the existing ImageMap extension of the legacy
* parser.
*
* Syntax:
* <imagemap>
* Image:Foo.jpg | 100px | picture of a foo
*
* rect 0 0 50 50 [[Foo type A]]
* circle 50 50 20 [[Foo type B]]
*
* desc bottom-left
* </imagemap>
*
* Coordinates are relative to the source image, not the thumbnail.
*/

class ImageMap extends ExtensionTagHandler implements ExtensionModule {

private const TOP_RIGHT = 0;
private const BOTTOM_RIGHT = 1;
private const BOTTOM_LEFT = 2;
private const TOP_LEFT = 3;
private const NONE = 4;

/** @inheritDoc */
public function getConfig(): array {
return [
'name' => 'ImageMap',
'tags' => [
[
'name' => 'imagemap',
'handler' => self::class,
]
]
];
}

/** @inheritDoc */
public function sourceToDom(
ParsoidExtensionAPI $extApi, string $src, array $extArgs
): DOMDocumentFragment {
$domFragment = $extApi->getTopLevelDoc()->createDocumentFragment();

$thumb = null;
$anchor = null;
$imageNode = null;
$mapHTML = null;

// Define canonical desc types to allow i18n of 'imagemap_desc_types'
$descTypesCanonical = 'top-right, bottom-right, bottom-left, top-left, none';
$descType = self::BOTTOM_RIGHT;

$scale = 1;
$lineNum = 0;
$first = true;
$defaultLinkAttribs = null;

$nextOffset = $extApi->extTag->getOffsets()->innerStart();

$lines = explode( "\n", $src );

foreach ( $lines as $line ) {
++$lineNum;

$offset = $nextOffset;
$nextOffset = $offset + strlen( $line ) + 1; // For the nl
$offset += strlen( $line ) - strlen( ltrim( $line ) );

$line = trim( $line );

if ( $line == '' || $line[0] == '#' ) {
continue;
}

if ( $first ) {
$first = false;

// The first line should have an image specification on it
// Extract it and render the HTML
$bits = explode( '|', $line, 2 );
if ( count( $bits ) == 1 ) {
$image = $bits[0];
$options = '';
} else {
list( $image, $options ) = $bits;
$options = '|' . $options;
}

$imageOpts = [
[ $options, $offset + strlen( $image ) ],
];

$thumb = $extApi->renderMedia( $image, $imageOpts, $error );
if ( !$thumb ) {
throw new ExtensionError( $error );
}

$anchor = $thumb->firstChild;
$imageNode = $anchor->firstChild;

// Could be a span
if ( $imageNode->nodeName !== 'img' ) {
throw new ExtensionError( 'imagemap_invalid_image' );
}
DOMUtils::assertElt( $imageNode );

// Add the linear dimensions to avoid inaccuracy in the scale
// factor when one is much larger than the other
// (sx+sy)/(x+y) = s

$thumbWidth = (int)$imageNode->getAttribute( 'width' );
$thumbHeight = (int)$imageNode->getAttribute( 'height' );
$imageWidth = (int)$imageNode->getAttribute( 'data-file-width' );
$imageHeight = (int)$imageNode->getAttribute( 'data-file-height' );

$denominator = $imageWidth + $imageHeight;
$numerator = $thumbWidth + $thumbHeight;
if ( $denominator <= 0 || $numerator <= 0 ) {
throw new ExtensionError( 'imagemap_invalid_image' );
}
$scale = $numerator / $denominator;
continue;
}

// Handle desc spec
$cmd = strtok( $line, " \t" );
if ( $cmd == 'desc' ) {
$typesText = $descTypesCanonical;
// FIXME: Support this ...
// $typesText = wfMessage( 'imagemap_desc_types' )->inContentLanguage()->text();
// if ( $descTypesCanonical != $typesText ) {
// // i18n desc types exists
// $typesText = $descTypesCanonical . ', ' . $typesText;
// }
$types = array_map( 'trim', explode( ',', $typesText ) );
$type = trim( strtok( '' ) );
$descType = array_search( $type, $types, true );
if ( $descType > 4 ) {
// A localized descType is used. Subtract 5 to reach the canonical desc type.
$descType = $descType - 5;
}
// <0? In theory never, but paranoia...
if ( $descType === false || $descType < 0 ) {
throw new ExtensionError( 'imagemap_invalid_desc', $typesText );
}
continue;
}

// Find the link

$link = trim( strstr( $line, '[' ) ?: '' );
if ( !$link ) {
throw new ExtensionError( 'imagemap_no_link', $lineNum );
}

// FIXME: Omits DSR offsets, which will be more relevant when VE
// supports HTML editing of maps.

$linkFragment = $extApi->wikitextToDOM(
$link,
[
'parseOpts' => [
'extTag' => 'imagemap',
'context' => 'inline',
],
// Create new frame, because $link doesn't literally
// appear on the page, it has been hand-crafted here
'processInNewFrame' => true
],
true // sol
);
$a = DOMCompat::querySelector( $linkFragment, 'a' );
if ( $a == null ) {
// Meh, might be for other reasons
throw new ExtensionError( 'imagemap_invalid_title', $lineNum );
}
DOMUtils::assertElt( $a );

$href = $a->getAttribute( 'href' );
$externLink = (bool)preg_match( "/^mw:ExtLink/", $a->getAttribute( 'rel' ) );
$alt = '';

$hasContent = $externLink || DOMDataUtils::getDataParsoid( $a )->stx === 'piped';

if ( $hasContent ) {
// FIXME: The legacy extension does ad hoc link parsing, which
// results in link content not interpreting wikitext syntax.
// Here we produce a known difference by just taking the text
// content of the resulting dom.
// See the test, "Link with wikitext syntax in content"
$alt = trim( $a->textContent );
}

$shapeSpec = substr( $line, 0, -strlen( $link ) );

// Tokenize shape spec
$shape = strtok( $shapeSpec, " \t" );
switch ( $shape ) {
case 'default':
$coords = [];
break;
case 'rect':
$coords = self::tokenizeCoords( $lineNum, 4 );
break;
case 'circle':
$coords = self::tokenizeCoords( $lineNum, 3 );
break;
case 'poly':
$coords = self::tokenizeCoords( $lineNum, 1, true );
if ( count( $coords ) % 2 !== 0 ) {
throw new ExtensionError( 'imagemap_poly_odd', $lineNum );
}
break;
default:
$coords = [];
throw new ExtensionError( 'imagemap_unrecognised_shape', $lineNum );
}

// Scale the coords using the size of the source image
foreach ( $coords as $i => $c ) {
$coords[$i] = (int)round( $c * $scale );
}

// Construct the area tag
$attribs = [ 'href' => $href ];
if ( $externLink ) {
$attribs['class'] = 'plainlinks';
// FIXME: T186241
// if ( $wgNoFollowLinks ) {
// $attribs['rel'] = 'nofollow';
// }
}
if ( $shape != 'default' ) {
$attribs['shape'] = $shape;
}
if ( $coords ) {
$attribs['coords'] = implode( ',', $coords );
}
if ( $alt != '' ) {
if ( $shape != 'default' ) {
$attribs['alt'] = $alt;
}
$attribs['title'] = $alt;
}
if ( $shape == 'default' ) {
$defaultLinkAttribs = $attribs;
} else {
if ( $mapHTML == null ) {
$mapHTML = $domFragment->ownerDocument->createElement( 'map' );
}
$area = $domFragment->ownerDocument->createElement( 'area' );
foreach ( $attribs as $key => $val ) {
$area->setAttribute( $key, $val );
}
$mapHTML->appendChild( $area );
}
}

if ( $first ) {
throw new ExtensionError( 'imagemap_no_image' );
}

if ( $mapHTML != null ) {
// Construct the map

// Add a hash of the map HTML to avoid breaking cached HTML fragments that are
// later joined together on the one page (T18471).
// The only way these hashes can clash is if the map is identical, in which
// case it wouldn't matter that the "wrong" map was used.
$mapName = 'ImageMap_' . substr( md5( DOMCompat::getInnerHTML( $mapHTML ) ), 0, 16 );
$mapHTML->setAttribute( 'name', $mapName );

// Alter the image tag
$imageNode->setAttribute( 'usemap', "#$mapName" );

$thumb->insertBefore( $mapHTML, $imageNode->parentNode->nextSibling );
}

// Determine whether a "magnify" link is present
// FIXME: Find a css way to achieving this

if ( $defaultLinkAttribs ) {
$defaultAnchor = $domFragment->ownerDocument->createElement( 'a' );
foreach ( $defaultLinkAttribs as $name => $value ) {
$defaultAnchor->setAttribute( $name, $value );
}
} else {
$defaultAnchor = $domFragment->ownerDocument->createElement( 'span' );
}
$defaultAnchor->appendChild( $imageNode );
$thumb->replaceChild( $defaultAnchor, $anchor );

$domFragment->appendChild( $thumb );
return $domFragment;
}

/**
* @param int $lineNum Line number, for error reporting
* @param int $minCount Minimum token count
* @param bool $allowNegative
* @return array Array of coordinates
* @throws ExtensionError
*/
private static function tokenizeCoords(
int $lineNum, int $minCount = 0, $allowNegative = false
) {
$coords = [];
$coord = strtok( " \t" );
while ( $coord !== false ) {
if ( !is_numeric( $coord ) || $coord > 1e9 || ( !$allowNegative && $coord < 0 ) ) {
throw new ExtensionError( 'imagemap_invalid_coord', $lineNum );
}
$coords[] = $coord;
$coord = strtok( " \t" );
}
if ( count( $coords ) < $minCount ) {
// TODO: Should this also check there aren't too many coords?
throw new ExtensionError( 'imagemap_missing_coord', $lineNum );
}
return $coords;
}

}
1 change: 0 additions & 1 deletion src/Mocks/MockSiteConfig.php
Expand Up @@ -335,7 +335,6 @@ protected function getNonNativeExtensionTags(): array {
'hiero' => true,
'charinsert' => true,
'inputbox' => true,
'imagemap' => true,
'source' => true,
'syntaxhighlight' => true,
'section' => true,
Expand Down
1 change: 1 addition & 0 deletions tests/parser/imageMapParserTests-knownFailures.json
@@ -0,0 +1 @@
{}

0 comments on commit f554b15

Please sign in to comment.