Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Migrate Parsoid implementation of Poem extension to src/Parsoid
Further commits will be necessary to complete the migration, but this merge commit imports all of the existing history of the Poem extension. It was generated using the following command on a checkout of Parsoid: git filter-repo --path src/Ext/Poem --path lib/ext/Poem \ --tag-rename '':'parsoid-' \ --path-rename src/Ext/Poem:src/Parsoid \ --path-rename lib/ext/Poem:src/Parsoid And then, in the Poem repository: git remote add parsoid ../path/to/parsoid/checkout git merge parsoid/master --allow-unrelated-histories Bug: T358054 Change-Id: Ic035e53d93bc41c5d5b26e5e8245ab859aba4266
- Loading branch information
Showing
2 changed files
with
208 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
<?php | ||
declare( strict_types = 1 ); | ||
|
||
namespace Wikimedia\Parsoid\Ext\Poem; | ||
|
||
use Wikimedia\Parsoid\DOM\DocumentFragment; | ||
use Wikimedia\Parsoid\Ext\ExtensionModule; | ||
use Wikimedia\Parsoid\Ext\ExtensionTagHandler; | ||
use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; | ||
use Wikimedia\Parsoid\Ext\PHPUtils; | ||
use Wikimedia\Parsoid\Utils\DOMCompat; | ||
|
||
class Poem extends ExtensionTagHandler implements ExtensionModule { | ||
|
||
/** @inheritDoc */ | ||
public function getConfig(): array { | ||
return [ | ||
'name' => 'Poem', | ||
'domProcessors' => [ | ||
PoemProcessor::class, | ||
], | ||
'tags' => [ | ||
[ | ||
'name' => 'poem', | ||
'handler' => self::class, | ||
'options' => [ | ||
'outputHasCoreMwDomSpecMarkup' => true | ||
], | ||
] | ||
] | ||
]; | ||
} | ||
|
||
public function __construct( ?ParsoidExtensionAPI $extApi = null ) { | ||
/* @phan-suppress-previous-line PhanEmptyPublicMethod */ | ||
/* The dom post processor doesn't need to use $extApi, so ignore it */ | ||
} | ||
|
||
/** @inheritDoc */ | ||
public function sourceToDom( | ||
ParsoidExtensionAPI $extApi, string $content, array $extArgs | ||
): DocumentFragment { | ||
/* | ||
* Transform wikitext found in <poem>...</poem> | ||
* 1. Strip leading & trailing newlines | ||
* 2. Suppress indent-pre by replacing leading space with | ||
* 3. Replace colons with <span class='...' style='...'>...</span> | ||
* 4. Add <br/> for newlines except (a) in nowikis (b) after ---- | ||
*/ | ||
|
||
if ( strlen( $content ) > 0 ) { | ||
// 1. above | ||
$content = PHPUtils::stripPrefix( $content, "\n" ); | ||
$content = PHPUtils::stripSuffix( $content, "\n" ); | ||
|
||
// 2. above | ||
$content = preg_replace( '/^ /m', ' ', $content ); | ||
|
||
// 3. above | ||
$contentArray = explode( "\n", $content ); | ||
$contentMap = array_map( static function ( $line ) use ( $extApi ) { | ||
$i = 0; | ||
$lineLength = strlen( $line ); | ||
while ( $i < $lineLength && $line[$i] === ':' ) { | ||
$i++; | ||
} | ||
if ( $i > 0 && $i < $lineLength ) { | ||
$domFragment = $extApi->htmlToDom( '' ); | ||
$doc = $domFragment->ownerDocument; | ||
$span = $doc->createElement( 'span' ); | ||
$span->setAttribute( 'class', 'mw-poem-indented' ); | ||
$span->setAttribute( 'style', 'display: inline-block; margin-inline-start: ' . $i . 'em;' ); | ||
// $line isn't an HTML text node, it's wikitext that will be passed to extTagToDOM | ||
return substr( DOMCompat::getOuterHTML( $span ), 0, -7 ) . | ||
ltrim( $line, ':' ) . '</span>'; | ||
} else { | ||
return $line; | ||
} | ||
}, $contentArray ); | ||
$content = implode( "\n", $contentMap ); // use faster? preg_replace | ||
|
||
// 4. above | ||
// Split on <nowiki>..</nowiki> fragments. | ||
// Process newlines inside nowikis in a post-processing pass. | ||
// If <br/>s are added here, Parsoid will escape them to plaintext. | ||
$splitContent = preg_split( '/(<nowiki>[\s\S]*?<\/nowiki>)/', $content, | ||
-1, PREG_SPLIT_DELIM_CAPTURE ); | ||
$content = implode( '', | ||
array_map( static function ( $p, $i ) { | ||
if ( $i % 2 === 1 ) { | ||
return $p; | ||
} | ||
|
||
// This is a hack that exploits the fact that </poem> | ||
// cannot show up in the extension's content. | ||
return preg_replace( '/^(-+)<\/poem>/m', "\$1\n", | ||
preg_replace( '/\n/m', "<br/>\n", | ||
preg_replace( '/(^----+)\n/m', '$1</poem>', $p ) ) ); | ||
}, | ||
$splitContent, | ||
range( 0, count( $splitContent ) - 1 ) ) | ||
); | ||
|
||
} | ||
|
||
// Add the 'poem' class to the 'class' attribute, or if not found, add it | ||
$value = $extApi->findAndUpdateArg( $extArgs, 'class', static function ( string $value ) { | ||
return strlen( $value ) ? "poem {$value}" : 'poem'; | ||
} ); | ||
|
||
if ( !$value ) { | ||
$extApi->addNewArg( $extArgs, 'class', 'poem' ); | ||
} | ||
|
||
return $extApi->extTagToDOM( $extArgs, $content, [ | ||
'wrapperTag' => 'div', | ||
'parseOpts' => [ 'extTag' => 'poem' ], | ||
// Create new frame, because $content doesn't literally appear in | ||
// the parent frame's sourceText (our copy has been munged) | ||
'processInNewFrame' => true, | ||
// We've shifted the content around quite a bit when we preprocessed | ||
// it. In the future if we wanted to enable selser inside the <poem> | ||
// body we should create a proper offset map and then apply it to the | ||
// result after the parse, like we do in the Gallery extension. | ||
// But for now, since we don't selser the contents, just strip the | ||
// DSR info so it doesn't cause problems/confusion with unicode | ||
// offset conversion (and so it's clear you can't selser what we're | ||
// currently emitting). | ||
'clearDSROffsets' => true | ||
] | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
<?php | ||
declare( strict_types = 1 ); | ||
|
||
namespace Wikimedia\Parsoid\Ext\Poem; | ||
|
||
use Wikimedia\Parsoid\DOM\Element; | ||
use Wikimedia\Parsoid\DOM\Node; | ||
use Wikimedia\Parsoid\DOM\Text; | ||
use Wikimedia\Parsoid\Ext\DOMProcessor; | ||
use Wikimedia\Parsoid\Ext\DOMUtils; | ||
use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI; | ||
|
||
class PoemProcessor extends DOMProcessor { | ||
|
||
/** | ||
* @inheritDoc | ||
*/ | ||
public function wtPostprocess( | ||
ParsoidExtensionAPI $extApi, Node $node, array $options | ||
): void { | ||
$c = $node->firstChild; | ||
while ( $c ) { | ||
if ( $c instanceof Element ) { | ||
if ( DOMUtils::hasTypeOf( $c, 'mw:Extension/poem' ) ) { | ||
// Replace newlines found in <nowiki> fragment with <br/>s | ||
self::processNowikis( $c ); | ||
} else { | ||
$this->wtPostprocess( $extApi, $c, $options ); | ||
} | ||
} | ||
$c = $c->nextSibling; | ||
} | ||
} | ||
|
||
private function processNowikis( Element $node ): void { | ||
$doc = $node->ownerDocument; | ||
$c = $node->firstChild; | ||
while ( $c ) { | ||
if ( !$c instanceof Element ) { | ||
$c = $c->nextSibling; | ||
continue; | ||
} | ||
|
||
if ( !DOMUtils::hasTypeOf( $c, 'mw:Nowiki' ) ) { | ||
self::processNowikis( $c ); | ||
$c = $c->nextSibling; | ||
continue; | ||
} | ||
|
||
// Replace the nowiki's text node with a combination | ||
// of content and <br/>s. Take care to deal with | ||
// entities that are still entity-wrapped (!!). | ||
$cc = $c->firstChild; | ||
while ( $cc ) { | ||
$next = $cc->nextSibling; | ||
if ( $cc instanceof Text ) { | ||
$pieces = preg_split( '/\n/', $cc->nodeValue ); | ||
$n = count( $pieces ); | ||
$nl = ''; | ||
for ( $i = 0; $i < $n; $i++ ) { | ||
$p = $pieces[$i]; | ||
$c->insertBefore( $doc->createTextNode( $nl . $p ), $cc ); | ||
if ( $i < $n - 1 ) { | ||
$c->insertBefore( $doc->createElement( 'br' ), $cc ); | ||
$nl = "\n"; | ||
} | ||
} | ||
$c->removeChild( $cc ); | ||
} | ||
$cc = $next; | ||
} | ||
$c = $c->nextSibling; | ||
} | ||
} | ||
} |