Skip to content

Commit

Permalink
Migrate Parsoid implementation of Poem extension to src/Parsoid
Browse files Browse the repository at this point in the history
Further commits will be necessary to complete the migration, but
this merge commit imports all of the existing history of the Poem
extension.  It was generated using the following command on a checkout
of Parsoid:

  git filter-repo --path src/Ext/Poem --path lib/ext/Poem \
    --tag-rename '':'parsoid-' \
    --path-rename src/Ext/Poem:src/Parsoid \
    --path-rename lib/ext/Poem:src/Parsoid

And then, in the Poem repository:

  git remote add parsoid ../path/to/parsoid/checkout
  git merge parsoid/master --allow-unrelated-histories

Bug: T358054
Change-Id: Ic035e53d93bc41c5d5b26e5e8245ab859aba4266
  • Loading branch information
arlolra committed Feb 26, 2024
2 parents c9a230a + d1383ca commit 5ce5108
Show file tree
Hide file tree
Showing 2 changed files with 208 additions and 0 deletions.
133 changes: 133 additions & 0 deletions src/Parsoid/Poem.php
@@ -0,0 +1,133 @@
<?php
declare( strict_types = 1 );

namespace Wikimedia\Parsoid\Ext\Poem;

use Wikimedia\Parsoid\DOM\DocumentFragment;
use Wikimedia\Parsoid\Ext\ExtensionModule;
use Wikimedia\Parsoid\Ext\ExtensionTagHandler;
use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;
use Wikimedia\Parsoid\Ext\PHPUtils;
use Wikimedia\Parsoid\Utils\DOMCompat;

class Poem extends ExtensionTagHandler implements ExtensionModule {

/** @inheritDoc */
public function getConfig(): array {
return [
'name' => 'Poem',
'domProcessors' => [
PoemProcessor::class,
],
'tags' => [
[
'name' => 'poem',
'handler' => self::class,
'options' => [
'outputHasCoreMwDomSpecMarkup' => true
],
]
]
];
}

public function __construct( ?ParsoidExtensionAPI $extApi = null ) {
/* @phan-suppress-previous-line PhanEmptyPublicMethod */
/* The dom post processor doesn't need to use $extApi, so ignore it */
}

/** @inheritDoc */
public function sourceToDom(
ParsoidExtensionAPI $extApi, string $content, array $extArgs
): DocumentFragment {
/*
* Transform wikitext found in <poem>...</poem>
* 1. Strip leading & trailing newlines
* 2. Suppress indent-pre by replacing leading space with &nbsp;
* 3. Replace colons with <span class='...' style='...'>...</span>
* 4. Add <br/> for newlines except (a) in nowikis (b) after ----
*/

if ( strlen( $content ) > 0 ) {
// 1. above
$content = PHPUtils::stripPrefix( $content, "\n" );
$content = PHPUtils::stripSuffix( $content, "\n" );

// 2. above
$content = preg_replace( '/^ /m', '&nbsp;', $content );

// 3. above
$contentArray = explode( "\n", $content );
$contentMap = array_map( static function ( $line ) use ( $extApi ) {
$i = 0;
$lineLength = strlen( $line );
while ( $i < $lineLength && $line[$i] === ':' ) {
$i++;
}
if ( $i > 0 && $i < $lineLength ) {
$domFragment = $extApi->htmlToDom( '' );
$doc = $domFragment->ownerDocument;
$span = $doc->createElement( 'span' );
$span->setAttribute( 'class', 'mw-poem-indented' );
$span->setAttribute( 'style', 'display: inline-block; margin-inline-start: ' . $i . 'em;' );
// $line isn't an HTML text node, it's wikitext that will be passed to extTagToDOM
return substr( DOMCompat::getOuterHTML( $span ), 0, -7 ) .
ltrim( $line, ':' ) . '</span>';
} else {
return $line;
}
}, $contentArray );
$content = implode( "\n", $contentMap ); // use faster? preg_replace

// 4. above
// Split on <nowiki>..</nowiki> fragments.
// Process newlines inside nowikis in a post-processing pass.
// If <br/>s are added here, Parsoid will escape them to plaintext.
$splitContent = preg_split( '/(<nowiki>[\s\S]*?<\/nowiki>)/', $content,
-1, PREG_SPLIT_DELIM_CAPTURE );
$content = implode( '',
array_map( static function ( $p, $i ) {
if ( $i % 2 === 1 ) {
return $p;
}

// This is a hack that exploits the fact that </poem>
// cannot show up in the extension's content.
return preg_replace( '/^(-+)<\/poem>/m', "\$1\n",
preg_replace( '/\n/m', "<br/>\n",
preg_replace( '/(^----+)\n/m', '$1</poem>', $p ) ) );
},
$splitContent,
range( 0, count( $splitContent ) - 1 ) )
);

}

// Add the 'poem' class to the 'class' attribute, or if not found, add it
$value = $extApi->findAndUpdateArg( $extArgs, 'class', static function ( string $value ) {
return strlen( $value ) ? "poem {$value}" : 'poem';
} );

if ( !$value ) {
$extApi->addNewArg( $extArgs, 'class', 'poem' );
}

return $extApi->extTagToDOM( $extArgs, $content, [
'wrapperTag' => 'div',
'parseOpts' => [ 'extTag' => 'poem' ],
// Create new frame, because $content doesn't literally appear in
// the parent frame's sourceText (our copy has been munged)
'processInNewFrame' => true,
// We've shifted the content around quite a bit when we preprocessed
// it. In the future if we wanted to enable selser inside the <poem>
// body we should create a proper offset map and then apply it to the
// result after the parse, like we do in the Gallery extension.
// But for now, since we don't selser the contents, just strip the
// DSR info so it doesn't cause problems/confusion with unicode
// offset conversion (and so it's clear you can't selser what we're
// currently emitting).
'clearDSROffsets' => true
]
);
}
}
75 changes: 75 additions & 0 deletions src/Parsoid/PoemProcessor.php
@@ -0,0 +1,75 @@
<?php
declare( strict_types = 1 );

namespace Wikimedia\Parsoid\Ext\Poem;

use Wikimedia\Parsoid\DOM\Element;
use Wikimedia\Parsoid\DOM\Node;
use Wikimedia\Parsoid\DOM\Text;
use Wikimedia\Parsoid\Ext\DOMProcessor;
use Wikimedia\Parsoid\Ext\DOMUtils;
use Wikimedia\Parsoid\Ext\ParsoidExtensionAPI;

class PoemProcessor extends DOMProcessor {

/**
* @inheritDoc
*/
public function wtPostprocess(
ParsoidExtensionAPI $extApi, Node $node, array $options
): void {
$c = $node->firstChild;
while ( $c ) {
if ( $c instanceof Element ) {
if ( DOMUtils::hasTypeOf( $c, 'mw:Extension/poem' ) ) {
// Replace newlines found in <nowiki> fragment with <br/>s
self::processNowikis( $c );
} else {
$this->wtPostprocess( $extApi, $c, $options );
}
}
$c = $c->nextSibling;
}
}

private function processNowikis( Element $node ): void {
$doc = $node->ownerDocument;
$c = $node->firstChild;
while ( $c ) {
if ( !$c instanceof Element ) {
$c = $c->nextSibling;
continue;
}

if ( !DOMUtils::hasTypeOf( $c, 'mw:Nowiki' ) ) {
self::processNowikis( $c );
$c = $c->nextSibling;
continue;
}

// Replace the nowiki's text node with a combination
// of content and <br/>s. Take care to deal with
// entities that are still entity-wrapped (!!).
$cc = $c->firstChild;
while ( $cc ) {
$next = $cc->nextSibling;
if ( $cc instanceof Text ) {
$pieces = preg_split( '/\n/', $cc->nodeValue );
$n = count( $pieces );
$nl = '';
for ( $i = 0; $i < $n; $i++ ) {
$p = $pieces[$i];
$c->insertBefore( $doc->createTextNode( $nl . $p ), $cc );
if ( $i < $n - 1 ) {
$c->insertBefore( $doc->createElement( 'br' ), $cc );
$nl = "\n";
}
}
$c->removeChild( $cc );
}
$cc = $next;
}
$c = $c->nextSibling;
}
}
}

0 comments on commit 5ce5108

Please sign in to comment.