diff --git a/src/Parsoid/Poem.php b/src/Parsoid/Poem.php new file mode 100644 index 0000000..a2dd6b8 --- /dev/null +++ b/src/Parsoid/Poem.php @@ -0,0 +1,133 @@ + 'Poem', + 'domProcessors' => [ + PoemProcessor::class, + ], + 'tags' => [ + [ + 'name' => 'poem', + 'handler' => self::class, + 'options' => [ + 'outputHasCoreMwDomSpecMarkup' => true + ], + ] + ] + ]; + } + + public function __construct( ?ParsoidExtensionAPI $extApi = null ) { + /* @phan-suppress-previous-line PhanEmptyPublicMethod */ + /* The dom post processor doesn't need to use $extApi, so ignore it */ + } + + /** @inheritDoc */ + public function sourceToDom( + ParsoidExtensionAPI $extApi, string $content, array $extArgs + ): DocumentFragment { + /* + * Transform wikitext found in ... + * 1. Strip leading & trailing newlines + * 2. Suppress indent-pre by replacing leading space with   + * 3. Replace colons with ... + * 4. Add
for newlines except (a) in nowikis (b) after ---- + */ + + if ( strlen( $content ) > 0 ) { + // 1. above + $content = PHPUtils::stripPrefix( $content, "\n" ); + $content = PHPUtils::stripSuffix( $content, "\n" ); + + // 2. above + $content = preg_replace( '/^ /m', ' ', $content ); + + // 3. above + $contentArray = explode( "\n", $content ); + $contentMap = array_map( static function ( $line ) use ( $extApi ) { + $i = 0; + $lineLength = strlen( $line ); + while ( $i < $lineLength && $line[$i] === ':' ) { + $i++; + } + if ( $i > 0 && $i < $lineLength ) { + $domFragment = $extApi->htmlToDom( '' ); + $doc = $domFragment->ownerDocument; + $span = $doc->createElement( 'span' ); + $span->setAttribute( 'class', 'mw-poem-indented' ); + $span->setAttribute( 'style', 'display: inline-block; margin-inline-start: ' . $i . 'em;' ); + // $line isn't an HTML text node, it's wikitext that will be passed to extTagToDOM + return substr( DOMCompat::getOuterHTML( $span ), 0, -7 ) . + ltrim( $line, ':' ) . ''; + } else { + return $line; + } + }, $contentArray ); + $content = implode( "\n", $contentMap ); // use faster? preg_replace + + // 4. above + // Split on .. fragments. + // Process newlines inside nowikis in a post-processing pass. + // If
s are added here, Parsoid will escape them to plaintext. + $splitContent = preg_split( '/([\s\S]*?<\/nowiki>)/', $content, + -1, PREG_SPLIT_DELIM_CAPTURE ); + $content = implode( '', + array_map( static function ( $p, $i ) { + if ( $i % 2 === 1 ) { + return $p; + } + + // This is a hack that exploits the fact that + // cannot show up in the extension's content. + return preg_replace( '/^(-+)<\/poem>/m', "\$1\n", + preg_replace( '/\n/m', "
\n", + preg_replace( '/(^----+)\n/m', '$1', $p ) ) ); + }, + $splitContent, + range( 0, count( $splitContent ) - 1 ) ) + ); + + } + + // Add the 'poem' class to the 'class' attribute, or if not found, add it + $value = $extApi->findAndUpdateArg( $extArgs, 'class', static function ( string $value ) { + return strlen( $value ) ? "poem {$value}" : 'poem'; + } ); + + if ( !$value ) { + $extApi->addNewArg( $extArgs, 'class', 'poem' ); + } + + return $extApi->extTagToDOM( $extArgs, $content, [ + 'wrapperTag' => 'div', + 'parseOpts' => [ 'extTag' => 'poem' ], + // Create new frame, because $content doesn't literally appear in + // the parent frame's sourceText (our copy has been munged) + 'processInNewFrame' => true, + // We've shifted the content around quite a bit when we preprocessed + // it. In the future if we wanted to enable selser inside the + // body we should create a proper offset map and then apply it to the + // result after the parse, like we do in the Gallery extension. + // But for now, since we don't selser the contents, just strip the + // DSR info so it doesn't cause problems/confusion with unicode + // offset conversion (and so it's clear you can't selser what we're + // currently emitting). + 'clearDSROffsets' => true + ] + ); + } +} diff --git a/src/Parsoid/PoemProcessor.php b/src/Parsoid/PoemProcessor.php new file mode 100644 index 0000000..3a60da0 --- /dev/null +++ b/src/Parsoid/PoemProcessor.php @@ -0,0 +1,75 @@ +firstChild; + while ( $c ) { + if ( $c instanceof Element ) { + if ( DOMUtils::hasTypeOf( $c, 'mw:Extension/poem' ) ) { + // Replace newlines found in fragment with
s + self::processNowikis( $c ); + } else { + $this->wtPostprocess( $extApi, $c, $options ); + } + } + $c = $c->nextSibling; + } + } + + private function processNowikis( Element $node ): void { + $doc = $node->ownerDocument; + $c = $node->firstChild; + while ( $c ) { + if ( !$c instanceof Element ) { + $c = $c->nextSibling; + continue; + } + + if ( !DOMUtils::hasTypeOf( $c, 'mw:Nowiki' ) ) { + self::processNowikis( $c ); + $c = $c->nextSibling; + continue; + } + + // Replace the nowiki's text node with a combination + // of content and
s. Take care to deal with + // entities that are still entity-wrapped (!!). + $cc = $c->firstChild; + while ( $cc ) { + $next = $cc->nextSibling; + if ( $cc instanceof Text ) { + $pieces = preg_split( '/\n/', $cc->nodeValue ); + $n = count( $pieces ); + $nl = ''; + for ( $i = 0; $i < $n; $i++ ) { + $p = $pieces[$i]; + $c->insertBefore( $doc->createTextNode( $nl . $p ), $cc ); + if ( $i < $n - 1 ) { + $c->insertBefore( $doc->createElement( 'br' ), $cc ); + $nl = "\n"; + } + } + $c->removeChild( $cc ); + } + $cc = $next; + } + $c = $c->nextSibling; + } + } +}