Skip to content

Commit

Permalink
Merge pull request #510 from wikimedia/transcluded-refs
Browse files Browse the repository at this point in the history
Fix issue with cleaning transcluded reference links
  • Loading branch information
samwilson committed Mar 20, 2024
2 parents 50fa545 + 3ef1fba commit 86dc32b
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 1 deletion.
5 changes: 4 additions & 1 deletion src/PageParser.php
Expand Up @@ -442,8 +442,11 @@ private function cleanRedLinks() {

private function cleanReferenceLinks() {
$links = $this->xPath->query(
'//*[@typeof="mw:Extension/ref"]/a | //a[@rel="mw:referencedBy"]'
'//*[contains(@typeof,"mw:Extension/ref")]/a | //a[@rel="mw:referencedBy"]'
);
if ( !$links ) {
return;
}
foreach ( $links as $link ) {
$href = $link->getAttribute( 'href' );
$pos = strpos( $href, '#' );
Expand Down
21 changes: 21 additions & 0 deletions tests/Book/PageParserTest.php
Expand Up @@ -233,4 +233,25 @@ public function provideGetPicturesList(): array {
],
];
}

/**
* @dataProvider provideCleanReferenceLinks
*/
public function testCleanReferenceLinks( string $html, string $expected ) {
$pageParser1 = new PageParser( Util::buildDOMDocumentFromHtml( $html ) );
$this->assertStringContainsString( $expected, $pageParser1->getContent( false )->saveXML() );
}

public function provideCleanReferenceLinks() {
return [
'no links to clean' => [
'<sup><a href="./Page#fn"><span>[1]</span></a></sup>',
'<a href="./Page#fn">',
],
'ref that is also transcluded' => [
'<sup class="mw-ref reference" id="cite_ref-1" rel="dc:references" typeof="mw:Transclusion mw:Extension/ref"><a href="./Page#cite_note-1"><span class="mw-reflink-text">[1]</span></a></sup>',
'<a href="#cite_note-1">',
],
];
}
}

0 comments on commit 86dc32b

Please sign in to comment.