Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions .travis.yml
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
language: php

php:
- 5.6
- 7.0
- 7.1
- 7.2

install:
- composer install

script:
- vendor/bin/phpcs -s
- vendor/bin/parallel-lint . --exclude vendor
- vendor/bin/phpdoc -q --template=checkstyle
- (! grep -B 1 error docs/api/checkstyle.xml)
- composer test
17 changes: 13 additions & 4 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "wikisource/api",
"description": "A PHP API to Wikisources (all languages).",
"type": "library",
"license": "GPL-2.0+",
"license": "GPL-2.0-or-later",
"support": {
"irc": "irc://irc.freenode.org/wikisource",
"issues": "https://phabricator.wikimedia.org/tag/wikisource-api/",
Expand All @@ -19,19 +19,28 @@
}
},
"require": {
"ext-json": "*",
"ext-simplexml": "*",
"psr/cache": "^1.0",
"psr/log": "^1.0",
"dflydev/dot-access-data": "^1.0",
"addwiki/mediawiki-api": "^0.7",
"symfony/dom-crawler": "^3.1"
"symfony/dom-crawler": "^4.2"
},
"require-dev": {
"mediawiki/minus-x": "^0.3",
"jakub-onderka/php-parallel-lint": "^0.9",
"mediawiki/mediawiki-codesniffer": "^13.0",
"phpunit/phpunit": "^5.6",
"phpdocumentor/phpdocumentor": "^2.9",
"tedivm/stash": "^0.14",
"monolog/monolog": "^1.21",
"eloquent/asplode": "^2.2"
},
"scripts": {
"test": [
"composer validate",
"parallel-lint . --exclude vendor",
"minus-x check .",
"phpcs -s"
]
}
}
40 changes: 38 additions & 2 deletions src/IndexPage.php
Original file line number Diff line number Diff line change
Expand Up @@ -178,15 +178,51 @@ protected function getHtmlCrawler() {
* Get a list of all pages: their numbers, labels, statuses, and URLs. Currently doing this in a
* pretty clunky way that probably makes quite a few assumptions based on English Wikisource.
* This method sends a request to Wikisource.
*
* @param bool $onlyExisting Only return info about pages that exist. This does not return the
* pages' label (it is set to the page number).
* @return string[] Array of arrays with keys 'num', 'label', 'status', 'url'.
*/
public function getPageList() {
public function getPageList( $onlyExisting = false ) {
$pagelist = [];

// Use the API to get a list of all existing pages.
if ( $onlyExisting ) {
$req = new FluentRequest();
$req->setAction( 'query' );
$indexNsLocalName = $this->wikisource
->getNamespaceLocalName( Wikisource::NS_NAME_INDEX );
$title = substr( $this->getTitle(), strlen( $indexNsLocalName ) + 1 );
$ns = $this->wikisource->getNamespaceId( Wikisource::NS_NAME_PAGE );
$reqParams = [
'prop' => 'proofread',
'generator' => 'prefixsearch',
'gpssearch' => $title,
'gpsnamespace' => $ns,
'gpslimit' => 500,
];
$req->addParams( $reqParams );
$res = $this->wikisource->sendApiRequest( $req, 'query.pages' );
foreach ( $res as $page ) {
$subpage = substr( $page['title'], strrpos( $page['title'], '/' ) + 1 );
// @TODO The label can not currently be retrieved from the API.
$pagelist['page-'.$subpage] = [
'label' => $subpage,
'num' => $subpage,
'url' => 'https://'.$this->getWikisource()->getDomainName().'/wiki/'.$page['title'],
'quality' => $page['proofread']['quality'],
'title' => $page['title'],
];
}
return $pagelist;
}

// If we need non-existing pages as well, we have to scrape the HTML. :-(
preg_match( '/(.*wikisource.org)/', $this->pageInfo['canonicalurl'], $matches );
$baseUrl = isset( $matches[1] ) ? $matches[1] : false;

$pageCrawler = $this->getHtmlCrawler();
$pagelistAnchors = $pageCrawler->filterXPath( "//div[contains(@class, 'index-pagelist')]//a" );
$pagelist = [];
foreach ( $pagelistAnchors as $pageLink ) {
// Get page URL (which is relative, starting with /w/index.php) and page number.
$anchorHref = $pageLink->getAttribute( 'href' );
Expand Down
48 changes: 39 additions & 9 deletions src/Wikisource.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class Wikisource {
/** The canonical name of the 'Index' namespace. */
const NS_NAME_INDEX = 'Index';

/** The canonical name of the 'Page' namespace. */
const NS_NAME_PAGE = 'Page';

/** @var WikisourceApi The parent API object. */
protected $api;

Expand Down Expand Up @@ -126,25 +129,52 @@ public function getIndexPageFromUrl( $url ) {
* @return int The namespace ID, or false if it can't be found.
*/
public function getNamespaceId( $namespaceName ) {
foreach ( $this->getNamespaces() as $ns ) {
if ( isset( $ns['canonical'] ) && $ns['canonical'] === $namespaceName ) {
return $ns['id'];
}
}
return false;
}

/**
* Get the local name for a single namespace.
*
* @param string $namespaceName The name of the namespace to get.
* @return string
*/
public function getNamespaceLocalName( $namespaceName ) {
if ( $namespaceName === '' ) {
return '';
}
$namespaces = $this->getNamespaces();
foreach ( $namespaces as $ns ) {
if ( isset( $ns['canonical'] ) && $ns['canonical'] === $namespaceName && isset( $ns['*'] ) ) {
return $ns['*'];
}
}
}

/**
* Get information about the namespaces on this Wikisource.
*
* @return array
*/
public function getNamespaces() {
$cacheKey = 'namespaces'.$this->getLanguageCode();
$namespaces = $this->getWikisoureApi()->cacheGet( $cacheKey );
if ( $namespaces !== false ) {
$this->logger->debug( "Using cached namespace data for ".$this->getLanguageCode() );
} else {
$this->logger->debug( "Requesting namespace data for ".$this->getLanguageCode() );
$request = FluentRequest::factory()
->setAction( 'query' )
->setParam( 'meta', 'siteinfo' )
->setParam( 'siprop', 'namespaces' );
->setAction( 'query' )
->setParam( 'meta', 'siteinfo' )
->setParam( 'siprop', 'namespaces' );
$namespaces = $this->sendApiRequest( $request, 'query.namespaces' );
$this->getWikisoureApi()->cacheSet( $cacheKey, $namespaces );
}
foreach ( $namespaces as $ns ) {
if ( isset( $ns['canonical'] ) && $ns['canonical'] === $namespaceName ) {
return $ns['id'];
}
}
return false;
return $namespaces;
}

/**
Expand Down