Skip to content

Commit

Permalink
Generation of DjVu from JP2 or PDF files
Browse files Browse the repository at this point in the history
Add a CLI system for processing the generation of DjVu files from
Internet Archive JP2 and PDF files.

Refs: #14 #13 #20
  • Loading branch information
samwilson committed Feb 6, 2017
1 parent 5ce25fe commit 467ac5c
Show file tree
Hide file tree
Showing 16 changed files with 748 additions and 106 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ temp/*
vendor/*
.idea/*
web/.htaccess
/jobqueue

12 changes: 12 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
language: php
php:
- '5.5'
- '5.6'
- '7.0'
- hhvm
sudo: false
env:
global:
- COMPOSER_DISABLE_XDEBUG_WARN=1
install: composer install
script: composer test
4 changes: 3 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
"symfony/twig-bridge": "~3.1",
"addwiki/mediawiki-api-base": "~2.2",
"firebase/php-jwt": "~4.0",
"monolog/monolog": "~1.20"
"monolog/monolog": "~1.20",
"symfony/console": "^3.2",
"pastuhov/php-exec-command": "^1.1"
},
"require-dev": {
"mediawiki/mediawiki-codesniffer": "~0.7"
Expand Down
120 changes: 118 additions & 2 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion config_example.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
consumerKey=
consumerSecret=
tempDirectory=temp
debug=false
11 changes: 11 additions & 0 deletions ia-upload
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env php
<?php

require __DIR__ . '/vendor/autoload.php';

use IaUpload\Commands\JobsCommand;
use Symfony\Component\Console\Application;

$application = new Application();
$application->add( new JobsCommand() );
$application->run();
121 changes: 121 additions & 0 deletions src/IaUpload/Commands/JobsCommand.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
<?php

namespace IaUpload\Commands;

use Exception;
use IaUpload\CommonsClient;
use IaUpload\OAuth\MediaWikiOAuth;
use IaUpload\OAuth\Token\AccessToken;
use IaUpload\OAuth\Token\ConsumerToken;
use IaUpload\OAuthController;
use Monolog\Handler\ErrorLogHandler;
use Monolog\Handler\StreamHandler;
use Monolog\Logger;
use RecursiveDirectoryIterator;
use RecursiveIteratorIterator;
use Symfony\Component\Console\Command\Command;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Output\OutputInterface;

class JobsCommand extends Command {

/**
* Set name and job.
*/
protected function configure() {
$this->setName( 'jobs' )->setDescription( 'Runs DjVu conversion jobs' );
}

/**
* @param InputInterface $input An InputInterface instance
* @param OutputInterface $output An OutputInterface instance
* @return null|int null or 0 if everything went fine, or an error code
* @throws Exception If unable to load the required DjVuMaker class.
*/
protected function execute( InputInterface $input, OutputInterface $output ) {
$jobsDir = __DIR__ . '/../../../jobqueue';
$jobs = glob( $jobsDir . '/*/job.json' );
foreach ( $jobs as $jobFile ) {
// Skip if this job is locked; otherwise lock this job.
$lockFile = dirname( $jobFile ) . '/lock';
if ( file_exists( $lockFile ) ) {
continue;
}
touch( $lockFile );

// Get job info and set up a log.
$jobInfo = \GuzzleHttp\json_decode( file_get_contents( $jobFile ) );
$log = new Logger( 'LOG' );
$log->pushHandler( new ErrorLogHandler() );
$log->pushHandler( new StreamHandler( dirname( $jobFile ) . '/log.txt' ) );

// Make sure we can upload, before doing anything else.
$mediawikiClient = $this->getMediawikiClient( $jobInfo->userAccessToken );
$commonsClient = new CommonsClient( $mediawikiClient, $log );
if ( !$commonsClient->canUpload() ) {
throw new Exception( "Unable to upload to Commons" );
}

// Load the DjvuMaker class.
$classType = ucfirst( strtolower( $jobInfo->fileSource ) );
$fileSourceClass = '\\IaUpload\\DjvuMakers\\'.$classType.'DjvuMaker';
if ( !class_exists( $fileSourceClass ) ) {
throw new Exception( "Unable to load class $fileSourceClass" );
}

// Generate the DjVu.
$log->info( "Creating DjVu for $jobInfo->iaId from $classType" );
$jobClient = new $fileSourceClass( $jobInfo->iaId, $log );
try {
$localDjvu = $jobClient->createLocalDjvu();
} catch ( Exception $e ) {
$log->critical( $e->getMessage() );
throw $e;
}

// Upload to Commons.
$log->info( "Uploading to $localDjvu to Commons $jobInfo->commonsName" );
$commonsClient->upload(
$jobInfo->commonsName,
$localDjvu,
$jobInfo->description,
'Imported from Internet Archive by the [[wikitech:Tool:IA Upload|IA Upload tool]] job queue'
);
$this->deleteDirectory( dirname( $jobFile ) );
}
return 0;
}

/**
* @param $dir
*/
protected function deleteDirectory( $dir ) {
$files = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator( $dir, RecursiveDirectoryIterator::SKIP_DOTS ),
RecursiveIteratorIterator::CHILD_FIRST
);
foreach ( $files as $file ) {
if ( $file->isDir() ) {
rmdir( $file->getRealPath() );
} else {
unlink( $file->getRealPath() );
}
}
rmdir( $dir );
}

/**
* @param string $accessToken The user's access token.
* @return \GuzzleHttp\Client
*/
protected function getMediawikiClient( $accessTokenDetails ) {
// @TODO This shouldn't be here.
$configFile = __DIR__ . '/../../../config.ini';
$config = parse_ini_file( $configFile );
$token = new ConsumerToken( $config['consumerKey'], $config['consumerSecret'] );
$oAuth = new MediaWikiOAuth( OAuthController::OAUTH_URL, $token );
$accessToken = new AccessToken( $accessTokenDetails->key, $accessTokenDetails->secret );
$mediawikiClient = $oAuth->buildMediawikiClientFromToken( $accessToken );
return $mediawikiClient;
}
}
16 changes: 14 additions & 2 deletions src/IaUpload/CommonsClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,23 @@ class CommonsClient {
private $mediawikiApi;

public function __construct( Client $oauthClient, LoggerInterface $logger ) {
$this->client = $oauthClient;
$this->client = $oauthClient;
$this->mediawikiApi = new MediawikiApi( 'https://commons.wikimedia.org/w/api.php', $oauthClient );
$this->mediawikiApi->setLogger( $logger );
}

/**
* Can the current user upload files?
* @return boolean
*/
public function canUpload() {
$result = $this->mediawikiApi->getRequest( new SimpleRequest( 'query', [
'meta' => 'userinfo',
'uiprop' => 'rights'
] ) );
return in_array( 'upload', $result['query']['userinfo']['rights'] );
}

/**
* Returns if a given page exists
*
Expand Down Expand Up @@ -87,7 +99,7 @@ public function upload( $fileName, $filePath, $text, $comment ) {
* @return string
*/
public function normalizePageTitle( $title ) {
return str_replace( [ ' ', "\t", "\n" ], [ '_', '_', '_' ], $title );
return str_replace( [ ' ', "\t", "\n" ], [ '_', '_', '_' ], trim( $title ) );
}

/**
Expand Down
Loading

0 comments on commit 467ac5c

Please sign in to comment.