Skip to content

Commit

Permalink
Merge branch
Browse files Browse the repository at this point in the history
Bug fixed with credits
  • Loading branch information
Tpt committed Jan 21, 2012
2 parents 364cadc + f3fd61b commit 5a35132
Show file tree
Hide file tree
Showing 5 changed files with 327 additions and 10 deletions.
121 changes: 118 additions & 3 deletions book/BookProvider.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,20 @@
*/
class BookProvider {
protected $api = null;
protected $curl_async = null;
protected $withPictures = true;
protected $creditPages = null;
protected $creditImages = null;

/**
* @var $api Api
*/
public function __construct(Api $api, $withPictures = true) {
$this->api = $api;
$tihs->withPictures = $withPictures;
$this->curl_async = new CurlAsync();
$this->withPictures = $withPictures;
$this->creditPages = array();
$this->creditImages = array();
}

/**
Expand All @@ -31,6 +37,7 @@ public function get($title, $isMetadata = false) {
$doc = $this->getDocument($title);
$parser = new PageParser($doc);
$book = new Book();
$book->credits_html = '';
$book->title = $title;
$book->lang = $this->api->lang;
$book->type = $parser->getMetadata('ws-type');
Expand Down Expand Up @@ -62,6 +69,7 @@ public function get($title, $isMetadata = false) {
$pictures = array_merge($pictures, $parser->getPicturesList());
}
$chapters = $parser->getChaptersList($title);
$key_credit = $this->startCredit($book, $chapters);
$chapters = $this->getPages($chapters);
foreach($chapters as $chapter) {
$parser = new PageParser($chapter->content);
Expand All @@ -70,8 +78,21 @@ public function get($title, $isMetadata = false) {
$pictures = array_merge($pictures, $parser->getPicturesList());
}
}

if ($this->withPictures && count($pictures)) {
$keyCreditImage = $this->startCreditImage($book, $pictures);
}

$this->curl_async->waitForKey($key_credit);

$book->chapters = $chapters;
$pictures = $this->getPicturesData($pictures);
if ($this->withPictures && count($pictures)) {
$this->curl_async->waitForKey($keyCreditImage);
}

$credit_html = $this->mergeCredit();

}
$book->pictures = $pictures;
return $book;
Expand Down Expand Up @@ -99,7 +120,7 @@ protected function getPages($pages) {
foreach($pages as $id => $page) {
$titles[$id] = $page->title;
}
$data = $this->api->getPages($titles);
$data = $this->api->getPagesAsync($this->curl_async, $titles);
foreach($pages as $id => $page) {
$document = new DOMDocument('1.0', 'UTF-8');
$document->loadXML($data[$id]);
Expand All @@ -118,7 +139,7 @@ protected function getPicturesData($pictures) {
foreach($pictures as $id => $picture) {
$urls[$id] = $picture->url;
}
$data = $this->api->getMulti($urls);
$data = $this->api->getImagesAsync($this->curl_async, $urls);
foreach($pictures as $id => $picture) {
$picture->content = $data[$id];
$picture->mimetype = getMimeType($picture->content);
Expand Down Expand Up @@ -172,8 +193,102 @@ public function getCover($cover) {
}
return $picture;
}

/**
* @var $book the Book object
* @var $chapters an array of Page
* @return a key id for the credit request
*/
protected function startCredit($book, $chapters) {
$url = 'http://toolserver.org/~phe/cgi-bin/credits';
$pages = array( );
foreach ($chapters as $id => $chapter)
$pages[] = $chapter->title;
$pages = join('|', $pages);
$params = array( 'lang' => $book->lang,
'format' => 'php',
'book' => $book->scan,
'page' => $pages);
return $this->curl_async->addRequest($url, $params,
array($this, 'finishCredit'));
}

public function finishCredit($data) {
if ($data['http_code'] != 200) {
//$html = 'Unable to get contributor credits';
error_log('getCredit() fail:' .
'http code: ' . $data['http_code'] .
', curl errno: ' . $data['curl_errno'] .
', curl_result:' . $data['curl_result']);
} else {
$this->creditPages = unserialize($data['content']);
}
}

/**
* @var $book the Book object
* @var $pictures an array of Picture
* @return a key id for the credit request
*/
protected function startCreditImage($book, $pictures) {
$url = 'http://toolserver.org/~phe/cgi-bin/credits';
$images = array( );
foreach ($pictures as $id => $picture)
$images[] = $picture->title;
$images = join('|', $images);
$params = array( 'lang' => $book->lang,
'format' => 'php',
'image' => $images);
return $this->curl_async->addRequest($url, $params,
array($this, 'finishCreditImage'));
}

public function finishCreditImage($data) {
if ($data['http_code'] != 200) {
//$html = 'Unable to get contributor credits';
error_log('getCreditImage() fail:' .
'http code: ' . $data['http_code'] .
', curl errno: ' . $data['curl_errno'] .
', curl_result:' . $data['curl_result']);
} else {
$this->creditImages = unserialize($data['content']);
}
}

/*
* merge the credit collected for images and pages and create
* an html code fragment for these credits
*/
protected function mergeCredit() {
$credit = $this->creditPages;
foreach ($this->creditImages as $name => $values) {
if(!isset($credit[$name]))
$credit[$name] = array('count' => 0, 'flags' => array());
$credit[$name]['count'] += $values['count'];
foreach ($values['flags'] as $id => $flag) {
if (!inarray($flags, $credit[$name]['flags']))
$credit[$name]['flags'][] = $flag;
}
}
uasort($credit, "cmp_credit");
$html = "<ul>\n";
foreach ($credit as $name => $value)
$html .= "\t<li>" . $name . "</li>\n";
$this->book->credits_html = $html;
}
}

/*
* cmp_credit: compare les crédits de deux utilisateurs
*
*/
function cmp_credit($a, $b) {
$f1 = in_array('bot', $a['flags']);
$f2 = in_array('bot', $b['flags']);
if ($f1 != $f2)
return $f1 - $f2;
return $b['count'] - $a['count'];
}

/**
* page parser
Expand Down
9 changes: 5 additions & 4 deletions book/formats/Epub2Generator.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,15 @@ public function create(Book $book) {
$css = $this->getCssWikisource($book->lang);
$this->i18n = $this->getI18n($book->lang);
setLocale(LC_TIME, $book->lang . '_' . strtoupper($book->lang));
$wsUrl = wikisourceUrl($book->lang, $book->title);
if($css != '')
$this->withCss = true;
$this->encodeTitles($book);
$this->clean($book);
$zip = new ZipCreator();
$zip->addContentFile('mimetype', 'application/epub+zip', null, false); //the mimetype must be first and uncompressed
$zip->addContentFile('META-INF/container.xml', $this->getXmlContainer());
$zip->addContentFile('OPS/content.opf', $this->getOpfContent($book));
$zip->addContentFile('OPS/content.opf', $this->getOpfContent($book, $wsUrl));
$zip->addContentFile('OPS/toc.ncx', $this->getNcxToc($book));
if($book->cover != '')
$zip->addContentFile('OPS/cover.xhtml', $this->getXhtmlCover($book));
Expand Down Expand Up @@ -82,14 +83,14 @@ protected function getXmlContainer() {
return $content;
}

protected function getOpfContent(Book $book) {
protected function getOpfContent(Book $book, $wsUrl) {
$content = '<?xml version="1.0" encoding="UTF-8" ?>
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid" version="2.0">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dcterms="http://purl.org/dc/terms/">
<dc:identifier id="uid" opf:scheme="URI">' . wikisourceUrl($book->lang, $book->title) . '</dc:identifier>
<dc:identifier id="uid" opf:scheme="URI">' . $wsUrl . '</dc:identifier>
<dc:language xsi:type="dcterms:RFC4646">' . $book->lang . '</dc:language>
<dc:title>' . $book->name . '</dc:title>
<dc:source>' . wikisourceUrl($book->lang, $book->title) . '</dc:source>
<dc:source>' . $wsUrl . '</dc:source>
<dc:date opf:event="ops-publication">' . date(DATE_W3C) . '</dc:date>
<dc:rights>http://creativecommons.org/licenses/by-sa/3.0/</dc:rights>
<dc:rights>http://www.gnu.org/copyleft/fdl.html</dc:rights>
Expand Down
1 change: 1 addition & 0 deletions book/init.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
include($basePath . '/utils/utils.php');
include($basePath . '/utils/HttpException.php');
include($basePath . '/utils/Api.php');
include($basePath . '/utils/CurlAsync.php');
include($basePath . '/utils/ZipCreator.php');
include($basePath . '/book/Generator.php');
include($basePath . '/book/Picture.php');
Expand Down
72 changes: 69 additions & 3 deletions utils/Api.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,72 @@ public function completeQuery($params) {
return $data;
}

/**
* @var $curl_async multi curl async object doing the request
* @var $title the title of the page
* @var $callback the callback to call on each request termination
* @return the content of a page
*/
public function getPageAsync($curl_async, $title, $id, &$responses) {
$url = $this->lang . '.wikisource.org/w/index.php?action=render&title=' . urlencode($title);
return $curl_async->addRequest($url, null, array($this, 'wrapPage'), array($id, &$responses));
}

/*
* Callback called when a request started by getPageAsync() finish
*/
public function wrapPage($data, $id, &$responses) {
if ($data['http_code'] != 200) {
throw new HttpException("HTTP error", $data['http_code']);
}
$content = '<?xml version="1.0" encoding="UTF-8" ?><!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" xml:lang="' . $this->lang . '"><head><meta content="application/xhtml+xml;charset=UTF-8" http-equiv="content-type"/><title></title></head><body>' . $data['content'] . '</body></html>';
$responses[$id] = $content;
}

public function getPagesAsync($curl_async, $titles) {
$responses = array();
$keys = array();
foreach($titles as $id => $title)
$keys[] = $this->getPageAsync($curl_async, $title, $id, $responses);
foreach ($keys as $key => $id)
$curl_async->waitForKey($id);
return $responses;
}

/**
* @var $curl_async multi curl async object doing the request
* @var $title the title of the page
* @var $callback the callback to call on each request termination
* @return the content of a page
*/
public function getImageAsync($curl_async, $url, $id, &$responses) {
return $curl_async->addRequest($url, null, array($this, 'endImage'), array($id, &$responses));
}

/*
* Callback called when a request started by getImageAsync() finish
*/
public function endImage($data, $id, &$responses) {
if ($data['http_code'] != 200) {
throw new HttpException("HTTP error", $data['http_code']);
}
$content = $data['content'];
$responses[$id] = $content;
}

/*
*
*/
function getImagesAsync($curl_async, $urls) {
$responses = array();
$keys = array();
foreach($urls as $id => $url)
$keys[] = $this->getImageAsync($curl_async, $url, $id, $responses);
foreach ($keys as $key => $id)
$curl_async->waitForKey($id);
return $responses;
}

/**
* @var $title the title of the page
* @return the content of a page
Expand Down Expand Up @@ -93,7 +159,7 @@ public function getPages($titles) {
* @return the file content
*/
public function get($url) {
$ch = $this->getCurl($url);
$ch = Api::getCurl($url);
$response = curl_exec($ch);
if(curl_errno($ch)) {
throw new HttpException(curl_error($ch), curl_errno($ch));
Expand All @@ -113,7 +179,7 @@ public function getMulti($urls) {
$mh = curl_multi_init();
$curl_array = array();
foreach($urls as $id => $url) {
$curl_array[$id] = $this->getCurl($url);
$curl_array[$id] = Api::getCurl($url);
curl_multi_add_handle($mh, $curl_array[$id]);
}
$running = null;
Expand All @@ -134,7 +200,7 @@ public function getMulti($urls) {
* @var $url the url
* @return curl
*/
protected function getCurl($url) {
static function getCurl($url) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_USERAGENT, Api::USER_AGENT);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
Expand Down

0 comments on commit 5a35132

Please sign in to comment.