/
SaturdayMorningBreakfastCereal.php
86 lines (73 loc) · 2.25 KB
/
SaturdayMorningBreakfastCereal.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
<?php
namespace PhlyComic\ComicSource;
use PhlyComic\Comic;
use SimpleXMLElement;
use Zend\Dom\Query as DomQuery;
/**
* The SMBC feed provides a link to the **page** containing the comic,
* but not a link to the comic image itself.
*
* This class fetches the feed, loops through entries for the first comic,
* pulls the page in that entry's link element, and then scrapes the page for
* the comic image URL.
*/
class SaturdayMorningBreakfastCereal extends AbstractRssSource
{
protected static $comics = array(
'smbc' => 'Saturday Morning Breakfast Cereal',
);
protected $comicBase = 'http://www.smbc-comics.com/';
protected $comicShortName = 'smbc';
protected $domQuery = 'img#comic';
protected $feedUrl = 'http://www.smbc-comics.com/rss.php';
protected function getDataFromFeed(SimpleXMLElement $feed)
{
foreach ($feed->channel->item as $latest) {
$link = (string) $latest->link;
$image = $this->getImageFromLink($link);
if ($image instanceof Comic) {
return $image;
}
if ($image) {
return array(
'daily' => $link,
'image' => $image,
);
}
}
return false;
}
protected function getImageFromLink($url)
{
$page = file_get_contents($url);
if (!$page) {
return $this->registerError(sprintf(
'Comic at "%s" is unreachable',
$url
));
}
$dom = new DomQuery();
$dom->setDocumentHtml($page);
$r = $dom->execute($this->domQuery);
if (!$r->count()) {
return $this->registerError(sprintf(
'Comic at "%s" is unreachable',
$url
));
}
$imgUrl = false;
foreach ($r as $node) {
if ($node->hasAttribute('src')) {
$imgUrl = $node->getAttribute('src');
break;
}
}
if (!$imgUrl) {
return $this->registerError(sprintf(
'Unable to find image source in "%s"',
$url
));
}
return $this->comicBase . $imgUrl;
}
}