diff --git a/composer.json b/composer.json
index 14c95eb..1ac5873 100644
--- a/composer.json
+++ b/composer.json
@@ -11,6 +11,7 @@
],
"require": {
"php": "^7.1",
+ "ext-json": "*",
"php-http/client-implementation": "~1",
"php-http/message-factory": "^1.0.2",
"php-http/httplug": "^1.1",
diff --git a/src/Scrapers/CoolProxyScraper.php b/src/Scrapers/CoolProxyScraper.php
index 6170c7c..0ae45f2 100644
--- a/src/Scrapers/CoolProxyScraper.php
+++ b/src/Scrapers/CoolProxyScraper.php
@@ -1,10 +1,14 @@
-getPage($page);
- } catch (HttpClientException $e) {
- break;
- }
- $page++;
- } while ($page <= static::MAX_PAGES_COUNT);
- }
-
- /**
- * @param int $page
- * @return \Generator
- * @throws \Vantoozz\ProxyScraper\Exceptions\HttpClientException
- * @throws \RuntimeException if the CssSelector Component is not available
- */
- private function getPage(int $page): \Generator
+ public function get(): Generator
{
+ try {
+ $json = $this->httpClient->get(sprintf(static::JSON_URL));
+ } catch (HttpClientException $e) {
+ throw new ScraperException($e->getMessage(), $e->getCode(), $e);
+ }
- $html = $this->httpClient->get(sprintf(static::PAGE_URL, $page));
+ $data = json_decode($json, true);
+ if (!$data) {
+ throw new ScraperException('Cannot parse json: ' . json_last_error_msg());
+ }
- $rows = (new Dom($html))->filter('table tr');
+ if (!is_array($data)) {
+ throw new ScraperException('No data');
+ }
- foreach ($rows as $row) {
+ foreach ($data as $item) {
+ if (!is_array($item)) {
+ $item = [];
+ }
try {
- yield $this->makeProxy(new Dom($row));
- } catch (\Throwable $e) {
+ yield $this->makeProxy($item);
+ } catch (Throwable $e) {
continue;
}
}
}
+
/**
- * @param Dom $row
+ * @param array $item
* @return Proxy
- * @throws \Throwable
+ * @throws Throwable
*/
- private function makeProxy(Dom $row): Proxy
+ private function makeProxy(array $item): Proxy
{
- $ipv4 = base64_decode(str_rot13(explode('"', $row->filter('td')->eq(0)->text())[1]));
-
- $port = (int)$row->filter('td')->eq(1)->text();
+ if (!isset($item['ip'])) {
+ throw new InvalidArgumentException('No IP given');
+ }
+ if (!isset($item['port'])) {
+ throw new InvalidArgumentException('No port given');
+ }
- $proxy = new Proxy(new Ipv4($ipv4), new Port($port));
+ $proxy = new Proxy(new Ipv4($item['ip']), new Port((int)$item['port']));
$proxy->addMetric(new Metric(Metrics::SOURCE, static::class));
return $proxy;
diff --git a/tests/unit/Scrapers/CoolProxyScraperTest.php b/tests/unit/Scrapers/CoolProxyScraperTest.php
index 773f1c6..ad391c3 100644
--- a/tests/unit/Scrapers/CoolProxyScraperTest.php
+++ b/tests/unit/Scrapers/CoolProxyScraperTest.php
@@ -3,11 +3,13 @@
namespace Vantoozz\ProxyScraper\UnitTests\Scrapers;
use PHPUnit\Framework\TestCase;
+use PHPUnit_Framework_MockObject_MockObject;
use Vantoozz\ProxyScraper\Enums\Metrics;
use Vantoozz\ProxyScraper\Exceptions\HttpClientException;
use Vantoozz\ProxyScraper\HttpClient\HttpClientInterface;
use Vantoozz\ProxyScraper\Proxy;
use Vantoozz\ProxyScraper\Scrapers\CoolProxyScraper;
+use Vantoozz\ProxyScraper\Scrapers\HideMyIpScraper;
/**
* Class CoolProxyScraperTest
@@ -17,8 +19,10 @@ final class CoolProxyScraperTest extends TestCase
{
/**
* @test
+ * @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException
+ * @expectedExceptionMessage error message
*/
- public function it_stops_on_http_client_error(): void
+ public function it_throws_an_exception_on_http_client_error(): void
{
/** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
$httpClient = $this->createMock(HttpClientInterface::class);
@@ -28,7 +32,7 @@ public function it_stops_on_http_client_error(): void
->willThrowException(new HttpClientException('error message'));
$scraper = new CoolProxyScraper($httpClient);
- static::assertNull($scraper->get()->current());
+ $scraper->get()->current();
}
/**
@@ -36,12 +40,12 @@ public function it_stops_on_http_client_error(): void
*/
public function it_returns_source_metric(): void
{
- /** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
+ /** @var HttpClientInterface|PHPUnit_Framework_MockObject_MockObject $httpClient */
$httpClient = $this->createMock(HttpClientInterface::class);
$httpClient
->expects(static::once())
->method('get')
- ->willReturn('
');
+ ->willReturn('[{"ip":"177.43.57.48","port":2222},{"ip":"206.189.220.8","port":80}]');
$scraper = new CoolProxyScraper($httpClient);
$proxy = $scraper->get()->current();
@@ -57,12 +61,12 @@ public function it_returns_source_metric(): void
*/
public function it_returns_a_proxy(): void
{
- /** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
+ /** @var HttpClientInterface|PHPUnit_Framework_MockObject_MockObject $httpClient */
$httpClient = $this->createMock(HttpClientInterface::class);
$httpClient
->expects(static::once())
->method('get')
- ->willReturn('');
+ ->willReturn('[{"ip":"177.43.57.48","port":2222},{"ip":"206.189.220.8","port":80}]');
$scraper = new CoolProxyScraper($httpClient);
$proxy = $scraper->get()->current();
@@ -74,35 +78,87 @@ public function it_returns_a_proxy(): void
/**
* @test
*/
- public function it_fetches_no_more_than_100_pages(): void
+ public function it_skips_rows_with_no_ip(): void
{
- /** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
+ /** @var HttpClientInterface|PHPUnit_Framework_MockObject_MockObject $httpClient */
$httpClient = $this->createMock(HttpClientInterface::class);
$httpClient
->expects(static::atLeastOnce())
->method('get')
- ->willReturn('');
+ ->willReturn('[{"port":2222}]');
$scraper = new CoolProxyScraper($httpClient);
- $proxies = iterator_to_array($scraper->get(), false);
- static::assertCount(100, $proxies);
+ static::assertNull($scraper->get()->current());
}
/**
* @test
*/
- public function it_skips_bad_rows(): void
+ public function it_skips_non_array_rows(): void
{
- /** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
+ /** @var HttpClientInterface|PHPUnit_Framework_MockObject_MockObject $httpClient */
$httpClient = $this->createMock(HttpClientInterface::class);
$httpClient
->expects(static::atLeastOnce())
->method('get')
- ->willReturn('');
+ ->willReturn('[123]');
$scraper = new CoolProxyScraper($httpClient);
static::assertNull($scraper->get()->current());
}
+
+ /**
+ * @test
+ */
+ public function it_skips_rows_with_no_port(): void
+ {
+ /** @var HttpClientInterface|PHPUnit_Framework_MockObject_MockObject $httpClient */
+ $httpClient = $this->createMock(HttpClientInterface::class);
+ $httpClient
+ ->expects(static::atLeastOnce())
+ ->method('get')
+ ->willReturn('[{"ip":"177.43.57.48"}]');
+
+ $scraper = new CoolProxyScraper($httpClient);
+
+ static::assertNull($scraper->get()->current());
+ }
+
+ /**
+ * @test
+ * @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException
+ * @expectedExceptionMessage Cannot parse json: Syntax error
+ */
+ public function it_throws_an_exception_if_bad_json_got(): void
+ {
+ /** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
+ $httpClient = $this->createMock(HttpClientInterface::class);
+ $httpClient
+ ->expects(static::once())
+ ->method('get')
+ ->willReturn('var json = dcvsdjh');
+
+ $scraper = new CoolProxyScraper($httpClient);
+ $scraper->get()->current();
+ }
+
+ /**
+ * @test
+ * @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException
+ * @expectedExceptionMessage No data
+ */
+ public function it_throws_an_exception_if_no_data_got(): void
+ {
+ /** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
+ $httpClient = $this->createMock(HttpClientInterface::class);
+ $httpClient
+ ->expects(static::once())
+ ->method('get')
+ ->willReturn('123');
+
+ $scraper = new CoolProxyScraper($httpClient);
+ $scraper->get()->current();
+ }
}