diff --git a/src/Exceptions/AppraiserException.php b/src/Exceptions/AppraiserException.php index 6cccbb7..645f37c 100644 --- a/src/Exceptions/AppraiserException.php +++ b/src/Exceptions/AppraiserException.php @@ -6,6 +6,6 @@ * Class AppraiserException * @package Vantoozz\ProxyScraper\Exceptions */ -class AppraiserException extends ProxyScraperException +final class AppraiserException extends ProxyScraperException { } diff --git a/src/Exceptions/HttpClientException.php b/src/Exceptions/HttpClientException.php index f501592..1c80bdd 100644 --- a/src/Exceptions/HttpClientException.php +++ b/src/Exceptions/HttpClientException.php @@ -6,6 +6,6 @@ * Class HttpClientException * @package Vantoozz\ProxyScraper\Exceptions */ -class HttpClientException extends ProxyScraperException +final class HttpClientException extends ProxyScraperException { } diff --git a/src/Exceptions/InvalidArgumentException.php b/src/Exceptions/InvalidArgumentException.php index 7048b8b..1b748f1 100644 --- a/src/Exceptions/InvalidArgumentException.php +++ b/src/Exceptions/InvalidArgumentException.php @@ -6,6 +6,6 @@ * Class InvalidArgumentException * @package Vantoozz\ProxyScraper\Exceptions */ -class InvalidArgumentException extends ProxyScraperException +final class InvalidArgumentException extends ProxyScraperException { } diff --git a/src/Exceptions/ProxyScraperException.php b/src/Exceptions/ProxyScraperException.php index 1f7673d..a063ae7 100644 --- a/src/Exceptions/ProxyScraperException.php +++ b/src/Exceptions/ProxyScraperException.php @@ -6,6 +6,6 @@ * Class ProxyScraperException * @package Vantoozz\ProxyScraper\Exceptions */ -class ProxyScraperException extends \Exception +abstract class ProxyScraperException extends \Exception { } diff --git a/src/Exceptions/RuntimeException.php b/src/Exceptions/RuntimeException.php index a51b208..e15a2c1 100644 --- a/src/Exceptions/RuntimeException.php +++ b/src/Exceptions/RuntimeException.php @@ -6,6 +6,6 @@ * Class RuntimeException * @package Vantoozz\ProxyScraper\Exceptions */ -class RuntimeException extends ProxyScraperException +final class RuntimeException extends ProxyScraperException { } diff --git a/src/Exceptions/ScraperException.php b/src/Exceptions/ScraperException.php index dc053c6..def80cb 100644 --- a/src/Exceptions/ScraperException.php +++ b/src/Exceptions/ScraperException.php @@ -6,6 +6,6 @@ * Class ScraperException * @package Vantoozz\ProxyScraper\Exceptions */ -class ScraperException extends ProxyScraperException +final class ScraperException extends ProxyScraperException { } diff --git a/src/Exceptions/ValidationException.php b/src/Exceptions/ValidationException.php index ed401cb..d33a7aa 100644 --- a/src/Exceptions/ValidationException.php +++ b/src/Exceptions/ValidationException.php @@ -6,6 +6,6 @@ * Class ValidationException * @package Vantoozz\ProxyScraper\Exceptions */ -class ValidationException extends ProxyScraperException +final class ValidationException extends ProxyScraperException { } diff --git a/src/Scrapers/PrimeSpeedScraper.php b/src/Scrapers/PrimeSpeedScraper.php new file mode 100644 index 0000000..5b5953b --- /dev/null +++ b/src/Scrapers/PrimeSpeedScraper.php @@ -0,0 +1,70 @@ +httpClient = $httpClient; + } + + /** + * @return \Generator|Proxy[] + * @throws \Vantoozz\ProxyScraper\Exceptions\ScraperException + */ + public function get(): \Generator + { + try { + $html = $this->httpClient->get(static::URL); + } catch (HttpClientException $e) { + throw new ScraperException($e->getMessage(), $e->getCode(), $e); + } + + $list = $this->extractList($html); + + yield from (new TextScraper($list))->get(); + } + + /** + * @param string $html + * @return string + * @throws \Vantoozz\ProxyScraper\Exceptions\ScraperException + */ + private function extractList(string $html): string + { + $expectedPartsCount = 2; + + $parts = explode("<proxy_server_name> : <proxy_port_number>\n\n0.0.0.0:80\n", $html); + if ($expectedPartsCount !== count($parts)) { + throw new ScraperException('Unexpected markup'); + } + + $parts = explode("\n\n\n\n", $parts[1]); + if ($expectedPartsCount !== count($parts)) { + throw new ScraperException('Unexpected markup'); + } + + return $parts[0]; + } +} diff --git a/src/Scrapers/ProxyDbScraper.php b/src/Scrapers/ProxyDbScraper.php index 2840fb6..6377c2c 100644 --- a/src/Scrapers/ProxyDbScraper.php +++ b/src/Scrapers/ProxyDbScraper.php @@ -65,7 +65,7 @@ private function getPage(int $offset, int $pageSize): \Generator } if (!(new Text($html))->isHtml()) { - throw new ScraperException($html); + throw new ScraperException('Unexpected markup'); } $rows = (new Dom($html))->filter('table tbody tr'); diff --git a/tests/integration/Scrapers/PrimeSpeedScraperTest.php b/tests/integration/Scrapers/PrimeSpeedScraperTest.php new file mode 100644 index 0000000..26a382f --- /dev/null +++ b/tests/integration/Scrapers/PrimeSpeedScraperTest.php @@ -0,0 +1,25 @@ +httpClient()); + + $proxies = iterator_to_array($scrapper->get()); + $this->assertGreaterThanOrEqual(100, count($proxies)); + } +} diff --git a/tests/systemTests.php b/tests/systemTests.php index f45ecfd..1b099ca 100644 --- a/tests/systemTests.php +++ b/tests/systemTests.php @@ -31,6 +31,7 @@ Scrapers\FreeProxyListScraper::class, Scrapers\HideMyIpScraper::class, Scrapers\MultiproxyScraper::class, + Scrapers\PrimeSpeedScraper::class, Scrapers\ProxyDbScraper::class, Scrapers\SocksProxyScraper::class, Scrapers\SpysMeScraper::class, diff --git a/tests/unit/Scrapers/PrimeSpeedScraperTest.php b/tests/unit/Scrapers/PrimeSpeedScraperTest.php new file mode 100644 index 0000000..59d6150 --- /dev/null +++ b/tests/unit/Scrapers/PrimeSpeedScraperTest.php @@ -0,0 +1,128 @@ +createMock(HttpClientInterface::class); + $httpClient + ->expects(static::once()) + ->method('get') + ->willThrowException(new HttpClientException('error message')); + + $scraper = new PrimeSpeedScraper($httpClient); + $scraper->get()->current(); + } + + /** + * @test + */ + public function it_returns_a_proxy(): void + { + + $html = << +format: +<proxy_server_name> : <proxy_port_number> + +0.0.0.0:80 +222.111.222.111:8118 +222.111.222.122:8118 + + + + + +HTML; + + + /** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */ + $httpClient = $this->createMock(HttpClientInterface::class); + $httpClient + ->expects(static::once()) + ->method('get') + ->willReturn($html); + + $scraper = new PrimeSpeedScraper($httpClient); + $proxies = iterator_to_array($scraper->get(), false); + + $this->assertInstanceOf(Proxy::class, $proxies[0]); + $this->assertSame('222.111.222.111:8118', (string)$proxies[0]); + } + + + /** + * @test + * @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException + * @expectedExceptionMessage Unexpected markup + */ + public function it_throws_an_exception_on_unexpected_markup(): void + { + $html = << + +HTML; + + + /** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */ + $httpClient = $this->createMock(HttpClientInterface::class); + $httpClient + ->expects(static::once()) + ->method('get') + ->willReturn($html); + + $scraper = new PrimeSpeedScraper($httpClient); + $scraper->get()->current(); + } + + + /** + * @test + * @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException + * @expectedExceptionMessage Unexpected markup + */ + public function it_throws_more_exceptions_on_unexpected_markup(): void + { + $html = << +format: +<proxy_server_name> : <proxy_port_number> + +0.0.0.0:80 +222.111.222.111:8118 +222.111.222.122:8118 + +HTML; + + + /** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */ + $httpClient = $this->createMock(HttpClientInterface::class); + $httpClient + ->expects(static::once()) + ->method('get') + ->willReturn($html); + + $scraper = new PrimeSpeedScraper($httpClient); + $scraper->get()->current(); + } + +} diff --git a/tests/unit/Scrapers/ProxyDbScraperTest.php b/tests/unit/Scrapers/ProxyDbScraperTest.php index 3ff38cc..b74e7ca 100644 --- a/tests/unit/Scrapers/ProxyDbScraperTest.php +++ b/tests/unit/Scrapers/ProxyDbScraperTest.php @@ -34,7 +34,7 @@ public function it_throws_an_exception_on_http_client_error(): void /** * @test * @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException - * @expectedExceptionMessage some text + * @expectedExceptionMessage Unexpected markup */ public function it_throws_an_exception_on_non_html_response(): void {