Skip to content

Commit

Permalink
+ prime speed scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
vantoozz committed Nov 3, 2017
1 parent 78fbbff commit fe4df16
Show file tree
Hide file tree
Showing 13 changed files with 233 additions and 9 deletions.
2 changes: 1 addition & 1 deletion src/Exceptions/AppraiserException.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
* Class AppraiserException
* @package Vantoozz\ProxyScraper\Exceptions
*/
class AppraiserException extends ProxyScraperException
final class AppraiserException extends ProxyScraperException
{
}
2 changes: 1 addition & 1 deletion src/Exceptions/HttpClientException.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
* Class HttpClientException
* @package Vantoozz\ProxyScraper\Exceptions
*/
class HttpClientException extends ProxyScraperException
final class HttpClientException extends ProxyScraperException
{
}
2 changes: 1 addition & 1 deletion src/Exceptions/InvalidArgumentException.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
* Class InvalidArgumentException
* @package Vantoozz\ProxyScraper\Exceptions
*/
class InvalidArgumentException extends ProxyScraperException
final class InvalidArgumentException extends ProxyScraperException
{
}
2 changes: 1 addition & 1 deletion src/Exceptions/ProxyScraperException.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
* Class ProxyScraperException
* @package Vantoozz\ProxyScraper\Exceptions
*/
class ProxyScraperException extends \Exception
abstract class ProxyScraperException extends \Exception
{
}
2 changes: 1 addition & 1 deletion src/Exceptions/RuntimeException.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
* Class RuntimeException
* @package Vantoozz\ProxyScraper\Exceptions
*/
class RuntimeException extends ProxyScraperException
final class RuntimeException extends ProxyScraperException
{
}
2 changes: 1 addition & 1 deletion src/Exceptions/ScraperException.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
* Class ScraperException
* @package Vantoozz\ProxyScraper\Exceptions
*/
class ScraperException extends ProxyScraperException
final class ScraperException extends ProxyScraperException
{
}
2 changes: 1 addition & 1 deletion src/Exceptions/ValidationException.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
* Class ValidationException
* @package Vantoozz\ProxyScraper\Exceptions
*/
class ValidationException extends ProxyScraperException
final class ValidationException extends ProxyScraperException
{
}
70 changes: 70 additions & 0 deletions src/Scrapers/PrimeSpeedScraper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
<?php declare(strict_types = 1);

namespace Vantoozz\ProxyScraper\Scrapers;

use Vantoozz\ProxyScraper\Exceptions\HttpClientException;
use Vantoozz\ProxyScraper\Exceptions\ScraperException;
use Vantoozz\ProxyScraper\HttpClient\HttpClientInterface;
use Vantoozz\ProxyScraper\Proxy;

/**
* Class PrimeSpeedScraper
* @package Vantoozz\ProxyScraper\Scrapers
*/
final class PrimeSpeedScraper implements ScraperInterface
{
private const URL = 'http://www.prime-speed.ru/proxy/free-proxy-list/all-working-proxies.php';

/**
* @var HttpClientInterface
*/
private $httpClient;

/**
* FreeProxyListScraper constructor.
* @param HttpClientInterface $httpClient
*/
public function __construct(HttpClientInterface $httpClient)
{
$this->httpClient = $httpClient;
}

/**
* @return \Generator|Proxy[]
* @throws \Vantoozz\ProxyScraper\Exceptions\ScraperException
*/
public function get(): \Generator
{
try {
$html = $this->httpClient->get(static::URL);
} catch (HttpClientException $e) {
throw new ScraperException($e->getMessage(), $e->getCode(), $e);
}

$list = $this->extractList($html);

yield from (new TextScraper($list))->get();
}

/**
* @param string $html
* @return string
* @throws \Vantoozz\ProxyScraper\Exceptions\ScraperException
*/
private function extractList(string $html): string
{
$expectedPartsCount = 2;

$parts = explode("&lt;proxy_server_name&gt; : &lt;proxy_port_number&gt;\n\n0.0.0.0:80\n", $html);
if ($expectedPartsCount !== count($parts)) {
throw new ScraperException('Unexpected markup');
}

$parts = explode("\n\n\n\n</pre>", $parts[1]);
if ($expectedPartsCount !== count($parts)) {
throw new ScraperException('Unexpected markup');
}

return $parts[0];
}
}
2 changes: 1 addition & 1 deletion src/Scrapers/ProxyDbScraper.php
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ private function getPage(int $offset, int $pageSize): \Generator
}

if (!(new Text($html))->isHtml()) {
throw new ScraperException($html);
throw new ScraperException('Unexpected markup');
}

$rows = (new Dom($html))->filter('table tbody tr');
Expand Down
25 changes: 25 additions & 0 deletions tests/integration/Scrapers/PrimeSpeedScraperTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?php declare(strict_types = 1);

namespace Vantoozz\ProxyScraper\IntegrationTests\Scrapers;

use Vantoozz\ProxyScraper\IntegrationTests\IntegrationTest;
use Vantoozz\ProxyScraper\Scrapers\PrimeSpeedScraper;
use Vantoozz\ProxyScraper\Scrapers\UsProxyScraper;

/**
* Class PrimeSpeedScraperTest
* @package Vantoozz\ProxyScraper\Scrapers
*/
final class PrimeSpeedScraperTest extends IntegrationTest
{
/**
* @test
*/
public function it_works(): void
{
$scrapper = new PrimeSpeedScraper($this->httpClient());

$proxies = iterator_to_array($scrapper->get());
$this->assertGreaterThanOrEqual(100, count($proxies));
}
}
1 change: 1 addition & 0 deletions tests/systemTests.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
Scrapers\FreeProxyListScraper::class,
Scrapers\HideMyIpScraper::class,
Scrapers\MultiproxyScraper::class,
Scrapers\PrimeSpeedScraper::class,
Scrapers\ProxyDbScraper::class,
Scrapers\SocksProxyScraper::class,
Scrapers\SpysMeScraper::class,
Expand Down
128 changes: 128 additions & 0 deletions tests/unit/Scrapers/PrimeSpeedScraperTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
<?php declare(strict_types = 1);

namespace Vantoozz\ProxyScraper\UnitTests\Scrapers;

use PHPUnit\Framework\TestCase;
use Vantoozz\ProxyScraper\Exceptions\HttpClientException;
use Vantoozz\ProxyScraper\HttpClient\HttpClientInterface;
use Vantoozz\ProxyScraper\Proxy;
use Vantoozz\ProxyScraper\Scrapers\FoxToolsScraper;
use Vantoozz\ProxyScraper\Scrapers\PrimeSpeedScraper;

/**
* Class PrimeSpeedScraperTest
* @package Vantoozz\ProxyScraper\Scrapers
*/
final class PrimeSpeedScraperTest extends TestCase
{
/**
* @test
* @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException
* @expectedExceptionMessage error message
*/
public function it_throws_an_exception_on_http_client_error(): void
{
/** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
$httpClient = $this->createMock(HttpClientInterface::class);
$httpClient
->expects(static::once())
->method('get')
->willThrowException(new HttpClientException('error message'));

$scraper = new PrimeSpeedScraper($httpClient);
$scraper->get()->current();
}

/**
* @test
*/
public function it_returns_a_proxy(): void
{

$html = <<<HTML
<pre>
format:
&lt;proxy_server_name&gt; : &lt;proxy_port_number&gt;
0.0.0.0:80
222.111.222.111:8118
222.111.222.122:8118
</pre>
HTML;


/** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
$httpClient = $this->createMock(HttpClientInterface::class);
$httpClient
->expects(static::once())
->method('get')
->willReturn($html);

$scraper = new PrimeSpeedScraper($httpClient);
$proxies = iterator_to_array($scraper->get(), false);

$this->assertInstanceOf(Proxy::class, $proxies[0]);
$this->assertSame('222.111.222.111:8118', (string)$proxies[0]);
}


/**
* @test
* @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException
* @expectedExceptionMessage Unexpected markup
*/
public function it_throws_an_exception_on_unexpected_markup(): void
{
$html = <<<HTML
<pre>
</pre>
HTML;


/** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
$httpClient = $this->createMock(HttpClientInterface::class);
$httpClient
->expects(static::once())
->method('get')
->willReturn($html);

$scraper = new PrimeSpeedScraper($httpClient);
$scraper->get()->current();
}


/**
* @test
* @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException
* @expectedExceptionMessage Unexpected markup
*/
public function it_throws_more_exceptions_on_unexpected_markup(): void
{
$html = <<<HTML
<pre>
format:
&lt;proxy_server_name&gt; : &lt;proxy_port_number&gt;
0.0.0.0:80
222.111.222.111:8118
222.111.222.122:8118
HTML;


/** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */
$httpClient = $this->createMock(HttpClientInterface::class);
$httpClient
->expects(static::once())
->method('get')
->willReturn($html);

$scraper = new PrimeSpeedScraper($httpClient);
$scraper->get()->current();
}

}
2 changes: 1 addition & 1 deletion tests/unit/Scrapers/ProxyDbScraperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public function it_throws_an_exception_on_http_client_error(): void
/**
* @test
* @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException
* @expectedExceptionMessage some text
* @expectedExceptionMessage Unexpected markup
*/
public function it_throws_an_exception_on_non_html_response(): void
{
Expand Down

0 comments on commit fe4df16

Please sign in to comment.