-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
13 changed files
with
233 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
<?php declare(strict_types = 1); | ||
|
||
namespace Vantoozz\ProxyScraper\Scrapers; | ||
|
||
use Vantoozz\ProxyScraper\Exceptions\HttpClientException; | ||
use Vantoozz\ProxyScraper\Exceptions\ScraperException; | ||
use Vantoozz\ProxyScraper\HttpClient\HttpClientInterface; | ||
use Vantoozz\ProxyScraper\Proxy; | ||
|
||
/** | ||
* Class PrimeSpeedScraper | ||
* @package Vantoozz\ProxyScraper\Scrapers | ||
*/ | ||
final class PrimeSpeedScraper implements ScraperInterface | ||
{ | ||
private const URL = 'http://www.prime-speed.ru/proxy/free-proxy-list/all-working-proxies.php'; | ||
|
||
/** | ||
* @var HttpClientInterface | ||
*/ | ||
private $httpClient; | ||
|
||
/** | ||
* FreeProxyListScraper constructor. | ||
* @param HttpClientInterface $httpClient | ||
*/ | ||
public function __construct(HttpClientInterface $httpClient) | ||
{ | ||
$this->httpClient = $httpClient; | ||
} | ||
|
||
/** | ||
* @return \Generator|Proxy[] | ||
* @throws \Vantoozz\ProxyScraper\Exceptions\ScraperException | ||
*/ | ||
public function get(): \Generator | ||
{ | ||
try { | ||
$html = $this->httpClient->get(static::URL); | ||
} catch (HttpClientException $e) { | ||
throw new ScraperException($e->getMessage(), $e->getCode(), $e); | ||
} | ||
|
||
$list = $this->extractList($html); | ||
|
||
yield from (new TextScraper($list))->get(); | ||
} | ||
|
||
/** | ||
* @param string $html | ||
* @return string | ||
* @throws \Vantoozz\ProxyScraper\Exceptions\ScraperException | ||
*/ | ||
private function extractList(string $html): string | ||
{ | ||
$expectedPartsCount = 2; | ||
|
||
$parts = explode("<proxy_server_name> : <proxy_port_number>\n\n0.0.0.0:80\n", $html); | ||
if ($expectedPartsCount !== count($parts)) { | ||
throw new ScraperException('Unexpected markup'); | ||
} | ||
|
||
$parts = explode("\n\n\n\n</pre>", $parts[1]); | ||
if ($expectedPartsCount !== count($parts)) { | ||
throw new ScraperException('Unexpected markup'); | ||
} | ||
|
||
return $parts[0]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
<?php declare(strict_types = 1); | ||
|
||
namespace Vantoozz\ProxyScraper\IntegrationTests\Scrapers; | ||
|
||
use Vantoozz\ProxyScraper\IntegrationTests\IntegrationTest; | ||
use Vantoozz\ProxyScraper\Scrapers\PrimeSpeedScraper; | ||
use Vantoozz\ProxyScraper\Scrapers\UsProxyScraper; | ||
|
||
/** | ||
* Class PrimeSpeedScraperTest | ||
* @package Vantoozz\ProxyScraper\Scrapers | ||
*/ | ||
final class PrimeSpeedScraperTest extends IntegrationTest | ||
{ | ||
/** | ||
* @test | ||
*/ | ||
public function it_works(): void | ||
{ | ||
$scrapper = new PrimeSpeedScraper($this->httpClient()); | ||
|
||
$proxies = iterator_to_array($scrapper->get()); | ||
$this->assertGreaterThanOrEqual(100, count($proxies)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
<?php declare(strict_types = 1); | ||
|
||
namespace Vantoozz\ProxyScraper\UnitTests\Scrapers; | ||
|
||
use PHPUnit\Framework\TestCase; | ||
use Vantoozz\ProxyScraper\Exceptions\HttpClientException; | ||
use Vantoozz\ProxyScraper\HttpClient\HttpClientInterface; | ||
use Vantoozz\ProxyScraper\Proxy; | ||
use Vantoozz\ProxyScraper\Scrapers\FoxToolsScraper; | ||
use Vantoozz\ProxyScraper\Scrapers\PrimeSpeedScraper; | ||
|
||
/** | ||
* Class PrimeSpeedScraperTest | ||
* @package Vantoozz\ProxyScraper\Scrapers | ||
*/ | ||
final class PrimeSpeedScraperTest extends TestCase | ||
{ | ||
/** | ||
* @test | ||
* @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException | ||
* @expectedExceptionMessage error message | ||
*/ | ||
public function it_throws_an_exception_on_http_client_error(): void | ||
{ | ||
/** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */ | ||
$httpClient = $this->createMock(HttpClientInterface::class); | ||
$httpClient | ||
->expects(static::once()) | ||
->method('get') | ||
->willThrowException(new HttpClientException('error message')); | ||
|
||
$scraper = new PrimeSpeedScraper($httpClient); | ||
$scraper->get()->current(); | ||
} | ||
|
||
/** | ||
* @test | ||
*/ | ||
public function it_returns_a_proxy(): void | ||
{ | ||
|
||
$html = <<<HTML | ||
<pre> | ||
format: | ||
<proxy_server_name> : <proxy_port_number> | ||
0.0.0.0:80 | ||
222.111.222.111:8118 | ||
222.111.222.122:8118 | ||
</pre> | ||
HTML; | ||
|
||
|
||
/** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */ | ||
$httpClient = $this->createMock(HttpClientInterface::class); | ||
$httpClient | ||
->expects(static::once()) | ||
->method('get') | ||
->willReturn($html); | ||
|
||
$scraper = new PrimeSpeedScraper($httpClient); | ||
$proxies = iterator_to_array($scraper->get(), false); | ||
|
||
$this->assertInstanceOf(Proxy::class, $proxies[0]); | ||
$this->assertSame('222.111.222.111:8118', (string)$proxies[0]); | ||
} | ||
|
||
|
||
/** | ||
* @test | ||
* @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException | ||
* @expectedExceptionMessage Unexpected markup | ||
*/ | ||
public function it_throws_an_exception_on_unexpected_markup(): void | ||
{ | ||
$html = <<<HTML | ||
<pre> | ||
</pre> | ||
HTML; | ||
|
||
|
||
/** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */ | ||
$httpClient = $this->createMock(HttpClientInterface::class); | ||
$httpClient | ||
->expects(static::once()) | ||
->method('get') | ||
->willReturn($html); | ||
|
||
$scraper = new PrimeSpeedScraper($httpClient); | ||
$scraper->get()->current(); | ||
} | ||
|
||
|
||
/** | ||
* @test | ||
* @expectedException \Vantoozz\ProxyScraper\Exceptions\ScraperException | ||
* @expectedExceptionMessage Unexpected markup | ||
*/ | ||
public function it_throws_more_exceptions_on_unexpected_markup(): void | ||
{ | ||
$html = <<<HTML | ||
<pre> | ||
format: | ||
<proxy_server_name> : <proxy_port_number> | ||
0.0.0.0:80 | ||
222.111.222.111:8118 | ||
222.111.222.122:8118 | ||
HTML; | ||
|
||
|
||
/** @var HttpClientInterface|\PHPUnit_Framework_MockObject_MockObject $httpClient */ | ||
$httpClient = $this->createMock(HttpClientInterface::class); | ||
$httpClient | ||
->expects(static::once()) | ||
->method('get') | ||
->willReturn($html); | ||
|
||
$scraper = new PrimeSpeedScraper($httpClient); | ||
$scraper->get()->current(); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters