Skip to content

Commit

Permalink
[BUGFIX] Add additional headers for checking external links
Browse files Browse the repository at this point in the history
This addresses some problems that were reported on checking
external links.

Resolves: #86918
Releases: master, 9.5
Change-Id: I8e84791a4d140e3cec40c012d54077fa84f48aa0
Reviewed-on: https://review.typo3.org/c/Packages/TYPO3.CMS/+/61829
Tested-by: Sascha Rademacher <sascha.rademacher+typo3@gmail.com>
Tested-by: TYPO3com <noreply@typo3.com>
Tested-by: Julian Geils <j_geils@web.de>
Tested-by: Jörg Bösche <typo3@joergboesche.de>
Tested-by: Steffen Frese <steffenf14@gmail.com>
Tested-by: Tobi Kretschmann <tobi@tobishome.de>
Reviewed-by: Sascha Rademacher <sascha.rademacher+typo3@gmail.com>
Reviewed-by: Julian Geils <j_geils@web.de>
Reviewed-by: Jörg Bösche <typo3@joergboesche.de>
Reviewed-by: Steffen Frese <steffenf14@gmail.com>
Reviewed-by: Tobi Kretschmann <tobi@tobishome.de>
  • Loading branch information
sypets authored and d3pendent committed Oct 7, 2019
1 parent 5a32b99 commit 5870350
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 18 deletions.
28 changes: 17 additions & 11 deletions typo3/sysext/linkvalidator/Classes/Linktype/ExternalLinktype.php
Expand Up @@ -27,31 +27,36 @@ class ExternalLinktype extends AbstractLinktype
/**
* Cached list of the URLs, which were already checked for the current processing
*
* @var array $urlReports
* @var array
*/
protected $urlReports = [];

/**
* Cached list of all error parameters of the URLs, which were already checked for the current processing
*
* @var array $urlErrorParams
* @var array
*/
protected $urlErrorParams = [];

/**
* List of headers to be used for matching an URL for the current processing
* List of HTTP request headers to use for checking a URL
*
* @var array $additionalHeaders
* @var array
*/
protected $additionalHeaders = [];
protected $headers = [
'User-Agent' => 'TYPO3 linkvalidator',
'Accept' => '*/*',
'Accept-Language' => '*',
'Accept-Encoding' => '*',
];

/**
* @var RequestFactory
*/
protected $requestFactory;

/**
* @var array $this->errorParams
* @var array
*/
protected $errorParams = [];

Expand All @@ -71,14 +76,16 @@ public function __construct(RequestFactory $requestFactory = null)
*/
public function checkLink($origUrl, $softRefEntry, $reference)
{
$isValidUrl = false;
// use URL from cache, if available
if (isset($this->urlReports[$origUrl])) {
$this->setErrorParams($this->urlErrorParams[$origUrl]);
return $this->urlReports[$origUrl];
}
$options = [
'cookies' => GeneralUtility::makeInstance(CookieJar::class),
'allow_redirects' => ['strict' => true]
'allow_redirects' => ['strict' => true],
'headers' => $this->headers
];
$url = $this->preprocessUrl($origUrl);
if (!empty($url)) {
Expand Down Expand Up @@ -108,13 +115,12 @@ protected function requestUrl(string $url, string $method, array $options): bool
$isValidUrl = false;
try {
$response = $this->requestFactory->request($url, $method, $options);
if ($response->getStatusCode() < 300) {
$isValidUrl = true;
} else {
if ($response->getStatusCode() >= 300) {
$this->errorParams['errorType'] = $response->getStatusCode();
$this->errorParams['message'] = $this->getErrorMessage($this->errorParams);
} else {
$isValidUrl = true;
}
$isValidUrl = true;
} catch (\GuzzleHttp\Exception\TooManyRedirectsException $e) {
// redirect loop or too many redirects
// todo: change errorType to 'redirect' (breaking change)
Expand Down
Expand Up @@ -15,13 +15,14 @@
* The TYPO3 project - inspiring people to share!
*/

use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Psr7\Response;
use Prophecy\Argument;
use Prophecy\Prophecy\ObjectProphecy;
use TYPO3\CMS\Core\Http\RequestFactory;

use TYPO3\CMS\Core\Localization\LanguageService;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Linkvalidator\Linktype\ExternalLinktype;
use TYPO3\TestingFramework\Core\Unit\UnitTestCase;

Expand Down Expand Up @@ -56,12 +57,16 @@ public function checkLinkWithExternalUrlNotFoundReturnsFalse()
$exceptionProphecy->getResponse()
->willReturn($responseProphecy->reveal());

$url = 'https://example.org/~not-existing-url';
$options = $this->getRequestHeaderOptions();
$requestFactoryProphecy = $this->prophesize(RequestFactory::class);
$requestFactoryProphecy->request(Argument::any(), Argument::any(), Argument::any())
$requestFactoryProphecy->request($url, 'HEAD', $options)
->willThrow($exceptionProphecy->reveal());
$subject = new ExternalLinktype($requestFactoryProphecy->reveal());

$url = 'https://example.org/~not-existing-URL';
$optionsSecondTryWithGET = array_merge_recursive($options, ['headers' => ['Range' => 'bytes=0-4048']]);
$requestFactoryProphecy->request($url, 'GET', $optionsSecondTryWithGET)
->willThrow($exceptionProphecy->reveal());
$subject = new ExternalLinktype($requestFactoryProphecy->reveal());

$result = $subject->checkLink($url, null, null);

Expand All @@ -82,16 +87,42 @@ public function checkLinkWithExternalUrlNotFoundResultsNotFoundErrorType()
$exceptionProphecy->getResponse()
->willReturn($responseProphecy->reveal());

$options = $this->getRequestHeaderOptions();

$url = 'https://example.org/~not-existing-url';
$requestFactoryProphecy = $this->prophesize(RequestFactory::class);
$requestFactoryProphecy->request(Argument::any(), Argument::any(), Argument::any())
$requestFactoryProphecy->request($url, 'HEAD', $options)
->willThrow($exceptionProphecy->reveal());
$optionsSecondTryWithGET = array_merge_recursive($options, ['headers' => ['Range' => 'bytes=0-4048']]);
$requestFactoryProphecy->request($url, 'GET', $optionsSecondTryWithGET)
->willThrow($exceptionProphecy->reveal());
$subject = new ExternalLinktype($requestFactoryProphecy->reveal());

$url = 'https://example.org/~not-existing-URL';

$subject->checkLink($url, null, null);
$result = $subject->getErrorParams()['errorType'];

self::assertSame(404, $result);
}

private function getCookieJarProphecy(): CookieJar
{
$cookieJar = $this->prophesize(CookieJar::class);
$cookieJar = $cookieJar->reveal();
GeneralUtility::addInstance(CookieJar::class, $cookieJar);
return $cookieJar;
}

private function getRequestHeaderOptions(): array
{
return [
'cookies' => $this->getCookieJarProphecy(),
'allow_redirects' => ['strict' => true],
'headers' => [
'User-Agent' => 'TYPO3 linkvalidator',
'Accept' => '*/*',
'Accept-Language' => '*',
'Accept-Encoding' => '*'
]
];
}
}

0 comments on commit 5870350

Please sign in to comment.