Skip to content

Commit

Permalink
Merge pull request #386 from wikimedia/rate-limiter
Browse files Browse the repository at this point in the history
Add rate limiter to prevent bots and crawlers from pounding the site
  • Loading branch information
samwilson committed Nov 3, 2021
2 parents bb4bb07 + 9c43c8a commit 75f6dcd
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 2 deletions.
4 changes: 3 additions & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,6 @@ REPLICAS_PORT_S6=4716
REPLICAS_PORT_S7=4717
REPLICAS_PORT_S8=4718
REPLICAS_USERNAME="username"
REPLICAS_PASSWORD="password123"
REPLICAS_PASSWORD="password123"
APP_RATE_LIMIT=0
APP_RATE_DURATION=0
5 changes: 5 additions & 0 deletions config/services.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,5 +65,10 @@ services:
arguments:
$projectDir: '%kernel.project_dir%'

App\EventSubscriber\RateLimitSubscriber:
arguments:
$rateLimit: '%env(int:APP_RATE_LIMIT)%'
$rateDuration: '%env(int:APP_RATE_DURATION)%'

# add more service definitions when explicit configuration is needed
# please note that last definitions always *replace* previous ones
7 changes: 6 additions & 1 deletion config/services_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,18 @@ services:
App\Generator\ConvertGenerator:
arguments:
$timeout: '%env(default::int:APP_TIMEOUT)%'

App\Repository\CreditRepository:
public: true

App\FileCache:
arguments:
$projectDir: '%kernel.project_dir%'

App\EventSubscriber\RateLimitSubscriber:
arguments:
$rateLimit: '%env(int:APP_RATE_LIMIT)%'
$rateDuration: '%env(int:APP_RATE_DURATION)%'

# add more service definitions when explicit configuration is needed
# please note that last definitions always *replace* previous ones
1 change: 1 addition & 0 deletions i18n/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"format-pdf-letter": "PDF - US letter size",
"format-rtf": "RTF",
"format-txt": "Plain text",
"exceeded-rate-limitation": "You are making too many requests during a short period of time. Please wait $1 {{PLURAL:$1|minute|minutes}} before reloading this tool.",
"exception-invalid-format": "\"$1\" is not a valid format. Valid formats are: $2",
"font-field-label": "Font",
"no-font-option": "None (use device default)",
Expand Down
1 change: 1 addition & 0 deletions i18n/qqq.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
"error-page-issue": "Call to action text.\n\n$1 - HTML link, with text from error-page-issue-link below.",
"error-page-issue-link": "Link text to use in the link in error-page-issue above.",
"onwikiconfig-failure": "Error message displayed if the on-wiki configuration fetching failed.\n\n* $1 - the URL of the config JSON page on Wikisource.",
"exceeded-rate-limitation": "Error shown when too many requests are made in a short period of time. $1 is the number of minutes the client must wait to reload the page.",
"exception-fetching-credits": "Error message displayed when credits information could not be retrieved.",
"exception-book-conversion": "Error message displayed when a book could not be converted to another format.",
"epub-title-page": "Name of the title page in exported books (used in the table of contents).",
Expand Down
111 changes: 111 additions & 0 deletions src/EventSubscriber/RateLimitSubscriber.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
<?php
declare( strict_types=1 );

namespace App\EventSubscriber;

use DateInterval;
use Krinkle\Intuition\Intuition;
use Psr\Cache\CacheItemPoolInterface;
use Symfony\Component\EventDispatcher\EventSubscriberInterface;
use Symfony\Component\HttpKernel\Event\ControllerEvent;
use Symfony\Component\HttpKernel\Exception\TooManyRequestsHttpException;
use Symfony\Component\HttpKernel\KernelEvents;

class RateLimitSubscriber implements EventSubscriberInterface {

/** @var Intuition */
protected $intuition;

/** @var CacheItemPoolInterface */
protected $cache;

/** @var int */
protected $rateLimit;

/** @var int */
protected $rateDuration;

/**
* @param Intuition $intuition
* @param CacheItemPoolInterface $cache
* @param int $rateLimit
* @param int $rateDuration
*/
public function __construct(
Intuition $intuition,
CacheItemPoolInterface $cache,
int $rateLimit,
int $rateDuration
) {
$this->intuition = $intuition;
$this->cache = $cache;
$this->rateLimit = $rateLimit;
$this->rateDuration = $rateDuration;
}

/**
* Register our interest in the kernel.controller event.
* @return string[]
*/
public static function getSubscribedEvents(): array {
return [
KernelEvents::CONTROLLER => 'onKernelController',
];
}

/**
* Check if the current user has exceeded the configured usage limitations.
* @param ControllerEvent $event
*/
public function onKernelController( ControllerEvent $event ): void {
$controller = $event->getController();
$action = null;
$request = $event->getRequest();

// when a controller class defines multiple action methods, the controller
// is returned as [$controllerInstance, 'methodName']
if ( is_array( $controller ) ) {
[ , $action ] = $controller;
}

// Abort if rate limitations are disabled or we're not exporting a book.
if ( $this->rateLimit + $this->rateDuration === 0 || $action !== 'home' || !$request->get( 'page' ) ) {
return;
}

$xff = $request->headers->get( 'x-forwarded-for', '' );
if ( $xff === '' ) {
// Happens in local environments, or outside of Cloud Services.
return;
}

$cacheKey = "ratelimit.session." . md5( $xff );
$cacheItem = $this->cache->getItem( $cacheKey );

// If increment value already in cache, or start with 1.
$count = $cacheItem->isHit() ? (int)$cacheItem->get() + 1 : 1;

// Check if limit has been exceeded, and if so, throw an error.
if ( $count > $this->rateLimit ) {
$this->denyAccess();
}

// Reset the clock on every request.
$cacheItem->set( $count )
->expiresAfter( new DateInterval( 'PT' . $this->rateDuration . 'M' ) );
$this->cache->save( $cacheItem );
}

/**
* Throw exception for denied access due to spider crawl or hitting usage limits.
* @throws TooManyRequestsHttpException
*/
private function denyAccess() {
// @phan-suppress-previous-line PhanPluginNeverReturnMethod
$message = $this->intuition->msg( 'exceeded-rate-limitation', [
'variables' => [ $this->rateDuration ]
] );

throw new TooManyRequestsHttpException( $this->rateDuration * 60, $message );
}
}

0 comments on commit 75f6dcd

Please sign in to comment.