From 4d0935ba5e6900d87e8420ac9d7de89e5440f24d Mon Sep 17 00:00:00 2001 From: epriestley Date: Tue, 8 Apr 2014 18:36:21 -0700 Subject: [PATCH] Rate limit requests by IP Summary: Fixes T3923. On `secure.phabricator.com`, we occasionally get slowed to a crawl when someone runs a security scanner against us, or 5 search bots decide to simultaneously index every line of every file in Diffusion. Every time a user makes a request, give their IP address some points. If they get too many points in 5 minutes, start blocking their requests automatically for a while. We give fewer points for logged in requests. We could futher refine this (more points for a 404, more points for a really slow page, etc.) but let's start simply. Also, provide a mechanism for configuring this, and configuring the LB environment stuff at the same time (this comes up rarely, but we don't have a good answer right now). Test Plan: Used `ab` and reloading over and over again to hit rate limits. Read documentation. Reviewers: btrahan Reviewed By: btrahan Subscribers: chad, epriestley Maniphest Tasks: T3923 Differential Revision: https://secure.phabricator.com/D8713 --- .gitignore | 3 + .../configuration/configuration_guide.diviner | 3 + .../configuring_preamble.diviner | 114 ++++++++ support/PhabricatorStartup.php | 252 ++++++++++++++++++ webroot/index.php | 27 +- 5 files changed, 398 insertions(+), 1 deletion(-) create mode 100644 src/docs/user/configuration/configuring_preamble.diviner diff --git a/.gitignore b/.gitignore index b3c0a8afdd..a84df0e9b2 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,9 @@ # User-accessible hook for adhoc debugging scripts /support/debug.php +# User-accessible hook for adhoc startup code +/support/preamble.php + # Users can link binaries here /support/bin/* diff --git a/src/docs/user/configuration/configuration_guide.diviner b/src/docs/user/configuration/configuration_guide.diviner index 6666dd0ca2..bd0aa922cb 100644 --- a/src/docs/user/configuration/configuration_guide.diviner +++ b/src/docs/user/configuration/configuration_guide.diviner @@ -197,6 +197,9 @@ Continue by: @{article:Configuring Accounts and Registration}; or - understanding advanced configuration topics with @{article:Configuration User Guide: Advanced Configuration}; or + - configuring a preamble script to set up the environment properly behind a + load balancer, or adjust rate limiting with + @{article:Configuring a Preamble Script}; or - configuring where uploaded files and attachments will be stored with @{article:Configuring File Storage}; or - configuring Phabricator so it can send mail with diff --git a/src/docs/user/configuration/configuring_preamble.diviner b/src/docs/user/configuration/configuring_preamble.diviner new file mode 100644 index 0000000000..a8710598bf --- /dev/null +++ b/src/docs/user/configuration/configuring_preamble.diviner @@ -0,0 +1,114 @@ +@title Configuring a Preamble Script +@group config + +Adjust environmental settings (SSL, remote IP, rate limiting) using a preamble +script. + += Overview = + +If Phabricator is deployed in an environment where HTTP headers behave oddly +(usually, because it is behind a load balancer), it may not be able to detect +some environmental features (like the client's IP, or the presence of SSL) +correctly. + +You can use a special preamble script to make arbitrary adjustments to the +environment and some parts of Phabricator's configuration in order to fix these +problems and set up the environment which Phabricator expects. + +NOTE: This is an advanced feature. Most installs should not need to configure +a preamble script. + += Creating a Preamble Script = + +To create a preamble script, write a file to: + + phabricator/support/preamble.php + +(This file is in Phabricator's `.gitignore`, so you do not need to worry about +colliding with `git` or interacting with updates.) + +This file should be a valid PHP script. If you aren't very familiar with PHP, +you can check for syntax errors with `php -l`: + + phabricator/ $ php -l support/preamble.php + No syntax errors detected in support/preamble.php + +If present, this script will be executed at the very beginning of each web +request, allowing you to adjust the environment. For common adjustments and +examples, see the next sections. + += Adjusting Client IPs = + +If your install is behind a load balancer, Phabricator may incorrectly detect +all requests as originating from the load balancer, rather than from the correct +client IPs. If this is the case and some other header (like `X-Forwarded-For`) +is known to be trustworthy, you can overwrite the `REMOTE_ADDR` setting so +Phabricator can figure out the client IP correctly: + +``` +name=Overwrite REMOTE_ADDR with X-Forwarded-For + (self::$maximumRate * self::getRateLimitBucketCount())) { + // Give the user some bonus points for getting rate limited. This keeps + // bad actors who keep slamming the 429 page locked out completely, + // instead of letting them get a burst of requests through every minute + // after a bucket expires. + self::addRateLimitScore($user_identity, 50); + self::didRateLimit($user_identity); + } + } + + + /** + * Add points to the rate limit score for some user. + * + * If users have earned more than 1000 points per minute across all the + * buckets they'll be locked out of the application, so awarding 1 point per + * request roughly corresponds to allowing 1000 requests per second, while + * awarding 50 points roughly corresponds to allowing 20 requests per second. + * + * @param string Some key which identifies the user making the request. + * @param float The cost for this request; more points pushes them toward + * the limit faster. + * @return void + * @task ratelimit + */ + public static function addRateLimitScore($user_identity, $score) { + if (!self::canRateLimit()) { + return; + } + + $current = self::getRateLimitBucket(); + + // There's a bit of a race here, if a second process reads the bucket before + // this one writes it, but it's fine if we occasionally fail to record a + // user's score. If they're making requests fast enough to hit rate + // limiting, we'll get them soon. + + $bucket_key = self::getRateLimitBucketKey($current); + $bucket = apc_fetch($bucket_key); + if (!is_array($bucket)) { + $bucket = array(); + } + + if (empty($bucket[$user_identity])) { + $bucket[$user_identity] = 0; + } + + $bucket[$user_identity] += $score; + apc_store($bucket_key, $bucket); + } + + + /** + * Determine if rate limiting is available. + * + * Rate limiting depends on APC, and isn't available unless the APC user + * cache is available. + * + * @return bool True if rate limiting is available. + * @task ratelimit + */ + private static function canRateLimit() { + if (!self::$maximumRate) { + return false; + } + + if (!function_exists('apc_fetch')) { + return false; + } + + return true; + } + + + /** + * Get the current bucket for storing rate limit scores. + * + * @return int The current bucket. + * @task ratelimit + */ + private static function getRateLimitBucket() { + return (int)(time() / 60); + } + + + /** + * Get the total number of rate limit buckets to retain. + * + * @return int Total number of rate limit buckets to retain. + * @task ratelimit + */ + private static function getRateLimitBucketCount() { + return 5; + } + + + /** + * Get the APC key for a given bucket. + * + * @param int Bucket to get the key for. + * @return string APC key for the bucket. + * @task ratelimit + */ + private static function getRateLimitBucketKey($bucket) { + return 'rate:bucket:'.$bucket; + } + + + /** + * Get the APC key for the smallest stored bucket. + * + * @return string APC key for the smallest stored bucket. + * @task ratelimit + */ + private static function getRateLimitMinKey() { + return 'rate:min'; + } + + + /** + * Get the current rate limit score for a given user. + * + * @param string Unique key identifying the user. + * @return float The user's current score. + * @task ratelimit + */ + private static function getRateLimitScore($user_identity) { + $min_key = self::getRateLimitMinKey(); + + // Identify the oldest bucket stored in APC. + $cur = self::getRateLimitBucket(); + $min = apc_fetch($min_key); + + // If we don't have any buckets stored yet, store the current bucket as + // the oldest bucket. + if (!$min) { + apc_store($min_key, $cur); + $min = $cur; + } + + // Destroy any buckets that are older than the minimum bucket we're keeping + // track of. Under load this normally shouldn't do anything, but will clean + // up an old bucket once per minute. + $count = self::getRateLimitBucketCount(); + for ($cursor = $min; $cursor < ($cur - $count); $cursor++) { + apc_delete(self::getRateLimitBucketKey($cursor)); + apc_store($min_key, $cursor + 1); + } + + // Now, sum up the user's scores in all of the active buckets. + $score = 0; + for (; $cursor <= $cur; $cursor++) { + $bucket = apc_fetch(self::getRateLimitBucketKey($cursor)); + if (isset($bucket[$user_identity])) { + $score += $bucket[$user_identity]; + } + } + + return $score; + } + + + /** + * Emit an HTTP 429 "Too Many Requests" response (indicating that the user + * has exceeded application rate limits) and exit. + * + * @return exit This method **does not return**. + * @task ratelimit + */ + private static function didRateLimit() { + $message = + "TOO MANY REQUESTS\n". + "You are issuing too many requests too quickly.\n". + "To adjust limits, see \"Configuring a Preamble Script\" in the ". + "documentation."; + + header( + 'Content-Type: text/plain; charset=utf-8', + $replace = true, + $http_error = 429); + + echo $message; + + exit(1); + } + } diff --git a/webroot/index.php b/webroot/index.php index 7072727e7f..0adaf7322f 100644 --- a/webroot/index.php +++ b/webroot/index.php @@ -1,6 +1,14 @@ getUser() && $request->getUser()->getPHID()) { + $score = $score / 5; + } + + PhabricatorStartup::addRateLimitScore($user_ip, $score); + } + } catch (Exception $ex) { PhabricatorStartup::didEncounterFatalException( 'Core Exception',