diff --git a/src/aphront/configuration/AphrontApplicationConfiguration.php b/src/aphront/configuration/AphrontApplicationConfiguration.php index b48ea3be13..dc32faedec 100644 --- a/src/aphront/configuration/AphrontApplicationConfiguration.php +++ b/src/aphront/configuration/AphrontApplicationConfiguration.php @@ -345,6 +345,18 @@ abstract class AphrontApplicationConfiguration extends Phobject { if ($site->shouldRequireHTTPS()) { if (!$request->isHTTPS()) { + + // Don't redirect intracluster requests: doing so drops headers and + // parameters, imposes a performance penalty, and indicates a + // misconfiguration. + if ($request->isProxiedClusterRequest()) { + throw new AphrontMalformedRequestException( + pht('HTTPS Required'), + pht( + 'This request reached a site which requires HTTPS, but the '. + 'request is not marked as HTTPS.')); + } + $https_uri = $request->getRequestURI(); $https_uri->setDomain($request->getHost()); $https_uri->setProtocol('https'); diff --git a/src/aphront/site/PhabricatorSite.php b/src/aphront/site/PhabricatorSite.php index 866ca178ca..95e0a538fa 100644 --- a/src/aphront/site/PhabricatorSite.php +++ b/src/aphront/site/PhabricatorSite.php @@ -3,6 +3,15 @@ abstract class PhabricatorSite extends AphrontSite { public function shouldRequireHTTPS() { + // If this is an intracluster request, it's okay for it to use HTTP even + // if the site otherwise requires HTTPS. It is common to terminate SSL at + // a load balancer and use plain HTTP from then on, and administrators are + // usually not concerned about attackers observing traffic within a + // datacenter. + if (PhabricatorEnv::isClusterRemoteAddress()) { + return false; + } + return PhabricatorEnv::getEnvConfig('security.require-https'); } diff --git a/src/applications/config/option/PhabricatorClusterConfigOptions.php b/src/applications/config/option/PhabricatorClusterConfigOptions.php index bed97b15a5..bcf498c32b 100644 --- a/src/applications/config/option/PhabricatorClusterConfigOptions.php +++ b/src/applications/config/option/PhabricatorClusterConfigOptions.php @@ -34,25 +34,32 @@ EOTEXT PhabricatorEnv::getDoclink('Cluster: Databases'), pht('Cluster: Databases'))); + + $intro_href = PhabricatorEnv::getDoclink('Clustering Introduction'); + $intro_name = pht('Clustering Introduction'); + return array( $this->newOption('cluster.addresses', 'list', array()) ->setLocked(true) ->setSummary(pht('Address ranges of cluster hosts.')) ->setDescription( pht( - 'To allow Phabricator nodes to communicate with other nodes '. - 'in the cluster, provide an address whitelist of hosts that '. - 'are part of the cluster.'. + 'Define a Phabricator cluster by providing a whitelist of host '. + 'addresses that are part of the cluster.'. "\n\n". - 'Hosts on this whitelist are permitted to use special cluster '. - 'mechanisms to authenticate requests. By default, these '. - 'mechanisms are disabled.'. + 'Hosts on this whitelist have special powers. These hosts are '. + 'permitted to bend security rules, and misconfiguring this list '. + 'can make your install less secure. For more information, '. + 'see **[[ %s | %s ]]**.'. "\n\n". 'Define a list of CIDR blocks which whitelist all hosts in the '. - 'cluster. See the examples below for details.', + 'cluster and no additional hosts. See the examples below for '. + 'details.'. "\n\n". 'When cluster addresses are defined, Phabricator hosts will also '. - 'reject requests to interfaces which are not whitelisted.')) + 'reject requests to interfaces which are not whitelisted.', + $intro_href, + $intro_name)) ->addExample( array( '23.24.25.80/32', diff --git a/src/applications/config/option/PhabricatorSecurityConfigOptions.php b/src/applications/config/option/PhabricatorSecurityConfigOptions.php index 8c9907d736..311c2d30b6 100644 --- a/src/applications/config/option/PhabricatorSecurityConfigOptions.php +++ b/src/applications/config/option/PhabricatorSecurityConfigOptions.php @@ -80,22 +80,25 @@ final class PhabricatorSecurityConfigOptions pht( "If the web server responds to both HTTP and HTTPS requests but ". "you want users to connect with only HTTPS, you can set this ". - "to true to make Phabricator redirect HTTP requests to HTTPS.\n\n". - + "to `true` to make Phabricator redirect HTTP requests to HTTPS.". + "\n\n". "Normally, you should just configure your server not to accept ". "HTTP traffic, but this setting may be useful if you originally ". "used HTTP and have now switched to HTTPS but don't want to ". "break old links, or if your webserver sits behind a load ". "balancer which terminates HTTPS connections and you can not ". - "reasonably configure more granular behavior there.\n\n". - + "reasonably configure more granular behavior there.". + "\n\n". "IMPORTANT: Phabricator determines if a request is HTTPS or not ". "by examining the PHP `%s` variable. If you run ". "Apache/mod_php this will probably be set correctly for you ". "automatically, but if you run Phabricator as CGI/FCGI (e.g., ". "through nginx or lighttpd), you need to configure your web ". "server so that it passes the value correctly based on the ". - "connection type.", + "connection type.". + "\n\n". + "If you configure Phabricator in cluster mode, note that this ". + "setting is ignored by intracluster requests.", "\$_SERVER['HTTPS']")) ->setBoolOptions( array( diff --git a/src/docs/user/cluster/cluster.diviner b/src/docs/user/cluster/cluster.diviner index 24f4261329..93800c81bb 100644 --- a/src/docs/user/cluster/cluster.diviner +++ b/src/docs/user/cluster/cluster.diviner @@ -30,6 +30,80 @@ For additional guidance on setting up a cluster, see "Overlaying Services" and "Cluster Recipes" at the bottom of this document. +Preparing for Clustering +======================== + +To begin deploying Phabricator in cluster mode, set up `cluster.addresses` +in your configuration. + +This option should contain a list of network addess blocks which are considered +to be part of the cluster. Hosts in this list are allowed to bend (or even +break) some of the security and policy rules when they make requests to other +hosts in the cluster, so this list should be as small as possible. See "Cluster +Whitelist Security" below for discussion. + +If you are deploying hardware in EC2, a reasonable approach is to launch a +dedicated Phabricator VPC, whitelist the whole VPC as a Phabricator cluster, +and then deploy only Phabricator services into that VPC. + +If you have additional auxiliary hosts which run builds and tests via Drydock, +you should //not// include them in the cluster address definition. For more +detailed discussion of the Drydock security model, see @{Drydock User Guide: +Security}. + +Most other clustering features will not work until you define a cluster by +configuring `cluster.addresses`. + + +Cluster Whitelist Security +======================== + +When you configure `cluster.addresses`, you should keep the list of trusted +cluster hosts as small as possible. Hosts on this list gain additional +capabilities, including these: + +**Trusted HTTP Headers**: Normally, Phabricator distrusts the load balancer +HTTP headers `X-Forwarded-For` and `X-Forwarded-Proto` because they may be +client-controlled and can be set to arbitrary values by an attacker if no load +balancer is deployed. In particular, clients can set `X-Forwarded-For` to any +value and spoof traffic from arbitrary remotes. + +These headers are trusted when they are received from a host on the cluster +address whitelist. This allows requests from cluster loadbalancers to be +interpreted correctly by default without requiring additional custom code or +configuration. + +**Intracluster HTTP**: Requests from cluster hosts are not required to use +HTTPS, even if `security.require-https` is enabled, because it is common to +terminate HTTPS on load balancers and use plain HTTP for requests within a +cluster. + +**Special Authentication Mechanisms**: Cluster hosts are allowed to connect to +other cluster hosts with "root credentials", and to impersonate any user +account. + +The use of root credentials is required because the daemons must be able to +bypass policies in order to function properly: they need to send mail about +private conversations and import commits in private repositories. + +The ability to impersonate users is required because SSH nodes must receive, +interpret, modify, and forward SSH traffic. They can not use the original +credentials to do this because SSH authentication is asymmetric and they do not +have the user's private key. Instead, they use root credentials and impersonate +the user within the cluster. + +These mechanisms are still authenticated (and use asymmetric keys, like SSH +does), so access to a host in the cluster address block does not mean that an +attacker can immediately compromise the cluster. However, an overbroad cluster +address whitelist may give an attacker who gains some access additional tools +to escalate access. + +Note that if an attacker gains access to an actual cluster host, these extra +powers are largely moot. Most cluster hosts must be able to connect to the +master database to function properly, so the attacker will just do that and +freely read or modify whatever data they want. + + Cluster: Databases ================= @@ -39,7 +113,7 @@ most important service to make redundant if your focus is on availability and disaster recovery. Configuring replicas allows Phabricator to run in read-only mode if you lose -the master, and to quickly promote the replica as a replacement. +the master and to quickly promote the replica as a replacement. For details, see @{article:Cluster: Databases}. diff --git a/src/docs/user/cluster/cluster_databases.diviner b/src/docs/user/cluster/cluster_databases.diviner index 653b521ee0..03c9619c97 100644 --- a/src/docs/user/cluster/cluster_databases.diviner +++ b/src/docs/user/cluster/cluster_databases.diviner @@ -301,6 +301,18 @@ it says to do: TODO: Make `bin/storage dump` replica-aware. See T10758. +With recent versions of MySQL, it is also possible to configure a //delayed// +replica which intentionally lags behind the master (say, by 12 hours). In the +event of a bad mutation, this could give you a larger window of time to +recognize the issue and recover the lost data from the delayed replica (which +might be quick) without needing to restore backups (which might be very slow). + +Delayed replication is outside the scope of this document, but may be worth +considering as an additional data security step on top of backup snapshots +depending on your resources and needs. If you configure a delayed replica, do +not add it to the `cluster.databases` configuration: Phabricator should never +send traffic to it, and does not need to know about it. + Next Steps ========== diff --git a/src/docs/user/cluster/cluster_repositories.diviner b/src/docs/user/cluster/cluster_repositories.diviner index d9e859fd42..c2750fbd78 100644 --- a/src/docs/user/cluster/cluster_repositories.diviner +++ b/src/docs/user/cluster/cluster_repositories.diviner @@ -58,6 +58,32 @@ Before responding to a write, replicas obtain a global lock, perform the same version check and fetch if necessary, then allow the write to continue. +HTTP vs HTTPS +============= + +Intracluster requests (from the daemons to repository servers, or from +webservers to repository servers) are permitted to use HTTP, even if you have +set `security.require-https` in your configuration. + +It is common to terminate SSL at a load balancer and use plain HTTP beyond +that, and the `security.require-https` feature is primarily focused on making +client browser behavior more convenient for users, so it does not apply to +intracluster traffic. + +Using HTTP within the cluster leaves you vulnerable to attackers who can +observe traffic within a datacenter, or observe traffic between datacenters. +This is normally very difficult, but within reach for state-level adversaries +like the NSA. + +If you are concerned about these attackers, you can terminate HTTPS on +repository hosts and bind to them with the "https" protocol. Just be aware that +the `security.require-https` setting won't prevent you from making +configuration mistakes, as it doesn't cover intracluster traffic. + +Other mitigations are possible, but securing a network against the NSA and +similar agents of other rogue nations is beyond the scope of this document. + + Backups ======