From 326d5bf8004ece51fbb44146766f5ddf765f1159 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 21 Nov 2016 07:06:30 -0800 Subject: [PATCH] Detect replicating masters and fatal (also, warn on nonreplicating replicas) Summary: Ref T10759. Check master/replica status during startup. After D16903, this also means that we check this status after a database comes back online after being unreachable. If a master is replicating, fatal (since this can do a million kinds of bad things). If a replica is not replicating, warn (this just means the replica is behind so some data is at risk). Also: if your masters were actually configured properly (mine weren't until this change detected it), we would throw away patches as we applied them, so they would only apply to the //first// master. Instead, properly apply all migration patches to all masters. Test Plan: - Started Phabricator with a replicating master, got a fatal. - Stopped replication on a replica, got a warning. - With two non-replicating masters, upgraded storage. Reviewers: chad Reviewed By: chad Maniphest Tasks: T10759 Differential Revision: https://secure.phabricator.com/D16904 --- .../check/PhabricatorDatabaseSetupCheck.php | 44 ++++++++++++++++++- .../cluster/PhabricatorDatabaseRef.php | 9 ++-- .../PhabricatorStorageManagementWorkflow.php | 16 ++++--- 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/src/applications/config/check/PhabricatorDatabaseSetupCheck.php b/src/applications/config/check/PhabricatorDatabaseSetupCheck.php index b5a188c142..7dbc00c9b2 100644 --- a/src/applications/config/check/PhabricatorDatabaseSetupCheck.php +++ b/src/applications/config/check/PhabricatorDatabaseSetupCheck.php @@ -43,7 +43,7 @@ final class PhabricatorDatabaseSetupCheck extends PhabricatorSetupCheck { $port)); } - $refs = PhabricatorDatabaseRef::getActiveDatabaseRefs(); + $refs = PhabricatorDatabaseRef::queryAll(); $refs = mpull($refs, null, 'getRefKey'); // Test if we can connect to each database first. If we can not connect @@ -164,5 +164,47 @@ final class PhabricatorDatabaseSetupCheck extends PhabricatorSetupCheck { return true; } + + // NOTE: It's possible that replication is broken but we have not been + // granted permission to "SHOW SLAVE STATUS" so we can't figure it out. + // We allow this kind of configuration and survive these checks, trusting + // that operations knows what they're doing. This issue is shown on the + // "Database Servers" console. + + switch ($ref->getReplicaStatus()) { + case PhabricatorDatabaseRef::REPLICATION_MASTER_REPLICA: + $message = pht( + 'Database host "%s" is configured as a master, but is replicating '. + 'another host. This is dangerous and can mangle or destroy data. '. + 'Only replicas should be replicating. Stop replication on the '. + 'host or reconfigure Phabricator.', + $ref->getRefKey()); + + $this->newIssue('db.master.replicating') + ->setName(pht('Replicating Master')) + ->setIsFatal(true) + ->setMessage($message); + + return true; + case PhabricatorDatabaseRef::REPLICATION_REPLICA_NONE: + case PhabricatorDatabaseRef::REPLICATION_NOT_REPLICATING: + if (!$ref->getIsMaster()) { + $message = pht( + 'Database replica "%s" is listed as a replica, but is not '. + 'currently replicating. You are vulnerable to data loss if '. + 'the master fails.', + $ref->getRefKey()); + + // This isn't a fatal because it can normally only put data at risk, + // not actually do anything destructive or unrecoverable. + + $this->newIssue('db.replica.not-replicating') + ->setName(pht('Nonreplicating Replica')) + ->setMessage($message); + } + break; + } + + } } diff --git a/src/infrastructure/cluster/PhabricatorDatabaseRef.php b/src/infrastructure/cluster/PhabricatorDatabaseRef.php index 6b1b7f3612..22f405ad91 100644 --- a/src/infrastructure/cluster/PhabricatorDatabaseRef.php +++ b/src/infrastructure/cluster/PhabricatorDatabaseRef.php @@ -308,13 +308,12 @@ final class PhabricatorDatabaseRef } public static function queryAll() { - $refs = self::newRefs(); + $refs = self::getActiveDatabaseRefs(); + return self::queryRefs($refs); + } + private static function queryRefs(array $refs) { foreach ($refs as $ref) { - if ($ref->getDisabled()) { - continue; - } - $conn = $ref->newManagementConnection(); $t_start = microtime(true); diff --git a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementWorkflow.php b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementWorkflow.php index 2404918b56..5fb117eb51 100644 --- a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementWorkflow.php +++ b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementWorkflow.php @@ -842,6 +842,8 @@ abstract class PhabricatorStorageManagementWorkflow $no_quickstart, $init_only) { + $patches = $this->patches; + $applied = $api->getAppliedPatches(); if ($applied === null) { if ($this->dryRun) { @@ -864,7 +866,7 @@ abstract class PhabricatorStorageManagementWorkflow // adjustment phase. $this->didInitialize = true; - $legacy = $api->getLegacyPatches($this->patches); + $legacy = $api->getLegacyPatches($patches); if ($legacy || $no_quickstart || $init_only) { // If we have legacy patches, we can't quickstart. @@ -921,14 +923,14 @@ abstract class PhabricatorStorageManagementWorkflow while (true) { $applied_something = false; - foreach ($this->patches as $key => $patch) { + foreach ($patches as $key => $patch) { if (isset($applied[$key])) { - unset($this->patches[$key]); + unset($patches[$key]); continue; } if ($apply_only && $apply_only != $key) { - unset($this->patches[$key]); + unset($patches[$key]); continue; } @@ -968,17 +970,17 @@ abstract class PhabricatorStorageManagementWorkflow } } - unset($this->patches[$key]); + unset($patches[$key]); $applied[$key] = true; } if (!$applied_something) { - if (count($this->patches)) { + if (count($patches)) { throw new Exception( pht( 'Some patches could not be applied to "%s": %s', $api->getRef()->getRefKey(), - implode(', ', array_keys($this->patches)))); + implode(', ', array_keys($patches)))); } else if (!$this->dryRun && !$apply_only) { echo pht( 'Storage is up to date on "%s". Use "%s" for details.',