1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-19 05:12:41 +01:00

Detect replicating masters and fatal (also, warn on nonreplicating replicas)

Summary:
Ref T10759. Check master/replica status during startup.

After D16903, this also means that we check this status after a database comes back online after being unreachable.

If a master is replicating, fatal (since this can do a million kinds of bad things).

If a replica is not replicating, warn (this just means the replica is behind so some data is at risk).

Also: if your masters were actually configured properly (mine weren't until this change detected it), we would throw away patches as we applied them, so they would only apply to the //first// master. Instead, properly apply all migration patches to all masters.

Test Plan:
  - Started Phabricator with a replicating master, got a fatal.
  - Stopped replication on a replica, got a warning.
  - With two non-replicating masters, upgraded storage.

Reviewers: chad

Reviewed By: chad

Maniphest Tasks: T10759

Differential Revision: https://secure.phabricator.com/D16904
This commit is contained in:
epriestley 2016-11-21 07:06:30 -08:00
parent bc4187d709
commit 326d5bf800
3 changed files with 56 additions and 13 deletions

View file

@ -43,7 +43,7 @@ final class PhabricatorDatabaseSetupCheck extends PhabricatorSetupCheck {
$port)); $port));
} }
$refs = PhabricatorDatabaseRef::getActiveDatabaseRefs(); $refs = PhabricatorDatabaseRef::queryAll();
$refs = mpull($refs, null, 'getRefKey'); $refs = mpull($refs, null, 'getRefKey');
// Test if we can connect to each database first. If we can not connect // Test if we can connect to each database first. If we can not connect
@ -164,5 +164,47 @@ final class PhabricatorDatabaseSetupCheck extends PhabricatorSetupCheck {
return true; return true;
} }
// NOTE: It's possible that replication is broken but we have not been
// granted permission to "SHOW SLAVE STATUS" so we can't figure it out.
// We allow this kind of configuration and survive these checks, trusting
// that operations knows what they're doing. This issue is shown on the
// "Database Servers" console.
switch ($ref->getReplicaStatus()) {
case PhabricatorDatabaseRef::REPLICATION_MASTER_REPLICA:
$message = pht(
'Database host "%s" is configured as a master, but is replicating '.
'another host. This is dangerous and can mangle or destroy data. '.
'Only replicas should be replicating. Stop replication on the '.
'host or reconfigure Phabricator.',
$ref->getRefKey());
$this->newIssue('db.master.replicating')
->setName(pht('Replicating Master'))
->setIsFatal(true)
->setMessage($message);
return true;
case PhabricatorDatabaseRef::REPLICATION_REPLICA_NONE:
case PhabricatorDatabaseRef::REPLICATION_NOT_REPLICATING:
if (!$ref->getIsMaster()) {
$message = pht(
'Database replica "%s" is listed as a replica, but is not '.
'currently replicating. You are vulnerable to data loss if '.
'the master fails.',
$ref->getRefKey());
// This isn't a fatal because it can normally only put data at risk,
// not actually do anything destructive or unrecoverable.
$this->newIssue('db.replica.not-replicating')
->setName(pht('Nonreplicating Replica'))
->setMessage($message);
}
break;
}
} }
} }

View file

@ -308,13 +308,12 @@ final class PhabricatorDatabaseRef
} }
public static function queryAll() { public static function queryAll() {
$refs = self::newRefs(); $refs = self::getActiveDatabaseRefs();
return self::queryRefs($refs);
}
private static function queryRefs(array $refs) {
foreach ($refs as $ref) { foreach ($refs as $ref) {
if ($ref->getDisabled()) {
continue;
}
$conn = $ref->newManagementConnection(); $conn = $ref->newManagementConnection();
$t_start = microtime(true); $t_start = microtime(true);

View file

@ -842,6 +842,8 @@ abstract class PhabricatorStorageManagementWorkflow
$no_quickstart, $no_quickstart,
$init_only) { $init_only) {
$patches = $this->patches;
$applied = $api->getAppliedPatches(); $applied = $api->getAppliedPatches();
if ($applied === null) { if ($applied === null) {
if ($this->dryRun) { if ($this->dryRun) {
@ -864,7 +866,7 @@ abstract class PhabricatorStorageManagementWorkflow
// adjustment phase. // adjustment phase.
$this->didInitialize = true; $this->didInitialize = true;
$legacy = $api->getLegacyPatches($this->patches); $legacy = $api->getLegacyPatches($patches);
if ($legacy || $no_quickstart || $init_only) { if ($legacy || $no_quickstart || $init_only) {
// If we have legacy patches, we can't quickstart. // If we have legacy patches, we can't quickstart.
@ -921,14 +923,14 @@ abstract class PhabricatorStorageManagementWorkflow
while (true) { while (true) {
$applied_something = false; $applied_something = false;
foreach ($this->patches as $key => $patch) { foreach ($patches as $key => $patch) {
if (isset($applied[$key])) { if (isset($applied[$key])) {
unset($this->patches[$key]); unset($patches[$key]);
continue; continue;
} }
if ($apply_only && $apply_only != $key) { if ($apply_only && $apply_only != $key) {
unset($this->patches[$key]); unset($patches[$key]);
continue; continue;
} }
@ -968,17 +970,17 @@ abstract class PhabricatorStorageManagementWorkflow
} }
} }
unset($this->patches[$key]); unset($patches[$key]);
$applied[$key] = true; $applied[$key] = true;
} }
if (!$applied_something) { if (!$applied_something) {
if (count($this->patches)) { if (count($patches)) {
throw new Exception( throw new Exception(
pht( pht(
'Some patches could not be applied to "%s": %s', 'Some patches could not be applied to "%s": %s',
$api->getRef()->getRefKey(), $api->getRef()->getRefKey(),
implode(', ', array_keys($this->patches)))); implode(', ', array_keys($patches))));
} else if (!$this->dryRun && !$apply_only) { } else if (!$this->dryRun && !$apply_only) {
echo pht( echo pht(
'Storage is up to date on "%s". Use "%s" for details.', 'Storage is up to date on "%s". Use "%s" for details.',