From 02e1440ef2605312940620b5e96764b282fb48cd Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 4 Oct 2017 09:10:18 -0700 Subject: [PATCH] Dump tables one at a time, rather than all at once Summary: Ref T13000. This allows us to be more selective about which tables we dump data for, to reduce the size of backups and exports. The immediate goal is to make large `ngrams` tables more manageable in the cluster, but this generally makes all backups and exports faster and easier. Here, tables are dumped one at a time. A followup change will sometimes add the `--no-data` flag, to skip dumping readthrough caches and (optionally) rebuildable indexes. Test Plan: Compared a dump from `master` and from this branch, found them to be essentially identical. The new dump has a little more header information in each section. Verified each contains the same number of `CREATE TABLE` statements. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13000 Differential Revision: https://secure.phabricator.com/D18679 --- ...abricatorStorageManagementDumpWorkflow.php | 120 +++++++++++------- 1 file changed, 76 insertions(+), 44 deletions(-) diff --git a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php index 4dc5c64042..f491133c67 100644 --- a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php +++ b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php @@ -62,7 +62,24 @@ final class PhabricatorStorageManagementDumpWorkflow return 1; } - $databases = $api->getDatabaseList($patches, true); + $ref = $api->getRef(); + $ref_key = $ref->getRefKey(); + + $schemata_map = id(new PhabricatorConfigSchemaQuery()) + ->setAPIs(array($api)) + ->setRefs(array($ref)) + ->loadActualSchemata(); + $schemata = $schemata_map[$ref_key]; + + $targets = array(); + foreach ($schemata->getDatabases() as $database_name => $database) { + foreach ($database->getTables() as $table_name => $table) { + $targets[] = array( + 'database' => $database_name, + 'table' => $table_name, + ); + } + } list($host, $port) = $this->getBareHostAndPort($api->getHost()); @@ -126,35 +143,42 @@ final class PhabricatorStorageManagementDumpWorkflow $argv[] = $port; } - $argv[] = '--databases'; - foreach ($databases as $database) { - $argv[] = $database; + $commands = array(); + foreach ($targets as $target) { + $target_argv = $argv; + + if ($has_password) { + $commands[] = csprintf( + 'mysqldump -p%P %Ls -- %R %R', + $password, + $target_argv, + $target['database'], + $target['table']); + } else { + $command = csprintf( + 'mysqldump %Ls -- %R %R', + $target_argv, + $target['database'], + $target['table']); + } + + $commands[] = $command; } - if ($has_password) { - $command = csprintf('mysqldump -p%P %Ls', $password, $argv); - } else { - $command = csprintf('mysqldump %Ls', $argv); - } - // Decrease the CPU priority of this process so it doesn't contend with // other more important things. if (function_exists('proc_nice')) { proc_nice(19); } - - // If we aren't writing to a file, just passthru the command. - if ($output_file === null) { - return phutil_passthru('%C', $command); - } - // If we are writing to a file, stream the command output to disk. This // mode makes sure the whole command fails if there's an error (commonly, // a full disk). See T6996 for discussion. - if ($is_compress) { + if ($output_file === null) { + $file = null; + } else if ($is_compress) { $file = gzopen($output_file, 'wb1'); } else { $file = fopen($output_file, 'wb'); @@ -167,41 +191,47 @@ final class PhabricatorStorageManagementDumpWorkflow $file)); } - $future = new ExecFuture('%C', $command); - try { - $iterator = id(new FutureIterator(array($future))) - ->setUpdateInterval(0.100); - foreach ($iterator as $ready) { - list($stdout, $stderr) = $future->read(); - $future->discardBuffers(); + foreach ($commands as $command) { + $future = new ExecFuture('%C', $command); - if (strlen($stderr)) { - fwrite(STDERR, $stderr); - } + $iterator = id(new FutureIterator(array($future))) + ->setUpdateInterval(0.100); + foreach ($iterator as $ready) { + list($stdout, $stderr) = $future->read(); + $future->discardBuffers(); - if (strlen($stdout)) { - if ($is_compress) { - $ok = gzwrite($file, $stdout); - } else { - $ok = fwrite($file, $stdout); + if (strlen($stderr)) { + fwrite(STDERR, $stderr); } - if ($ok !== strlen($stdout)) { - throw new Exception( - pht( - 'Failed to write %d byte(s) to file "%s".', - new PhutilNumber(strlen($stdout)), - $output_file)); - } - } + if (strlen($stdout)) { + if (!$file) { + $ok = fwrite(STDOUT, $stdout); + } else if ($is_compress) { + $ok = gzwrite($file, $stdout); + } else { + $ok = fwrite($file, $stdout); + } - if ($ready !== null) { - $ready->resolvex(); + if ($ok !== strlen($stdout)) { + throw new Exception( + pht( + 'Failed to write %d byte(s) to file "%s".', + new PhutilNumber(strlen($stdout)), + $output_file)); + } + } + + if ($ready !== null) { + $ready->resolvex(); + } } } - if ($is_compress) { + if (!$file) { + $ok = true; + } else if ($is_compress) { $ok = gzclose($file); } else { $ok = fclose($file); @@ -218,7 +248,9 @@ final class PhabricatorStorageManagementDumpWorkflow // we don't leave any confusing artifacts laying around. try { - Filesystem::remove($output_file); + if ($file !== null) { + Filesystem::remove($output_file); + } } catch (Exception $ex) { // Ignore any errors we hit. }