From b5f23b023e3c3a055e1d446a3a7a60341a3590cc Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 16 Apr 2018 06:38:14 -0700 Subject: [PATCH] Add an "--auto" flag to "bin/differential migrate-hunk" Summary: Depends on D19370. See T13124. See PHI549. The particular install in PHI549 migrated a large amount of data via the fallback hunk migration script, which does not compress hunks. Add a mode to `bin/differential migrate-hunk` that amounts to "compress all the hunks which would benefit from compression". Test Plan: Ran `bin/differential migrate-hunk` with `--auto`, `--all`, `--to`, `--id`, and `--dry-run` in various mixtures. Forced a bunch of hunks to raw ("byte") format, saw it cleanly upgrade them to compressed ("gzde") format. Reviewers: amckinley Reviewed By: amckinley Differential Revision: https://secure.phabricator.com/D19371 --- ...ricatorDifferentialMigrateHunkWorkflow.php | 116 +++++++++++++++--- .../differential/storage/DifferentialHunk.php | 22 ++-- 2 files changed, 114 insertions(+), 24 deletions(-) diff --git a/src/applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php b/src/applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php index 18afd3f359..0db158e171 100644 --- a/src/applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php +++ b/src/applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php @@ -26,10 +26,20 @@ final class PhabricatorDifferentialMigrateHunkWorkflow 'name' => 'all', 'help' => pht('Migrate all hunks.'), ), + array( + 'name' => 'auto', + 'help' => pht('Select storage format automatically.'), + ), + array( + 'name' => 'dry-run', + 'help' => pht('Show planned writes but do not perform them.'), + ), )); } public function execute(PhutilArgumentParser $args) { + $is_dry_run = $args->getArg('dry-run'); + $id = $args->getArg('id'); $is_all = $args->getArg('all'); @@ -45,14 +55,34 @@ final class PhabricatorDifferentialMigrateHunkWorkflow 'with "--all".')); } + $is_auto = $args->getArg('auto'); $storage = $args->getArg('to'); - switch ($storage) { - case DifferentialHunk::DATATYPE_TEXT: - case DifferentialHunk::DATATYPE_FILE: - break; - default: + if ($is_auto && $storage) { + throw new PhutilArgumentUsageException( + pht( + 'Options "--to" (to choose a specific storage format) and "--auto" '. + '(to select a storage format automatically) are mutually '. + 'exclusive.')); + } else if (!$is_auto && !$storage) { + throw new PhutilArgumentUsageException( + pht( + 'Use "--to" to choose a storage format, or "--auto" to select a '. + 'format automatically.')); + } + + $types = array( + DifferentialHunk::DATATYPE_TEXT, + DifferentialHunk::DATATYPE_FILE, + ); + $types = array_fuse($types); + if (strlen($storage)) { + if (!isset($types[$storage])) { throw new PhutilArgumentUsageException( - pht('Specify a hunk storage engine with --to.')); + pht( + 'Storage type "%s" is unknown. Supported types are: %s.', + $storage, + implode(', ', array_keys($types)))); + } } if ($id) { @@ -64,7 +94,7 @@ final class PhabricatorDifferentialMigrateHunkWorkflow foreach ($hunks as $hunk) { try { - $this->migrateHunk($hunk, $storage); + $this->migrateHunk($hunk, $storage, $is_auto, $is_dry_run); } catch (Exception $ex) { // If we're migrating a single hunk, just throw the exception. If // we're migrating multiple hunks, warn but continue. @@ -96,31 +126,85 @@ final class PhabricatorDifferentialMigrateHunkWorkflow return $hunk; } - private function migrateHunk(DifferentialHunk $hunk, $format) { + private function migrateHunk( + DifferentialHunk $hunk, + $type, + $is_auto, + $is_dry_run) { + + $old_type = $hunk->getDataType(); + + if ($is_auto) { + // By default, we're just going to keep hunks in the same storage + // engine. In the future, we could perhaps select large hunks stored in + // text engine and move them into file storage. + $new_type = $old_type; + } else { + $new_type = $type; + } + + // Figure out if the storage format (e.g., plain text vs compressed) + // would change if we wrote this hunk anew today. + $old_format = $hunk->getDataFormat(); + $new_format = $hunk->getAutomaticDataFormat(); + + $same_type = ($old_type === $new_type); + $same_format = ($old_format === $new_format); + + // If we aren't going to change the storage engine and aren't going to + // change the storage format, just bail out. + if ($same_type && $same_format) { + $this->logInfo( + pht('SKIP'), + pht( + 'Hunk %d is already stored in the preferred engine ("%s") '. + 'with the preferred format ("%s").', + $hunk->getID(), + $new_type, + $new_format)); + return; + } + + if ($is_dry_run) { + $this->logOkay( + pht('DRY RUN'), + pht( + 'Hunk %d would be rewritten (storage: "%s" -> "%s"; '. + 'format: "%s" -> "%s").', + $hunk->getID(), + $old_type, + $new_type, + $old_format, + $new_format)); + return; + } + $old_data = $hunk->getChanges(); - switch ($format) { + switch ($new_type) { case DifferentialHunk::DATATYPE_TEXT: $hunk->saveAsText(); - $this->logOkay( - pht('TEXT'), - pht('Converted hunk to text storage.')); break; case DifferentialHunk::DATATYPE_FILE: $hunk->saveAsFile(); - $this->logOkay( - pht('FILE'), - pht('Converted hunk to file storage.')); break; } + $this->logOkay( + pht('MIGRATE'), + pht( + 'Converted hunk %d to "%s" storage (with format "%s").', + $hunk->getID(), + $new_type, + $hunk->getDataFormat())); + $hunk = $this->loadHunk($hunk->getID()); $new_data = $hunk->getChanges(); if ($old_data !== $new_data) { throw new Exception( pht( - 'Integrity check failed: new file data differs fom old data!')); + 'Integrity check failed: new file data differs from old data!')); } } diff --git a/src/applications/differential/storage/DifferentialHunk.php b/src/applications/differential/storage/DifferentialHunk.php index 3defb3566b..6bfb38b789 100644 --- a/src/applications/differential/storage/DifferentialHunk.php +++ b/src/applications/differential/storage/DifferentialHunk.php @@ -290,14 +290,24 @@ final class DifferentialHunk return array(self::DATAFORMAT_RAW, $data); } + public function getAutomaticDataFormat() { + // If the hunk is already stored deflated, just keep it deflated. This is + // mostly a performance improvement for "bin/differential migrate-hunk" so + // that we don't have to recompress all the stored hunks when looking for + // stray uncompressed hunks. + if ($this->dataFormat === self::DATAFORMAT_DEFLATED) { + return self::DATAFORMAT_DEFLATED; + } + + list($format) = $this->formatDataForStorage($this->getRawData()); + + return $format; + } + public function saveAsText() { $old_type = $this->getDataType(); $old_data = $this->getData(); - if ($old_type == self::DATATYPE_TEXT) { - return $this; - } - $raw_data = $this->getRawData(); $this->setDataType(self::DATATYPE_TEXT); @@ -317,10 +327,6 @@ final class DifferentialHunk $old_type = $this->getDataType(); $old_data = $this->getData(); - if ($old_type == self::DATATYPE_FILE) { - return $this; - } - $raw_data = $this->getRawData(); list($format, $data) = $this->formatDataForStorage($raw_data);