From 917da084176c6bf89c1e738c6140c9a86dfc6a47 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 29 Oct 2014 15:49:29 -0700 Subject: [PATCH] Fix various MySQL version issues with new charset stuff Summary: Ref T1191. Notable stuff: - Adds `--disable-utf8mb4` to `bin/storage` to make it easier to test what things will (approximately) do on old MySQL. This isn't 100% perfect but should catch all the major stuff. It basically makes us pretend the server is an old server. - Require utf8mb4 to dump a quickstart. - Fix some issues with quickstart generation, notably special casing the FULLTEXT handling. - Add an `--unsafe` flag to `bin/storage adjust` to let it truncate data to fix schemata. - Fix some old patches which don't work if the default table charset is utf8mb4. Test Plan: - Dumped a quickstart. - Loaded the quickstart with utf8mb4. - Loaded the quickstart with `--disable-utf8mb4` (verified that we get binary columns, etc). - Adjusted schema with `--disable-utf8mb4` (got a long adjustment with binary columns, some truncation stuff with weird edge case test data). - Adjusted schema with `--disable-utf8mb4 --unsafe` (got truncations and clean adjust). - Adjusted schema back without `--disable-utf8mb4` (got a long adjustment with utf8mb4 columns, some invalid data on truncated utf8). - Adjusted schema without `--disable-utf8mb4`, but with `--unsafe` (got truncations on the invalid data). Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley Maniphest Tasks: T1191 Differential Revision: https://secure.phabricator.com/D10757 --- resources/celerity/map.php | 52 +++++++++--------- resources/sql/patches/000.project.sql | 2 +- resources/sql/patches/002.oauth.sql | 4 +- resources/sql/patches/004.daemonrepos.sql | 2 +- resources/sql/patches/010.herald.sql | 2 +- resources/sql/patches/011.badcommit.sql | 2 +- resources/sql/patches/018.owners.sql | 2 +- resources/sql/patches/019.arcprojects.sql | 2 +- resources/sql/patches/035.proxyimage.sql | 2 +- resources/sql/patches/040.transform.sql | 4 +- .../patches/068.maniphestauxiliarystorage.sql | 2 +- scripts/sql/manage_storage.php | 8 +++ .../schema/PhabricatorConfigSchemaQuery.php | 15 +++-- .../storage/PhabricatorUserSchemaSpec.php | 2 +- .../PhabricatorStorageManagementAPI.php | 55 ++++++++++++++++--- ...ricatorStorageManagementAdjustWorkflow.php | 33 ++++++++++- ...torStorageManagementQuickstartWorkflow.php | 23 ++++++++ 17 files changed, 153 insertions(+), 59 deletions(-) diff --git a/resources/celerity/map.php b/resources/celerity/map.php index ceea55acbb..3cdef77967 100644 --- a/resources/celerity/map.php +++ b/resources/celerity/map.php @@ -855,10 +855,6 @@ return array( 'javelin-workflow', 'phabricator-draggable-list', ), - '7319e029' => array( - 'javelin-behavior', - 'javelin-dom', - ), '06e05112' => array( 'javelin-behavior', 'javelin-stratcom', @@ -1276,6 +1272,10 @@ return array( 'phabricator-phtize', 'changeset-view-manager', ), + '7319e029' => array( + 'javelin-behavior', + 'javelin-dom', + ), '76b9fc3e' => array( 'javelin-behavior', 'javelin-stratcom', @@ -1336,28 +1336,6 @@ return array( 'javelin-behavior-device', 'phabricator-title', ), - 42126667 => array( - 'javelin-behavior', - 'javelin-dom', - 'javelin-request', - ), - 48086888 => array( - 'javelin-behavior', - 'javelin-dom', - 'javelin-workflow', - ), - 60479091 => array( - 'phabricator-busy', - 'javelin-behavior', - ), - 82439934 => array( - 'javelin-behavior', - 'javelin-dom', - 'javelin-util', - 'javelin-stratcom', - 'javelin-workflow', - 'phabricator-draggable-list', - ), '7e41274a' => array( 'javelin-install', ), @@ -1962,6 +1940,28 @@ return array( 'phabricator-prefab', 'javelin-json', ), + 42126667 => array( + 'javelin-behavior', + 'javelin-dom', + 'javelin-request', + ), + 48086888 => array( + 'javelin-behavior', + 'javelin-dom', + 'javelin-workflow', + ), + 60479091 => array( + 'phabricator-busy', + 'javelin-behavior', + ), + 82439934 => array( + 'javelin-behavior', + 'javelin-dom', + 'javelin-util', + 'javelin-stratcom', + 'javelin-workflow', + 'phabricator-draggable-list', + ), ), 'packages' => array( 'core.pkg.css' => array( diff --git a/resources/sql/patches/000.project.sql b/resources/sql/patches/000.project.sql index 62221ce871..794eb2737f 100644 --- a/resources/sql/patches/000.project.sql +++ b/resources/sql/patches/000.project.sql @@ -1,6 +1,6 @@ create table {$NAMESPACE}_project.project ( id int unsigned not null auto_increment primary key, - name varchar(255) not null, + name varchar(255) COLLATE `binary` not null, unique key (name), phid varchar(64) binary not null, authorPHID varchar(64) binary not null, diff --git a/resources/sql/patches/002.oauth.sql b/resources/sql/patches/002.oauth.sql index 4f68261188..24cce576a0 100644 --- a/resources/sql/patches/002.oauth.sql +++ b/resources/sql/patches/002.oauth.sql @@ -1,8 +1,8 @@ create table {$NAMESPACE}_user.user_oauthinfo ( id int unsigned not null auto_increment primary key, userID int unsigned not null, - oauthProvider varchar(255) not null, - oauthUID varchar(255) not null, + oauthProvider varchar(255) COLLATE `binary` not null, + oauthUID varchar(255) COLLATE `binary` not null, unique key (userID, oauthProvider), unique key (oauthProvider, oauthUID), dateCreated int unsigned not null, diff --git a/resources/sql/patches/004.daemonrepos.sql b/resources/sql/patches/004.daemonrepos.sql index e58aac7233..9e46210ca3 100644 --- a/resources/sql/patches/004.daemonrepos.sql +++ b/resources/sql/patches/004.daemonrepos.sql @@ -23,6 +23,6 @@ create table {$NAMESPACE}_timeline.timeline_eventdata ( ); create table {$NAMESPACE}_timeline.timeline_cursor ( - name varchar(255) not null primary key, + name varchar(255) COLLATE `binary` not null primary key, position int unsigned not null ); diff --git a/resources/sql/patches/010.herald.sql b/resources/sql/patches/010.herald.sql index 57f86acc03..1a28ce262e 100644 --- a/resources/sql/patches/010.herald.sql +++ b/resources/sql/patches/010.herald.sql @@ -7,7 +7,7 @@ CREATE TABLE {$NAMESPACE}_herald.herald_action ( CREATE TABLE {$NAMESPACE}_herald.herald_rule ( id int unsigned not null auto_increment primary key, - name varchar(255) not null, + name varchar(255) COLLATE `binary` not null, authorPHID varchar(64) binary not null, contentType varchar(255) not null, mustMatchAll bool not null, diff --git a/resources/sql/patches/011.badcommit.sql b/resources/sql/patches/011.badcommit.sql index 58dbc45993..a9c0853126 100644 --- a/resources/sql/patches/011.badcommit.sql +++ b/resources/sql/patches/011.badcommit.sql @@ -1,4 +1,4 @@ CREATE TABLE {$NAMESPACE}_repository.repository_badcommit ( - fullCommitName varchar(255) binary not null primary key, + fullCommitName varchar(255) COLLATE `binary` not null primary key, description longblob not null ); diff --git a/resources/sql/patches/018.owners.sql b/resources/sql/patches/018.owners.sql index c30eb010c5..de7dde64d0 100644 --- a/resources/sql/patches/018.owners.sql +++ b/resources/sql/patches/018.owners.sql @@ -2,7 +2,7 @@ CREATE TABLE {$NAMESPACE}_owners.owners_package ( id int unsigned not null auto_increment primary key, phid varchar(64) binary not null, unique key(phid), - name varchar(255) not null, + name varchar(255) COLLATE `binary` not null, unique key(name), description text not null, primaryOwnerPHID varchar(64) binary diff --git a/resources/sql/patches/019.arcprojects.sql b/resources/sql/patches/019.arcprojects.sql index e6e0405268..ac3ab8828a 100644 --- a/resources/sql/patches/019.arcprojects.sql +++ b/resources/sql/patches/019.arcprojects.sql @@ -2,7 +2,7 @@ CREATE TABLE {$NAMESPACE}_repository.repository_arcanistproject ( id int unsigned not null auto_increment primary key, phid varchar(64) binary not null, unique key(phid), - name varchar(255) not null, + name varchar(255) COLLATE `binary` not null, unique key (name), repositoryID int unsigned ); diff --git a/resources/sql/patches/035.proxyimage.sql b/resources/sql/patches/035.proxyimage.sql index 78564aa43d..425506bcf4 100644 --- a/resources/sql/patches/035.proxyimage.sql +++ b/resources/sql/patches/035.proxyimage.sql @@ -1,6 +1,6 @@ CREATE TABLE {$NAMESPACE}_file.file_proxyimage ( id int unsigned not null primary key auto_increment, - uri varchar(255) binary not null, + uri varchar(255) COLLATE `binary` not null, unique key(uri), filePHID varchar(64) binary not null ) ENGINE=InnoDB; diff --git a/resources/sql/patches/040.transform.sql b/resources/sql/patches/040.transform.sql index 0f35801d43..bb47b810a1 100644 --- a/resources/sql/patches/040.transform.sql +++ b/resources/sql/patches/040.transform.sql @@ -1,7 +1,7 @@ CREATE TABLE {$NAMESPACE}_file.file_transformedfile ( id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, - originalPHID varchar(64) BINARY NOT NULL, - transform varchar(255) BINARY NOT NULL, + originalPHID varchar(64) COLLATE `binary` NOT NULL, + transform varchar(255) COLLATE `binary` NOT NULL, unique key (originalPHID, transform), transformedPHID varchar(64) BINARY NOT NULL, key (transformedPHID), diff --git a/resources/sql/patches/068.maniphestauxiliarystorage.sql b/resources/sql/patches/068.maniphestauxiliarystorage.sql index bd7576b186..17c94f9516 100644 --- a/resources/sql/patches/068.maniphestauxiliarystorage.sql +++ b/resources/sql/patches/068.maniphestauxiliarystorage.sql @@ -1,7 +1,7 @@ create table {$NAMESPACE}_maniphest.maniphest_taskauxiliarystorage (id int unsigned not null auto_increment primary key, taskPHID varchar(64) binary not null, - name varchar(255) not null, + name varchar(255) COLLATE `binary` not null, value varchar(255) not null, unique key (taskPHID,name), dateCreated int unsigned not null, diff --git a/scripts/sql/manage_storage.php b/scripts/sql/manage_storage.php index 0b0b6da9b1..f4d8d0f947 100755 --- a/scripts/sql/manage_storage.php +++ b/scripts/sql/manage_storage.php @@ -64,6 +64,13 @@ try { 'help' => 'Do not actually change anything, just show what would be '. 'changed.', ), + array( + 'name' => 'disable-utf8mb4', + 'help' => pht( + 'Disable utf8mb4, even if the database supports it. This is an '. + 'advanced feature used for testing changes to Phabricator; you '. + 'should not normally use this flag.'), + ) )); } catch (PhutilArgumentUsageException $ex) { $args->printUsageException($ex); @@ -126,6 +133,7 @@ $api->setHost($default_host); $api->setPort($default_port); $api->setPassword($password); $api->setNamespace($args->getArg('namespace')); +$api->setDisableUTF8MB4($args->getArg('disable-utf8mb4')); try { queryfx( diff --git a/src/applications/config/schema/PhabricatorConfigSchemaQuery.php b/src/applications/config/schema/PhabricatorConfigSchemaQuery.php index 60292b4a74..7a11d64684 100644 --- a/src/applications/config/schema/PhabricatorConfigSchemaQuery.php +++ b/src/applications/config/schema/PhabricatorConfigSchemaQuery.php @@ -153,11 +153,7 @@ final class PhabricatorConfigSchemaQuery extends Phobject { public function loadExpectedSchema() { $databases = $this->getDatabaseNames(); - - $api = $this->getAPI(); - - $charset_info = $api->getCharsetInfo(); - list($charset, $collate_text, $collate_sort) = $charset_info; + $info = $this->getAPI()->getCharsetInfo(); $specs = id(new PhutilSymbolLoader()) ->setAncestorClass('PhabricatorConfigSchemaSpec') @@ -166,9 +162,12 @@ final class PhabricatorConfigSchemaQuery extends Phobject { $server_schema = new PhabricatorConfigServerSchema(); foreach ($specs as $spec) { $spec - ->setUTF8Charset($charset) - ->setUTF8BinaryCollation($collate_text) - ->setUTF8SortingCollation($collate_sort) + ->setUTF8Charset( + $info[PhabricatorStorageManagementAPI::CHARSET_DEFAULT]) + ->setUTF8BinaryCollation( + $info[PhabricatorStorageManagementAPI::COLLATE_TEXT]) + ->setUTF8SortingCollation( + $info[PhabricatorStorageManagementAPI::COLLATE_SORT]) ->setServer($server_schema) ->buildSchemata($server_schema); } diff --git a/src/applications/people/storage/PhabricatorUserSchemaSpec.php b/src/applications/people/storage/PhabricatorUserSchemaSpec.php index fa01b8c210..4d9bab1b4f 100644 --- a/src/applications/people/storage/PhabricatorUserSchemaSpec.php +++ b/src/applications/people/storage/PhabricatorUserSchemaSpec.php @@ -9,7 +9,7 @@ final class PhabricatorUserSchemaSpec extends PhabricatorConfigSchemaSpec { id(new PhabricatorUser())->getApplicationName(), PhabricatorUser::NAMETOKEN_TABLE, array( - 'token' => 'text255', + 'token' => 'sort255', 'userID' => 'id', ), array( diff --git a/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php b/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php index 70ee920169..16d4c5674d 100644 --- a/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php +++ b/src/infrastructure/storage/management/PhabricatorStorageManagementAPI.php @@ -8,6 +8,22 @@ final class PhabricatorStorageManagementAPI { private $password; private $namespace; private $conns = array(); + private $disableUTF8MB4; + + const CHARSET_DEFAULT = 'CHARSET'; + const CHARSET_FULLTEXT = 'CHARSET_FULLTEXT'; + const COLLATE_TEXT = 'COLLATE_TEXT'; + const COLLATE_SORT = 'COLLATE_SORT'; + const COLLATE_FULLTEXT = 'COLLATE_FULLTEXT'; + + public function setDisableUTF8MB4($disable_utf8_mb4) { + $this->disableUTF8MB4 = $disable_utf8_mb4; + return $this; + } + + public function getDisableUTF8MB4() { + return $this->disableUTF8MB4; + } public function setNamespace($namespace) { $this->namespace = $namespace; @@ -110,13 +126,12 @@ final class PhabricatorStorageManagementAPI { public function createDatabase($fragment) { $info = $this->getCharsetInfo(); - list($charset, $collate_text, $collate_sort) = $info; queryfx( $this->getConn(null), 'CREATE DATABASE IF NOT EXISTS %T COLLATE %T', $this->getDatabaseName($fragment), - $collate_text); + $info[self::COLLATE_TEXT]); } public function createTable($fragment, $table, array $cols) { @@ -187,15 +202,17 @@ final class PhabricatorStorageManagementAPI { $conn = $this->getConn(null); $charset_info = $this->getCharsetInfo(); - list($charset, $collate_text, $collate_sort) = $charset_info; + foreach ($charset_info as $key => $value) { + $charset_info[$key] = qsprintf($conn, '%T', $value); + } foreach ($queries as $query) { $query = str_replace('{$NAMESPACE}', $this->namespace, $query); - $query = str_replace('{$CHARSET}', $charset, $query); - $escaped_text = qsprintf($conn, '%T', $collate_text); - $query = str_replace('{$COLLATE_TEXT}', $escaped_text, $query); - $escaped_text = qsprintf($conn, '%T', $collate_sort); - $query = str_replace('{$COLLATE_SORT}', $escaped_text, $query); + + foreach ($charset_info as $key => $value) { + $query = str_replace('{$'.$key.'}', $value, $query); + } + queryfx( $conn, '%Q', @@ -209,6 +226,12 @@ final class PhabricatorStorageManagementAPI { } public function isCharacterSetAvailable($character_set) { + if ($character_set == 'utf8mb4') { + if ($this->getDisableUTF8MB4()) { + return false; + } + } + $conn = $this->getConn(null); $result = queryfx_one( @@ -226,8 +249,10 @@ final class PhabricatorStorageManagementAPI { // collation. This is most correct, and will sort properly. $charset = 'utf8mb4'; + $charset_full = 'utf8mb4'; $collate_text = 'utf8mb4_bin'; $collate_sort = 'utf8mb4_unicode_ci'; + $collate_full = 'utf8mb4_unicode_ci'; } else { // If utf8mb4 is not available, we use binary. This allows us to store // 4-byte unicode characters. This has some tradeoffs: @@ -238,13 +263,25 @@ final class PhabricatorStorageManagementAPI { // It's possible that strings will be truncated in the middle of a // character on insert. We encourage users to set STRICT_ALL_TABLES // to prevent this. + // + // There's no valid collation we can use to get a fulltext index on + // 4-byte unicode characters: we can't add a fulltext key to a binary + // column. $charset = 'binary'; + $charset_full = 'utf8'; $collate_text = 'binary'; $collate_sort = 'binary'; + $collate_full = 'utf8_general_ci'; } - return array($charset, $collate_text, $collate_sort); + return array( + self::CHARSET_DEFAULT => $charset, + self::CHARSET_FULLTEXT => $charset_full, + self::COLLATE_TEXT => $collate_text, + self::COLLATE_SORT => $collate_sort, + self::COLLATE_FULLTEXT => $collate_full, + ); } } diff --git a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementAdjustWorkflow.php b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementAdjustWorkflow.php index 61a799deed..81d1c21df1 100644 --- a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementAdjustWorkflow.php +++ b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementAdjustWorkflow.php @@ -10,14 +10,26 @@ final class PhabricatorStorageManagementAdjustWorkflow ->setSynopsis( pht( 'Make schemata adjustments to correct issues with characters sets, '. - 'collations, and keys.')); + 'collations, and keys.')) + ->setArguments( + array( + array( + 'name' => 'unsafe', + 'help' => pht( + 'Permit adjustments which truncate data. This option may '. + 'destroy some data, but the lost data is usually not '. + 'important (most commonly, the ends of very long object '. + 'titles).'), + ), + )); } public function execute(PhutilArgumentParser $args) { $force = $args->getArg('force'); + $unsafe = $args->getArg('unsafe'); $this->requireAllPatchesApplied(); - return $this->adjustSchemata($force); + return $this->adjustSchemata($force, $unsafe); } private function requireAllPatchesApplied() { @@ -59,7 +71,7 @@ final class PhabricatorStorageManagementAdjustWorkflow return array($comp, $expect, $actual); } - private function adjustSchemata($force) { + private function adjustSchemata($force, $unsafe) { $console = PhutilConsole::getConsole(); $console->writeOut( @@ -138,6 +150,12 @@ final class PhabricatorStorageManagementAdjustWorkflow $api = $this->getAPI(); $conn = $api->getConn(null); + if ($unsafe) { + queryfx($conn, 'SET SESSION sql_mode = %s', ''); + } else { + queryfx($conn, 'SET SESSION sql_mode = %s', 'STRICT_ALL_TABLES'); + } + $failed = array(); // We make changes in several phases. @@ -327,6 +345,15 @@ final class PhabricatorStorageManagementAdjustWorkflow "\n%s\n", pht('Failed to make some schema adjustments, detailed above.')); + if (!$unsafe) { + $console->writeOut( + "%s\n", + pht( + 'Migrations which fail with certain types of errors (including '. + '"#1406 Data Too Long" and "#1366 Incorrect String Value") can be '. + 'forced to complete by running again with `--unsafe`.')); + } + return 1; } diff --git a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementQuickstartWorkflow.php b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementQuickstartWorkflow.php index 6abeedf9bc..419fca8340 100644 --- a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementQuickstartWorkflow.php +++ b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementQuickstartWorkflow.php @@ -33,6 +33,14 @@ final class PhabricatorStorageManagementQuickstartWorkflow $bin = dirname(phutil_get_library_root('phabricator')).'/bin/storage'; + if (!$this->getAPI()->isCharacterSetAvailable('utf8mb4')) { + throw new PhutilArgumentUsageException( + pht( + 'You can only generate a new quickstart file if MySQL supports '. + 'the utf8mb4 character set (available in MySQL 5.5 and newer). The '. + 'configured server does not support utf8mb4.')); + } + $err = phutil_passthru( '%s upgrade --force --no-quickstart --namespace %s', $bin, @@ -74,6 +82,21 @@ final class PhabricatorStorageManagementQuickstartWorkflow '{$NAMESPACE}', $dump); + // NOTE: This is a hack. We can not use `binary` for this column, because + // it is part of a fulltext index. + $old = $dump; + $dump = preg_replace( + '/`corpus` longtext CHARACTER SET .* COLLATE .*,/mi', + '`corpus` longtext CHARACTER SET {$CHARSET_FULLTEXT} '. + 'COLLATE {$COLLATE_FULLTEXT},', + $dump); + if ($dump == $old) { + // If we didn't make any changes, yell about it. We'll produce an invalid + // dump otherwise. + throw new PhutilArgumentUsageException( + pht('Failed to apply hack to adjust FULLTEXT search column!')); + } + $dump = str_replace( 'utf8mb4_bin', '{$COLLATE_TEXT}',