1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-25 16:22:43 +01:00

Fix various MySQL version issues with new charset stuff

Summary:
Ref T1191. Notable stuff:

  - Adds `--disable-utf8mb4` to `bin/storage` to make it easier to test what things will (approximately) do on old MySQL. This isn't 100% perfect but should catch all the major stuff. It basically makes us pretend the server is an old server.
  - Require utf8mb4 to dump a quickstart.
  - Fix some issues with quickstart generation, notably special casing the FULLTEXT handling.
  - Add an `--unsafe` flag to `bin/storage adjust` to let it truncate data to fix schemata.
  - Fix some old patches which don't work if the default table charset is utf8mb4.

Test Plan:
  - Dumped a quickstart.
  - Loaded the quickstart with utf8mb4.
  - Loaded the quickstart with `--disable-utf8mb4` (verified that we get binary columns, etc).
  - Adjusted schema with `--disable-utf8mb4` (got a long adjustment with binary columns, some truncation stuff with weird edge case test data).
  - Adjusted schema with `--disable-utf8mb4 --unsafe` (got truncations and clean adjust).
  - Adjusted schema back without `--disable-utf8mb4` (got a long adjustment with utf8mb4 columns, some invalid data on truncated utf8).
  - Adjusted schema without `--disable-utf8mb4`, but with `--unsafe` (got truncations on the invalid data).

Reviewers: btrahan

Reviewed By: btrahan

Subscribers: epriestley

Maniphest Tasks: T1191

Differential Revision: https://secure.phabricator.com/D10757
This commit is contained in:
epriestley 2014-10-29 15:49:29 -07:00
parent bf66c90852
commit 917da08417
17 changed files with 153 additions and 59 deletions

View file

@ -855,10 +855,6 @@ return array(
'javelin-workflow',
'phabricator-draggable-list',
),
'7319e029' => array(
'javelin-behavior',
'javelin-dom',
),
'06e05112' => array(
'javelin-behavior',
'javelin-stratcom',
@ -1276,6 +1272,10 @@ return array(
'phabricator-phtize',
'changeset-view-manager',
),
'7319e029' => array(
'javelin-behavior',
'javelin-dom',
),
'76b9fc3e' => array(
'javelin-behavior',
'javelin-stratcom',
@ -1336,28 +1336,6 @@ return array(
'javelin-behavior-device',
'phabricator-title',
),
42126667 => array(
'javelin-behavior',
'javelin-dom',
'javelin-request',
),
48086888 => array(
'javelin-behavior',
'javelin-dom',
'javelin-workflow',
),
60479091 => array(
'phabricator-busy',
'javelin-behavior',
),
82439934 => array(
'javelin-behavior',
'javelin-dom',
'javelin-util',
'javelin-stratcom',
'javelin-workflow',
'phabricator-draggable-list',
),
'7e41274a' => array(
'javelin-install',
),
@ -1962,6 +1940,28 @@ return array(
'phabricator-prefab',
'javelin-json',
),
42126667 => array(
'javelin-behavior',
'javelin-dom',
'javelin-request',
),
48086888 => array(
'javelin-behavior',
'javelin-dom',
'javelin-workflow',
),
60479091 => array(
'phabricator-busy',
'javelin-behavior',
),
82439934 => array(
'javelin-behavior',
'javelin-dom',
'javelin-util',
'javelin-stratcom',
'javelin-workflow',
'phabricator-draggable-list',
),
),
'packages' => array(
'core.pkg.css' => array(

View file

@ -1,6 +1,6 @@
create table {$NAMESPACE}_project.project (
id int unsigned not null auto_increment primary key,
name varchar(255) not null,
name varchar(255) COLLATE `binary` not null,
unique key (name),
phid varchar(64) binary not null,
authorPHID varchar(64) binary not null,

View file

@ -1,8 +1,8 @@
create table {$NAMESPACE}_user.user_oauthinfo (
id int unsigned not null auto_increment primary key,
userID int unsigned not null,
oauthProvider varchar(255) not null,
oauthUID varchar(255) not null,
oauthProvider varchar(255) COLLATE `binary` not null,
oauthUID varchar(255) COLLATE `binary` not null,
unique key (userID, oauthProvider),
unique key (oauthProvider, oauthUID),
dateCreated int unsigned not null,

View file

@ -23,6 +23,6 @@ create table {$NAMESPACE}_timeline.timeline_eventdata (
);
create table {$NAMESPACE}_timeline.timeline_cursor (
name varchar(255) not null primary key,
name varchar(255) COLLATE `binary` not null primary key,
position int unsigned not null
);

View file

@ -7,7 +7,7 @@ CREATE TABLE {$NAMESPACE}_herald.herald_action (
CREATE TABLE {$NAMESPACE}_herald.herald_rule (
id int unsigned not null auto_increment primary key,
name varchar(255) not null,
name varchar(255) COLLATE `binary` not null,
authorPHID varchar(64) binary not null,
contentType varchar(255) not null,
mustMatchAll bool not null,

View file

@ -1,4 +1,4 @@
CREATE TABLE {$NAMESPACE}_repository.repository_badcommit (
fullCommitName varchar(255) binary not null primary key,
fullCommitName varchar(255) COLLATE `binary` not null primary key,
description longblob not null
);

View file

@ -2,7 +2,7 @@ CREATE TABLE {$NAMESPACE}_owners.owners_package (
id int unsigned not null auto_increment primary key,
phid varchar(64) binary not null,
unique key(phid),
name varchar(255) not null,
name varchar(255) COLLATE `binary` not null,
unique key(name),
description text not null,
primaryOwnerPHID varchar(64) binary

View file

@ -2,7 +2,7 @@ CREATE TABLE {$NAMESPACE}_repository.repository_arcanistproject (
id int unsigned not null auto_increment primary key,
phid varchar(64) binary not null,
unique key(phid),
name varchar(255) not null,
name varchar(255) COLLATE `binary` not null,
unique key (name),
repositoryID int unsigned
);

View file

@ -1,6 +1,6 @@
CREATE TABLE {$NAMESPACE}_file.file_proxyimage (
id int unsigned not null primary key auto_increment,
uri varchar(255) binary not null,
uri varchar(255) COLLATE `binary` not null,
unique key(uri),
filePHID varchar(64) binary not null
) ENGINE=InnoDB;

View file

@ -1,7 +1,7 @@
CREATE TABLE {$NAMESPACE}_file.file_transformedfile (
id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
originalPHID varchar(64) BINARY NOT NULL,
transform varchar(255) BINARY NOT NULL,
originalPHID varchar(64) COLLATE `binary` NOT NULL,
transform varchar(255) COLLATE `binary` NOT NULL,
unique key (originalPHID, transform),
transformedPHID varchar(64) BINARY NOT NULL,
key (transformedPHID),

View file

@ -1,7 +1,7 @@
create table {$NAMESPACE}_maniphest.maniphest_taskauxiliarystorage
(id int unsigned not null auto_increment primary key,
taskPHID varchar(64) binary not null,
name varchar(255) not null,
name varchar(255) COLLATE `binary` not null,
value varchar(255) not null,
unique key (taskPHID,name),
dateCreated int unsigned not null,

View file

@ -64,6 +64,13 @@ try {
'help' => 'Do not actually change anything, just show what would be '.
'changed.',
),
array(
'name' => 'disable-utf8mb4',
'help' => pht(
'Disable utf8mb4, even if the database supports it. This is an '.
'advanced feature used for testing changes to Phabricator; you '.
'should not normally use this flag.'),
)
));
} catch (PhutilArgumentUsageException $ex) {
$args->printUsageException($ex);
@ -126,6 +133,7 @@ $api->setHost($default_host);
$api->setPort($default_port);
$api->setPassword($password);
$api->setNamespace($args->getArg('namespace'));
$api->setDisableUTF8MB4($args->getArg('disable-utf8mb4'));
try {
queryfx(

View file

@ -153,11 +153,7 @@ final class PhabricatorConfigSchemaQuery extends Phobject {
public function loadExpectedSchema() {
$databases = $this->getDatabaseNames();
$api = $this->getAPI();
$charset_info = $api->getCharsetInfo();
list($charset, $collate_text, $collate_sort) = $charset_info;
$info = $this->getAPI()->getCharsetInfo();
$specs = id(new PhutilSymbolLoader())
->setAncestorClass('PhabricatorConfigSchemaSpec')
@ -166,9 +162,12 @@ final class PhabricatorConfigSchemaQuery extends Phobject {
$server_schema = new PhabricatorConfigServerSchema();
foreach ($specs as $spec) {
$spec
->setUTF8Charset($charset)
->setUTF8BinaryCollation($collate_text)
->setUTF8SortingCollation($collate_sort)
->setUTF8Charset(
$info[PhabricatorStorageManagementAPI::CHARSET_DEFAULT])
->setUTF8BinaryCollation(
$info[PhabricatorStorageManagementAPI::COLLATE_TEXT])
->setUTF8SortingCollation(
$info[PhabricatorStorageManagementAPI::COLLATE_SORT])
->setServer($server_schema)
->buildSchemata($server_schema);
}

View file

@ -9,7 +9,7 @@ final class PhabricatorUserSchemaSpec extends PhabricatorConfigSchemaSpec {
id(new PhabricatorUser())->getApplicationName(),
PhabricatorUser::NAMETOKEN_TABLE,
array(
'token' => 'text255',
'token' => 'sort255',
'userID' => 'id',
),
array(

View file

@ -8,6 +8,22 @@ final class PhabricatorStorageManagementAPI {
private $password;
private $namespace;
private $conns = array();
private $disableUTF8MB4;
const CHARSET_DEFAULT = 'CHARSET';
const CHARSET_FULLTEXT = 'CHARSET_FULLTEXT';
const COLLATE_TEXT = 'COLLATE_TEXT';
const COLLATE_SORT = 'COLLATE_SORT';
const COLLATE_FULLTEXT = 'COLLATE_FULLTEXT';
public function setDisableUTF8MB4($disable_utf8_mb4) {
$this->disableUTF8MB4 = $disable_utf8_mb4;
return $this;
}
public function getDisableUTF8MB4() {
return $this->disableUTF8MB4;
}
public function setNamespace($namespace) {
$this->namespace = $namespace;
@ -110,13 +126,12 @@ final class PhabricatorStorageManagementAPI {
public function createDatabase($fragment) {
$info = $this->getCharsetInfo();
list($charset, $collate_text, $collate_sort) = $info;
queryfx(
$this->getConn(null),
'CREATE DATABASE IF NOT EXISTS %T COLLATE %T',
$this->getDatabaseName($fragment),
$collate_text);
$info[self::COLLATE_TEXT]);
}
public function createTable($fragment, $table, array $cols) {
@ -187,15 +202,17 @@ final class PhabricatorStorageManagementAPI {
$conn = $this->getConn(null);
$charset_info = $this->getCharsetInfo();
list($charset, $collate_text, $collate_sort) = $charset_info;
foreach ($charset_info as $key => $value) {
$charset_info[$key] = qsprintf($conn, '%T', $value);
}
foreach ($queries as $query) {
$query = str_replace('{$NAMESPACE}', $this->namespace, $query);
$query = str_replace('{$CHARSET}', $charset, $query);
$escaped_text = qsprintf($conn, '%T', $collate_text);
$query = str_replace('{$COLLATE_TEXT}', $escaped_text, $query);
$escaped_text = qsprintf($conn, '%T', $collate_sort);
$query = str_replace('{$COLLATE_SORT}', $escaped_text, $query);
foreach ($charset_info as $key => $value) {
$query = str_replace('{$'.$key.'}', $value, $query);
}
queryfx(
$conn,
'%Q',
@ -209,6 +226,12 @@ final class PhabricatorStorageManagementAPI {
}
public function isCharacterSetAvailable($character_set) {
if ($character_set == 'utf8mb4') {
if ($this->getDisableUTF8MB4()) {
return false;
}
}
$conn = $this->getConn(null);
$result = queryfx_one(
@ -226,8 +249,10 @@ final class PhabricatorStorageManagementAPI {
// collation. This is most correct, and will sort properly.
$charset = 'utf8mb4';
$charset_full = 'utf8mb4';
$collate_text = 'utf8mb4_bin';
$collate_sort = 'utf8mb4_unicode_ci';
$collate_full = 'utf8mb4_unicode_ci';
} else {
// If utf8mb4 is not available, we use binary. This allows us to store
// 4-byte unicode characters. This has some tradeoffs:
@ -238,13 +263,25 @@ final class PhabricatorStorageManagementAPI {
// It's possible that strings will be truncated in the middle of a
// character on insert. We encourage users to set STRICT_ALL_TABLES
// to prevent this.
//
// There's no valid collation we can use to get a fulltext index on
// 4-byte unicode characters: we can't add a fulltext key to a binary
// column.
$charset = 'binary';
$charset_full = 'utf8';
$collate_text = 'binary';
$collate_sort = 'binary';
$collate_full = 'utf8_general_ci';
}
return array($charset, $collate_text, $collate_sort);
return array(
self::CHARSET_DEFAULT => $charset,
self::CHARSET_FULLTEXT => $charset_full,
self::COLLATE_TEXT => $collate_text,
self::COLLATE_SORT => $collate_sort,
self::COLLATE_FULLTEXT => $collate_full,
);
}
}

View file

@ -10,14 +10,26 @@ final class PhabricatorStorageManagementAdjustWorkflow
->setSynopsis(
pht(
'Make schemata adjustments to correct issues with characters sets, '.
'collations, and keys.'));
'collations, and keys.'))
->setArguments(
array(
array(
'name' => 'unsafe',
'help' => pht(
'Permit adjustments which truncate data. This option may '.
'destroy some data, but the lost data is usually not '.
'important (most commonly, the ends of very long object '.
'titles).'),
),
));
}
public function execute(PhutilArgumentParser $args) {
$force = $args->getArg('force');
$unsafe = $args->getArg('unsafe');
$this->requireAllPatchesApplied();
return $this->adjustSchemata($force);
return $this->adjustSchemata($force, $unsafe);
}
private function requireAllPatchesApplied() {
@ -59,7 +71,7 @@ final class PhabricatorStorageManagementAdjustWorkflow
return array($comp, $expect, $actual);
}
private function adjustSchemata($force) {
private function adjustSchemata($force, $unsafe) {
$console = PhutilConsole::getConsole();
$console->writeOut(
@ -138,6 +150,12 @@ final class PhabricatorStorageManagementAdjustWorkflow
$api = $this->getAPI();
$conn = $api->getConn(null);
if ($unsafe) {
queryfx($conn, 'SET SESSION sql_mode = %s', '');
} else {
queryfx($conn, 'SET SESSION sql_mode = %s', 'STRICT_ALL_TABLES');
}
$failed = array();
// We make changes in several phases.
@ -327,6 +345,15 @@ final class PhabricatorStorageManagementAdjustWorkflow
"\n%s\n",
pht('Failed to make some schema adjustments, detailed above.'));
if (!$unsafe) {
$console->writeOut(
"%s\n",
pht(
'Migrations which fail with certain types of errors (including '.
'"#1406 Data Too Long" and "#1366 Incorrect String Value") can be '.
'forced to complete by running again with `--unsafe`.'));
}
return 1;
}

View file

@ -33,6 +33,14 @@ final class PhabricatorStorageManagementQuickstartWorkflow
$bin = dirname(phutil_get_library_root('phabricator')).'/bin/storage';
if (!$this->getAPI()->isCharacterSetAvailable('utf8mb4')) {
throw new PhutilArgumentUsageException(
pht(
'You can only generate a new quickstart file if MySQL supports '.
'the utf8mb4 character set (available in MySQL 5.5 and newer). The '.
'configured server does not support utf8mb4.'));
}
$err = phutil_passthru(
'%s upgrade --force --no-quickstart --namespace %s',
$bin,
@ -74,6 +82,21 @@ final class PhabricatorStorageManagementQuickstartWorkflow
'{$NAMESPACE}',
$dump);
// NOTE: This is a hack. We can not use `binary` for this column, because
// it is part of a fulltext index.
$old = $dump;
$dump = preg_replace(
'/`corpus` longtext CHARACTER SET .* COLLATE .*,/mi',
'`corpus` longtext CHARACTER SET {$CHARSET_FULLTEXT} '.
'COLLATE {$COLLATE_FULLTEXT},',
$dump);
if ($dump == $old) {
// If we didn't make any changes, yell about it. We'll produce an invalid
// dump otherwise.
throw new PhutilArgumentUsageException(
pht('Failed to apply hack to adjust FULLTEXT search column!'));
}
$dump = str_replace(
'utf8mb4_bin',
'{$COLLATE_TEXT}',