1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-20 05:42:40 +01:00

Use utf8_general_ci for "sort" columns in old MySQL

Summary:
Fixes T7287. This trades off 4-byte character support for case insensitivity in these columns, which is a much better trade on the balance.

Also adds more warnings about old MySQL. Note that we already issue a warning when you run "storage adjust" (which I've made stronger) and already "strongly recommend" MySQL 5.5 or newer in the install documentation.

Test Plan:
  - Ran `storage adjust --disable-utf8mb4` to go to old definitions, then ran `storage adjust` to get back to the new ones. Everything seemed OK in both cases.
  - Verified that utf8mb4 data can be migrated out of these colums with `--unsafe` (which will truncate).
  - Verified that manual explains this.
  - Faked my way into the setup warning.

Reviewers: btrahan

Reviewed By: btrahan

Subscribers: epriestley

Maniphest Tasks: T7287

Differential Revision: https://secure.phabricator.com/D11893
This commit is contained in:
epriestley 2015-02-26 10:18:54 -08:00
parent 1236043472
commit 9e82cfcc21
3 changed files with 47 additions and 12 deletions

View file

@ -315,6 +315,26 @@ final class PhabricatorMySQLSetupCheck extends PhabricatorSetupCheck {
->addMySQLConfig('innodb_buffer_pool_size'); ->addMySQLConfig('innodb_buffer_pool_size');
} }
$ok = PhabricatorStorageManagementAPI::isCharacterSetAvailableOnConnection(
'utf8mb4',
id(new PhabricatorUser())->establishConnection('w'));
if (!$ok) {
$summary = pht(
'You are using an old version of MySQL, and should upgrade.');
$message = pht(
'You are using an old version of MySQL which has poor unicode '.
'support (it does not support the "utf8mb4" collation set). You will '.
'encounter limitations when working with some unicode data.'.
"\n\n".
'We strongly recommend you upgrade to MySQL 5.5 or newer.');
$this->newIssue('mysql.utf8mb4')
->setName(pht('Old MySQL Version'))
->setSummary($summary)
->setMessage($message);
}
} }
} }

View file

@ -233,7 +233,12 @@ final class PhabricatorStorageManagementAPI {
} }
$conn = $this->getConn(null); $conn = $this->getConn(null);
return self::isCharacterSetAvailableOnConnection($character_set, $conn);
}
public static function isCharacterSetAvailableOnConnection(
$character_set,
AphrontDatabaseConnection $conn) {
$result = queryfx_one( $result = queryfx_one(
$conn, $conn,
'SELECT CHARACTER_SET_NAME FROM INFORMATION_SCHEMA.CHARACTER_SETS 'SELECT CHARACTER_SET_NAME FROM INFORMATION_SCHEMA.CHARACTER_SETS
@ -254,24 +259,32 @@ final class PhabricatorStorageManagementAPI {
$collate_sort = 'utf8mb4_unicode_ci'; $collate_sort = 'utf8mb4_unicode_ci';
$collate_full = 'utf8mb4_unicode_ci'; $collate_full = 'utf8mb4_unicode_ci';
} else { } else {
// If utf8mb4 is not available, we use binary. This allows us to store // If utf8mb4 is not available, we use binary for most data. This allows
// 4-byte unicode characters. This has some tradeoffs: // us to store 4-byte unicode characters.
//
// Unicode characters won't sort correctly. There's nothing we can do
// about this while still supporting 4-byte characters.
// //
// It's possible that strings will be truncated in the middle of a // It's possible that strings will be truncated in the middle of a
// character on insert. We encourage users to set STRICT_ALL_TABLES // character on insert. We encourage users to set STRICT_ALL_TABLES
// to prevent this. // to prevent this.
// //
// There's no valid collation we can use to get a fulltext index on // For "fulltext" and "sort" columns, we don't use binary.
// 4-byte unicode characters: we can't add a fulltext key to a binary //
// column. // With "fulltext", we can not use binary because MySQL won't let us.
// We use 3-byte utf8 instead and accept being unable to index 4-byte
// characters.
//
// With "sort", if we use binary we lose case insensitivity (for
// example, "ALincoln@logcabin.com" and "alincoln@logcabin.com" would no
// longer be identified as the same email address). This can be very
// confusing and is far worse overall than not supporting 4-byte unicode
// characters, so we use 3-byte utf8 and accept limited 4-byte support as
// a tradeoff to get sensible collation behavior. Many columns where
// collation is important rarely contain 4-byte characters anyway, so we
// are not giving up too much.
$charset = 'binary'; $charset = 'binary';
$charset_full = 'utf8'; $charset_full = 'utf8';
$collate_text = 'binary'; $collate_text = 'binary';
$collate_sort = 'binary'; $collate_sort = 'utf8_general_ci';
$collate_full = 'utf8_general_ci'; $collate_full = 'utf8_general_ci';
} }

View file

@ -57,9 +57,11 @@ abstract class PhabricatorStorageManagementWorkflow
if (!$force && !$api->isCharacterSetAvailable('utf8mb4')) { if (!$force && !$api->isCharacterSetAvailable('utf8mb4')) {
$message = pht( $message = pht(
"You have an old version of MySQL (older than 5.5) which does not ". "You have an old version of MySQL (older than 5.5) which does not ".
"support the utf8mb4 character set. If you apply adjustments now ". "support the utf8mb4 character set. We strongly recomend upgrading to ".
"and later update MySQL to 5.5 or newer, you'll need to apply ". "5.5 or newer.\n\n".
"adjustments again (and they will take a long time).\n\n". "If you apply adjustments now and later update MySQL to 5.5 or newer, ".
"you'll need to apply adjustments again (and they will take a long ".
"time).\n\n".
"You can exit this workflow, update MySQL now, and then run this ". "You can exit this workflow, update MySQL now, and then run this ".
"workflow again. This is recommended, but may cause a lot of downtime ". "workflow again. This is recommended, but may cause a lot of downtime ".
"right now.\n\n". "right now.\n\n".