mirror of
https://we.phorge.it/source/phorge.git
synced 2024-12-20 12:30:56 +01:00
Use utf8_general_ci for "sort" columns in old MySQL
Summary: Fixes T7287. This trades off 4-byte character support for case insensitivity in these columns, which is a much better trade on the balance. Also adds more warnings about old MySQL. Note that we already issue a warning when you run "storage adjust" (which I've made stronger) and already "strongly recommend" MySQL 5.5 or newer in the install documentation. Test Plan: - Ran `storage adjust --disable-utf8mb4` to go to old definitions, then ran `storage adjust` to get back to the new ones. Everything seemed OK in both cases. - Verified that utf8mb4 data can be migrated out of these colums with `--unsafe` (which will truncate). - Verified that manual explains this. - Faked my way into the setup warning. Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley Maniphest Tasks: T7287 Differential Revision: https://secure.phabricator.com/D11893
This commit is contained in:
parent
1236043472
commit
9e82cfcc21
3 changed files with 47 additions and 12 deletions
|
@ -315,6 +315,26 @@ final class PhabricatorMySQLSetupCheck extends PhabricatorSetupCheck {
|
|||
->addMySQLConfig('innodb_buffer_pool_size');
|
||||
}
|
||||
|
||||
$ok = PhabricatorStorageManagementAPI::isCharacterSetAvailableOnConnection(
|
||||
'utf8mb4',
|
||||
id(new PhabricatorUser())->establishConnection('w'));
|
||||
if (!$ok) {
|
||||
$summary = pht(
|
||||
'You are using an old version of MySQL, and should upgrade.');
|
||||
|
||||
$message = pht(
|
||||
'You are using an old version of MySQL which has poor unicode '.
|
||||
'support (it does not support the "utf8mb4" collation set). You will '.
|
||||
'encounter limitations when working with some unicode data.'.
|
||||
"\n\n".
|
||||
'We strongly recommend you upgrade to MySQL 5.5 or newer.');
|
||||
|
||||
$this->newIssue('mysql.utf8mb4')
|
||||
->setName(pht('Old MySQL Version'))
|
||||
->setSummary($summary)
|
||||
->setMessage($message);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -233,7 +233,12 @@ final class PhabricatorStorageManagementAPI {
|
|||
}
|
||||
|
||||
$conn = $this->getConn(null);
|
||||
return self::isCharacterSetAvailableOnConnection($character_set, $conn);
|
||||
}
|
||||
|
||||
public static function isCharacterSetAvailableOnConnection(
|
||||
$character_set,
|
||||
AphrontDatabaseConnection $conn) {
|
||||
$result = queryfx_one(
|
||||
$conn,
|
||||
'SELECT CHARACTER_SET_NAME FROM INFORMATION_SCHEMA.CHARACTER_SETS
|
||||
|
@ -254,24 +259,32 @@ final class PhabricatorStorageManagementAPI {
|
|||
$collate_sort = 'utf8mb4_unicode_ci';
|
||||
$collate_full = 'utf8mb4_unicode_ci';
|
||||
} else {
|
||||
// If utf8mb4 is not available, we use binary. This allows us to store
|
||||
// 4-byte unicode characters. This has some tradeoffs:
|
||||
//
|
||||
// Unicode characters won't sort correctly. There's nothing we can do
|
||||
// about this while still supporting 4-byte characters.
|
||||
// If utf8mb4 is not available, we use binary for most data. This allows
|
||||
// us to store 4-byte unicode characters.
|
||||
//
|
||||
// It's possible that strings will be truncated in the middle of a
|
||||
// character on insert. We encourage users to set STRICT_ALL_TABLES
|
||||
// to prevent this.
|
||||
//
|
||||
// There's no valid collation we can use to get a fulltext index on
|
||||
// 4-byte unicode characters: we can't add a fulltext key to a binary
|
||||
// column.
|
||||
// For "fulltext" and "sort" columns, we don't use binary.
|
||||
//
|
||||
// With "fulltext", we can not use binary because MySQL won't let us.
|
||||
// We use 3-byte utf8 instead and accept being unable to index 4-byte
|
||||
// characters.
|
||||
//
|
||||
// With "sort", if we use binary we lose case insensitivity (for
|
||||
// example, "ALincoln@logcabin.com" and "alincoln@logcabin.com" would no
|
||||
// longer be identified as the same email address). This can be very
|
||||
// confusing and is far worse overall than not supporting 4-byte unicode
|
||||
// characters, so we use 3-byte utf8 and accept limited 4-byte support as
|
||||
// a tradeoff to get sensible collation behavior. Many columns where
|
||||
// collation is important rarely contain 4-byte characters anyway, so we
|
||||
// are not giving up too much.
|
||||
|
||||
$charset = 'binary';
|
||||
$charset_full = 'utf8';
|
||||
$collate_text = 'binary';
|
||||
$collate_sort = 'binary';
|
||||
$collate_sort = 'utf8_general_ci';
|
||||
$collate_full = 'utf8_general_ci';
|
||||
}
|
||||
|
||||
|
|
|
@ -57,9 +57,11 @@ abstract class PhabricatorStorageManagementWorkflow
|
|||
if (!$force && !$api->isCharacterSetAvailable('utf8mb4')) {
|
||||
$message = pht(
|
||||
"You have an old version of MySQL (older than 5.5) which does not ".
|
||||
"support the utf8mb4 character set. If you apply adjustments now ".
|
||||
"and later update MySQL to 5.5 or newer, you'll need to apply ".
|
||||
"adjustments again (and they will take a long time).\n\n".
|
||||
"support the utf8mb4 character set. We strongly recomend upgrading to ".
|
||||
"5.5 or newer.\n\n".
|
||||
"If you apply adjustments now and later update MySQL to 5.5 or newer, ".
|
||||
"you'll need to apply adjustments again (and they will take a long ".
|
||||
"time).\n\n".
|
||||
"You can exit this workflow, update MySQL now, and then run this ".
|
||||
"workflow again. This is recommended, but may cause a lot of downtime ".
|
||||
"right now.\n\n".
|
||||
|
|
Loading…
Reference in a new issue