1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-22 23:02:42 +01:00

Use binary collations for most text

Summary:
Ref T1191. For most text columns, we either don't care if "a" and "A" are the same, or we expect them to be different (for example: keys, domains, secrets, etc). Default text columns to the `_bin` collation so they are compared by strict character value. This is safer in cases where we aren't sure.

For some text columns, we allow the user to sort by the column in the UI (like Maniphest task titles) or we do care that "A" and "a" are the same (for example: project names). Introduce a new class of virtual data types, the "sort..." types, to cover these columns. These are like the "text..." types but use sorting collations which treat "A" and "a" the same.

Test Plan:
  - Made an effort to identify all columns where the UI relies on database collation.
  - Ran `bin/storage adjust` and cleared all warnings.

Reviewers: btrahan

Reviewed By: btrahan

Subscribers: beng, epriestley

Maniphest Tasks: T1191

Differential Revision: https://secure.phabricator.com/D10602
This commit is contained in:
epriestley 2014-10-01 08:18:53 -07:00
parent 4fcc634a99
commit 1dfa94e571
10 changed files with 73 additions and 32 deletions

View file

@ -154,7 +154,8 @@ final class PhabricatorConfigSchemaQuery extends Phobject {
// collation. This is most correct, and will sort properly. // collation. This is most correct, and will sort properly.
$utf8_charset = 'utf8mb4'; $utf8_charset = 'utf8mb4';
$utf8_collation = 'utf8mb4_unicode_ci'; $utf8_binary_collation = 'utf8mb4_bin';
$utf8_sorting_collation = 'utf8mb4_unicode_ci';
} else { } else {
// If utf8mb4 is not available, we use binary. This allows us to store // If utf8mb4 is not available, we use binary. This allows us to store
// 4-byte unicode characters. This has some tradeoffs: // 4-byte unicode characters. This has some tradeoffs:
@ -167,7 +168,8 @@ final class PhabricatorConfigSchemaQuery extends Phobject {
// to prevent this. // to prevent this.
$utf8_charset = 'binary'; $utf8_charset = 'binary';
$utf8_collation = 'binary'; $utf8_binary_collation = 'binary';
$utf8_sorting_collation = 'binary';
} }
$specs = id(new PhutilSymbolLoader()) $specs = id(new PhutilSymbolLoader())
@ -177,8 +179,9 @@ final class PhabricatorConfigSchemaQuery extends Phobject {
$server_schema = new PhabricatorConfigServerSchema(); $server_schema = new PhabricatorConfigServerSchema();
foreach ($specs as $spec) { foreach ($specs as $spec) {
$spec $spec
->setUTF8Collation($utf8_collation)
->setUTF8Charset($utf8_charset) ->setUTF8Charset($utf8_charset)
->setUTF8BinaryCollation($utf8_binary_collation)
->setUTF8SortingCollation($utf8_sorting_collation)
->setServer($server_schema) ->setServer($server_schema)
->buildSchemata($server_schema); ->buildSchemata($server_schema);
} }

View file

@ -4,15 +4,25 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject {
private $server; private $server;
private $utf8Charset; private $utf8Charset;
private $utf8Collation; private $utf8BinaryCollation;
private $utf8SortingCollation;
public function setUTF8Collation($utf8_collation) { public function setUTF8SortingCollation($utf8_sorting_collation) {
$this->utf8Collation = $utf8_collation; $this->utf8SortingCollation = $utf8_sorting_collation;
return $this; return $this;
} }
public function getUTF8Collation() { public function getUTF8SortingCollation() {
return $this->utf8Collation; return $this->utf8SortingCollation;
}
public function setUTF8BinaryCollation($utf8_binary_collation) {
$this->utf8BinaryCollation = $utf8_binary_collation;
return $this;
}
public function getUTF8BinaryCollation() {
return $this->utf8BinaryCollation;
} }
public function setUTF8Charset($utf8_charset) { public function setUTF8Charset($utf8_charset) {
@ -195,7 +205,7 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject {
return id(new PhabricatorConfigDatabaseSchema()) return id(new PhabricatorConfigDatabaseSchema())
->setName($this->getNamespacedDatabase($name)) ->setName($this->getNamespacedDatabase($name))
->setCharacterSet($this->getUTF8Charset()) ->setCharacterSet($this->getUTF8Charset())
->setCollation($this->getUTF8Collation()); ->setCollation($this->getUTF8BinaryCollation());
} }
protected function getNamespacedDatabase($name) { protected function getNamespacedDatabase($name) {
@ -206,7 +216,7 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject {
protected function newTable($name) { protected function newTable($name) {
return id(new PhabricatorConfigTableSchema()) return id(new PhabricatorConfigTableSchema())
->setName($name) ->setName($name)
->setCollation($this->getUTF8Collation()); ->setCollation($this->getUTF8BinaryCollation());
} }
protected function newColumn($name) { protected function newColumn($name) {
@ -276,70 +286,95 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject {
case 'bytes': case 'bytes':
$column_type = 'longblob'; $column_type = 'longblob';
break; break;
case 'sort255':
$column_type = 'varchar(255)';
$charset = $this->getUTF8Charset();
$collation = $this->getUTF8SortingCollation();
break;
case 'sort128':
$column_type = 'varchar(128)';
$charset = $this->getUTF8Charset();
$collation = $this->getUTF8SortingCollation();
break;
case 'sort64':
$column_type = 'varchar(64)';
$charset = $this->getUTF8Charset();
$collation = $this->getUTF8SortingCollation();
break;
case 'sort32':
$column_type = 'varchar(32)';
$charset = $this->getUTF8Charset();
$collation = $this->getUTF8SortingCollation();
break;
case 'sort':
$column_type = 'longtext';
$charset = $this->getUTF8Charset();
$collation = $this->getUTF8SortingCollation();
break;
case 'text255': case 'text255':
$column_type = 'varchar(255)'; $column_type = 'varchar(255)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text160': case 'text160':
$column_type = 'varchar(160)'; $column_type = 'varchar(160)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text128': case 'text128':
$column_type = 'varchar(128)'; $column_type = 'varchar(128)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text80': case 'text80':
$column_type = 'varchar(80)'; $column_type = 'varchar(80)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text64': case 'text64':
$column_type = 'varchar(64)'; $column_type = 'varchar(64)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text40': case 'text40':
$column_type = 'varchar(40)'; $column_type = 'varchar(40)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text32': case 'text32':
$column_type = 'varchar(32)'; $column_type = 'varchar(32)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text20': case 'text20':
$column_type = 'varchar(20)'; $column_type = 'varchar(20)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text16': case 'text16':
$column_type = 'varchar(16)'; $column_type = 'varchar(16)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text12': case 'text12':
$column_type = 'varchar(12)'; $column_type = 'varchar(12)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text8': case 'text8':
$column_type = 'varchar(8)'; $column_type = 'varchar(8)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text4': case 'text4':
$column_type = 'varchar(4)'; $column_type = 'varchar(4)';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'text': case 'text':
$column_type = 'longtext'; $column_type = 'longtext';
$charset = $this->getUTF8Charset(); $charset = $this->getUTF8Charset();
$collation = $this->getUTF8Collation(); $collation = $this->getUTF8BinaryCollation();
break; break;
case 'bool': case 'bool':
$column_type = 'tinyint(1)'; $column_type = 'tinyint(1)';

View file

@ -13,7 +13,7 @@ final class ManiphestNameIndex extends ManiphestDAO {
return array( return array(
self::CONFIG_TIMESTAMPS => false, self::CONFIG_TIMESTAMPS => false,
self::CONFIG_COLUMN_SCHEMA => array( self::CONFIG_COLUMN_SCHEMA => array(
'indexedObjectName' => 'text128', 'indexedObjectName' => 'sort128',
), ),
self::CONFIG_KEY_SCHEMA => array( self::CONFIG_KEY_SCHEMA => array(
'key_phid' => array( 'key_phid' => array(

View file

@ -71,7 +71,7 @@ final class ManiphestTask extends ManiphestDAO
'ownerPHID' => 'phid?', 'ownerPHID' => 'phid?',
'status' => 'text12', 'status' => 'text12',
'priority' => 'uint32', 'priority' => 'uint32',
'title' => 'text', 'title' => 'sort',
'originalTitle' => 'text', 'originalTitle' => 'text',
'description' => 'text', 'description' => 'text',
'mailKey' => 'bytes20', 'mailKey' => 'bytes20',
@ -114,6 +114,9 @@ final class ManiphestTask extends ManiphestDAO
'key_dateModified' => array( 'key_dateModified' => array(
'columns' => array('dateModified'), 'columns' => array('dateModified'),
), ),
'key_title' => array(
'columns' => array('title(64)'),
),
), ),
) + parent::getConfiguration(); ) + parent::getConfiguration();
} }

View file

@ -90,7 +90,7 @@ final class PhamePost extends PhameDAO
), ),
self::CONFIG_COLUMN_SCHEMA => array( self::CONFIG_COLUMN_SCHEMA => array(
'title' => 'text255', 'title' => 'text255',
'phameTitle' => 'text64', 'phameTitle' => 'sort64',
'visibility' => 'uint32', 'visibility' => 'uint32',
// T6203/NULLABILITY // T6203/NULLABILITY

View file

@ -32,7 +32,7 @@ final class PhrictionContent extends PhrictionDAO
return array( return array(
self::CONFIG_COLUMN_SCHEMA => array( self::CONFIG_COLUMN_SCHEMA => array(
'version' => 'uint32', 'version' => 'uint32',
'title' => 'text', 'title' => 'sort',
'slug' => 'text128', 'slug' => 'text128',
'content' => 'text', 'content' => 'text',
'changeType' => 'uint32', 'changeType' => 'uint32',

View file

@ -25,7 +25,7 @@ final class PhrictionDocument extends PhrictionDAO
self::CONFIG_AUX_PHID => true, self::CONFIG_AUX_PHID => true,
self::CONFIG_TIMESTAMPS => false, self::CONFIG_TIMESTAMPS => false,
self::CONFIG_COLUMN_SCHEMA => array( self::CONFIG_COLUMN_SCHEMA => array(
'slug' => 'text128', 'slug' => 'sort128',
'depth' => 'uint32', 'depth' => 'uint32',
'contentID' => 'id?', 'contentID' => 'id?',
'status' => 'uint32', 'status' => 'uint32',

View file

@ -122,7 +122,7 @@ final class PhabricatorProject extends PhabricatorProjectDAO
'subprojectPHIDs' => self::SERIALIZATION_JSON, 'subprojectPHIDs' => self::SERIALIZATION_JSON,
), ),
self::CONFIG_COLUMN_SCHEMA => array( self::CONFIG_COLUMN_SCHEMA => array(
'name' => 'text128', 'name' => 'sort128',
'status' => 'text32', 'status' => 'text32',
'phrictionSlug' => 'text128?', 'phrictionSlug' => 'text128?',
'isMembershipLocked' => 'bool', 'isMembershipLocked' => 'bool',

View file

@ -79,8 +79,8 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO
'details' => self::SERIALIZATION_JSON, 'details' => self::SERIALIZATION_JSON,
), ),
self::CONFIG_COLUMN_SCHEMA => array( self::CONFIG_COLUMN_SCHEMA => array(
'name' => 'text255', 'name' => 'sort255',
'callsign' => 'text32', 'callsign' => 'sort32',
'versionControlSystem' => 'text32', 'versionControlSystem' => 'text32',
'uuid' => 'text64?', 'uuid' => 'text64?',
'pushPolicy' => 'policy', 'pushPolicy' => 'policy',

View file

@ -7,7 +7,7 @@ abstract class PhabricatorCustomFieldStringIndexStorage
return array( return array(
self::CONFIG_COLUMN_SCHEMA => array( self::CONFIG_COLUMN_SCHEMA => array(
'indexKey' => 'bytes12', 'indexKey' => 'bytes12',
'indexValue' => 'text', 'indexValue' => 'sort',
), ),
self::CONFIG_KEY_SCHEMA => array( self::CONFIG_KEY_SCHEMA => array(
'key_join' => array( 'key_join' => array(