From cd14194a329788d5fff6365bcade278fd18f3612 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 2 Oct 2017 07:21:38 -0700 Subject: [PATCH 01/13] Fix transaction queries using withComments() for transactions with no comments Summary: See . Some object types (for example, Passphrase Credentials) support indexing but not commenting. Make `withComments(...)` work properly if the transaction type does not support comments. Test Plan: Indexed a credential (no comments) and a revision (comments) with `bin/search index --trace ...`. Before, credential fataled. After, credetial succeeds, and skips the transaction query. Before and after, the revision queries the transaction table. Reviewers: amckinley Reviewed By: amckinley Differential Revision: https://secure.phabricator.com/D18667 --- ...PhabricatorApplicationTransactionQuery.php | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/applications/transactions/query/PhabricatorApplicationTransactionQuery.php b/src/applications/transactions/query/PhabricatorApplicationTransactionQuery.php index abe1498fc0..f15522a087 100644 --- a/src/applications/transactions/query/PhabricatorApplicationTransactionQuery.php +++ b/src/applications/transactions/query/PhabricatorApplicationTransactionQuery.php @@ -203,16 +203,29 @@ abstract class PhabricatorApplicationTransactionQuery $xaction = $this->getTemplateApplicationTransaction(); $comment = $xaction->getApplicationTransactionCommentObject(); - if ($this->withComments) { - $joins[] = qsprintf( - $conn, - 'JOIN %T c ON x.phid = c.transactionPHID', - $comment->getTableName()); + // Not every transaction type has comments, so we may be able to + // implement this constraint trivially. + + if (!$comment) { + if ($this->withComments) { + throw new PhabricatorEmptyQueryException(); + } else { + // If we're querying for transactions with no comments and the + // transaction type does not support comments, we don't need to + // do anything. + } } else { - $joins[] = qsprintf( - $conn, - 'LEFT JOIN %T c ON x.phid = c.transactionPHID', - $comment->getTableName()); + if ($this->withComments) { + $joins[] = qsprintf( + $conn, + 'JOIN %T c ON x.phid = c.transactionPHID', + $comment->getTableName()); + } else { + $joins[] = qsprintf( + $conn, + 'LEFT JOIN %T c ON x.phid = c.transactionPHID', + $comment->getTableName()); + } } } From f9110b87abf337dd1e7714d755775e53cffd4db9 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 2 Oct 2017 11:36:50 -0700 Subject: [PATCH 02/13] In "Move Tasks to Column...", show only visible columns Summary: See PHI94. I considered this initially but wasn't sure about it. However, PHI94 brings up the good point that we already use a similar rule in Maniphest. For consistency, only show visible columns here too. Test Plan: Used "Move tasks to column..." on a board with visible and hidden columns, only saw visbile columns offered in the dropdown. Reviewers: amckinley Reviewed By: amckinley Differential Revision: https://secure.phabricator.com/D18668 --- .../controller/PhabricatorProjectBoardViewController.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/applications/project/controller/PhabricatorProjectBoardViewController.php b/src/applications/project/controller/PhabricatorProjectBoardViewController.php index 87cb5bc417..793fe5603d 100644 --- a/src/applications/project/controller/PhabricatorProjectBoardViewController.php +++ b/src/applications/project/controller/PhabricatorProjectBoardViewController.php @@ -313,6 +313,12 @@ final class PhabricatorProjectBoardViewController $columns = $move_engine->getColumns($move_project->getPHID()); $columns = mpull($columns, null, 'getPHID'); + foreach ($columns as $key => $column) { + if ($column->isHidden()) { + unset($columns[$key]); + } + } + $move_column_phid = $request->getStr('moveColumnPHID'); if (!$move_column_phid) { if ($request->getBool('hasColumn')) { From 89fe84f9789629bbd7166965811e6d33c635f51b Mon Sep 17 00:00:00 2001 From: epriestley Date: Tue, 3 Oct 2017 12:50:01 -0700 Subject: [PATCH 03/13] Add a "/source/..." URI for Diffusion commits which redirects Summary: See PHI112. The install presumably wants to generate links to Diffusion commits from an external tool, but only knows the short name of the repository. Provide a `/source/phabricator/commit/abcdef908273` URI which redirects to the canonical URI for the commit. Test Plan: - Visited `/source/` URI for a commit, got a redirect. - Visited normal URI for a commit, got a commit page. - Visited `/branches/` and `/tags/` for a `/source/` repository, got proper pages. Reviewers: amckinley Reviewed By: amckinley Differential Revision: https://secure.phabricator.com/D18676 --- .../PhabricatorDiffusionApplication.php | 9 +++++---- .../controller/DiffusionCommitController.php | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/applications/diffusion/application/PhabricatorDiffusionApplication.php b/src/applications/diffusion/application/PhabricatorDiffusionApplication.php index cf9c14aa8b..5a426ec299 100644 --- a/src/applications/diffusion/application/PhabricatorDiffusionApplication.php +++ b/src/applications/diffusion/application/PhabricatorDiffusionApplication.php @@ -69,10 +69,11 @@ final class PhabricatorDiffusionApplication extends PhabricatorApplication { 'branches/(?P.*)' => 'DiffusionBranchTableController', 'refs/(?P.*)' => 'DiffusionRefTableController', 'lint/(?P.*)' => 'DiffusionLintController', - 'commit/(?P[a-z0-9]+)/branches/' - => 'DiffusionCommitBranchesController', - 'commit/(?P[a-z0-9]+)/tags/' - => 'DiffusionCommitTagsController', + 'commit/(?P[a-z0-9]+)' => array( + '/?' => 'DiffusionCommitController', + '/branches/' => 'DiffusionCommitBranchesController', + '/tags/' => 'DiffusionCommitTagsController', + ), 'compare/' => 'DiffusionCompareController', 'manage/(?:(?P[^/]+)/)?' => 'DiffusionRepositoryManagePanelsController', diff --git a/src/applications/diffusion/controller/DiffusionCommitController.php b/src/applications/diffusion/controller/DiffusionCommitController.php index 59a132b17d..38c94e8a92 100644 --- a/src/applications/diffusion/controller/DiffusionCommitController.php +++ b/src/applications/diffusion/controller/DiffusionCommitController.php @@ -22,17 +22,27 @@ final class DiffusionCommitController extends DiffusionController { $drequest = $this->getDiffusionRequest(); $viewer = $request->getUser(); + $repository = $drequest->getRepository(); + $commit_identifier = $drequest->getCommit(); + + // If this page is being accessed via "/source/xyz/commit/...", redirect + // to the canonical URI. + $has_callsign = strlen($request->getURIData('repositoryCallsign')); + $has_id = strlen($request->getURIData('repositoryID')); + if (!$has_callsign && !$has_id) { + $canonical_uri = $repository->getCommitURI($commit_identifier); + return id(new AphrontRedirectResponse()) + ->setURI($canonical_uri); + } if ($request->getStr('diff')) { return $this->buildRawDiffResponse($drequest); } - $repository = $drequest->getRepository(); - $commit = id(new DiffusionCommitQuery()) ->setViewer($viewer) ->withRepository($repository) - ->withIdentifiers(array($drequest->getCommit())) + ->withIdentifiers(array($commit_identifier)) ->needCommitData(true) ->needAuditRequests(true) ->executeOne(); From 1de130c9f5b241cf1e1353f5d87968e7aa5eec1f Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 2 Oct 2017 15:27:02 -0700 Subject: [PATCH 04/13] Allow the Ferret engine to remove "common" ngrams from the index Summary: Ref T13000. This adds support for tracking "common" ngrams, which occur in too many documents to be useful as part of the ngram index. If an ngram is listed in the "common" table, it won't be written when indexing documents, or queried for when searching for them. In this change, nothing actually writes to the "common" table. I'll start writing to the table in a followup change. Specifically, I plan to do this: - A new GC process updates the "common" table periodically, by writing ngrams which appear in more than X% of documents to it, for some value of X, if there are at least a minimum number of documents (maybe like 4,000). - A new GC process deletes ngrams that have been added to the common table from the existing indexes. Hopefully, this will pare down the ngrams index to something reasonable over time without requiring any manual tuning. Test Plan: - Ran some queries and indexes. - Manually inserted ngrams `xxx` and `yyy` into the ngrams table, searched and indexed, saw them ignored as viable ngrams for search/index. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13000 Differential Revision: https://secure.phabricator.com/D18672 --- .../20171002.cngram.01.maniphest.sql | 7 +++ .../autopatches/20171002.cngram.02.event.sql | 7 +++ .../20171002.cngram.03.revision.sql | 7 +++ .../autopatches/20171002.cngram.04.fund.sql | 7 +++ .../autopatches/20171002.cngram.05.owners.sql | 7 +++ .../20171002.cngram.06.passphrase.sql | 7 +++ .../autopatches/20171002.cngram.07.blog.sql | 7 +++ .../autopatches/20171002.cngram.08.post.sql | 7 +++ .../autopatches/20171002.cngram.09.pholio.sql | 7 +++ .../20171002.cngram.10.phriction.sql | 7 +++ .../20171002.cngram.11.project.sql | 7 +++ .../autopatches/20171002.cngram.12.user.sql | 7 +++ .../20171002.cngram.13.repository.sql | 7 +++ .../autopatches/20171002.cngram.14.commit.sql | 7 +++ .../schema/PhabricatorConfigSchemaSpec.php | 6 +++ ...abricatorFerretFulltextEngineExtension.php | 49 ++++++++++++++----- .../search/ferret/PhabricatorFerretEngine.php | 31 ++++++++++++ ...PhabricatorCursorPagedPolicyAwareQuery.php | 28 +++++++++++ 18 files changed, 200 insertions(+), 12 deletions(-) create mode 100644 resources/sql/autopatches/20171002.cngram.01.maniphest.sql create mode 100644 resources/sql/autopatches/20171002.cngram.02.event.sql create mode 100644 resources/sql/autopatches/20171002.cngram.03.revision.sql create mode 100644 resources/sql/autopatches/20171002.cngram.04.fund.sql create mode 100644 resources/sql/autopatches/20171002.cngram.05.owners.sql create mode 100644 resources/sql/autopatches/20171002.cngram.06.passphrase.sql create mode 100644 resources/sql/autopatches/20171002.cngram.07.blog.sql create mode 100644 resources/sql/autopatches/20171002.cngram.08.post.sql create mode 100644 resources/sql/autopatches/20171002.cngram.09.pholio.sql create mode 100644 resources/sql/autopatches/20171002.cngram.10.phriction.sql create mode 100644 resources/sql/autopatches/20171002.cngram.11.project.sql create mode 100644 resources/sql/autopatches/20171002.cngram.12.user.sql create mode 100644 resources/sql/autopatches/20171002.cngram.13.repository.sql create mode 100644 resources/sql/autopatches/20171002.cngram.14.commit.sql diff --git a/resources/sql/autopatches/20171002.cngram.01.maniphest.sql b/resources/sql/autopatches/20171002.cngram.01.maniphest.sql new file mode 100644 index 0000000000..9b275f5b45 --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.01.maniphest.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_maniphest.maniphest_task_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.02.event.sql b/resources/sql/autopatches/20171002.cngram.02.event.sql new file mode 100644 index 0000000000..a071fdcd19 --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.02.event.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_calendar.calendar_event_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.03.revision.sql b/resources/sql/autopatches/20171002.cngram.03.revision.sql new file mode 100644 index 0000000000..40c2450598 --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.03.revision.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_differential.differential_revision_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.04.fund.sql b/resources/sql/autopatches/20171002.cngram.04.fund.sql new file mode 100644 index 0000000000..34975ce4fb --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.04.fund.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_fund.fund_initiative_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.05.owners.sql b/resources/sql/autopatches/20171002.cngram.05.owners.sql new file mode 100644 index 0000000000..e98d29f87c --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.05.owners.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_owners.owners_package_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.06.passphrase.sql b/resources/sql/autopatches/20171002.cngram.06.passphrase.sql new file mode 100644 index 0000000000..f9afa9ad87 --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.06.passphrase.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_passphrase.passphrase_credential_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.07.blog.sql b/resources/sql/autopatches/20171002.cngram.07.blog.sql new file mode 100644 index 0000000000..34001c3608 --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.07.blog.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_phame.phame_blog_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.08.post.sql b/resources/sql/autopatches/20171002.cngram.08.post.sql new file mode 100644 index 0000000000..9a9c70867e --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.08.post.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_phame.phame_post_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.09.pholio.sql b/resources/sql/autopatches/20171002.cngram.09.pholio.sql new file mode 100644 index 0000000000..6e8b8f8dcc --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.09.pholio.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_pholio.pholio_mock_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.10.phriction.sql b/resources/sql/autopatches/20171002.cngram.10.phriction.sql new file mode 100644 index 0000000000..ed31dc30ba --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.10.phriction.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_phriction.phriction_document_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.11.project.sql b/resources/sql/autopatches/20171002.cngram.11.project.sql new file mode 100644 index 0000000000..9c11235ba7 --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.11.project.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_project.project_project_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.12.user.sql b/resources/sql/autopatches/20171002.cngram.12.user.sql new file mode 100644 index 0000000000..3e8499aaa6 --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.12.user.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_user.user_user_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.13.repository.sql b/resources/sql/autopatches/20171002.cngram.13.repository.sql new file mode 100644 index 0000000000..e406c44edf --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.13.repository.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_repository.repository_repository_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/resources/sql/autopatches/20171002.cngram.14.commit.sql b/resources/sql/autopatches/20171002.cngram.14.commit.sql new file mode 100644 index 0000000000..48c1a02594 --- /dev/null +++ b/resources/sql/autopatches/20171002.cngram.14.commit.sql @@ -0,0 +1,7 @@ +CREATE TABLE {$NAMESPACE}_repository.repository_commit_fngrams_common ( + id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + ngram CHAR(3) NOT NULL COLLATE {$COLLATE_TEXT}, + needsCollection BOOL NOT NULL, + UNIQUE KEY `key_ngram` (ngram), + KEY `key_collect` (needsCollection) +) ENGINE=InnoDB, COLLATE {$COLLATE_TEXT}; diff --git a/src/applications/config/schema/PhabricatorConfigSchemaSpec.php b/src/applications/config/schema/PhabricatorConfigSchemaSpec.php index b24adf27cd..971f5b828f 100644 --- a/src/applications/config/schema/PhabricatorConfigSchemaSpec.php +++ b/src/applications/config/schema/PhabricatorConfigSchemaSpec.php @@ -73,6 +73,12 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject { $engine->getNgramsTableName(), $engine->getNgramsSchemaColumns(), $engine->getNgramsSchemaKeys()); + + $this->buildRawSchema( + $engine->getApplicationName(), + $engine->getCommonNgramsTableName(), + $engine->getCommonNgramsSchemaColumns(), + $engine->getCommonNgramsSchemaKeys()); } protected function buildRawSchema( diff --git a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php index 6903072345..fe50518f3f 100644 --- a/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php +++ b/src/applications/search/engineextension/PhabricatorFerretFulltextEngineExtension.php @@ -165,21 +165,46 @@ final class PhabricatorFerretFulltextEngineExtension $ferret_field['normalCorpus']); } - $sql = array(); - foreach ($ngrams as $ngram) { - $sql[] = qsprintf( + if ($ngrams) { + $common = queryfx_all( $conn, - '(%d, %s)', - $document_id, - $ngram); + 'SELECT ngram FROM %T WHERE ngram IN (%Ls)', + $engine->getCommonNgramsTableName(), + $ngrams); + $common = ipull($common, 'ngram', 'ngram'); + + foreach ($ngrams as $key => $ngram) { + if (isset($common[$ngram])) { + unset($ngrams[$key]); + continue; + } + + // NOTE: MySQL discards trailing whitespace in CHAR(X) columns. + $trim_ngram = rtrim($ngram, ' '); + if (isset($common[$ngram])) { + unset($ngrams[$key]); + continue; + } + } } - foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { - queryfx( - $conn, - 'INSERT INTO %T (documentID, ngram) VALUES %Q', - $engine->getNgramsTableName(), - $chunk); + if ($ngrams) { + $sql = array(); + foreach ($ngrams as $ngram) { + $sql[] = qsprintf( + $conn, + '(%d, %s)', + $document_id, + $ngram); + } + + foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { + queryfx( + $conn, + 'INSERT INTO %T (documentID, ngram) VALUES %Q', + $engine->getNgramsTableName(), + $chunk); + } } } catch (Exception $ex) { $object->killTransaction(); diff --git a/src/applications/search/ferret/PhabricatorFerretEngine.php b/src/applications/search/ferret/PhabricatorFerretEngine.php index 7cd06ae549..ee6ca57ec3 100644 --- a/src/applications/search/ferret/PhabricatorFerretEngine.php +++ b/src/applications/search/ferret/PhabricatorFerretEngine.php @@ -295,4 +295,35 @@ abstract class PhabricatorFerretEngine extends Phobject { ); } + public function getCommonNgramsTableName() { + $application = $this->getApplicationName(); + $scope = $this->getScopeName(); + + return "{$application}_{$scope}_fngrams_common"; + } + + public function getCommonNgramsSchemaColumns() { + return array( + 'id' => 'auto', + 'ngram' => 'char3', + 'needsCollection' => 'bool', + ); + } + + public function getCommonNgramsSchemaKeys() { + return array( + 'PRIMARY' => array( + 'columns' => array('id'), + 'unique' => true, + ), + 'key_ngram' => array( + 'columns' => array('ngram'), + 'unique' => true, + ), + 'key_collect' => array( + 'columns' => array('needsCollection'), + ), + ); + } + } diff --git a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php index 09cb3e499c..c2b8ce04e0 100644 --- a/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php +++ b/src/infrastructure/query/policy/PhabricatorCursorPagedPolicyAwareQuery.php @@ -1700,6 +1700,34 @@ abstract class PhabricatorCursorPagedPolicyAwareQuery } } + // Remove common ngrams, like "the", which occur too frequently in + // documents to be useful in constraining the query. The best ngrams + // are obscure sequences which occur in very few documents. + + if ($flat) { + $common_ngrams = queryfx_all( + $conn, + 'SELECT ngram FROM %T WHERE ngram IN (%Ls)', + $engine->getCommonNgramsTableName(), + ipull($flat, 'ngram')); + $common_ngrams = ipull($common_ngrams, 'ngram', 'ngram'); + + foreach ($flat as $key => $spec) { + $ngram = $spec['ngram']; + if (isset($common_ngrams[$ngram])) { + unset($flat[$key]); + continue; + } + + // NOTE: MySQL discards trailing whitespace in CHAR(X) columns. + $trim_ngram = rtrim($ngram, ' '); + if (isset($common_ngrams[$trim_ngram])) { + unset($flat[$key]); + continue; + } + } + } + // MySQL only allows us to join a maximum of 61 tables per query. Each // ngram is going to cost us a join toward that limit, so if the user // specified a very long query string, just pick 16 of the ngrams From 3e589cdd73ba40c3e408dabd482ee7fc6165ace1 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 2 Oct 2017 16:14:46 -0700 Subject: [PATCH 05/13] Add a workflow for populating (or depopulating) the common ngrams table Summary: Depends on D18672. Ref T13000. This does an on-demand build of the common ngrams table. Plan here is: - Push to `secure`. - Build the common ngrams table here. - See if stuff breaks? If it looks okay on this dataset, we can build out the GC support and try it in production. Test Plan: - Locally, my dataset has a bunch of `bin/lipsum` tasks with similar, common words. - Verified that ipsum terms now skip ngrams. For "lorem ipsum" search performance actually IMPROVED by skipping the ngrams table (12s to 9s). - Queried for normal terms, got very fast results using the ngram table, as normal. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13000 Differential Revision: https://secure.phabricator.com/D18673 --- src/__phutil_library_map__.php | 2 + ...bricatorSearchManagementNgramsWorkflow.php | 106 ++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index c535d46322..dd2bcd8dfe 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -3948,6 +3948,7 @@ phutil_register_library_map(array( 'PhabricatorSearchIndexVersionDestructionEngineExtension' => 'applications/search/engineextension/PhabricatorSearchIndexVersionDestructionEngineExtension.php', 'PhabricatorSearchManagementIndexWorkflow' => 'applications/search/management/PhabricatorSearchManagementIndexWorkflow.php', 'PhabricatorSearchManagementInitWorkflow' => 'applications/search/management/PhabricatorSearchManagementInitWorkflow.php', + 'PhabricatorSearchManagementNgramsWorkflow' => 'applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php', 'PhabricatorSearchManagementWorkflow' => 'applications/search/management/PhabricatorSearchManagementWorkflow.php', 'PhabricatorSearchNgrams' => 'applications/search/ngrams/PhabricatorSearchNgrams.php', 'PhabricatorSearchNgramsDestructionEngineExtension' => 'applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php', @@ -9528,6 +9529,7 @@ phutil_register_library_map(array( 'PhabricatorSearchIndexVersionDestructionEngineExtension' => 'PhabricatorDestructionEngineExtension', 'PhabricatorSearchManagementIndexWorkflow' => 'PhabricatorSearchManagementWorkflow', 'PhabricatorSearchManagementInitWorkflow' => 'PhabricatorSearchManagementWorkflow', + 'PhabricatorSearchManagementNgramsWorkflow' => 'PhabricatorSearchManagementWorkflow', 'PhabricatorSearchManagementWorkflow' => 'PhabricatorManagementWorkflow', 'PhabricatorSearchNgrams' => 'PhabricatorSearchDAO', 'PhabricatorSearchNgramsDestructionEngineExtension' => 'PhabricatorDestructionEngineExtension', diff --git a/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php b/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php new file mode 100644 index 0000000000..d1a6e0bdff --- /dev/null +++ b/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php @@ -0,0 +1,106 @@ +setName('ngrams') + ->setSynopsis(pht('Recompute common ngrams.')) + ->setArguments( + array( + array( + 'name' => 'reset', + 'help' => pht('Reset all common ngram records.'), + ), + )); + } + + public function execute(PhutilArgumentParser $args) { + $is_reset = $args->getArg('reset'); + + $all_objects = id(new PhutilClassMapQuery()) + ->setAncestorClass('PhabricatorFerretInterface') + ->execute(); + + $min_documents = 4096; + $threshold = 0.15; + + foreach ($all_objects as $object) { + $engine = $object->newFerretEngine(); + $conn = $object->establishConnection('w'); + $display_name = get_class($object); + + if ($is_reset) { + echo tsprintf( + "%s\n", + pht( + 'Resetting common ngrams for "%s".', + $display_name)); + + queryfx( + $conn, + 'DELETE FROM %T', + $engine->getCommonNgramsTableName()); + continue; + } + + $document_count = queryfx_one( + $conn, + 'SELECT COUNT(*) N FROM %T', + $engine->getDocumentTableName()); + $document_count = $document_count['N']; + + if ($document_count < $min_documents) { + echo tsprintf( + "%s\n", + pht( + 'Too few documents of type "%s" for any ngrams to be common.', + $display_name)); + continue; + } + + $min_frequency = (int)ceil($document_count * $threshold); + $common_ngrams = queryfx_all( + $conn, + 'SELECT ngram, COUNT(*) N FROM %T + GROUP BY ngram + HAVING N >= %d', + $engine->getNgramsTableName(), + $min_frequency); + + if (!$common_ngrams) { + echo tsprintf( + "%s\n", + pht( + 'No new common ngrams exist for "%s".', + $display_name)); + continue; + } + + $sql = array(); + foreach ($common_ngrams as $ngram) { + $sql[] = qsprintf( + $conn, + '(%s, 1)', + $ngram['ngram']); + } + + foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { + queryfx( + $conn, + 'INSERT IGNORE INTO %T (ngram, needsCollection) + VALUES %Q', + $engine->getCommonNgramsTableName(), + $chunk); + } + + echo tsprintf( + "%s\n", + pht( + 'Updated common ngrams for "%s".', + $display_name)); + } + } + +} From bc9de7eceef01c972c00cadfdff2e65cb996ab7e Mon Sep 17 00:00:00 2001 From: Austin McKinley Date: Tue, 3 Oct 2017 12:19:34 -0700 Subject: [PATCH 06/13] Typo fix Summary: vive la resistance Test Plan: doitlive Reviewers: epriestley Reviewed By: epriestley Subscribers: Korvin Differential Revision: https://secure.phabricator.com/D18674 --- src/docs/user/cluster/cluster_notifications.diviner | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docs/user/cluster/cluster_notifications.diviner b/src/docs/user/cluster/cluster_notifications.diviner index 3cdeec3c39..79c89769fc 100644 --- a/src/docs/user/cluster/cluster_notifications.diviner +++ b/src/docs/user/cluster/cluster_notifications.diviner @@ -14,7 +14,7 @@ are: - performance and capacity may improve. This configuration is relatively simple, but has a small impact on availability -and does nothing to increase resitance to data loss. +and does nothing to increase resistance to data loss. Clustering Design Goals From b9fd5262509d1f8ace3f46c9782f8ee0638a3f51 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 4 Oct 2017 07:16:33 -0700 Subject: [PATCH 07/13] Fix a fatal on user email settings when `account.editable` is disabled Summary: If `account.editable` is set to false, we try to add a `null` button and fatal: > Argument 1 passed to PHUIHeaderView::addActionLink() must be an instance of PHUIButtonView, null given, called in /srv/phabricator/phabricator/src/applications/settings/panel/PhabricatorSettingsPanel.php on line 290 Instead, don't try to render `null` as a button. Test Plan: - Configured `account.editable` false. - Viewed email address settings. - Before: fatal. - After: page works, no button is provided. Reviewers: amckinley Reviewed By: amckinley Differential Revision: https://secure.phabricator.com/D18677 --- .../panel/PhabricatorEmailAddressesSettingsPanel.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/applications/settings/panel/PhabricatorEmailAddressesSettingsPanel.php b/src/applications/settings/panel/PhabricatorEmailAddressesSettingsPanel.php index 1bdb95f568..cd1bfba540 100644 --- a/src/applications/settings/panel/PhabricatorEmailAddressesSettingsPanel.php +++ b/src/applications/settings/panel/PhabricatorEmailAddressesSettingsPanel.php @@ -138,9 +138,9 @@ final class PhabricatorEmailAddressesSettingsPanel $editable, )); - $button = null; + $buttons = array(); if ($editable) { - $button = id(new PHUIButtonView()) + $buttons[] = id(new PHUIButtonView()) ->setTag('a') ->setIcon('fa-plus') ->setText(pht('Add New Address')) @@ -149,7 +149,7 @@ final class PhabricatorEmailAddressesSettingsPanel ->setColor(PHUIButtonView::GREY); } - return $this->newBox(pht('Email Addresses'), $table, array($button)); + return $this->newBox(pht('Email Addresses'), $table, $buttons); } private function returnNewAddressResponse( From 0ea5d668d1d40c5e605fa838545fd88a636d15ba Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 4 Oct 2017 10:54:42 -0700 Subject: [PATCH 08/13] Enable hovercards for the "Task Graph" UI in Maniphest Summary: See PHI118. Enables hovercards to support peeking at tags and other details if you, e.g., create numerous identical subtasks of each task. Test Plan: {F5210816} Reviewers: amckinley Reviewed By: amckinley Differential Revision: https://secure.phabricator.com/D18681 --- src/infrastructure/graph/ManiphestTaskGraph.php | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/infrastructure/graph/ManiphestTaskGraph.php b/src/infrastructure/graph/ManiphestTaskGraph.php index 24e463e273..99191760dd 100644 --- a/src/infrastructure/graph/ManiphestTaskGraph.php +++ b/src/infrastructure/graph/ManiphestTaskGraph.php @@ -27,6 +27,8 @@ final class ManiphestTaskGraph protected function newTableRow($phid, $object, $trace) { $viewer = $this->getViewer(); + Javelin::initBehavior('phui-hovercards'); + if ($object) { $status = $object->getStatus(); $priority = $object->getPriority(); @@ -51,15 +53,16 @@ final class ManiphestTaskGraph $assigned = phutil_tag('em', array(), pht('None')); } - $full_title = $object->getTitle(); - - $link = phutil_tag( + $link = javelin_tag( 'a', array( 'href' => $object->getURI(), - 'title' => $full_title, + 'sigil' => 'hovercard', + 'meta' => array( + 'hoverPHID' => $object->getPHID(), + ), ), - $full_title); + $object->getTitle()); $link = array( phutil_tag( From 02e1440ef2605312940620b5e96764b282fb48cd Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 4 Oct 2017 09:10:18 -0700 Subject: [PATCH 09/13] Dump tables one at a time, rather than all at once Summary: Ref T13000. This allows us to be more selective about which tables we dump data for, to reduce the size of backups and exports. The immediate goal is to make large `ngrams` tables more manageable in the cluster, but this generally makes all backups and exports faster and easier. Here, tables are dumped one at a time. A followup change will sometimes add the `--no-data` flag, to skip dumping readthrough caches and (optionally) rebuildable indexes. Test Plan: Compared a dump from `master` and from this branch, found them to be essentially identical. The new dump has a little more header information in each section. Verified each contains the same number of `CREATE TABLE` statements. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13000 Differential Revision: https://secure.phabricator.com/D18679 --- ...abricatorStorageManagementDumpWorkflow.php | 120 +++++++++++------- 1 file changed, 76 insertions(+), 44 deletions(-) diff --git a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php index 4dc5c64042..f491133c67 100644 --- a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php +++ b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php @@ -62,7 +62,24 @@ final class PhabricatorStorageManagementDumpWorkflow return 1; } - $databases = $api->getDatabaseList($patches, true); + $ref = $api->getRef(); + $ref_key = $ref->getRefKey(); + + $schemata_map = id(new PhabricatorConfigSchemaQuery()) + ->setAPIs(array($api)) + ->setRefs(array($ref)) + ->loadActualSchemata(); + $schemata = $schemata_map[$ref_key]; + + $targets = array(); + foreach ($schemata->getDatabases() as $database_name => $database) { + foreach ($database->getTables() as $table_name => $table) { + $targets[] = array( + 'database' => $database_name, + 'table' => $table_name, + ); + } + } list($host, $port) = $this->getBareHostAndPort($api->getHost()); @@ -126,35 +143,42 @@ final class PhabricatorStorageManagementDumpWorkflow $argv[] = $port; } - $argv[] = '--databases'; - foreach ($databases as $database) { - $argv[] = $database; + $commands = array(); + foreach ($targets as $target) { + $target_argv = $argv; + + if ($has_password) { + $commands[] = csprintf( + 'mysqldump -p%P %Ls -- %R %R', + $password, + $target_argv, + $target['database'], + $target['table']); + } else { + $command = csprintf( + 'mysqldump %Ls -- %R %R', + $target_argv, + $target['database'], + $target['table']); + } + + $commands[] = $command; } - if ($has_password) { - $command = csprintf('mysqldump -p%P %Ls', $password, $argv); - } else { - $command = csprintf('mysqldump %Ls', $argv); - } - // Decrease the CPU priority of this process so it doesn't contend with // other more important things. if (function_exists('proc_nice')) { proc_nice(19); } - - // If we aren't writing to a file, just passthru the command. - if ($output_file === null) { - return phutil_passthru('%C', $command); - } - // If we are writing to a file, stream the command output to disk. This // mode makes sure the whole command fails if there's an error (commonly, // a full disk). See T6996 for discussion. - if ($is_compress) { + if ($output_file === null) { + $file = null; + } else if ($is_compress) { $file = gzopen($output_file, 'wb1'); } else { $file = fopen($output_file, 'wb'); @@ -167,41 +191,47 @@ final class PhabricatorStorageManagementDumpWorkflow $file)); } - $future = new ExecFuture('%C', $command); - try { - $iterator = id(new FutureIterator(array($future))) - ->setUpdateInterval(0.100); - foreach ($iterator as $ready) { - list($stdout, $stderr) = $future->read(); - $future->discardBuffers(); + foreach ($commands as $command) { + $future = new ExecFuture('%C', $command); - if (strlen($stderr)) { - fwrite(STDERR, $stderr); - } + $iterator = id(new FutureIterator(array($future))) + ->setUpdateInterval(0.100); + foreach ($iterator as $ready) { + list($stdout, $stderr) = $future->read(); + $future->discardBuffers(); - if (strlen($stdout)) { - if ($is_compress) { - $ok = gzwrite($file, $stdout); - } else { - $ok = fwrite($file, $stdout); + if (strlen($stderr)) { + fwrite(STDERR, $stderr); } - if ($ok !== strlen($stdout)) { - throw new Exception( - pht( - 'Failed to write %d byte(s) to file "%s".', - new PhutilNumber(strlen($stdout)), - $output_file)); - } - } + if (strlen($stdout)) { + if (!$file) { + $ok = fwrite(STDOUT, $stdout); + } else if ($is_compress) { + $ok = gzwrite($file, $stdout); + } else { + $ok = fwrite($file, $stdout); + } - if ($ready !== null) { - $ready->resolvex(); + if ($ok !== strlen($stdout)) { + throw new Exception( + pht( + 'Failed to write %d byte(s) to file "%s".', + new PhutilNumber(strlen($stdout)), + $output_file)); + } + } + + if ($ready !== null) { + $ready->resolvex(); + } } } - if ($is_compress) { + if (!$file) { + $ok = true; + } else if ($is_compress) { $ok = gzclose($file); } else { $ok = fclose($file); @@ -218,7 +248,9 @@ final class PhabricatorStorageManagementDumpWorkflow // we don't leave any confusing artifacts laying around. try { - Filesystem::remove($output_file); + if ($file !== null) { + Filesystem::remove($output_file); + } } catch (Exception $ex) { // Ignore any errors we hit. } From c767c971ca796e22253726fe72135af6ba485e67 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 4 Oct 2017 10:51:29 -0700 Subject: [PATCH 10/13] Add "persistence" types (data, cache, or index) to tables, and tweak what "storage dump" dumps Summary: Ref T13000. This marks each table as either "data" (normal data), "cache" (automatically rebuilt, no need to ever dump) or "index" (can be manually rebuilt). By default, `bin/storage dump` dumps data and index tables, but not cache tables. With `--no-indexes`, it dumps only data tables. Indexes can be rebuilt after a restore with `bin/search index --all ...`. Test Plan: - Ran `--no-indexes` and normal dumps with `--trace`, verified that cache and index (former case) or cache only (latter case) tables were dumped with `--no-data`. - Verified dump has the same number of `CREATE TABLE` statements as before the changes. - Reviewed persistence tags in the web UI (note Ferret engine tables are "Index"): {F5210886} Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13000 Differential Revision: https://secure.phabricator.com/D18682 --- .../storage/PhabricatorCacheSchemaSpec.php | 3 + ...bricatorConfigDatabaseStatusController.php | 3 + .../schema/PhabricatorConfigSchemaQuery.php | 2 + .../schema/PhabricatorConfigSchemaSpec.php | 31 ++++++++-- .../schema/PhabricatorConfigTableSchema.php | 26 ++++++++ .../storage/DifferentialSchemaSpec.php | 3 + .../configuration/configuring_backups.diviner | 18 ++++++ ...abricatorStorageManagementDumpWorkflow.php | 61 +++++++++++++++++-- 8 files changed, 138 insertions(+), 9 deletions(-) diff --git a/src/applications/cache/storage/PhabricatorCacheSchemaSpec.php b/src/applications/cache/storage/PhabricatorCacheSchemaSpec.php index 97d6e1ac81..c9e78af0bc 100644 --- a/src/applications/cache/storage/PhabricatorCacheSchemaSpec.php +++ b/src/applications/cache/storage/PhabricatorCacheSchemaSpec.php @@ -30,6 +30,9 @@ final class PhabricatorCacheSchemaSpec extends PhabricatorConfigSchemaSpec { 'key_ttl' => array( 'columns' => array('cacheExpires'), ), + ), + array( + 'persistence' => PhabricatorConfigTableSchema::PERSISTENCE_CACHE, )); } diff --git a/src/applications/config/controller/PhabricatorConfigDatabaseStatusController.php b/src/applications/config/controller/PhabricatorConfigDatabaseStatusController.php index a67c57e6f9..760317ae80 100644 --- a/src/applications/config/controller/PhabricatorConfigDatabaseStatusController.php +++ b/src/applications/config/controller/PhabricatorConfigDatabaseStatusController.php @@ -261,6 +261,7 @@ final class PhabricatorConfigDatabaseStatusController $this->renderAttr( $table->getCollation(), $table->hasIssue($collation_issue)), + $table->getPersistenceTypeDisplayName(), ); } @@ -270,12 +271,14 @@ final class PhabricatorConfigDatabaseStatusController null, pht('Table'), pht('Collation'), + pht('Persistence'), )) ->setColumnClasses( array( null, 'wide pri', null, + null, )); $title = $database_name; diff --git a/src/applications/config/schema/PhabricatorConfigSchemaQuery.php b/src/applications/config/schema/PhabricatorConfigSchemaQuery.php index 6feda7363c..2cb7763110 100644 --- a/src/applications/config/schema/PhabricatorConfigSchemaQuery.php +++ b/src/applications/config/schema/PhabricatorConfigSchemaQuery.php @@ -338,6 +338,8 @@ final class PhabricatorConfigSchemaQuery extends Phobject { $comp_table->addKey($comp_key); } + $comp_table->setPersistenceType($expect_table->getPersistenceType()); + $comp_database->addTable($comp_table); } $comp_server->addDatabase($comp_database); diff --git a/src/applications/config/schema/PhabricatorConfigSchemaSpec.php b/src/applications/config/schema/PhabricatorConfigSchemaSpec.php index 971f5b828f..c451658682 100644 --- a/src/applications/config/schema/PhabricatorConfigSchemaSpec.php +++ b/src/applications/config/schema/PhabricatorConfigSchemaSpec.php @@ -56,36 +56,52 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject { } protected function buildFerretIndexSchema(PhabricatorFerretEngine $engine) { + $index_options = array( + 'persistence' => PhabricatorConfigTableSchema::PERSISTENCE_INDEX, + ); + $this->buildRawSchema( $engine->getApplicationName(), $engine->getDocumentTableName(), $engine->getDocumentSchemaColumns(), - $engine->getDocumentSchemaKeys()); + $engine->getDocumentSchemaKeys(), + $index_options); $this->buildRawSchema( $engine->getApplicationName(), $engine->getFieldTableName(), $engine->getFieldSchemaColumns(), - $engine->getFieldSchemaKeys()); + $engine->getFieldSchemaKeys(), + $index_options); $this->buildRawSchema( $engine->getApplicationName(), $engine->getNgramsTableName(), $engine->getNgramsSchemaColumns(), - $engine->getNgramsSchemaKeys()); + $engine->getNgramsSchemaKeys(), + $index_options); $this->buildRawSchema( $engine->getApplicationName(), $engine->getCommonNgramsTableName(), $engine->getCommonNgramsSchemaColumns(), - $engine->getCommonNgramsSchemaKeys()); + $engine->getCommonNgramsSchemaKeys(), + $index_options); } protected function buildRawSchema( $database_name, $table_name, array $columns, - array $keys) { + array $keys, + array $options = array()) { + + PhutilTypeSpec::checkMap( + $options, + array( + 'persistence' => 'optional string', + )); + $database = $this->getDatabase($database_name); $table = $this->newTable($table_name); @@ -144,6 +160,11 @@ abstract class PhabricatorConfigSchemaSpec extends Phobject { $table->addKey($key); } + $persistence_type = idx($options, 'persistence'); + if ($persistence_type !== null) { + $table->setPersistenceType($persistence_type); + } + $database->addTable($table); } diff --git a/src/applications/config/schema/PhabricatorConfigTableSchema.php b/src/applications/config/schema/PhabricatorConfigTableSchema.php index 870c8ebddb..d6e5bfd9fa 100644 --- a/src/applications/config/schema/PhabricatorConfigTableSchema.php +++ b/src/applications/config/schema/PhabricatorConfigTableSchema.php @@ -7,6 +7,11 @@ final class PhabricatorConfigTableSchema private $engine; private $columns = array(); private $keys = array(); + private $persistenceType = self::PERSISTENCE_DATA; + + const PERSISTENCE_DATA = 'data'; + const PERSISTENCE_CACHE = 'cache'; + const PERSISTENCE_INDEX = 'index'; public function addColumn(PhabricatorConfigColumnSchema $column) { $key = $column->getName(); @@ -45,6 +50,27 @@ final class PhabricatorConfigTableSchema return idx($this->getKeys(), $key); } + public function setPersistenceType($persistence_type) { + $this->persistenceType = $persistence_type; + return $this; + } + + public function getPersistenceType() { + return $this->persistenceType; + } + + public function getPersistenceTypeDisplayName() { + $map = array( + self::PERSISTENCE_DATA => pht('Data'), + self::PERSISTENCE_CACHE => pht('Cache'), + self::PERSISTENCE_INDEX => pht('Index'), + ); + + $type = $this->getPersistenceType(); + + return idx($map, $type, $type); + } + protected function getSubschemata() { // NOTE: Keys and columns may have the same name, so make sure we return // everything. diff --git a/src/applications/differential/storage/DifferentialSchemaSpec.php b/src/applications/differential/storage/DifferentialSchemaSpec.php index e376c7f4f8..7aa76fa822 100644 --- a/src/applications/differential/storage/DifferentialSchemaSpec.php +++ b/src/applications/differential/storage/DifferentialSchemaSpec.php @@ -21,6 +21,9 @@ final class DifferentialSchemaSpec extends PhabricatorConfigSchemaSpec { 'dateCreated' => array( 'columns' => array('dateCreated'), ), + ), + array( + 'persistence' => PhabricatorConfigTableSchema::PERSISTENCE_CACHE, )); $this->buildRawSchema( diff --git a/src/docs/user/configuration/configuring_backups.diviner b/src/docs/user/configuration/configuring_backups.diviner index 9776448216..d32eebb0da 100644 --- a/src/docs/user/configuration/configuring_backups.diviner +++ b/src/docs/user/configuration/configuring_backups.diviner @@ -145,6 +145,24 @@ present a risk. If you restrict access to the Phabricator host or database, you should also restrict access to the backups. +Skipping Indexes +================ + +By default, `bin/storage dump` does not dump all of the data in the database: +it skips some caches which can be rebuilt automatically and do not need to be +backed up. Some of these caches are very large, so the size of the dump may +be significantly smaller than the size of the databases. + +If you have a large amount of data, you can specify `--no-indexes` when taking +a database dump to skip additional tables which contain search indexes. This +will reduce the size (and increase the speed) of the backup. This is an +advanced option which most installs will not benefit from. + +This index data can be rebuilt after a restore, but will not be rebuilt +automatically. If you choose to use this flag, you must manually rebuild +indexes after a restore (for details, see ((reindex))). + + Next Steps ========== diff --git a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php index f491133c67..fecc0517e3 100644 --- a/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php +++ b/src/infrastructure/storage/management/workflow/PhabricatorStorageManagementDumpWorkflow.php @@ -30,6 +30,13 @@ final class PhabricatorStorageManagementDumpWorkflow 'With __--output__, write a compressed file to disk instead '. 'of a plaintext file.'), ), + array( + 'name' => 'no-indexes', + 'help' => pht( + 'Do not dump data in rebuildable index tables. This means '. + 'backups are smaller and faster, but you will need to manually '. + 'rebuild indexes after performing a restore.'), + ), array( 'name' => 'overwrite', 'help' => pht( @@ -49,6 +56,8 @@ final class PhabricatorStorageManagementDumpWorkflow $console = PhutilConsole::getConsole(); + $with_indexes = !$args->getArg('no-indexes'); + $applied = $api->getAppliedPatches(); if ($applied === null) { $namespace = $api->getNamespace(); @@ -65,18 +74,58 @@ final class PhabricatorStorageManagementDumpWorkflow $ref = $api->getRef(); $ref_key = $ref->getRefKey(); - $schemata_map = id(new PhabricatorConfigSchemaQuery()) + $schemata_query = id(new PhabricatorConfigSchemaQuery()) ->setAPIs(array($api)) - ->setRefs(array($ref)) - ->loadActualSchemata(); - $schemata = $schemata_map[$ref_key]; + ->setRefs(array($ref)); + + $actual_map = $schemata_query->loadActualSchemata(); + $expect_map = $schemata_query->loadExpectedSchemata(); + + $schemata = $actual_map[$ref_key]; + $expect = $expect_map[$ref_key]; $targets = array(); foreach ($schemata->getDatabases() as $database_name => $database) { + $expect_database = $expect->getDatabase($database_name); foreach ($database->getTables() as $table_name => $table) { + + // NOTE: It's possible for us to find tables in these database which + // we don't expect to be there. For example, an older version of + // Phabricator may have had a table that was later dropped. We assume + // these are data tables and always dump them, erring on the side of + // caution. + + $persistence = PhabricatorConfigTableSchema::PERSISTENCE_DATA; + if ($expect_database) { + $expect_table = $expect_database->getTable($table_name); + if ($expect_table) { + $persistence = $expect_table->getPersistenceType(); + } + } + + switch ($persistence) { + case PhabricatorConfigTableSchema::PERSISTENCE_CACHE: + // When dumping tables, leave the data in cache tables in the + // database. This will be automatically rebuild after the data + // is restored and does not need to be persisted in backups. + $with_data = false; + break; + case PhabricatorConfigTableSchema::PERSISTENCE_INDEX: + // When dumping tables, leave index data behind of the caller + // specified "--no-indexes". These tables can be rebuilt manually + // from other tables, but do not rebuild automatically. + $with_data = $with_indexes; + break; + case PhabricatorConfigTableSchema::PERSISTENCE_DATA: + default: + $with_data = true; + break; + } + $targets[] = array( 'database' => $database_name, 'table' => $table_name, + 'data' => $with_data, ); } } @@ -147,6 +196,10 @@ final class PhabricatorStorageManagementDumpWorkflow foreach ($targets as $target) { $target_argv = $argv; + if (!$target['data']) { + $target_argv[] = '--no-data'; + } + if ($has_password) { $commands[] = csprintf( 'mysqldump -p%P %Ls -- %R %R', From 66df5b149309ae261e335022429c458aac323570 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 4 Oct 2017 17:19:27 -0700 Subject: [PATCH 11/13] Add a garbage collector for common ngrams Summary: Ref T13000. After an ngram is marked as "common", we can delete it from the storage table. Currently, the only way to get ngrams marked as "common" is to manually run `bin/search ngrams`, so this has no impact on normal installs. Test Plan: Ran `bin/garbage collect`, saw it start chewing through my local Maniphest ngrams table and removing common ngrams. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13000 Differential Revision: https://secure.phabricator.com/D18687 --- src/__phutil_library_map__.php | 2 + ...catorSearchFerretNgramGarbageCollector.php | 55 +++++++++++++++++++ ...bricatorSearchManagementNgramsWorkflow.php | 5 +- 3 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 src/applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index dd2bcd8dfe..aca6e09fdb 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -3941,6 +3941,7 @@ phutil_register_library_map(array( 'PhabricatorSearchEngineAttachment' => 'applications/search/engineextension/PhabricatorSearchEngineAttachment.php', 'PhabricatorSearchEngineExtension' => 'applications/search/engineextension/PhabricatorSearchEngineExtension.php', 'PhabricatorSearchEngineExtensionModule' => 'applications/search/engineextension/PhabricatorSearchEngineExtensionModule.php', + 'PhabricatorSearchFerretNgramGarbageCollector' => 'applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php', 'PhabricatorSearchField' => 'applications/search/field/PhabricatorSearchField.php', 'PhabricatorSearchHost' => 'infrastructure/cluster/search/PhabricatorSearchHost.php', 'PhabricatorSearchHovercardController' => 'applications/search/controller/PhabricatorSearchHovercardController.php', @@ -9522,6 +9523,7 @@ phutil_register_library_map(array( 'PhabricatorSearchEngineAttachment' => 'Phobject', 'PhabricatorSearchEngineExtension' => 'Phobject', 'PhabricatorSearchEngineExtensionModule' => 'PhabricatorConfigModule', + 'PhabricatorSearchFerretNgramGarbageCollector' => 'PhabricatorGarbageCollector', 'PhabricatorSearchField' => 'Phobject', 'PhabricatorSearchHost' => 'Phobject', 'PhabricatorSearchHovercardController' => 'PhabricatorSearchBaseController', diff --git a/src/applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php b/src/applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php new file mode 100644 index 0000000000..f2c43743f6 --- /dev/null +++ b/src/applications/search/garbagecollector/PhabricatorSearchFerretNgramGarbageCollector.php @@ -0,0 +1,55 @@ +setAncestorClass('PhabricatorFerretInterface') + ->execute(); + + $did_collect = false; + foreach ($all_objects as $object) { + $engine = $object->newFerretEngine(); + $conn = $object->establishConnection('w'); + + $ngram_row = queryfx_one( + $conn, + 'SELECT ngram FROM %T WHERE needsCollection = 1 LIMIT 1', + $engine->getCommonNgramsTableName()); + if (!$ngram_row) { + continue; + } + + $ngram = $ngram_row['ngram']; + + queryfx( + $conn, + 'DELETE FROM %T WHERE ngram = %s', + $engine->getNgramsTableName(), + $ngram); + + queryfx( + $conn, + 'UPDATE %T SET needsCollection = 0 WHERE ngram = %s', + $engine->getCommonNgramsTableName(), + $ngram); + + $did_collect = true; + break; + } + + return $did_collect; + } + +} diff --git a/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php b/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php index d1a6e0bdff..4b983fe0f9 100644 --- a/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php +++ b/src/applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php @@ -6,7 +6,10 @@ final class PhabricatorSearchManagementNgramsWorkflow protected function didConstruct() { $this ->setName('ngrams') - ->setSynopsis(pht('Recompute common ngrams.')) + ->setSynopsis( + pht( + 'Recompute common ngrams. This is an advanced workflow that '. + 'can harm search quality if used improperly.')) ->setArguments( array( array( From 17e83b53d53e2f1b08021993e143db891a51b26f Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 6 Oct 2017 07:48:25 -0700 Subject: [PATCH 12/13] Add "bin/search query" for debugging query execution Summary: Ref T13000. Currently, queries can only be executed from the web UI, which requires logging in as a user. I really want to avoid doing that wherever we can, but being able to execute queries on an instance (and, particularly, see the ngrams and timings on the underlying lookups) would have been helpful in several cases. Improve tooling a bit in advance of the "common ngrams" stuff going out since it seems likely that it will be useful if issues arise. Test Plan: Ran `bin/search query --query ...`, got useful minimal output. Ran with `--trace` to get internals. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13000 Differential Revision: https://secure.phabricator.com/D18690 --- src/__phutil_library_map__.php | 2 + ...abricatorSearchManagementQueryWorkflow.php | 55 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 src/applications/search/management/PhabricatorSearchManagementQueryWorkflow.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index aca6e09fdb..a650e33fcf 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -3950,6 +3950,7 @@ phutil_register_library_map(array( 'PhabricatorSearchManagementIndexWorkflow' => 'applications/search/management/PhabricatorSearchManagementIndexWorkflow.php', 'PhabricatorSearchManagementInitWorkflow' => 'applications/search/management/PhabricatorSearchManagementInitWorkflow.php', 'PhabricatorSearchManagementNgramsWorkflow' => 'applications/search/management/PhabricatorSearchManagementNgramsWorkflow.php', + 'PhabricatorSearchManagementQueryWorkflow' => 'applications/search/management/PhabricatorSearchManagementQueryWorkflow.php', 'PhabricatorSearchManagementWorkflow' => 'applications/search/management/PhabricatorSearchManagementWorkflow.php', 'PhabricatorSearchNgrams' => 'applications/search/ngrams/PhabricatorSearchNgrams.php', 'PhabricatorSearchNgramsDestructionEngineExtension' => 'applications/search/engineextension/PhabricatorSearchNgramsDestructionEngineExtension.php', @@ -9532,6 +9533,7 @@ phutil_register_library_map(array( 'PhabricatorSearchManagementIndexWorkflow' => 'PhabricatorSearchManagementWorkflow', 'PhabricatorSearchManagementInitWorkflow' => 'PhabricatorSearchManagementWorkflow', 'PhabricatorSearchManagementNgramsWorkflow' => 'PhabricatorSearchManagementWorkflow', + 'PhabricatorSearchManagementQueryWorkflow' => 'PhabricatorSearchManagementWorkflow', 'PhabricatorSearchManagementWorkflow' => 'PhabricatorManagementWorkflow', 'PhabricatorSearchNgrams' => 'PhabricatorSearchDAO', 'PhabricatorSearchNgramsDestructionEngineExtension' => 'PhabricatorDestructionEngineExtension', diff --git a/src/applications/search/management/PhabricatorSearchManagementQueryWorkflow.php b/src/applications/search/management/PhabricatorSearchManagementQueryWorkflow.php new file mode 100644 index 0000000000..8e40162a98 --- /dev/null +++ b/src/applications/search/management/PhabricatorSearchManagementQueryWorkflow.php @@ -0,0 +1,55 @@ +setName('query') + ->setSynopsis( + pht('Run a search query. Intended for debugging and development.')) + ->setArguments( + array( + array( + 'name' => 'query', + 'param' => 'query', + 'help' => pht('Raw query to execute.'), + ), + )); + } + + public function execute(PhutilArgumentParser $args) { + $viewer = $this->getViewer(); + $raw_query = $args->getArg('query'); + if (!strlen($raw_query)) { + throw new PhutilArgumentUsageException( + pht('Specify a query with --query.')); + } + + $engine = id(new PhabricatorSearchApplicationSearchEngine()) + ->setViewer($viewer); + + $saved = $engine->newSavedQuery(); + $saved->setParameter('query', $raw_query); + + $query = $engine->buildQueryFromSavedQuery($saved); + $pager = $engine->newPagerForSavedQuery($saved); + + $results = $engine->executeQuery($query, $pager); + if ($results) { + foreach ($results as $result) { + echo tsprintf( + "%s\t%s\n", + $result->getPHID(), + $result->getName()); + } + } else { + echo tsprintf( + "%s\n", + pht('No results.')); + } + + return 0; + } + +} From 85011a46d0aa1a5da241cb91620d929adc6414a9 Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 6 Oct 2017 11:47:35 -0700 Subject: [PATCH 13/13] Bail out of PhabricatorRepositoryGraphCache more aggressively after cache fills Summary: Ref PHI109. Ref T11786. We currently test elapsed time every 64 iterations (since iterations are normally very fast), but at least one install is seeing the page timeout after 30 seconds. One reason could be that cache fills may occur, and are likely to be much slower than normal iterations. In an extreme case, we could do 64 cache fills before checking the time. Tweak thing so that we always check the time after doing a cache fill, regardless of how many iterations have elapsed since the last attempt. Additionally, this API method currently accepts an arbitrary number of paths, but implicitly limits each cache query to 500ms. If more than 60 paths are passed, this may exceed 30s. Only let the cache churn for a maximum of 10s across all paths. If this is more the latter issue than the former, this might replace the GraphCache timeouts with `git` timeouts, but at least our understanding of what's going on here will improve. Test Plan: This is difficult to test convincingly locally, since I can't reproduce the original issue. It still works after these changes, but it worked fine before these changes too. Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T11786 Differential Revision: https://secure.phabricator.com/D18692 --- ...ffusionLastModifiedQueryConduitAPIMethod.php | 14 +++++++++++++- .../PhabricatorRepositoryGraphCache.php | 17 +++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php b/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php index 30b436cfff..1135420a6e 100644 --- a/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php +++ b/src/applications/diffusion/conduit/DiffusionLastModifiedQueryConduitAPIMethod.php @@ -115,6 +115,10 @@ final class DiffusionLastModifiedQueryConduitAPIMethod $graph_cache = new PhabricatorRepositoryGraphCache(); $results = array(); + + // Spend no more than this many total seconds trying to satisfy queries + // via the graph cache. + $remaining_time = 10.0; foreach ($map as $path => $commit) { $path_id = idx($path_map, $path); if (!$path_id) { @@ -125,13 +129,21 @@ final class DiffusionLastModifiedQueryConduitAPIMethod continue; } + $t_start = microtime(true); $cache_result = $graph_cache->loadLastModifiedCommitID( $commit_id, - $path_id); + $path_id, + $remaining_time); + $t_end = microtime(true); if ($cache_result !== false) { $results[$path] = $cache_result; } + + $remaining_time -= ($t_end - $t_start); + if ($remaining_time <= 0) { + break; + } } if ($results) { diff --git a/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php index d52182fcc9..c4adc61ab0 100644 --- a/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php +++ b/src/applications/repository/graphcache/PhabricatorRepositoryGraphCache.php @@ -102,6 +102,10 @@ final class PhabricatorRepositoryGraphCache extends Phobject { } // Otherwise, the rebuild gave us the data, so we can keep going. + + $did_fill = true; + } else { + $did_fill = false; } // Sanity check so we can survive and recover from bad data. @@ -147,12 +151,17 @@ final class PhabricatorRepositoryGraphCache extends Phobject { $commit_id = $parent_id; // Periodically check if we've spent too long looking for a result - // in the cache, and return so we can fall back to a VCS operation. This - // keeps us from having a degenerate worst case if, e.g., the cache - // is cold and we need to inspect a very large number of blocks + // in the cache, and return so we can fall back to a VCS operation. + // This keeps us from having a degenerate worst case if, e.g., the + // cache is cold and we need to inspect a very large number of blocks // to satisfy the query. - if (((++$iterations) % 64) === 0) { + ++$iterations; + + // If we performed a cache fill in this cycle, always check the time + // limit, since cache fills may take a significant amount of time. + + if ($did_fill || ($iterations % 64 === 0)) { $t_end = microtime(true); if (($t_end - $t_start) > $time) { return false;