diff --git a/src/docs/developer/n_plus_one.diviner b/src/docs/developer/n_plus_one.diviner index f4f8c7bbf5..1f3b78424e 100644 --- a/src/docs/developer/n_plus_one.diviner +++ b/src/docs/developer/n_plus_one.diviner @@ -63,6 +63,9 @@ are no longer paying the overhead of issuing hundreds of extra queries. This will perform much better (although, as with all performance changes, you should verify this claim by measuring it). +See also @{method:LiskDAO::loadRelatives} method which provides an abstraction +to prevent this problem. + = Detecting the Problem = Beyond reasoning about it while figuring out how to load the data you need, the diff --git a/src/storage/lisk/dao/LiskDAO.php b/src/storage/lisk/dao/LiskDAO.php index 7cc43e844b..504bd8dec6 100644 --- a/src/storage/lisk/dao/LiskDAO.php +++ b/src/storage/lisk/dao/LiskDAO.php @@ -138,6 +138,9 @@ * @{method:loadAllWhere} returns a list of objects, while * @{method:loadOneWhere} returns a single object (or `null`). * + * There's also a @{method:loadRelatives} method which helps to prevent the 1+N + * queries problem. + * * = Managing Transactions = * * Lisk uses a transaction stack, so code does not generally need to be aware @@ -204,6 +207,8 @@ abstract class LiskDAO { private static $connections = array(); + private $inSet = null; + /** * Build an empty object. * @@ -692,11 +697,136 @@ abstract class LiskDAO { } else { $result[] = $obj->loadFromArray($row); } + if ($this->inSet) { + $this->inSet->addToSet($obj); + } } return $result; } + /** + * This method helps to prevent the 1+N queries problem. It happens when you + * execute a query for each row in a result set. Like in this code: + * + * COUNTEREXAMPLE, name=Easy to write but expensive to execute + * $diffs = id(new DifferentialDiff())->loadAllWhere( + * 'revisionID = %d', + * $revision->getID()); + * foreach ($diffs as $diff) { + * $changesets = id(new DifferentialChangeset())->loadAllWhere( + * 'diffID = %d', + * $diff->getID()); + * // Do something with $changesets. + * } + * + * One can solve this problem by reading all the dependent objects at once and + * assigning them later: + * + * COUNTEREXAMPLE, name=Cheaper to execute but harder to write and maintain + * $diffs = id(new DifferentialDiff())->loadAllWhere( + * 'revisionID = %d', + * $revision->getID()); + * $all_changesets = id(new DifferentialChangeset())->loadAllWhere( + * 'diffID IN (%Ld)', + * mpull($diffs, 'getID')); + * $all_changesets = mgroup($all_changesets, 'getDiffID'); + * foreach ($diffs as $diff) { + * $changesets = idx($all_changesets, $diff->getID(), array()); + * // Do something with $changesets. + * } + * + * The method @{method:loadRelatives} abstracts this approach which allows + * writing a code which is simple and efficient at the same time: + * + * name=Easy to write and cheap to execute + * $diffs = $revision->loadRelatives(new DifferentialDiff(), 'revisionID'); + * foreach ($diffs as $diff) { + * $changesets = $diff->loadRelatives( + * new DifferentialChangeset(), + * 'diffID'); + * // Do something with $changesets. + * } + * + * This will load dependent objects for all diffs in the first call of + * @{method:loadRelatives} and use this result for all following calls. + * + * The method supports working with set of sets, like in this code: + * + * $diffs = $revision->loadRelatives(new DifferentialDiff(), 'revisionID'); + * foreach ($diffs as $diff) { + * $changesets = $diff->loadRelatives( + * new DifferentialChangeset(), + * 'diffID'); + * foreach ($changesets as $changeset) { + * $hunks = $changeset->loadRelatives( + * new DifferentialHunk(), + * 'changesetID'); + * // Do something with hunks. + * } + * } + * + * This code will execute just three queries - one to load all diffs, one to + * load all their related changesets and one to load all their related hunks. + * You can try to write an equivalent code without using this method as + * a homework. + * + * The method also supports retrieving referenced objects, for example authors + * of all diffs: + * + * foreach ($diffs as $diff) { + * $author = head($diff->loadRelatives( + * new PhabricatorUser(), + * 'phid', + * 'getAuthorPHID')); + * // Do something with author. + * } + * + * It is also possible to specify additional conditions for the `WHERE` + * clause. Similarly to @{method:loadAllWhere}, you can specify everything + * after `WHERE` (except `LIMIT`). Contrary to @{method:loadAllWhere}, it is + * allowed to pass only a constant string (`%` doesn't have a special + * meaning). This is intentional to avoid mistakes with using data from one + * row in retrieving other rows. Example of a correct usage: + * + * $status = head($author->loadRelatives( + * new PhabricatorUserStatus(), + * 'userPHID', + * 'getPHID', + * '(UNIX_TIMESTAMP() BETWEEN dateFrom AND dateTo)')); + * + * @param LiskDAO Type of objects to load. + * @param string Name of the column in target table. + * @param string Method name in this table. + * @param string Additional constraints on returned rows. It supports no + * placeholders and requires putting the WHERE part into + * parentheses. It's not possible to use LIMIT. + * @return list Objects of type $object. + * + * @task load + */ + public function loadRelatives( + LiskDAO $object, + $foreign_column, + $key_method = 'getID', + $where = '') { + + if (!$this->inSet) { + id(new LiskDAOSet())->addToSet($this); + } + $relatives = $this->inSet->loadRelatives( + $object, + $foreign_column, + $key_method, + $where); + return idx($relatives, $this->$key_method(), array()); + } + + final public function putInSet(LiskDAOSet $set) { + $this->inSet = $set; + return $this; + } + /* -( Examining Objects )-------------------------------------------------- */ diff --git a/src/storage/lisk/dao/LiskDAOSet.php b/src/storage/lisk/dao/LiskDAOSet.php new file mode 100644 index 0000000000..1b343b96a4 --- /dev/null +++ b/src/storage/lisk/dao/LiskDAOSet.php @@ -0,0 +1,85 @@ +addToSet($author); + * foreach ($reviewers as $reviewer) { + * $users->addToSet($reviewer); + * } + * foreach ($ccs as $cc) { + * $users->addToSet($cc); + * } + * // Preload e-mails of all involved users and return e-mails of author. + * $author_emails = $author->loadRelatives( + * new PhabricatorUserEmail(), + * 'userPHID', + * 'getPHID'); + */ +final class LiskDAOSet { + private $daos = array(); + private $relatives = array(); + + public function addToSet(LiskDAO $dao) { + $this->daos[] = $dao; + $dao->putInSet($this); + return $this; + } + + /** + * See @{method:LiskDAO::loadRelatives}. + */ + public function loadRelatives( + LiskDAO $object, + $foreign_column, + $key_method = 'getID', + $where = '') { + + $relatives = &$this->relatives[ + get_class($object)."-{$foreign_column}-{$key_method}-{$where}"]; + + if ($relatives === null) { + $ids = array(); + foreach ($this->daos as $dao) { + $id = $dao->$key_method(); + if ($id !== null) { + $ids[$id] = $id; + } + } + if (!$ids) { + $relatives = array(); + } else { + $set = new LiskDAOSet(); + $relatives = $object->putInSet($set)->loadAllWhere( + '%C IN (%Ls) %Q', + $foreign_column, + $ids, + ($where != '' ? 'AND '.$where : '')); + $relatives = mgroup($relatives, 'get'.$foreign_column); + } + } + + return $relatives; + } + +} diff --git a/src/storage/lisk/dao/__init__.php b/src/storage/lisk/dao/__init__.php index d4eec11af6..b7d8537078 100644 --- a/src/storage/lisk/dao/__init__.php +++ b/src/storage/lisk/dao/__init__.php @@ -16,4 +16,5 @@ phutil_require_module('phutil', 'utils'); phutil_require_source('LiskDAO.php'); +phutil_require_source('LiskDAOSet.php'); phutil_require_source('LiskEphemeralObjectException.php');