diff --git a/resources/sql/patches/126.edges.sql b/resources/sql/patches/126.edges.sql new file mode 100644 index 0000000000..2123c0b068 --- /dev/null +++ b/resources/sql/patches/126.edges.sql @@ -0,0 +1,123 @@ +CREATE TABLE phabricator_maniphest.edge ( + src VARCHAR(64) NOT NULL COLLATE utf8_bin, + type VARCHAR(64) NOT NULL COLLATE utf8_bin, + dst VARCHAR(64) NOT NULL COLLATE utf8_bin, + dateCreated INT UNSIGNED NOT NULL, + seq INT UNSIGNED NOT NULL, + dataID INT UNSIGNED, + PRIMARY KEY (src, type, dst), + KEY (src, type, dateCreated, seq) +) ENGINE=InnoDB, COLLATE utf8_general_ci; + +CREATE TABLE phabricator_maniphest.edgedata ( + id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + data LONGTEXT NOT NULL COLLATE utf8_bin +) ENGINE=InnoDB, COLLATE utf8_general_ci; + + + +CREATE TABLE phabricator_repository.edge ( + src VARCHAR(64) NOT NULL COLLATE utf8_bin, + type VARCHAR(64) NOT NULL COLLATE utf8_bin, + dst VARCHAR(64) NOT NULL COLLATE utf8_bin, + dateCreated INT UNSIGNED NOT NULL, + seq INT UNSIGNED NOT NULL, + dataID INT UNSIGNED, + PRIMARY KEY (src, type, dst), + KEY (src, type, dateCreated, seq) +) ENGINE=InnoDB, COLLATE utf8_general_ci; + +CREATE TABLE phabricator_repository.edgedata ( + id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + data LONGTEXT NOT NULL COLLATE utf8_bin +) ENGINE=InnoDB, COLLATE utf8_general_ci; + + + +CREATE TABLE phabricator_differential.edge ( + src VARCHAR(64) NOT NULL COLLATE utf8_bin, + type VARCHAR(64) NOT NULL COLLATE utf8_bin, + dst VARCHAR(64) NOT NULL COLLATE utf8_bin, + dateCreated INT UNSIGNED NOT NULL, + seq INT UNSIGNED NOT NULL, + dataID INT UNSIGNED, + PRIMARY KEY (src, type, dst), + KEY (src, type, dateCreated, seq) +) ENGINE=InnoDB, COLLATE utf8_general_ci; + +CREATE TABLE phabricator_differential.edgedata ( + id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + data LONGTEXT NOT NULL COLLATE utf8_bin +) ENGINE=InnoDB, COLLATE utf8_general_ci; + + + +CREATE TABLE phabricator_file.edge ( + src VARCHAR(64) NOT NULL COLLATE utf8_bin, + type VARCHAR(64) NOT NULL COLLATE utf8_bin, + dst VARCHAR(64) NOT NULL COLLATE utf8_bin, + dateCreated INT UNSIGNED NOT NULL, + seq INT UNSIGNED NOT NULL, + dataID INT UNSIGNED, + PRIMARY KEY (src, type, dst), + KEY (src, type, dateCreated, seq) +) ENGINE=InnoDB, COLLATE utf8_general_ci; + +CREATE TABLE phabricator_file.edgedata ( + id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + data LONGTEXT NOT NULL COLLATE utf8_bin +) ENGINE=InnoDB, COLLATE utf8_general_ci; + + + +CREATE TABLE phabricator_user.edge ( + src VARCHAR(64) NOT NULL COLLATE utf8_bin, + type VARCHAR(64) NOT NULL COLLATE utf8_bin, + dst VARCHAR(64) NOT NULL COLLATE utf8_bin, + dateCreated INT UNSIGNED NOT NULL, + seq INT UNSIGNED NOT NULL, + dataID INT UNSIGNED, + PRIMARY KEY (src, type, dst), + KEY (src, type, dateCreated, seq) +) ENGINE=InnoDB, COLLATE utf8_general_ci; + +CREATE TABLE phabricator_user.edgedata ( + id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + data LONGTEXT NOT NULL COLLATE utf8_bin +) ENGINE=InnoDB, COLLATE utf8_general_ci; + + + +CREATE TABLE phabricator_project.edge ( + src VARCHAR(64) NOT NULL COLLATE utf8_bin, + type VARCHAR(64) NOT NULL COLLATE utf8_bin, + dst VARCHAR(64) NOT NULL COLLATE utf8_bin, + dateCreated INT UNSIGNED NOT NULL, + seq INT UNSIGNED NOT NULL, + dataID INT UNSIGNED, + PRIMARY KEY (src, type, dst), + KEY (src, type, dateCreated, seq) +) ENGINE=InnoDB, COLLATE utf8_general_ci; + +CREATE TABLE phabricator_project.edgedata ( + id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + data LONGTEXT NOT NULL COLLATE utf8_bin +) ENGINE=InnoDB, COLLATE utf8_general_ci; + + + +CREATE TABLE phabricator_metamta.edge ( + src VARCHAR(64) NOT NULL COLLATE utf8_bin, + type VARCHAR(64) NOT NULL COLLATE utf8_bin, + dst VARCHAR(64) NOT NULL COLLATE utf8_bin, + dateCreated INT UNSIGNED NOT NULL, + seq INT UNSIGNED NOT NULL, + dataID INT UNSIGNED, + PRIMARY KEY (src, type, dst), + KEY (src, type, dateCreated, seq) +) ENGINE=InnoDB, COLLATE utf8_general_ci; + +CREATE TABLE phabricator_metamta.edgedata ( + id INT UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT, + data LONGTEXT NOT NULL COLLATE utf8_bin +) ENGINE=InnoDB, COLLATE utf8_general_ci; diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index fb26bfa7ce..4f4f6acf3e 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -552,6 +552,10 @@ phutil_register_library_map(array( 'PhabricatorDisabledUserController' => 'applications/auth/controller/disabled', 'PhabricatorDraft' => 'applications/draft/storage/draft', 'PhabricatorDraftDAO' => 'applications/draft/storage/base', + 'PhabricatorEdgeConfig' => 'infrastructure/edges/constants/config', + 'PhabricatorEdgeConstants' => 'infrastructure/edges/constants/base', + 'PhabricatorEdgeEditor' => 'infrastructure/edges/editor/edge', + 'PhabricatorEdgeQuery' => 'infrastructure/edges/query/edge', 'PhabricatorEmailLoginController' => 'applications/auth/controller/email', 'PhabricatorEmailTokenController' => 'applications/auth/controller/emailtoken', 'PhabricatorEnv' => 'infrastructure/env', @@ -1403,6 +1407,8 @@ phutil_register_library_map(array( 'PhabricatorDisabledUserController' => 'PhabricatorAuthController', 'PhabricatorDraft' => 'PhabricatorDraftDAO', 'PhabricatorDraftDAO' => 'PhabricatorLiskDAO', + 'PhabricatorEdgeConfig' => 'PhabricatorEdgeConstants', + 'PhabricatorEdgeQuery' => 'PhabricatorQuery', 'PhabricatorEmailLoginController' => 'PhabricatorAuthController', 'PhabricatorEmailTokenController' => 'PhabricatorAuthController', 'PhabricatorEnvTestCase' => 'PhabricatorTestCase', diff --git a/src/applications/base/storage/lisk/PhabricatorLiskDAO.php b/src/applications/base/storage/lisk/PhabricatorLiskDAO.php index 3db43dfc78..826a70cfb4 100644 --- a/src/applications/base/storage/lisk/PhabricatorLiskDAO.php +++ b/src/applications/base/storage/lisk/PhabricatorLiskDAO.php @@ -1,7 +1,7 @@ $type_edges) { + $this->edges[$type] = $type_edges; + } + return $this; + } + + + /** + * @task edges + */ + public function getEdges($type) { + $edges = idx($this->edges, $type); + if ($edges === null) { + throw new Exception("Call attachEdges() before getEdges()!"); + } + return $edges; + } + + + /** + * @task edges + */ + public function getEdgePHIDs($type) { + return ipull($this->getEdges($type), 'dst'); + } + + +/* -( Configuring Storage )------------------------------------------------ */ + + + /** + * @task config + */ public function establishLiveConnection($mode) { $conf_provider = PhabricatorEnv::getEnvConfig( 'mysql.configuration_provider', 'DatabaseConfigurationProvider'); @@ -34,6 +80,9 @@ abstract class PhabricatorLiskDAO extends LiskDAO { )); } + /** + * @task config + */ public function getTableName() { $str = 'phabricator'; $len = strlen($str); @@ -54,5 +103,8 @@ abstract class PhabricatorLiskDAO extends LiskDAO { } } + /** + * @task config + */ abstract public function getApplicationName(); } diff --git a/src/docs/developer/using_edges.diviner b/src/docs/developer/using_edges.diviner new file mode 100644 index 0000000000..c607b1f9ea --- /dev/null +++ b/src/docs/developer/using_edges.diviner @@ -0,0 +1,31 @@ +@title Using Edges +@group developer + +Guide to the Edges infrastructure. + += Overview = + +Edges are a generic way of storing a relationship between two objects (like +a Task and its attached files). If you are familiar with the Facebook +associations framework, Phabricator Edges are substantially similar. + +An edge is defined by a source PHID (the edge origin), a destination PHID +(the edge destination) and an edge type (which describes the relationship, +like "is subscribed to" or "has attached file"). + +Every edge is directional, and stored alongside the source object. Some edges +are configured to automatically write an inverse edge, effectively building +a bidirectional relationship. The strength of storing relationships like this +is that they work when databases are partitioned or sharded. + += Reading Edges = + +You can load edges with @{class:PhabricatorEdgeQuery}. + += Writing Edges = + +You can edit edges with @{class:PhabricatorEdgeEditor}. + += Edges and Lisk = + +@{class:PhabricatorLiskDAO} includes some builtin support for edges. diff --git a/src/infrastructure/edges/constants/base/PhabricatorEdgeConstants.php b/src/infrastructure/edges/constants/base/PhabricatorEdgeConstants.php new file mode 100644 index 0000000000..045c7f46b8 --- /dev/null +++ b/src/infrastructure/edges/constants/base/PhabricatorEdgeConstants.php @@ -0,0 +1,21 @@ + self::TYPE_COMMIT_HAS_TASK, + self::TYPE_COMMIT_HAS_TASK => self::TYPE_TASK_HAS_COMMIT, + ); + + return idx($map, $edge_type); + } + + public static function establishConnection($phid_type, $conn_type) { + static $class_map = array( + PhabricatorPHIDConstants::PHID_TYPE_TASK => 'ManiphestTask', + PhabricatorPHIDConstants::PHID_TYPE_CMIT => 'PhabricatorRepository', + PhabricatorPHIDConstants::PHID_TYPE_DREV => 'DifferentialRevision', + PhabricatorPHIDConstants::PHID_TYPE_FILE => 'PhabricatorFile', + PhabricatorPHIDConstants::PHID_TYPE_USER => 'PhabricatorUser', + PhabricatorPHIDConstants::PHID_TYPE_PROJ => 'PhabricatorProject', + PhabricatorPHIDConstants::PHID_TYPE_MLST => + 'PhabricatorMetaMTAMailingList', + ); + + $class = idx($class_map, $phid_type); + + if (!$class) { + throw new Exception( + "Edges are not available for objects of type '{$phid_type}'!"); + } + + return newv($class, array())->establishConnection($conn_type); + } + + +} diff --git a/src/infrastructure/edges/constants/config/__init__.php b/src/infrastructure/edges/constants/config/__init__.php new file mode 100644 index 0000000000..bf40589693 --- /dev/null +++ b/src/infrastructure/edges/constants/config/__init__.php @@ -0,0 +1,15 @@ +addEdge($src, $type, $dst) + * ->save(); + * + * @task edit Editing Edges + * @task internal Internals + */ +final class PhabricatorEdgeEditor { + + private $addEdges = array(); + private $remEdges = array(); + private $openTransactions = array(); + + +/* -( Editing Edges )------------------------------------------------------ */ + + + /** + * Add a new edge (possibly also adding its inverse). Changes take effect when + * you call @{method:save()}. If the edge already exists, it will not be + * overwritten. Removals queued with @{method:removeEdge} are executed before + * adds, so the effect of removing and adding the same edge is to overwrite + * any existing edge. + * + * The `$options` parameter accepts these values: + * + * - `data` Optional, data to write onto the edge. + * - `inverse_data` Optional, data to write on the inverse edge. If not + * provided, `data` will be written. + * + * @param phid Source object PHID. + * @param const Edge type constant. + * @param phid Destination object PHID. + * @param map Options map (see documentation). + * @return this + * + * @task edit + */ + public function addEdge($src, $type, $dst, array $options = array()) { + foreach ($this->buildEdgeSpecs($src, $type, $dst, $options) as $spec) { + $this->addEdges[] = $spec; + } + return $this; + } + + + /** + * Remove an edge (possibly also removing its inverse). Changes take effect + * when you call @{method:save()}. If an edge does not exist, the removal + * will be ignored. Edges are added after edges are removed, so the effect of + * a remove plus an add is to overwrite. + * + * @param phid Source object PHID. + * @param const Edge type constant. + * @param phid Destination object PHID. + * @return this + * + * @task edit + */ + public function removeEdge($src, $type, $dst) { + foreach ($this->buildEdgeSpecs($src, $type, $dst) as $spec) { + $this->remEdges[] = $spec; + } + return $this; + } + + + /** + * Apply edge additions and removals queued by @{method:addEdge} and + * @{method:removeEdge}. Note that transactions are opened, all additions and + * removals are executed, and then transactions are saved. Thus, in some cases + * it may be slightly more efficient to perform multiple edit operations + * (e.g., adds followed by removals) if their outcomes are not dependent, + * since transactions will not be held open as long. + * + * @return this + * @task edit + */ + public function save() { + + // NOTE: We write edge data first, before doing any transactions, since + // it's OK if we just leave it hanging out in space unattached to anything. + + $this->writeEdgeData(); + + // NOTE: Removes first, then adds, so that "remove + add" is a useful + // operation meaning "overwrite". + + $this->executeRemoves(); + $this->executeAdds(); + + $this->saveTransactions(); + } + + +/* -( Internals )---------------------------------------------------------- */ + + + /** + * Build the specification for an edge operation, and possibly build its + * inverse as well. + * + * @task internal + */ + private function buildEdgeSpecs($src, $type, $dst, array $options = array()) { + $data = array(); + if (!empty($options['data'])) { + $data['data'] = $options['data']; + } + + $specs = array(); + $specs[] = array( + 'src' => $src, + 'src_type' => phid_get_type($src), + 'dst' => $dst, + 'type' => $type, + 'data' => $data, + ); + + $inverse = PhabricatorEdgeConfig::getInverse($type); + if ($inverse) { + + // If `inverse_data` is set, overwrite the edge data. Normally, just + // write the same data to the inverse edge. + if (array_key_exists('inverse_data', $options)) { + $data['data'] = $options['inverse_data']; + } + + $specs[] = array( + 'src' => $dst, + 'src_type' => phid_get_type($dst), + 'dst' => $src, + 'type' => $inverse, + 'data' => $data, + ); + } + + return $specs; + } + + + /** + * Write edge data. + * + * @task internal + */ + private function writeEdgeData() { + $adds = $this->addEdges; + + $writes = array(); + foreach ($adds as $key => $edge) { + if ($edge['data']) { + $writes[] = array($key, $edge['src_type'], json_encode($edge['data'])); + } + } + + foreach ($writes as $write) { + list($key, $src_type, $data) = $write; + $conn_w = PhabricatorEdgeConfig::establishConnection($src_type, 'w'); + queryfx( + $conn_w, + 'INSERT INTO %T (data) VALUES (%s)', + PhabricatorEdgeConfig::TABLE_NAME_EDGEDATA, + $data); + $this->addEdges[$key]['data_id'] = $conn_w->getInsertID(); + } + } + + + /** + * Add queued edges. + * + * @task internal + */ + private function executeAdds() { + $adds = $this->addEdges; + $adds = igroup($adds, 'src_type'); + + // Assign stable sequence numbers to each edge, so we have a consistent + // ordering across edges by source and type. + foreach ($adds as $src_type => $edges) { + $edges_by_src = igroup($edges, 'src'); + foreach ($edges_by_src as $src => $src_edges) { + $seq = 0; + foreach ($src_edges as $key => $edge) { + $src_edges[$key]['seq'] = $seq++; + $src_edges[$key]['dateCreated'] = time(); + } + $edges_by_src[$src] = $src_edges; + } + $adds[$src_type] = array_mergev($edges_by_src); + } + + $inserts = array(); + foreach ($adds as $src_type => $edges) { + $conn_w = PhabricatorEdgeConfig::establishConnection($src_type, 'w'); + $sql = array(); + foreach ($edges as $edge) { + $sql[] = qsprintf( + $conn_w, + '(%s, %d, %s, %d, %d, %nd)', + $edge['src'], + $edge['type'], + $edge['dst'], + $edge['dateCreated'], + $edge['seq'], + idx($edge, 'data_id')); + } + $inserts[] = array($conn_w, $sql); + } + + foreach ($inserts as $insert) { + list($conn_w, $sql) = $insert; + $conn_w->openTransaction(); + $this->openTransactions[] = $conn_w; + + foreach (array_chunk($sql, 256) as $chunk) { + queryfx( + $conn_w, + 'INSERT IGNORE INTO %T (src, type, dst, dateCreated, seq, dataID) + VALUES %Q', + PhabricatorEdgeConfig::TABLE_NAME_EDGE, + implode(', ', $chunk)); + } + } + } + + + /** + * Remove queued edges. + * + * @task internal + */ + private function executeRemoves() { + $rems = $this->remEdges; + $rems = igroup($rems, 'src_type'); + + $deletes = array(); + foreach ($rems as $src_type => $edges) { + $conn_w = PhabricatorEdgeConfig::establishConnection($src_type, 'w'); + $sql = array(); + foreach ($edges as $edge) { + $sql[] = qsprintf( + $conn_w, + '(%s, %d, %s)', + $edge['src'], + $edge['type'], + $edge['dst']); + } + $deletes[] = array($conn_w, $sql); + } + + foreach ($deletes as $delete) { + list($conn_w, $sql) = $delete; + + $conn_w->openTransaction(); + $this->openTransactions[] = $conn_w; + + foreach (array_chunk($sql, 256) as $chunk) { + queryfx( + $conn_w, + 'DELETE FROM %T WHERE (src, type, dst) IN (%Q)', + PhabricatorEdgeConfig::TABLE_NAME_EDGE, + implode(', ', $chunk)); + } + } + } + + + /** + * Save open transactions. + * + * @task internal + */ + private function saveTransactions() { + foreach ($this->openTransactions as $key => $conn_w) { + $conn_w->saveTransaction(); + unset($this->openTransactions[$key]); + } + } + +} diff --git a/src/infrastructure/edges/editor/edge/__init__.php b/src/infrastructure/edges/editor/edge/__init__.php new file mode 100644 index 0000000000..4242ea5e22 --- /dev/null +++ b/src/infrastructure/edges/editor/edge/__init__.php @@ -0,0 +1,17 @@ +withSourcePHIDs(array($src)) + * ->withEdgeTypes(array($type)) + * ->execute(); + * + * For more information on edges, see @{article:Using Edges}. + * + * @task config Configuring the Query + * @task exec Executing the Query + * @task internal Internal + */ +final class PhabricatorEdgeQuery extends PhabricatorQuery { + + private $sourcePHIDs; + private $edgeTypes; + + private $needEdgeData; + + +/* -( Configuring the Query )---------------------------------------------- */ + + + /** + * Find edges originating at one or more source PHIDs. You MUST provide this + * to execute an edge query. + * + * @param list List of source PHIDs. + * @return this + * + * @task config + */ + public function withSourcePHIDs(array $source_phids) { + $this->sourcePHIDs = $source_phids; + return $this; + } + + + /** + * Find edges of specific types. + * + * @param list List of PhabricatorEdgeConfig type constants. + * @return this + * + * @task config + */ + public function withEdgeTypes(array $types) { + $this->edgeTypes = $types; + return $this; + } + + + /** + * When loading edges, also load edge data. + * + * @param bool True to load edge data. + * @return this + * + * @task config + */ + public function needEdgeData($need) { + $this->needEdgeData = $need; + return $this; + } + + +/* -( Executing the Query )------------------------------------------------ */ + + + /** + * Load specified edges. + * + * @task exec + */ + public function execute() { + if (!$this->sourcePHIDs) { + throw new Exception( + "You must use withSourcePHIDs() to query edges."); + } + + $sources = phid_group_by_type($this->sourcePHIDs); + + $result = array(); + + // When a query specifies types, make sure we return data for all queried + // types. This is mostly to make sure PhabricatorLiskDAO->attachEdges() + // gets some data, so that getEdges() doesn't throw later. + if ($this->edgeTypes) { + foreach ($this->sourcePHIDs as $phid) { + foreach ($this->edgeTypes as $type) { + $result[$phid][$type] = array(); + } + } + } + + foreach ($sources as $type => $phids) { + $conn_r = PhabricatorEdgeConfig::establishConnection($type, 'r'); + + $where = $this->buildWhereClause($conn_r); + $order = $this->buildOrderClause($conn_r); + + $edges = queryfx_all( + $conn_r, + 'SELECT edge.* FROM %T edge %Q %Q', + PhabricatorEdgeConfig::TABLE_NAME_EDGE, + $where, + $order); + + if ($this->needEdgeData) { + $data_ids = array_filter(ipull($edges, 'dataID')); + $data_map = array(); + if ($data_ids) { + $data_rows = queryfx_all( + $conn_r, + 'SELECT edgedata.* FROM %T edgedata WHERE id IN (%Ld)', + PhabricatorEdgeConfig::TABLE_NAME_EDGEDATA, + $data_ids); + foreach ($data_rows as $row) { + $data_map[$row['id']] = idx( + json_decode($row['data'], true), + 'data'); + } + } + foreach ($edges as $key => $edge) { + $edges[$key]['data'] = idx($data_map, $edge['dataID']); + } + } + + foreach ($edges as $edge) { + $result[$edge['src']][$edge['type']][$edge['dst']] = $edge; + } + } + + return $result; + } + + +/* -( Internals )---------------------------------------------------------- */ + + + /** + * @task internal + */ + private function buildWhereClause($conn_r) { + $where = array(); + + if ($this->sourcePHIDs) { + $where[] = qsprintf( + $conn_r, + 'edge.src IN (%Ls)', + $this->sourcePHIDs); + } + + if ($this->edgeTypes) { + $where[] = qsprintf( + $conn_r, + 'edge.type IN (%Ls)', + $this->edgeTypes); + } + + return $this->formatWhereClause($where); + } + + + /** + * @task internal + */ + private function buildOrderClause($conn_r) { + return 'ORDER BY edge.dateCreated DESC, edge.seq ASC'; + } + +} diff --git a/src/infrastructure/edges/query/edge/__init__.php b/src/infrastructure/edges/query/edge/__init__.php new file mode 100644 index 0000000000..000e5da7ba --- /dev/null +++ b/src/infrastructure/edges/query/edge/__init__.php @@ -0,0 +1,18 @@ +