1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-09-19 16:58:48 +02:00

Improve awkward Diffusion query plans.

This commit is contained in:
epriestley 2011-03-20 17:46:02 -07:00
parent 01a20c0480
commit afe0079819
16 changed files with 262 additions and 78 deletions

View file

@ -53,7 +53,7 @@ switch (isset($argv[1]) ? $argv[1] : 'help') {
echo "Launching 'git pull' daemon on the {$desc} repository...\n";
$control->launchDaemon(
'PhabricatorRepositoryGitPullDaemon',
'PhabricatorRepositoryGitFetchDaemon',
array(
$phid,
));
@ -74,7 +74,7 @@ switch (isset($argv[1]) ? $argv[1] : 'help') {
switch ($repository->getVersionControlSystem()) {
case 'git':
echo "Launching 'git pull' daemon on the {$desc} repository...\n";
echo "Launching 'git fetch' daemon on the {$desc} repository...\n";
$control->launchDaemon(
'PhabricatorRepositoryGitPullDaemon',
array(

View file

@ -0,0 +1,74 @@
#!/usr/bin/env php
<?php
/*
* Copyright 2011 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
$root = dirname(dirname(dirname(__FILE__)));
require_once $root.'/scripts/__init_script__.php';
require_once $root.'/scripts/__init_env__.php';
phutil_require_module('phutil', 'symbols');
PhutilSymbolLoader::loadClass('PhabricatorRepository');
PhutilSymbolLoader::loadClass('PhabricatorRepositoryCommit');
$commit = new PhabricatorRepositoryCommit();
$conn_w = id(new PhabricatorRepository())->establishConnection('r');
$sizes = queryfx_all(
$conn_w,
'SELECT repositoryID, count(*) N FROM %T GROUP BY repositoryID',
$commit->getTableName());
$sizes = ipull($sizes, 'N', 'repositoryID');
$maxes = queryfx_all(
$conn_w,
'SELECT repositoryID, max(epoch) maxEpoch FROM %T GROUP BY repositoryID',
$commit->getTableName());
$maxes = ipull($maxes, 'maxEpoch', 'repositoryID');
$repository_ids = array_keys($sizes + $maxes);
echo "Updating ".count($repository_ids)." repositories";
foreach ($repository_ids as $repository_id) {
$last_commit = queryfx_one(
$conn_w,
'SELECT id FROM %T WHERE repositoryID = %d AND epoch = %d LIMIT 1',
$commit->getTableName(),
$repository_id,
idx($maxes, $repository_id, 0));
if ($last_commit) {
$last_commit = $last_commit['id'];
} else {
$last_commit = 0;
}
queryfx(
$conn_w,
'INSERT INTO %T (repositoryID, lastCommitID, size, epoch)
VALUES (%d, %d, %d, %d) ON DUPLICATE KEY UPDATE
lastCommitID = VALUES(lastCommitID),
size = VALUES(size),
epoch = VALUES(epoch)',
PhabricatorRepository::TABLE_SUMMARY,
$repository_id,
$last_commit,
idx($sizes, $repository_id, 0),
idx($maxes, $repository_id, 0));
echo ".";
}
echo "\ndone.\n";

View file

@ -162,6 +162,7 @@ phutil_register_library_map(array(
'DiffusionGitBrowseQuery' => 'applications/diffusion/query/browse/git',
'DiffusionGitFileContentQuery' => 'applications/diffusion/query/filecontent/git',
'DiffusionGitHistoryQuery' => 'applications/diffusion/query/history/git',
'DiffusionGitPathIDQuery' => 'applications/diffusion/query/pathid/base',
'DiffusionGitRequest' => 'applications/diffusion/request/git',
'DiffusionHistoryController' => 'applications/diffusion/controller/history',
'DiffusionHistoryQuery' => 'applications/diffusion/query/history/base',
@ -312,9 +313,9 @@ phutil_register_library_map(array(
'PhabricatorRepositoryGitCommitChangeParserWorker' => 'applications/repository/worker/commitchangeparser/git',
'PhabricatorRepositoryGitCommitDiscoveryDaemon' => 'applications/repository/daemon/commitdiscovery/git',
'PhabricatorRepositoryGitCommitMessageParserWorker' => 'applications/repository/worker/commitmessageparser/git',
'PhabricatorRepositoryGitFetchDaemon' => 'applications/repository/daemon/gitfetch',
'PhabricatorRepositoryGitHubNotification' => 'applications/repository/storage/githubnotification',
'PhabricatorRepositoryGitHubPostReceiveController' => 'applications/repository/controller/github-post-receive',
'PhabricatorRepositoryGitPullDaemon' => 'applications/repository/daemon/gitpull',
'PhabricatorRepositoryListController' => 'applications/repository/controller/list',
'PhabricatorRepositorySvnCommitChangeParserWorker' => 'applications/repository/worker/commitchangeparser/svn',
'PhabricatorRepositorySvnCommitDiscoveryDaemon' => 'applications/repository/daemon/commitdiscovery/svn',
@ -622,9 +623,9 @@ phutil_register_library_map(array(
'PhabricatorRepositoryGitCommitChangeParserWorker' => 'PhabricatorRepositoryCommitChangeParserWorker',
'PhabricatorRepositoryGitCommitDiscoveryDaemon' => 'PhabricatorRepositoryCommitDiscoveryDaemon',
'PhabricatorRepositoryGitCommitMessageParserWorker' => 'PhabricatorRepositoryCommitMessageParserWorker',
'PhabricatorRepositoryGitFetchDaemon' => 'PhabricatorRepositoryDaemon',
'PhabricatorRepositoryGitHubNotification' => 'PhabricatorRepositoryDAO',
'PhabricatorRepositoryGitHubPostReceiveController' => 'PhabricatorRepositoryController',
'PhabricatorRepositoryGitPullDaemon' => 'PhabricatorRepositoryDaemon',
'PhabricatorRepositoryListController' => 'PhabricatorRepositoryController',
'PhabricatorRepositorySvnCommitChangeParserWorker' => 'PhabricatorRepositoryCommitChangeParserWorker',
'PhabricatorRepositorySvnCommitDiscoveryDaemon' => 'PhabricatorRepositoryCommitDiscoveryDaemon',

View file

@ -22,54 +22,43 @@ class DiffusionHomeController extends DiffusionController {
// TODO: Restore "shortcuts" feature.
$repositories = id(new PhabricatorRepository())->loadAll();
$repository = new PhabricatorRepository();
$repositories = $repository->loadAll();
foreach ($repositories as $key => $repository) {
if (!$repository->getDetail('tracking-enabled')) {
unset($repositories[$key]);
}
}
$commit = new PhabricatorRepositoryCommit();
$conn_r = $commit->establishConnection('r');
// TODO: These queries are pretty bogus.
$repository_ids = mpull($repositories, 'getID');
$summaries = array();
$commits = array();
$commit_counts = array();
if ($repository_ids) {
$summaries = queryfx_all(
$repository->establishConnection('r'),
'SELECT * FROM %T WHERE repositoryID IN (%Ld)',
PhabricatorRepository::TABLE_SUMMARY,
$repository_ids);
$summaries = ipull($summaries, null, 'repositoryID');
$max_epoch = queryfx_all(
$commit->establishConnection('r'),
'SELECT repositoryID, MAX(epoch) maxEpoch FROM %T GROUP BY repositoryID',
$commit->getTableName());
if ($max_epoch) {
$sql = array();
foreach ($max_epoch as $head) {
$sql[] = '('.(int)$head['repositoryID'].', '.(int)$head['maxEpoch'].')';
$commit_ids = array_filter(ipull($summaries, 'lastCommitID'));
if ($commit_ids) {
$commit = new PhabricatorRepositoryCommit();
$commits = $commit->loadAllWhere('id IN (%Ld)', $commit_ids);
$commits = mpull($commits, null, 'getRepositoryID');
}
// NOTE: It's possible we'll pull multiple commits for some repository
// here but it reduces query cost around 3x to unique them in PHP rather
// than apply GROUP BY in MySQL.
$commits = $commit->loadAllWhere(
'(repositoryID, epoch) IN (%Q)',
implode(', ', $sql));
$commits = mpull($commits, null, 'getRepositoryID');
$commit_counts = queryfx_all(
$conn_r,
'SELECT repositoryID, count(*) N FROM %T
GROUP BY repositoryID',
$commit->getTableName());
$commit_counts = ipull($commit_counts, 'N', 'repositoryID');
}
$rows = array();
foreach ($repositories as $repository) {
$id = $repository->getID();
$commit = idx($commits, $id);
$date = null;
$time = null;
$size = idx(idx($summaries, $id, array()), 'size', 0);
$date = '-';
$time = '-';
if ($commit) {
$date = date('M j, Y', $commit->getEpoch());
$time = date('g:i A', $commit->getEpoch());
@ -84,12 +73,12 @@ class DiffusionHomeController extends DiffusionController {
phutil_escape_html($repository->getName())),
PhabricatorRepositoryType::getNameForRepositoryType(
$repository->getVersionControlSystem()),
idx($commit_counts, $id, 0),
$size ? number_format($size) : '-',
$commit
? DiffusionView::linkCommit(
$repository,
$commit->getCommitIdentifier())
: null,
: '-',
$date,
$time,
);

View file

@ -22,31 +22,46 @@ final class DiffusionGitBranchQuery extends DiffusionBranchQuery {
$drequest = $this->getRequest();
$repository = $drequest->getRepository();
$path = $drequest->getPath();
$commit = $drequest->getCommit();
$local_path = $repository->getDetail('local-path');
list($stdout) = execx(
'(cd %s && git branch --verbose --no-abbrev)',
'(cd %s && git branch -r --verbose --no-abbrev)',
$local_path);
$branches = array();
$lines = array_filter(explode("\n", $stdout));
foreach ($lines as $line) {
$matches = null;
if (!preg_match('/^[ *] (\S+)\s+([a-z0-9]{40}) /', $line, $matches)) {
throw new Exception("Failed to parse {$line}!");
}
foreach (self::parseGitRemoteBranchOutput($stdout) as $name => $head) {
$branch = new DiffusionBranchInformation();
$branch->setName($matches[1]);
$branch->setHeadCommitIdentifier($matches[2]);
$branch->setName($name);
$branch->setHeadCommitIdentifier($head);
$branches[] = $branch;
}
return $branches;
}
public static function parseGitRemoteBranchOutput($stdout) {
$map = array();
$lines = array_filter(explode("\n", $stdout));
foreach ($lines as $line) {
$matches = null;
if (preg_match('/^ (\S+)\s+-> (\S+)$/', $line, $matches)) {
// This is a line like:
//
// origin/HEAD -> origin/master
//
// ...which we don't currently do anything interesting with, although
// in theory we could use it to automatically choose the default
// branch.
continue;
}
if (!preg_match('/^[ *] (\S+)\s+([a-z0-9]{40}) /', $line, $matches)) {
throw new Exception("Failed to parse {$line}!");
}
$map[$matches[1]] = $matches[2];
}
return $map;
}
}

View file

@ -25,17 +25,18 @@ final class DiffusionSvnBrowseQuery extends DiffusionBrowseQuery {
$path = $drequest->getPath();
$commit = $drequest->getCommit();
$path_normal = '/'.trim($path, '/');
$conn_r = $repository->establishConnection('r');
$paths = queryfx_all(
$conn_r,
'SELECT id, path FROM %T WHERE path IN (%Ls)',
PhabricatorRepository::TABLE_PATH,
array($path_normal));
$paths = ipull($paths, 'id', 'path');
$path_id = idx($paths, $path_normal);
$parent_path = dirname($path);
$path_query = new DiffusionGitPathIDQuery(
array(
$path,
$parent_path,
));
$path_map = $path_query->loadPathIDs();
$path_id = $path_map[$path];
$parent_path_id = $path_map[$parent_path];
if (empty($path_id)) {
$this->reason = self::REASON_IS_NONEXISTENT;
@ -62,12 +63,21 @@ final class DiffusionSvnBrowseQuery extends DiffusionBrowseQuery {
if ($path == '/') {
$this->reason = self::REASON_IS_EMPTY;
} else {
// NOTE: The parent path ID is included so this query can take
// advantage of the table's primary key; it is uniquely determined by
// the pathID but if we don't do the lookup ourselves MySQL doesn't have
// the information it needs to avoid a table scan.
$reasons = queryfx_all(
$conn_r,
'SELECT * FROM %T WHERE repositoryID = %d AND pathID = %d
'SELECT * FROM %T WHERE repositoryID = %d
AND parentID = %d
AND pathID = %d
%Q ORDER BY svnCommit DESC LIMIT 2',
PhabricatorRepository::TABLE_FILESYSTEM,
$repository->getID(),
$parent_path_id,
$path_id,
$slice_clause);
@ -113,6 +123,8 @@ final class DiffusionSvnBrowseQuery extends DiffusionBrowseQuery {
$path_id,
implode(', ', $sql));
$path_normal = DiffusionGitPathIDQuery::normalizePath($path);
$results = array();
foreach ($browse as $file) {

View file

@ -9,10 +9,9 @@
phutil_require_module('phabricator', 'applications/differential/constants/changetype');
phutil_require_module('phabricator', 'applications/diffusion/data/repositorypath');
phutil_require_module('phabricator', 'applications/diffusion/query/browse/base');
phutil_require_module('phabricator', 'applications/diffusion/query/pathid/base');
phutil_require_module('phabricator', 'applications/repository/storage/repository');
phutil_require_module('phabricator', 'storage/queryfx');
phutil_require_module('phutil', 'utils');
phutil_require_source('DiffusionSvnBrowseQuery.php');

View file

@ -0,0 +1,56 @@
<?php
/*
* Copyright 2011 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class DiffusionGitPathIDQuery {
public function __construct(array $paths) {
$this->paths = $paths;
}
public function loadPathIDs() {
$repository = new PhabricatorRepository();
$path_normal_map = array();
foreach ($this->paths as $path) {
$normal = self::normalizePath($path);
$path_normal_map[$normal][] = $path;
}
$paths = queryfx_all(
$repository->establishConnection('r'),
'SELECT * FROM %T WHERE path IN (%Ls)',
PhabricatorRepository::TABLE_PATH,
array_keys($path_normal_map));
$paths = ipull($paths, 'id', 'path');
$result = array();
foreach ($path_normal_map as $normal => $originals) {
foreach ($originals as $original) {
$result[$original] = idx($paths, $normal);
}
}
return $result;
}
public static function normalizePath($path) {
return '/'.trim($path, '/');
}
}

View file

@ -0,0 +1,15 @@
<?php
/**
* This file is automatically generated. Lint this module to rebuild it.
* @generated
*/
phutil_require_module('phabricator', 'applications/repository/storage/repository');
phutil_require_module('phabricator', 'storage/queryfx');
phutil_require_module('phutil', 'utils');
phutil_require_source('DiffusionGitPathIDQuery.php');

View file

@ -56,7 +56,7 @@ abstract class PhabricatorRepositoryCommitDiscoveryDaemon
}
$this->commitCache[$target] = true;
while (count($this->commitCache) > 16) {
while (count($this->commitCache) > 64) {
array_shift($this->commitCache);
}
@ -80,6 +80,20 @@ abstract class PhabricatorRepositoryCommitDiscoveryDaemon
));
$event->recordEvent();
queryfx(
$repository->establishConnection('r'),
'INSERT INTO %T (repositoryID, size, lastCommitID, epoch)
VALUES (%d, 1, %d, %d)
ON DUPLICATE KEY UPDATE
size = size + 1,
lastCommitID =
IF(VALUES(epoch) > epoch, VALUES(lastCommitID), lastCommitID),
epoch = IF(VALUES(epoch) > epoch, VALUES(epoch), epoch)',
PhabricatorRepository::TABLE_SUMMARY,
$repository->getID(),
$commit->getID(),
$epoch);
$this->commitCache[$commit_identifier] = true;
} catch (AphrontQueryDuplicateKeyException $ex) {
// Ignore. This can happen because we discover the same new commit

View file

@ -8,7 +8,9 @@
phutil_require_module('phabricator', 'applications/repository/daemon/base');
phutil_require_module('phabricator', 'applications/repository/storage/commit');
phutil_require_module('phabricator', 'applications/repository/storage/repository');
phutil_require_module('phabricator', 'infrastructure/daemon/timeline/storage/event');
phutil_require_module('phabricator', 'storage/queryfx');
phutil_require_module('phutil', 'utils');

View file

@ -20,7 +20,7 @@ class PhabricatorRepositoryGitCommitDiscoveryDaemon
extends PhabricatorRepositoryCommitDiscoveryDaemon {
protected function discoverCommits() {
// NOTE: PhabricatorRepositoryGitPullDaemon does the actual pulls, this
// NOTE: PhabricatorRepositoryGitFetchDaemon does the actual pulls, this
// just parses HEAD.
$repository = $this->getRepository();
@ -33,18 +33,23 @@ class PhabricatorRepositoryGitCommitDiscoveryDaemon
$repository_phid = $repository->getPHID();
$repo_base = $repository->getDetail('local-path');
list($commit) = execx(
'(cd %s && git log -n1 --pretty="%%H")',
list($stdout) = execx(
'(cd %s && git branch -r --verbose --no-abbrev)',
$repo_base);
$commit = trim($commit);
if ($this->isKnownCommit($commit)) {
return false;
$branches = DiffusionGitBranchQuery::parseGitRemoteBranchOutput($stdout);
$got_something = false;
foreach ($branches as $name => $commit) {
if ($this->isKnownCommit($commit)) {
continue;
} else {
$this->discoverCommit($commit);
$got_something = true;
}
}
$this->discoverCommit($commit);
return true;
return $got_something;
}
private function discoverCommit($commit) {

View file

@ -6,6 +6,7 @@
phutil_require_module('phabricator', 'applications/diffusion/query/branch/git');
phutil_require_module('phabricator', 'applications/repository/constants/repositorytype');
phutil_require_module('phabricator', 'applications/repository/daemon/commitdiscovery/base');

View file

@ -16,7 +16,7 @@
* limitations under the License.
*/
class PhabricatorRepositoryGitPullDaemon
class PhabricatorRepositoryGitFetchDaemon
extends PhabricatorRepositoryDaemon {
public function run() {
@ -46,7 +46,7 @@ class PhabricatorRepositoryGitPullDaemon
execx('mkdir -p %s', dirname($local_path));
execx('git clone %s %s', $remote_uri, rtrim($local_path, '/'));
} else {
execx('(cd %s && git pull)', $local_path);
execx('(cd %s && git fetch --all)', $local_path);
}
$this->sleep($repository->getDetail('pull-frequency', 15));
}

View file

@ -12,4 +12,4 @@ phutil_require_module('phutil', 'filesystem');
phutil_require_module('phutil', 'future/exec');
phutil_require_source('PhabricatorRepositoryGitPullDaemon.php');
phutil_require_source('PhabricatorRepositoryGitFetchDaemon.php');

View file

@ -21,6 +21,7 @@ class PhabricatorRepository extends PhabricatorRepositoryDAO {
const TABLE_PATH = 'repository_path';
const TABLE_PATHCHANGE = 'repository_pathchange';
const TABLE_FILESYSTEM = 'repository_filesystem';
const TABLE_SUMMARY = 'repository_summary';
protected $phid;
protected $name;