1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-21 22:32:41 +01:00

Simplify daemon management: "phd start"

Summary:
  - Merge CommitTask daemon into PullLocal daemon. This is another artifact of past instability (and order-dependent parsers). We still publish to the timeline, although this was the last consumer. Long term we'll probably delete timeline and move to webhooks, since everyone who has asked about this stuff has been eager to trade away the durability and ordering of the timeline for the ease of use of webhooks. There's also no reason to timeline this anymore since parsing is no longer order-dependent.
  - Add `phd start` to start all the daemons you need. Add `phd restart` to restart all the daemons you need. So cool~
  - Simplify and improve phd and Diffusion daemon documentation.

Test Plan:
  - Ran `phd start`.
  - Ran `phd restart`.
  - Generated/read documentation.
  - Imported some stuff, got clean parses.

Reviewers: btrahan, csilvers

Reviewed By: csilvers

CC: aran, jungejason, nh

Differential Revision: https://secure.phabricator.com/D2433
This commit is contained in:
epriestley 2012-05-09 10:29:37 -07:00
parent 907f1a3dee
commit b800df8c1b
11 changed files with 228 additions and 191 deletions

View file

@ -937,6 +937,11 @@ return array(
// track running daemons.
'phd.pid-directory' => '/var/tmp/phd',
// Number of "TaskMaster" daemons that "phd start" should start. You can
// raise this if you have a task backlog, or explicitly launch more with
// "phd launch <N> taskmaster".
'phd.start-taskmasters' => 4,
// This value is an input to the hash function when building resource hashes.
// It has no security value, but if you accidentally poison user caches (by
// pushing a bad patch or having something go wrong with a CDN, e.g.) you can

View file

@ -34,7 +34,8 @@ function must_have_extension($ext) {
}
}
switch (isset($argv[1]) ? $argv[1] : 'help') {
$command = isset($argv[1]) ? $argv[1] : 'help';
switch ($command) {
case 'list':
$err = $control->executeListCommand();
exit($err);
@ -48,7 +49,45 @@ switch (isset($argv[1]) ? $argv[1] : 'help') {
$err = $control->executeStopCommand($pass_argv);
exit($err);
case 'repository-launch-readonly':
case 'restart':
$err = $control->executeStopCommand(array());
if ($err) {
exit($err);
}
/* Fall Through */
case 'start':
$running = $control->loadRunningDaemons();
if ($running) {
echo phutil_console_wrap(
"phd start: Unable to start daemons because daemons are already ".
"running.\n".
"You can view running daemons with 'phd list'.\n".
"You can stop running daemons with 'phd stop'.\n".
"You can use 'phd restart' to stop all daemons before starting new ".
"daemons.\n");
exit(1);
}
$daemons = array(
array('PhabricatorRepositoryPullLocalDaemon', array()),
array('PhabricatorGarbageCollectorDaemon', array()),
);
$taskmasters = PhabricatorEnv::getEnvConfig('phd.start-taskmasters');
for ($ii = 0; $ii < $taskmasters; $ii++) {
$daemons[] = array('PhabricatorTaskmasterDaemon', array());
}
will_launch($control);
foreach ($daemons as $spec) {
list($name, $argv) = $spec;
echo "Launching '{$name}'...\n";
$control->launchDaemon($name, $argv);
}
echo "Done.\n";
break;
$need_launch = phd_load_tracked_repositories();
if (!$need_launch) {
echo "There are no repositories with tracking enabled.\n";
@ -68,7 +107,16 @@ switch (isset($argv[1]) ? $argv[1] : 'help') {
echo "Done.\n";
break;
case 'repository-launch-readonly':
case 'repository-launch-master':
if ($command == 'repository-launch-readonly') {
$daemon_args = array(
'--no-discovery',
);
} else {
$daemon_args = array();
}
$need_launch = phd_load_tracked_repositories();
if (!$need_launch) {
echo "There are no repositories with tracking enabled.\n";
@ -77,18 +125,12 @@ switch (isset($argv[1]) ? $argv[1] : 'help') {
will_launch($control);
echo "Launching PullLocal daemon in master mode...\n";
echo "Launching PullLocal daemon...\n";
$control->launchDaemon(
'PhabricatorRepositoryPullLocalDaemon',
array());
$daemon_args);
echo "Launching CommitTask daemon...\n";
$control->launchDaemon(
'PhabricatorRepositoryCommitTaskDaemon',
array());
echo "NOTE: Make sure you run some taskmaster daemons too, e.g. ".
"with 'phd launch 4 taskmaster'.\n";
echo "NOTE: '{$command}' is deprecated. Consult the documentation.\n";
echo "Done.\n";
break;

View file

@ -57,9 +57,7 @@ sudo /etc/init.d/httpd start
# Restart daemons. Customize this to start whatever daemons you're running on
# your system.
# $ROOT/phabricator/bin/phd repository-launch-master
# $ROOT/phabricator/bin/phd launch garbagecollector
# $ROOT/phabricator/bin/phd launch 4 taskmaster
$ROOT/phabricator/bin/phd start
# $ROOT/phabricator/bin/phd launch ircbot /config/bot.json

View file

@ -845,7 +845,6 @@ phutil_register_library_map(array(
'PhabricatorRepositoryCommitMessageParserWorker' => 'applications/repository/worker/commitmessageparser/base',
'PhabricatorRepositoryCommitOwnersWorker' => 'applications/repository/worker/owner',
'PhabricatorRepositoryCommitParserWorker' => 'applications/repository/worker/base',
'PhabricatorRepositoryCommitTaskDaemon' => 'applications/repository/daemon/committask',
'PhabricatorRepositoryController' => 'applications/repository/controller/base',
'PhabricatorRepositoryCreateController' => 'applications/repository/controller/create',
'PhabricatorRepositoryDAO' => 'applications/repository/storage/base',
@ -1734,7 +1733,6 @@ phutil_register_library_map(array(
'PhabricatorRepositoryCommitMessageParserWorker' => 'PhabricatorRepositoryCommitParserWorker',
'PhabricatorRepositoryCommitOwnersWorker' => 'PhabricatorRepositoryCommitParserWorker',
'PhabricatorRepositoryCommitParserWorker' => 'PhabricatorWorker',
'PhabricatorRepositoryCommitTaskDaemon' => 'PhabricatorDaemon',
'PhabricatorRepositoryController' => 'PhabricatorController',
'PhabricatorRepositoryCreateController' => 'PhabricatorRepositoryController',
'PhabricatorRepositoryDAO' => 'PhabricatorLiskDAO',

View file

@ -1,80 +0,0 @@
<?php
/*
* Copyright 2012 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
final class PhabricatorRepositoryCommitTaskDaemon
extends PhabricatorDaemon {
final public function run() {
do {
$iterator = new PhabricatorTimelineIterator('cmittask', array('cmit'));
foreach ($iterator as $event) {
$data = $event->getData();
if (!$data) {
// TODO: This event can't be processed, provide some way to
// communicate that?
continue;
}
$commit = id(new PhabricatorRepositoryCommit())->load($data['id']);
if (!$commit) {
// TODO: Same as above.
continue;
}
// TODO: Cache these.
$repository = id(new PhabricatorRepository())->load(
$commit->getRepositoryID());
if (!$repository) {
// TODO: As above, although this almost certainly means the user just
// deleted the repository and we're correct to ignore the event in
// the timeline.
continue;
}
$vcs = $repository->getVersionControlSystem();
switch ($vcs) {
case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
$class = 'PhabricatorRepositoryGitCommitMessageParserWorker';
break;
case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN:
$class = 'PhabricatorRepositorySvnCommitMessageParserWorker';
break;
case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL:
$class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker';
break;
default:
throw new Exception("Unknown repository type.");
}
$task = new PhabricatorWorkerTask();
$task->setTaskClass($class);
$task->setData(
array(
'commitID' => $commit->getID(),
));
$task->save();
$this->stillWorking();
}
sleep(1);
$this->stillWorking();
} while (true);
}
}

View file

@ -1,19 +0,0 @@
<?php
/**
* This file is automatically generated. Lint this module to rebuild it.
* @generated
*/
phutil_require_module('phabricator', 'applications/repository/constants/repositorytype');
phutil_require_module('phabricator', 'applications/repository/storage/commit');
phutil_require_module('phabricator', 'applications/repository/storage/repository');
phutil_require_module('phabricator', 'infrastructure/daemon/base');
phutil_require_module('phabricator', 'infrastructure/daemon/timeline/cursor/iterator');
phutil_require_module('phabricator', 'infrastructure/daemon/workers/storage/task');
phutil_require_module('phutil', 'utils');
phutil_require_source('PhabricatorRepositoryCommitTaskDaemon.php');

View file

@ -269,6 +269,7 @@ final class PhabricatorRepositoryPullLocalDaemon
try {
$commit->save();
$event = new PhabricatorTimelineEvent(
'cmit',
array(
@ -276,6 +277,8 @@ final class PhabricatorRepositoryPullLocalDaemon
));
$event->recordEvent();
self::insertTask($repository, $commit);
queryfx(
$repository->establishConnection('w'),
'INSERT INTO %T (repositoryID, size, lastCommitID, epoch)
@ -300,6 +303,35 @@ final class PhabricatorRepositoryPullLocalDaemon
}
}
private static function insertTask(
PhabricatorRepository $repository,
PhabricatorRepositoryCommmit $commit) {
$vcs = $repository->getVersionControlSystem();
switch ($vcs) {
case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
$class = 'PhabricatorRepositoryGitCommitMessageParserWorker';
break;
case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN:
$class = 'PhabricatorRepositorySvnCommitMessageParserWorker';
break;
case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL:
$class = 'PhabricatorRepositoryMercurialCommitMessageParserWorker';
break;
default:
throw new Exception("Unknown repository type '{$vcs}'!");
}
$task = new PhabricatorWorkerTask();
$task->setTaskClass($class);
$task->setData(
array(
'commitID' => $commit->getID(),
));
$task->save();
}
private static function setCache(
PhabricatorRepository $repository,
$commit_identifier) {

View file

@ -15,6 +15,7 @@ phutil_require_module('phabricator', 'applications/repository/storage/commit');
phutil_require_module('phabricator', 'applications/repository/storage/repository');
phutil_require_module('phabricator', 'infrastructure/daemon/base');
phutil_require_module('phabricator', 'infrastructure/daemon/timeline/storage/event');
phutil_require_module('phabricator', 'infrastructure/daemon/workers/storage/task');
phutil_require_module('phabricator', 'storage/queryfx');
phutil_require_module('phutil', 'error');

View file

@ -6,17 +6,17 @@ Explains Phabricator daemons and the daemon control program ##phd##.
= Overview =
Phabricator uses daemons (background processing scripts) to handle a number of
tasks, like:
tasks:
- tracking repositories and discovering new commits;
- sending mail;
- updating objects in the search index; and
- custom tasks you define.
- tracking repositories, discovering new commits, and importing and parsing
commits;
- sending email; and
- collecting garbage, like old logs and caches.
Daemons are started and stopped with **phd** (the **Ph**abricator **D**aemon
launcher). Daemons can be monitored via a web console.
You do not need to run daemons for most parts of Phabricator to work, but a few
You do not need to run daemons for most parts of Phabricator to work, but some
features (principally, repository tracking with Diffusion) require them and
several features will benefit in performance or stability if you configure
daemons.
@ -33,22 +33,24 @@ a list of commands, run ##phd help##:
Generally, you will use:
- **phd launch** to launch daemons;
- **phd debug** to debug problems with daemons;
- **phd start** to launch all daemons;
- **phd restart** to restart all daemons;
- **phd status** to get a list of running daemons; and
- **phd stop** to stop all daemons.
NOTE: When you upgrade Phabricator or change configuration, you should restart
the daemons by stopping and relaunching them.
If you want finer-grained control, you can use:
NOTE: When you **launch** a daemon, you can type any unique substring of its
name, so **phd launch metamta** will work correctly.
- **phd launch** to launch individual daemons; and
- **phd debug** to debug problems with daemons.
NOTE: When you upgrade Phabricator or change configuration, you should restart
the daemons by running `phd restart`.
= Daemon Console =
You can view status and debugging information for daemons in the Daemon Console
via the web interface. Go to ##/daemon/## in your install or click
**Daemon Console** from the homepage.
**Daemon Console** from "More Stuff".
The Daemon Console shows a list of all the daemons that have ever launched, and
allows you to view log information for them. If you have issues with daemons,
@ -56,7 +58,7 @@ you may be able to find error information that will help you resolve the problem
in the console.
NOTE: The easiest way to figure out what's wrong with a daemon is usually to use
**phd debug** to launch it instead of **phd launch**. This will run it without
**phd debug** to launch it instead of **phd start**. This will run it without
daemonizing it, so you can see output in your console.
= Available Daemons =
@ -65,7 +67,72 @@ You can get a list of launchable daemons with **phd list**:
- **libphutil test daemons** are not generally useful unless you are
developing daemon infrastructure or debugging a daemon problem;
- **PhabricatorTaskmasterDaemon** runs a generic task queue; and
- **PhabricatorRepository** daemons track repositories, descriptions are
available in the @{article:Diffusion User Guide}.
- **PhabricatorTaskmasterDaemon** performs work from a task queue;
- **PhabricatorRepositoryPullLocalDaemon** daemons track repositories, for
more information see @{article:Diffusion User Guide}; and
- **PhabricatorGarbageCollectorDaemon** cleans up old logs and caches.
= Debugging and Tuning =
In most cases, **phd start** handles launching all the daemons you need.
However, you may want to use more granular daemon controls to debug daemons,
launch custom daemons, or launch special daemons like the IRC bot.
To debug a daemon, use `phd debug`:
phabricator/bin/ $ ./phd debug <daemon>
You can pass arguments like this (normal arguments are passed to the daemon
control mechanism, not to the daemon itself):
phabricator/bin/ $ ./phd debug <daemon> -- --flavor apple
In debug mode, daemons do not daemonize, and they print additional debugging
output to the console. This should make it easier to debug problems. You can
terminate the daemon with `^C`.
To launch a nonstandard daemon, use `phd launch`:
phabricator/bin/ $ ./phd launch <daemon>
This daemon will daemonize and run normally.
== General Tips ==
- You can set the number of taskmasters that `phd start` starts in the config.
If you have a task backlog, try increasing it.
- When you `phd launch` or `phd debug` a daemon, you can type any unique
substring of its name, so `phd launch pull` will work correctly.
- `phd stop` and `phd restart` stop **all** of the daemons on the machine, not
just those started with `phd start`. If you're writing a restart script,
have it launch any custom daemons explicitly after `phd restart`.
- You can write your own daemons and manage them with `phd` by extending
@{class:PhabricatorDaemon}. See @{article: libphutil Libraries User Guide}.
- See @{article:Diffusion User Guide} for details about tuning the repository
daemon.
== Multiple Machines ==
If you have multiple machines, you should use `phd launch` to tweak which
daemons launch, and split daemons across machines like this:
- `PhabricatorRepositoryPullLocalDaemon`: Run one copy on any machine.
On each web frontend which is not running a normal copy, run a copy
with the `--no-discovery` flag.
- `PhabricatorGarbageCollectorDaemon`: Run one copy on any machine.
- `PhabricatorTaskmasterDaemon`: Run as many copies as you need to keep
tasks from backing up. You can run them all on one machine or split them
across machines.
A gratuitously wasteful install might have a dedicated daemon machine which
runs `phd start` with a large pool of taskmasters set in the config, and then
runs `phd launch PhabricatorRepositoryPullLocalDaemon --no-discovery` on each
web server. This is grossly excessive in normal cases.
= Next Steps =
Continue by:
- learning about the repository daemon with @{article:Diffusion User Guide};
or
- writing your own daemons with @{article: libphutil Libraries User Guide}.

View file

@ -43,16 +43,17 @@ The primary goal of callsigns is to namespace commits to SVN repositories: if
you use multiple SVN repositories, each repository has a revision 1, revision 2,
etc., so referring to them by number alone is ambiguous. However, even for Git
they impart additional information to human readers and allow parsers to detect
that something is a commit name with high probability.
that something is a commit name with high probability (and allow distinguishing
between multiple copies of a repository).
Diffusion uses this callsign and information about the commit itself to generate
a commit name, like "rE12345" or "rP28146171ce1278f2375e3646a1e1ea3fd56fc5a3".
The "r" stands for "revision". It is followed by the repository callsign, and
then a VCS-specific commit identifier (for SVN, the commit number; for Git, the
commit hash). When writing the name of a Git commit you may abbreviate the hash,
but note that hash collisions are probable for short prefix lengths. See this
post on the LKML for a historical explanation of Git's occasional internal use
of 7-character hashes:
then a VCS-specific commit identifier (for SVN, the commit number; for Git and
Mercurial, the commit hash). When writing the name of a Git commit you may
abbreviate the hash, but note that hash collisions are probable for short prefix
lengths. See this post on the LKML for a historical explanation of Git's
occasional internal use of 7-character hashes:
https://lkml.org/lkml/2010/10/28/287
@ -84,8 +85,8 @@ tracking in Diffusion.
Most of the options in the **Tracking** tab should be self-explanatory or are
safe to leave at their defaults. In broad strokes, Diffusion tracks SVN
repositories by issuing an "svn log" command periodically against the remote to
look for new commits. It tracks Git repositories by cloning a local copy and
issuing "git fetch" periodically.
look for new commits. It tracks Git and Mercurial repositories by cloning a
local copy and issuing `git fetch` or `hg pull` periodically.
Once you've configured everything (and made sure **Tracking** is set to
"Enabled"), you can launch the daemons to begin actually tracking the
@ -93,20 +94,15 @@ repository.
= Running Diffusion Daemons =
For an introduction to Phabricator daemons, see
@{article:Managing Daemons with phd}. To actually track repositories, you need
to:
In most cases, it is sufficient to run:
- run ##phd repository-launch-master## on one machine;
- run at least one @{class:PhabricatorTaskmasterDaemon} with
##phd launch taskmaster##. You should probably launch a few of these
somewhere. They are generic workers which run many different kinds of
background tasks, so if you already have some running you don't need to
launch more. However, if you are importing a very large repository, import
rate will primarily be a function of how many taskmasters you are running so
you may want to launch a bunch of them; and
- if you have multiple web frontends and have tracked Git repositories, run
##phd repository-launch-readonly## on each web frontend.
phabricator/bin/ $ ./phd start
...to start the daemons. For a more in-depth explanation of `phd` and daemons,
see @{article:Managing Daemons with phd}.
NOTE: If you have an unusually large install with multiple web frontends, see
notes in @{article:Managing Daemons with phd}.
You can use the Daemon Console to monitor the daemons and their progress
importing the repository. Small repositories should import quickly, while
@ -116,39 +112,32 @@ discovering commits in Facebook's 350,000-commit primary repository, and about
should begin appearing in Diffusion within a few minutes for all but the
largest repositories.
In detail, Diffusion uses several daemons to track, parse and import
repositories:
== Tuning Daemons ==
- **PhabricatorRepositoryGitFetchDaemon**: periodically runs "git fetch" to
keep git repositories up to date
- **PhabricatorRepositoryGitCommitDiscoveryDaemon**: periodically looks for
new commits and imports them
- **PhabricatorRepositorySvnCommitDiscoveryDaemon**: periodically runs
"svn log" to look for new commits and import them
- **PhabricatorRepositoryCommitTaskDaemon**: creates tasks to parse and
import newly discovered commits
By default, Phabricator launches one daemon to pull and discover all of the
tracked repositories. This works well for a small number of repositories or
a large number of relatively inactive repositories, but might benefit from
tuning in some cases. The daemon makes a rough effort to respect pull
frequencies defined in repository configuration, but may not be able to import
new commits very quickly if you have a large number of repositories (as it is
blocked waiting on I/O from other repositories). If you want to provide lower
commit import latency for some repositories, you can launch additional
dedicated daemons:
The ##repository-launch-master## command just chooses the right daemons to
launch based on which repositories you've configured to be tracked. If you add
new repositories in the future, you should stop all the daemons and rerun
##repository-launch-master##.
For example, if you want low latency on the repositories with callsigns
`A` and `B`, but don't care about latency for the other repositories, you could
launch two daemons like this:
If you run Phabricator with multiple web frontends, have your deployment script
do a ##phd stop## and ##phd repository-launch-readonly## when it deploys. It is
very unlikely you are impacted by this unless you are one of the largest
installs in the world.
phabricator/bin $ ./phd launch RepositoryPullLocal -- A B
phabricator/bin $ ./phd launch RepositoryPullLocal -- --not A --not B
= Building New Parsers =
You can add new classes which will extend or enhance Diffusion's ability to
parse commit messages.
TODO: This is an advanced feature which doesn't currently have documentation and
isn't terribly stable.
The first one will work only on `A` and `B`, and should be able to import
commits with low latency more reliably. The second one will work on all other
repositories.
= Next Steps =
- Learn about creating a symbol index at
- Learn about creating a symbol index at
@{article:Diffusion User Guide: Symbol Indexes}; or
- understand daemons in detail with @{article:Managing Daemons with phd}; or
- give us feedback at @{article:Give Feedback! Get Support!}.

View file

@ -152,6 +152,18 @@ final class PhabricatorDaemonControl {
**COMMAND REFERENCE**
**start**
Start the normal collection of daemons that Phabricator uses. This
is appropriate for most installs. If you want to customize what
is launched, you can use **launch** for fine-grained control.
**restart**
Stop all running daemons, then start a standard loadout.
**stop** [PID ...]
Stop all running daemons if no PIDs are given, or a particular
PID or set of PIDs, if they are supplied.
**launch** [__n__] __daemon__ [argv ...]
**debug** __daemon__ [argv ...]
Start a daemon (or n copies of a daemon).
@ -164,22 +176,14 @@ final class PhabricatorDaemonControl {
**status**
List running daemons.
**stop** [PID ...]
Stop all running daemons if no PIDs are given, or a particular
PID or set of PIDs, if they are supplied.
**help**
Show this help.
**repository-launch-master**
Launches daemons to update and parse all tracked repositories. You
must also launch Taskmaster daemons, either on the same machine or
elsewhere. You should launch a master only one machine. For other
machines, launch a 'readonly'.
DEPRECATED. Use 'phd start'.
**repository-launch-readonly**
Launches daemons to 'git pull' tracked git repositories so they
stay up to date.
DEPRECATED. Use 'phd launch pulllocal --no-discovery'.
EOHELP
);
@ -301,7 +305,7 @@ EOHELP
->selectSymbolsWithoutLoading();
}
protected function loadRunningDaemons() {
public function loadRunningDaemons() {
$results = array();
$pid_dir = $this->getControlDirectory('pid');