mirror of
https://we.phorge.it/source/phorge.git
synced 2024-12-29 17:00:59 +01:00
Make phd
more aware of multiple daemons under a single overseer
Summary: Ref T7352. This makes `phd stop` and `phd status` produce more reasonable output with the new PID file format. Test Plan: Ran `phd stop`, `phd status`, etc. Reviewers: btrahan Reviewed By: btrahan Subscribers: epriestley Maniphest Tasks: T7352 Differential Revision: https://secure.phabricator.com/D11856
This commit is contained in:
parent
09f3d0bb7e
commit
c2d66f29cd
2 changed files with 98 additions and 132 deletions
|
@ -44,11 +44,11 @@ abstract class PhabricatorDaemonManagementWorkflow
|
|||
$pid_files = Filesystem::listDirectory($pid_dir);
|
||||
|
||||
foreach ($pid_files as $pid_file) {
|
||||
$daemons[] = PhabricatorDaemonReference::newFromFile(
|
||||
$pid_dir.'/'.$pid_file);
|
||||
$path = $pid_dir.'/'.$pid_file;
|
||||
$daemons[] = PhabricatorDaemonReference::loadReferencesFromFile($path);
|
||||
}
|
||||
|
||||
return $daemons;
|
||||
return array_mergev($daemons);
|
||||
}
|
||||
|
||||
protected final function loadAllRunningDaemons() {
|
||||
|
@ -403,43 +403,60 @@ abstract class PhabricatorDaemonManagementWorkflow
|
|||
return 0;
|
||||
}
|
||||
|
||||
$daemons = mpull($daemons, null, 'getPID');
|
||||
|
||||
$running = array();
|
||||
$running_pids = array_fuse(mpull($daemons, 'getPID'));
|
||||
if (!$pids) {
|
||||
$running = $daemons;
|
||||
$stop_pids = $running_pids;
|
||||
} else {
|
||||
// We were given a PID or set of PIDs to kill.
|
||||
$stop_pids = array();
|
||||
foreach ($pids as $key => $pid) {
|
||||
if (!preg_match('/^\d+$/', $pid)) {
|
||||
$console->writeErr(pht("PID '%s' is not a valid PID.", $pid)."\n");
|
||||
continue;
|
||||
} else if (empty($daemons[$pid])) {
|
||||
} else if (empty($running_pids[$pid])) {
|
||||
$console->writeErr(
|
||||
pht(
|
||||
"PID '%s' is not a Phabricator daemon PID. It will not ".
|
||||
"be killed.",
|
||||
'PID "%d" is not a known Phabricator daemon PID. It will not '.
|
||||
'be killed.',
|
||||
$pid)."\n");
|
||||
continue;
|
||||
} else {
|
||||
$running[] = $daemons[$pid];
|
||||
$stop_pids[$pid] = $pid;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($running)) {
|
||||
if (!$stop_pids) {
|
||||
$console->writeErr(pht('No daemons to kill.')."\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
$all_daemons = $running;
|
||||
// don't specify force here as that's about rogue daemons
|
||||
$this->sendStopSignals($running, $grace_period);
|
||||
$survivors = $this->sendStopSignals($stop_pids, $grace_period);
|
||||
|
||||
foreach ($all_daemons as $daemon) {
|
||||
if ($daemon->getPIDFile()) {
|
||||
Filesystem::remove($daemon->getPIDFile());
|
||||
// Try to clean up PID files for daemons we killed.
|
||||
$remove = array();
|
||||
foreach ($daemons as $daemon) {
|
||||
$pid = $daemon->getPID();
|
||||
if (empty($stop_pids[$pid])) {
|
||||
// We did not try to stop this overseer.
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isset($survivors[$pid])) {
|
||||
// We weren't able to stop this overseer.
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!$daemon->getPIDFile()) {
|
||||
// We don't know where the PID file is.
|
||||
continue;
|
||||
}
|
||||
|
||||
$remove[] = $daemon->getPIDFile();
|
||||
}
|
||||
|
||||
foreach (array_unique($remove) as $remove_file) {
|
||||
Filesystem::remove($remove_file);
|
||||
}
|
||||
|
||||
if (!$gently) {
|
||||
|
@ -455,20 +472,19 @@ abstract class PhabricatorDaemonManagementWorkflow
|
|||
$rogue_daemons = PhutilDaemonOverseer::findRunningDaemons();
|
||||
if ($rogue_daemons) {
|
||||
if ($force_stop) {
|
||||
$stop_rogue_daemons = $this->buildRogueDaemons($rogue_daemons);
|
||||
$survivors = $this->sendStopSignals(
|
||||
$stop_rogue_daemons,
|
||||
$grace_period,
|
||||
$force_stop);
|
||||
$rogue_pids = ipull($rogue_daemons, 'pid');
|
||||
$survivors = $this->sendStopSignals($rogue_pids, $grace_period);
|
||||
if ($survivors) {
|
||||
$console->writeErr(pht(
|
||||
'Unable to stop processes running without pid files. Try running '.
|
||||
'this command again with sudo.'."\n"));
|
||||
$console->writeErr(
|
||||
pht(
|
||||
'Unable to stop processes running without PID files. '.
|
||||
'Try running this command again with sudo.')."\n");
|
||||
}
|
||||
} else if ($warn) {
|
||||
$console->writeErr($this->getForceStopHint($rogue_daemons)."\n");
|
||||
}
|
||||
}
|
||||
|
||||
return $rogue_daemons;
|
||||
}
|
||||
|
||||
|
@ -485,57 +501,47 @@ abstract class PhabricatorDaemonManagementWorkflow
|
|||
$debug_output);
|
||||
}
|
||||
|
||||
private function buildRogueDaemons(array $daemons) {
|
||||
$rogue_daemons = array();
|
||||
foreach ($daemons as $pid => $data) {
|
||||
$rogue_daemons[] =
|
||||
PhabricatorDaemonReference::newFromRogueDictionary($data);
|
||||
}
|
||||
return $rogue_daemons;
|
||||
}
|
||||
|
||||
private function sendStopSignals($daemons, $grace_period, $force = false) {
|
||||
private function sendStopSignals($pids, $grace_period) {
|
||||
// If we're doing a graceful shutdown, try SIGINT first.
|
||||
if ($grace_period) {
|
||||
$daemons = $this->sendSignal($daemons, SIGINT, $grace_period, $force);
|
||||
$pids = $this->sendSignal($pids, SIGINT, $grace_period);
|
||||
}
|
||||
|
||||
// If we still have daemons, SIGTERM them.
|
||||
if ($daemons) {
|
||||
$daemons = $this->sendSignal($daemons, SIGTERM, 15, $force);
|
||||
if ($pids) {
|
||||
$pids = $this->sendSignal($pids, SIGTERM, 15);
|
||||
}
|
||||
|
||||
// If the overseer is still alive, SIGKILL it.
|
||||
if ($daemons) {
|
||||
$daemons = $this->sendSignal($daemons, SIGKILL, 0, $force);
|
||||
if ($pids) {
|
||||
$pids = $this->sendSignal($pids, SIGKILL, 0);
|
||||
}
|
||||
return $daemons;
|
||||
|
||||
return $pids;
|
||||
}
|
||||
|
||||
private function sendSignal(array $daemons, $signo, $wait, $force = false) {
|
||||
private function sendSignal(array $pids, $signo, $wait) {
|
||||
$console = PhutilConsole::getConsole();
|
||||
|
||||
foreach ($daemons as $key => $daemon) {
|
||||
$pid = $daemon->getPID();
|
||||
$name = $daemon->getName();
|
||||
$pids = array_fuse($pids);
|
||||
|
||||
foreach ($pids as $key => $pid) {
|
||||
if (!$pid) {
|
||||
// NOTE: We must have a PID to signal a daemon, since sending a signal
|
||||
// to PID 0 kills this process.
|
||||
$console->writeOut("%s\n", pht("Daemon '%s' has no PID!", $name));
|
||||
unset($daemons[$key]);
|
||||
unset($pids[$key]);
|
||||
continue;
|
||||
}
|
||||
|
||||
switch ($signo) {
|
||||
case SIGINT:
|
||||
$message = pht("Interrupting daemon '%s' (%s)...", $name, $pid);
|
||||
$message = pht('Interrupting process %d...', $pid);
|
||||
break;
|
||||
case SIGTERM:
|
||||
$message = pht("Terminating daemon '%s' (%s)...", $name, $pid);
|
||||
$message = pht('Terminating process %d...', $pid);
|
||||
break;
|
||||
case SIGKILL:
|
||||
$message = pht("Killing daemon '%s' (%s)...", $name, $pid);
|
||||
$message = pht('Killing process %d...', $pid);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -546,21 +552,20 @@ abstract class PhabricatorDaemonManagementWorkflow
|
|||
if ($wait) {
|
||||
$start = PhabricatorTime::getNow();
|
||||
do {
|
||||
foreach ($daemons as $key => $daemon) {
|
||||
$pid = $daemon->getPID();
|
||||
if (!$daemon->isRunning()) {
|
||||
$console->writeOut(pht('Daemon %s exited.', $pid)."\n");
|
||||
unset($daemons[$key]);
|
||||
foreach ($pids as $key => $pid) {
|
||||
if (!PhabricatorDaemonReference::isProcessRunning($pid)) {
|
||||
$console->writeOut(pht('Process %d exited.', $pid)."\n");
|
||||
unset($pids[$key]);
|
||||
}
|
||||
}
|
||||
if (empty($daemons)) {
|
||||
if (empty($pids)) {
|
||||
break;
|
||||
}
|
||||
usleep(100000);
|
||||
} while (PhabricatorTime::getNow() < $start + $wait);
|
||||
}
|
||||
|
||||
return $daemons;
|
||||
return $pids;
|
||||
}
|
||||
|
||||
private function freeActiveLeases() {
|
||||
|
|
|
@ -10,7 +10,7 @@ final class PhabricatorDaemonReference {
|
|||
|
||||
private $daemonLog;
|
||||
|
||||
public static function newFromFile($path) {
|
||||
public static function loadReferencesFromFile($path) {
|
||||
$pid_data = Filesystem::readFile($path);
|
||||
|
||||
try {
|
||||
|
@ -19,89 +19,50 @@ final class PhabricatorDaemonReference {
|
|||
$dict = array();
|
||||
}
|
||||
|
||||
$ref = self::newFromDictionary($dict);
|
||||
$ref->pidFile = $path;
|
||||
return $ref;
|
||||
}
|
||||
$refs = array();
|
||||
$daemons = idx($dict, 'daemons', array());
|
||||
|
||||
public static function newFromDictionary(array $dict) {
|
||||
$ref = new PhabricatorDaemonReference();
|
||||
foreach ($daemons as $daemon) {
|
||||
$ref = new PhabricatorDaemonReference();
|
||||
|
||||
// TODO: This is a little rough during the transition from one-to-one
|
||||
// overseers to one-to-many.
|
||||
$config = idx($dict, 'config', array());
|
||||
// NOTE: This is the overseer PID, not the actual daemon process PID.
|
||||
// This is correct for checking status and sending signals (the only
|
||||
// things we do with it), but might be confusing. $daemon['pid'] has
|
||||
// the daemon PID, and we could expose that if we had some use for it.
|
||||
|
||||
$daemon_list = null;
|
||||
if ($config) {
|
||||
$daemon_list = idx($config, 'daemons');
|
||||
$ref->pid = idx($dict, 'pid');
|
||||
$ref->start = idx($dict, 'start');
|
||||
|
||||
$ref->name = idx($daemon, 'class');
|
||||
$ref->argv = idx($daemon, 'argv', array());
|
||||
|
||||
|
||||
// TODO: We previously identified daemon logs by using a <class, pid,
|
||||
// epoch> tuple, but now all daemons under a single overseer will share
|
||||
// that identifier. We can uniquely identify daemons by $daemon['id'],
|
||||
// but that isn't currently written into the daemon logs. We should
|
||||
// start writing it, then load the logs here. This would give us a
|
||||
// slightly greater ability to keep the web UI in sync when daemons
|
||||
// get killed forcefully and clean up `phd status` a bit.
|
||||
|
||||
$ref->pidFile = $path;
|
||||
$refs[] = $ref;
|
||||
}
|
||||
|
||||
if ($daemon_list) {
|
||||
$ref->name = pht('Overseer Daemon Group');
|
||||
$ref->argv = array();
|
||||
} else {
|
||||
$ref->name = idx($dict, 'name', 'Unknown');
|
||||
$ref->argv = idx($dict, 'argv', array());
|
||||
}
|
||||
|
||||
$ref->pid = idx($dict, 'pid');
|
||||
$ref->start = idx($dict, 'start');
|
||||
|
||||
try {
|
||||
$ref->daemonLog = id(new PhabricatorDaemonLog())->loadOneWhere(
|
||||
'daemon = %s AND pid = %d AND dateCreated = %d',
|
||||
$ref->name,
|
||||
$ref->pid,
|
||||
$ref->start);
|
||||
} catch (AphrontQueryException $ex) {
|
||||
// Ignore the exception. We want to be able to terminate the daemons,
|
||||
// even if MySQL is down.
|
||||
}
|
||||
|
||||
return $ref;
|
||||
}
|
||||
|
||||
/**
|
||||
* Appropriate for getting @{class:PhabricatorDaemonReference} objects from
|
||||
* the data from @{class:PhabricatorDaemonManagementWorkflow}'s method
|
||||
* @{method:findRunningDaemons}.
|
||||
*
|
||||
* NOTE: the objects are not fully featured and should be used with caution.
|
||||
*/
|
||||
public static function newFromRogueDictionary(array $dict) {
|
||||
$ref = new PhabricatorDaemonReference();
|
||||
$ref->name = pht('Rogue %s', idx($dict, 'type'));
|
||||
$ref->pid = idx($dict, 'pid');
|
||||
|
||||
return $ref;
|
||||
return $refs;
|
||||
}
|
||||
|
||||
public function updateStatus($new_status) {
|
||||
try {
|
||||
if (!$this->daemonLog) {
|
||||
$this->daemonLog = id(new PhabricatorDaemonLog())->loadOneWhere(
|
||||
'daemon = %s AND pid = %d AND dateCreated = %d',
|
||||
$this->name,
|
||||
$this->pid,
|
||||
$this->start);
|
||||
}
|
||||
if (!$this->daemonLog) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ($this->daemonLog) {
|
||||
$this->daemonLog
|
||||
->setStatus($new_status)
|
||||
->save();
|
||||
}
|
||||
try {
|
||||
$this->daemonLog
|
||||
->setStatus($new_status)
|
||||
->save();
|
||||
} catch (AphrontQueryException $ex) {
|
||||
// Ignore anything that goes wrong here. We anticipate at least two
|
||||
// specific failure modes:
|
||||
//
|
||||
// - Upgrade scripts which run `git pull`, then `phd stop`, then
|
||||
// `bin/storage upgrade` will fail when trying to update the `status`
|
||||
// column, as it does not exist yet.
|
||||
// - Daemons running on machines which do not have access to MySQL
|
||||
// (like an IRC bot) will not be able to load or save the log.
|
||||
//
|
||||
//
|
||||
// Ignore anything that goes wrong here.
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue