1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-20 05:42:40 +01:00

Make phd more aware of multiple daemons under a single overseer

Summary: Ref T7352. This makes `phd stop` and `phd status` produce more reasonable output with the new PID file format.

Test Plan: Ran `phd stop`, `phd status`, etc.

Reviewers: btrahan

Reviewed By: btrahan

Subscribers: epriestley

Maniphest Tasks: T7352

Differential Revision: https://secure.phabricator.com/D11856
This commit is contained in:
epriestley 2015-02-22 09:45:57 -08:00
parent 09f3d0bb7e
commit c2d66f29cd
2 changed files with 98 additions and 132 deletions

View file

@ -44,11 +44,11 @@ abstract class PhabricatorDaemonManagementWorkflow
$pid_files = Filesystem::listDirectory($pid_dir); $pid_files = Filesystem::listDirectory($pid_dir);
foreach ($pid_files as $pid_file) { foreach ($pid_files as $pid_file) {
$daemons[] = PhabricatorDaemonReference::newFromFile( $path = $pid_dir.'/'.$pid_file;
$pid_dir.'/'.$pid_file); $daemons[] = PhabricatorDaemonReference::loadReferencesFromFile($path);
} }
return $daemons; return array_mergev($daemons);
} }
protected final function loadAllRunningDaemons() { protected final function loadAllRunningDaemons() {
@ -403,43 +403,60 @@ abstract class PhabricatorDaemonManagementWorkflow
return 0; return 0;
} }
$daemons = mpull($daemons, null, 'getPID'); $running_pids = array_fuse(mpull($daemons, 'getPID'));
$running = array();
if (!$pids) { if (!$pids) {
$running = $daemons; $stop_pids = $running_pids;
} else { } else {
// We were given a PID or set of PIDs to kill. // We were given a PID or set of PIDs to kill.
$stop_pids = array();
foreach ($pids as $key => $pid) { foreach ($pids as $key => $pid) {
if (!preg_match('/^\d+$/', $pid)) { if (!preg_match('/^\d+$/', $pid)) {
$console->writeErr(pht("PID '%s' is not a valid PID.", $pid)."\n"); $console->writeErr(pht("PID '%s' is not a valid PID.", $pid)."\n");
continue; continue;
} else if (empty($daemons[$pid])) { } else if (empty($running_pids[$pid])) {
$console->writeErr( $console->writeErr(
pht( pht(
"PID '%s' is not a Phabricator daemon PID. It will not ". 'PID "%d" is not a known Phabricator daemon PID. It will not '.
"be killed.", 'be killed.',
$pid)."\n"); $pid)."\n");
continue; continue;
} else { } else {
$running[] = $daemons[$pid]; $stop_pids[$pid] = $pid;
} }
} }
} }
if (empty($running)) { if (!$stop_pids) {
$console->writeErr(pht('No daemons to kill.')."\n"); $console->writeErr(pht('No daemons to kill.')."\n");
return 0; return 0;
} }
$all_daemons = $running; $survivors = $this->sendStopSignals($stop_pids, $grace_period);
// don't specify force here as that's about rogue daemons
$this->sendStopSignals($running, $grace_period);
foreach ($all_daemons as $daemon) { // Try to clean up PID files for daemons we killed.
if ($daemon->getPIDFile()) { $remove = array();
Filesystem::remove($daemon->getPIDFile()); foreach ($daemons as $daemon) {
$pid = $daemon->getPID();
if (empty($stop_pids[$pid])) {
// We did not try to stop this overseer.
continue;
} }
if (isset($survivors[$pid])) {
// We weren't able to stop this overseer.
continue;
}
if (!$daemon->getPIDFile()) {
// We don't know where the PID file is.
continue;
}
$remove[] = $daemon->getPIDFile();
}
foreach (array_unique($remove) as $remove_file) {
Filesystem::remove($remove_file);
} }
if (!$gently) { if (!$gently) {
@ -455,20 +472,19 @@ abstract class PhabricatorDaemonManagementWorkflow
$rogue_daemons = PhutilDaemonOverseer::findRunningDaemons(); $rogue_daemons = PhutilDaemonOverseer::findRunningDaemons();
if ($rogue_daemons) { if ($rogue_daemons) {
if ($force_stop) { if ($force_stop) {
$stop_rogue_daemons = $this->buildRogueDaemons($rogue_daemons); $rogue_pids = ipull($rogue_daemons, 'pid');
$survivors = $this->sendStopSignals( $survivors = $this->sendStopSignals($rogue_pids, $grace_period);
$stop_rogue_daemons,
$grace_period,
$force_stop);
if ($survivors) { if ($survivors) {
$console->writeErr(pht( $console->writeErr(
'Unable to stop processes running without pid files. Try running '. pht(
'this command again with sudo.'."\n")); 'Unable to stop processes running without PID files. '.
'Try running this command again with sudo.')."\n");
} }
} else if ($warn) { } else if ($warn) {
$console->writeErr($this->getForceStopHint($rogue_daemons)."\n"); $console->writeErr($this->getForceStopHint($rogue_daemons)."\n");
} }
} }
return $rogue_daemons; return $rogue_daemons;
} }
@ -485,57 +501,47 @@ abstract class PhabricatorDaemonManagementWorkflow
$debug_output); $debug_output);
} }
private function buildRogueDaemons(array $daemons) { private function sendStopSignals($pids, $grace_period) {
$rogue_daemons = array();
foreach ($daemons as $pid => $data) {
$rogue_daemons[] =
PhabricatorDaemonReference::newFromRogueDictionary($data);
}
return $rogue_daemons;
}
private function sendStopSignals($daemons, $grace_period, $force = false) {
// If we're doing a graceful shutdown, try SIGINT first. // If we're doing a graceful shutdown, try SIGINT first.
if ($grace_period) { if ($grace_period) {
$daemons = $this->sendSignal($daemons, SIGINT, $grace_period, $force); $pids = $this->sendSignal($pids, SIGINT, $grace_period);
} }
// If we still have daemons, SIGTERM them. // If we still have daemons, SIGTERM them.
if ($daemons) { if ($pids) {
$daemons = $this->sendSignal($daemons, SIGTERM, 15, $force); $pids = $this->sendSignal($pids, SIGTERM, 15);
} }
// If the overseer is still alive, SIGKILL it. // If the overseer is still alive, SIGKILL it.
if ($daemons) { if ($pids) {
$daemons = $this->sendSignal($daemons, SIGKILL, 0, $force); $pids = $this->sendSignal($pids, SIGKILL, 0);
}
return $daemons;
} }
private function sendSignal(array $daemons, $signo, $wait, $force = false) { return $pids;
}
private function sendSignal(array $pids, $signo, $wait) {
$console = PhutilConsole::getConsole(); $console = PhutilConsole::getConsole();
foreach ($daemons as $key => $daemon) { $pids = array_fuse($pids);
$pid = $daemon->getPID();
$name = $daemon->getName();
foreach ($pids as $key => $pid) {
if (!$pid) { if (!$pid) {
// NOTE: We must have a PID to signal a daemon, since sending a signal // NOTE: We must have a PID to signal a daemon, since sending a signal
// to PID 0 kills this process. // to PID 0 kills this process.
$console->writeOut("%s\n", pht("Daemon '%s' has no PID!", $name)); unset($pids[$key]);
unset($daemons[$key]);
continue; continue;
} }
switch ($signo) { switch ($signo) {
case SIGINT: case SIGINT:
$message = pht("Interrupting daemon '%s' (%s)...", $name, $pid); $message = pht('Interrupting process %d...', $pid);
break; break;
case SIGTERM: case SIGTERM:
$message = pht("Terminating daemon '%s' (%s)...", $name, $pid); $message = pht('Terminating process %d...', $pid);
break; break;
case SIGKILL: case SIGKILL:
$message = pht("Killing daemon '%s' (%s)...", $name, $pid); $message = pht('Killing process %d...', $pid);
break; break;
} }
@ -546,21 +552,20 @@ abstract class PhabricatorDaemonManagementWorkflow
if ($wait) { if ($wait) {
$start = PhabricatorTime::getNow(); $start = PhabricatorTime::getNow();
do { do {
foreach ($daemons as $key => $daemon) { foreach ($pids as $key => $pid) {
$pid = $daemon->getPID(); if (!PhabricatorDaemonReference::isProcessRunning($pid)) {
if (!$daemon->isRunning()) { $console->writeOut(pht('Process %d exited.', $pid)."\n");
$console->writeOut(pht('Daemon %s exited.', $pid)."\n"); unset($pids[$key]);
unset($daemons[$key]);
} }
} }
if (empty($daemons)) { if (empty($pids)) {
break; break;
} }
usleep(100000); usleep(100000);
} while (PhabricatorTime::getNow() < $start + $wait); } while (PhabricatorTime::getNow() < $start + $wait);
} }
return $daemons; return $pids;
} }
private function freeActiveLeases() { private function freeActiveLeases() {

View file

@ -10,7 +10,7 @@ final class PhabricatorDaemonReference {
private $daemonLog; private $daemonLog;
public static function newFromFile($path) { public static function loadReferencesFromFile($path) {
$pid_data = Filesystem::readFile($path); $pid_data = Filesystem::readFile($path);
try { try {
@ -19,89 +19,50 @@ final class PhabricatorDaemonReference {
$dict = array(); $dict = array();
} }
$ref = self::newFromDictionary($dict); $refs = array();
$ref->pidFile = $path; $daemons = idx($dict, 'daemons', array());
return $ref;
}
public static function newFromDictionary(array $dict) { foreach ($daemons as $daemon) {
$ref = new PhabricatorDaemonReference(); $ref = new PhabricatorDaemonReference();
// TODO: This is a little rough during the transition from one-to-one // NOTE: This is the overseer PID, not the actual daemon process PID.
// overseers to one-to-many. // This is correct for checking status and sending signals (the only
$config = idx($dict, 'config', array()); // things we do with it), but might be confusing. $daemon['pid'] has
// the daemon PID, and we could expose that if we had some use for it.
$daemon_list = null;
if ($config) {
$daemon_list = idx($config, 'daemons');
}
if ($daemon_list) {
$ref->name = pht('Overseer Daemon Group');
$ref->argv = array();
} else {
$ref->name = idx($dict, 'name', 'Unknown');
$ref->argv = idx($dict, 'argv', array());
}
$ref->pid = idx($dict, 'pid'); $ref->pid = idx($dict, 'pid');
$ref->start = idx($dict, 'start'); $ref->start = idx($dict, 'start');
try { $ref->name = idx($daemon, 'class');
$ref->daemonLog = id(new PhabricatorDaemonLog())->loadOneWhere( $ref->argv = idx($daemon, 'argv', array());
'daemon = %s AND pid = %d AND dateCreated = %d',
$ref->name,
$ref->pid, // TODO: We previously identified daemon logs by using a <class, pid,
$ref->start); // epoch> tuple, but now all daemons under a single overseer will share
} catch (AphrontQueryException $ex) { // that identifier. We can uniquely identify daemons by $daemon['id'],
// Ignore the exception. We want to be able to terminate the daemons, // but that isn't currently written into the daemon logs. We should
// even if MySQL is down. // start writing it, then load the logs here. This would give us a
// slightly greater ability to keep the web UI in sync when daemons
// get killed forcefully and clean up `phd status` a bit.
$ref->pidFile = $path;
$refs[] = $ref;
} }
return $ref; return $refs;
}
/**
* Appropriate for getting @{class:PhabricatorDaemonReference} objects from
* the data from @{class:PhabricatorDaemonManagementWorkflow}'s method
* @{method:findRunningDaemons}.
*
* NOTE: the objects are not fully featured and should be used with caution.
*/
public static function newFromRogueDictionary(array $dict) {
$ref = new PhabricatorDaemonReference();
$ref->name = pht('Rogue %s', idx($dict, 'type'));
$ref->pid = idx($dict, 'pid');
return $ref;
} }
public function updateStatus($new_status) { public function updateStatus($new_status) {
try {
if (!$this->daemonLog) { if (!$this->daemonLog) {
$this->daemonLog = id(new PhabricatorDaemonLog())->loadOneWhere( return;
'daemon = %s AND pid = %d AND dateCreated = %d',
$this->name,
$this->pid,
$this->start);
} }
if ($this->daemonLog) { try {
$this->daemonLog $this->daemonLog
->setStatus($new_status) ->setStatus($new_status)
->save(); ->save();
}
} catch (AphrontQueryException $ex) { } catch (AphrontQueryException $ex) {
// Ignore anything that goes wrong here. We anticipate at least two // Ignore anything that goes wrong here.
// specific failure modes:
//
// - Upgrade scripts which run `git pull`, then `phd stop`, then
// `bin/storage upgrade` will fail when trying to update the `status`
// column, as it does not exist yet.
// - Daemons running on machines which do not have access to MySQL
// (like an IRC bot) will not be able to load or save the log.
//
//
} }
} }