1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-20 05:42:40 +01:00

Make phd more aware of multiple daemons under a single overseer

Summary: Ref T7352. This makes `phd stop` and `phd status` produce more reasonable output with the new PID file format.

Test Plan: Ran `phd stop`, `phd status`, etc.

Reviewers: btrahan

Reviewed By: btrahan

Subscribers: epriestley

Maniphest Tasks: T7352

Differential Revision: https://secure.phabricator.com/D11856
This commit is contained in:
epriestley 2015-02-22 09:45:57 -08:00
parent 09f3d0bb7e
commit c2d66f29cd
2 changed files with 98 additions and 132 deletions

View file

@ -44,11 +44,11 @@ abstract class PhabricatorDaemonManagementWorkflow
$pid_files = Filesystem::listDirectory($pid_dir);
foreach ($pid_files as $pid_file) {
$daemons[] = PhabricatorDaemonReference::newFromFile(
$pid_dir.'/'.$pid_file);
$path = $pid_dir.'/'.$pid_file;
$daemons[] = PhabricatorDaemonReference::loadReferencesFromFile($path);
}
return $daemons;
return array_mergev($daemons);
}
protected final function loadAllRunningDaemons() {
@ -403,43 +403,60 @@ abstract class PhabricatorDaemonManagementWorkflow
return 0;
}
$daemons = mpull($daemons, null, 'getPID');
$running = array();
$running_pids = array_fuse(mpull($daemons, 'getPID'));
if (!$pids) {
$running = $daemons;
$stop_pids = $running_pids;
} else {
// We were given a PID or set of PIDs to kill.
$stop_pids = array();
foreach ($pids as $key => $pid) {
if (!preg_match('/^\d+$/', $pid)) {
$console->writeErr(pht("PID '%s' is not a valid PID.", $pid)."\n");
continue;
} else if (empty($daemons[$pid])) {
} else if (empty($running_pids[$pid])) {
$console->writeErr(
pht(
"PID '%s' is not a Phabricator daemon PID. It will not ".
"be killed.",
'PID "%d" is not a known Phabricator daemon PID. It will not '.
'be killed.',
$pid)."\n");
continue;
} else {
$running[] = $daemons[$pid];
$stop_pids[$pid] = $pid;
}
}
}
if (empty($running)) {
if (!$stop_pids) {
$console->writeErr(pht('No daemons to kill.')."\n");
return 0;
}
$all_daemons = $running;
// don't specify force here as that's about rogue daemons
$this->sendStopSignals($running, $grace_period);
$survivors = $this->sendStopSignals($stop_pids, $grace_period);
foreach ($all_daemons as $daemon) {
if ($daemon->getPIDFile()) {
Filesystem::remove($daemon->getPIDFile());
// Try to clean up PID files for daemons we killed.
$remove = array();
foreach ($daemons as $daemon) {
$pid = $daemon->getPID();
if (empty($stop_pids[$pid])) {
// We did not try to stop this overseer.
continue;
}
if (isset($survivors[$pid])) {
// We weren't able to stop this overseer.
continue;
}
if (!$daemon->getPIDFile()) {
// We don't know where the PID file is.
continue;
}
$remove[] = $daemon->getPIDFile();
}
foreach (array_unique($remove) as $remove_file) {
Filesystem::remove($remove_file);
}
if (!$gently) {
@ -455,20 +472,19 @@ abstract class PhabricatorDaemonManagementWorkflow
$rogue_daemons = PhutilDaemonOverseer::findRunningDaemons();
if ($rogue_daemons) {
if ($force_stop) {
$stop_rogue_daemons = $this->buildRogueDaemons($rogue_daemons);
$survivors = $this->sendStopSignals(
$stop_rogue_daemons,
$grace_period,
$force_stop);
$rogue_pids = ipull($rogue_daemons, 'pid');
$survivors = $this->sendStopSignals($rogue_pids, $grace_period);
if ($survivors) {
$console->writeErr(pht(
'Unable to stop processes running without pid files. Try running '.
'this command again with sudo.'."\n"));
$console->writeErr(
pht(
'Unable to stop processes running without PID files. '.
'Try running this command again with sudo.')."\n");
}
} else if ($warn) {
$console->writeErr($this->getForceStopHint($rogue_daemons)."\n");
}
}
return $rogue_daemons;
}
@ -485,57 +501,47 @@ abstract class PhabricatorDaemonManagementWorkflow
$debug_output);
}
private function buildRogueDaemons(array $daemons) {
$rogue_daemons = array();
foreach ($daemons as $pid => $data) {
$rogue_daemons[] =
PhabricatorDaemonReference::newFromRogueDictionary($data);
}
return $rogue_daemons;
}
private function sendStopSignals($daemons, $grace_period, $force = false) {
private function sendStopSignals($pids, $grace_period) {
// If we're doing a graceful shutdown, try SIGINT first.
if ($grace_period) {
$daemons = $this->sendSignal($daemons, SIGINT, $grace_period, $force);
$pids = $this->sendSignal($pids, SIGINT, $grace_period);
}
// If we still have daemons, SIGTERM them.
if ($daemons) {
$daemons = $this->sendSignal($daemons, SIGTERM, 15, $force);
if ($pids) {
$pids = $this->sendSignal($pids, SIGTERM, 15);
}
// If the overseer is still alive, SIGKILL it.
if ($daemons) {
$daemons = $this->sendSignal($daemons, SIGKILL, 0, $force);
if ($pids) {
$pids = $this->sendSignal($pids, SIGKILL, 0);
}
return $daemons;
return $pids;
}
private function sendSignal(array $daemons, $signo, $wait, $force = false) {
private function sendSignal(array $pids, $signo, $wait) {
$console = PhutilConsole::getConsole();
foreach ($daemons as $key => $daemon) {
$pid = $daemon->getPID();
$name = $daemon->getName();
$pids = array_fuse($pids);
foreach ($pids as $key => $pid) {
if (!$pid) {
// NOTE: We must have a PID to signal a daemon, since sending a signal
// to PID 0 kills this process.
$console->writeOut("%s\n", pht("Daemon '%s' has no PID!", $name));
unset($daemons[$key]);
unset($pids[$key]);
continue;
}
switch ($signo) {
case SIGINT:
$message = pht("Interrupting daemon '%s' (%s)...", $name, $pid);
$message = pht('Interrupting process %d...', $pid);
break;
case SIGTERM:
$message = pht("Terminating daemon '%s' (%s)...", $name, $pid);
$message = pht('Terminating process %d...', $pid);
break;
case SIGKILL:
$message = pht("Killing daemon '%s' (%s)...", $name, $pid);
$message = pht('Killing process %d...', $pid);
break;
}
@ -546,21 +552,20 @@ abstract class PhabricatorDaemonManagementWorkflow
if ($wait) {
$start = PhabricatorTime::getNow();
do {
foreach ($daemons as $key => $daemon) {
$pid = $daemon->getPID();
if (!$daemon->isRunning()) {
$console->writeOut(pht('Daemon %s exited.', $pid)."\n");
unset($daemons[$key]);
foreach ($pids as $key => $pid) {
if (!PhabricatorDaemonReference::isProcessRunning($pid)) {
$console->writeOut(pht('Process %d exited.', $pid)."\n");
unset($pids[$key]);
}
}
if (empty($daemons)) {
if (empty($pids)) {
break;
}
usleep(100000);
} while (PhabricatorTime::getNow() < $start + $wait);
}
return $daemons;
return $pids;
}
private function freeActiveLeases() {

View file

@ -10,7 +10,7 @@ final class PhabricatorDaemonReference {
private $daemonLog;
public static function newFromFile($path) {
public static function loadReferencesFromFile($path) {
$pid_data = Filesystem::readFile($path);
try {
@ -19,89 +19,50 @@ final class PhabricatorDaemonReference {
$dict = array();
}
$ref = self::newFromDictionary($dict);
$ref->pidFile = $path;
return $ref;
}
$refs = array();
$daemons = idx($dict, 'daemons', array());
public static function newFromDictionary(array $dict) {
$ref = new PhabricatorDaemonReference();
foreach ($daemons as $daemon) {
$ref = new PhabricatorDaemonReference();
// TODO: This is a little rough during the transition from one-to-one
// overseers to one-to-many.
$config = idx($dict, 'config', array());
// NOTE: This is the overseer PID, not the actual daemon process PID.
// This is correct for checking status and sending signals (the only
// things we do with it), but might be confusing. $daemon['pid'] has
// the daemon PID, and we could expose that if we had some use for it.
$daemon_list = null;
if ($config) {
$daemon_list = idx($config, 'daemons');
$ref->pid = idx($dict, 'pid');
$ref->start = idx($dict, 'start');
$ref->name = idx($daemon, 'class');
$ref->argv = idx($daemon, 'argv', array());
// TODO: We previously identified daemon logs by using a <class, pid,
// epoch> tuple, but now all daemons under a single overseer will share
// that identifier. We can uniquely identify daemons by $daemon['id'],
// but that isn't currently written into the daemon logs. We should
// start writing it, then load the logs here. This would give us a
// slightly greater ability to keep the web UI in sync when daemons
// get killed forcefully and clean up `phd status` a bit.
$ref->pidFile = $path;
$refs[] = $ref;
}
if ($daemon_list) {
$ref->name = pht('Overseer Daemon Group');
$ref->argv = array();
} else {
$ref->name = idx($dict, 'name', 'Unknown');
$ref->argv = idx($dict, 'argv', array());
}
$ref->pid = idx($dict, 'pid');
$ref->start = idx($dict, 'start');
try {
$ref->daemonLog = id(new PhabricatorDaemonLog())->loadOneWhere(
'daemon = %s AND pid = %d AND dateCreated = %d',
$ref->name,
$ref->pid,
$ref->start);
} catch (AphrontQueryException $ex) {
// Ignore the exception. We want to be able to terminate the daemons,
// even if MySQL is down.
}
return $ref;
}
/**
* Appropriate for getting @{class:PhabricatorDaemonReference} objects from
* the data from @{class:PhabricatorDaemonManagementWorkflow}'s method
* @{method:findRunningDaemons}.
*
* NOTE: the objects are not fully featured and should be used with caution.
*/
public static function newFromRogueDictionary(array $dict) {
$ref = new PhabricatorDaemonReference();
$ref->name = pht('Rogue %s', idx($dict, 'type'));
$ref->pid = idx($dict, 'pid');
return $ref;
return $refs;
}
public function updateStatus($new_status) {
try {
if (!$this->daemonLog) {
$this->daemonLog = id(new PhabricatorDaemonLog())->loadOneWhere(
'daemon = %s AND pid = %d AND dateCreated = %d',
$this->name,
$this->pid,
$this->start);
}
if (!$this->daemonLog) {
return;
}
if ($this->daemonLog) {
$this->daemonLog
->setStatus($new_status)
->save();
}
try {
$this->daemonLog
->setStatus($new_status)
->save();
} catch (AphrontQueryException $ex) {
// Ignore anything that goes wrong here. We anticipate at least two
// specific failure modes:
//
// - Upgrade scripts which run `git pull`, then `phd stop`, then
// `bin/storage upgrade` will fail when trying to update the `status`
// column, as it does not exist yet.
// - Daemons running on machines which do not have access to MySQL
// (like an IRC bot) will not be able to load or save the log.
//
//
// Ignore anything that goes wrong here.
}
}