diff --git a/src/applications/daemon/management/PhabricatorDaemonManagementWorkflow.php b/src/applications/daemon/management/PhabricatorDaemonManagementWorkflow.php index c9d3107fe5..ba51d56457 100644 --- a/src/applications/daemon/management/PhabricatorDaemonManagementWorkflow.php +++ b/src/applications/daemon/management/PhabricatorDaemonManagementWorkflow.php @@ -44,11 +44,11 @@ abstract class PhabricatorDaemonManagementWorkflow $pid_files = Filesystem::listDirectory($pid_dir); foreach ($pid_files as $pid_file) { - $daemons[] = PhabricatorDaemonReference::newFromFile( - $pid_dir.'/'.$pid_file); + $path = $pid_dir.'/'.$pid_file; + $daemons[] = PhabricatorDaemonReference::loadReferencesFromFile($path); } - return $daemons; + return array_mergev($daemons); } protected final function loadAllRunningDaemons() { @@ -403,43 +403,60 @@ abstract class PhabricatorDaemonManagementWorkflow return 0; } - $daemons = mpull($daemons, null, 'getPID'); - - $running = array(); + $running_pids = array_fuse(mpull($daemons, 'getPID')); if (!$pids) { - $running = $daemons; + $stop_pids = $running_pids; } else { // We were given a PID or set of PIDs to kill. + $stop_pids = array(); foreach ($pids as $key => $pid) { if (!preg_match('/^\d+$/', $pid)) { $console->writeErr(pht("PID '%s' is not a valid PID.", $pid)."\n"); continue; - } else if (empty($daemons[$pid])) { + } else if (empty($running_pids[$pid])) { $console->writeErr( pht( - "PID '%s' is not a Phabricator daemon PID. It will not ". - "be killed.", + 'PID "%d" is not a known Phabricator daemon PID. It will not '. + 'be killed.', $pid)."\n"); continue; } else { - $running[] = $daemons[$pid]; + $stop_pids[$pid] = $pid; } } } - if (empty($running)) { + if (!$stop_pids) { $console->writeErr(pht('No daemons to kill.')."\n"); return 0; } - $all_daemons = $running; - // don't specify force here as that's about rogue daemons - $this->sendStopSignals($running, $grace_period); + $survivors = $this->sendStopSignals($stop_pids, $grace_period); - foreach ($all_daemons as $daemon) { - if ($daemon->getPIDFile()) { - Filesystem::remove($daemon->getPIDFile()); + // Try to clean up PID files for daemons we killed. + $remove = array(); + foreach ($daemons as $daemon) { + $pid = $daemon->getPID(); + if (empty($stop_pids[$pid])) { + // We did not try to stop this overseer. + continue; } + + if (isset($survivors[$pid])) { + // We weren't able to stop this overseer. + continue; + } + + if (!$daemon->getPIDFile()) { + // We don't know where the PID file is. + continue; + } + + $remove[] = $daemon->getPIDFile(); + } + + foreach (array_unique($remove) as $remove_file) { + Filesystem::remove($remove_file); } if (!$gently) { @@ -455,20 +472,19 @@ abstract class PhabricatorDaemonManagementWorkflow $rogue_daemons = PhutilDaemonOverseer::findRunningDaemons(); if ($rogue_daemons) { if ($force_stop) { - $stop_rogue_daemons = $this->buildRogueDaemons($rogue_daemons); - $survivors = $this->sendStopSignals( - $stop_rogue_daemons, - $grace_period, - $force_stop); + $rogue_pids = ipull($rogue_daemons, 'pid'); + $survivors = $this->sendStopSignals($rogue_pids, $grace_period); if ($survivors) { - $console->writeErr(pht( - 'Unable to stop processes running without pid files. Try running '. - 'this command again with sudo.'."\n")); + $console->writeErr( + pht( + 'Unable to stop processes running without PID files. '. + 'Try running this command again with sudo.')."\n"); } } else if ($warn) { $console->writeErr($this->getForceStopHint($rogue_daemons)."\n"); } } + return $rogue_daemons; } @@ -485,57 +501,47 @@ abstract class PhabricatorDaemonManagementWorkflow $debug_output); } - private function buildRogueDaemons(array $daemons) { - $rogue_daemons = array(); - foreach ($daemons as $pid => $data) { - $rogue_daemons[] = - PhabricatorDaemonReference::newFromRogueDictionary($data); - } - return $rogue_daemons; - } - - private function sendStopSignals($daemons, $grace_period, $force = false) { + private function sendStopSignals($pids, $grace_period) { // If we're doing a graceful shutdown, try SIGINT first. if ($grace_period) { - $daemons = $this->sendSignal($daemons, SIGINT, $grace_period, $force); + $pids = $this->sendSignal($pids, SIGINT, $grace_period); } // If we still have daemons, SIGTERM them. - if ($daemons) { - $daemons = $this->sendSignal($daemons, SIGTERM, 15, $force); + if ($pids) { + $pids = $this->sendSignal($pids, SIGTERM, 15); } // If the overseer is still alive, SIGKILL it. - if ($daemons) { - $daemons = $this->sendSignal($daemons, SIGKILL, 0, $force); + if ($pids) { + $pids = $this->sendSignal($pids, SIGKILL, 0); } - return $daemons; + + return $pids; } - private function sendSignal(array $daemons, $signo, $wait, $force = false) { + private function sendSignal(array $pids, $signo, $wait) { $console = PhutilConsole::getConsole(); - foreach ($daemons as $key => $daemon) { - $pid = $daemon->getPID(); - $name = $daemon->getName(); + $pids = array_fuse($pids); + foreach ($pids as $key => $pid) { if (!$pid) { // NOTE: We must have a PID to signal a daemon, since sending a signal // to PID 0 kills this process. - $console->writeOut("%s\n", pht("Daemon '%s' has no PID!", $name)); - unset($daemons[$key]); + unset($pids[$key]); continue; } switch ($signo) { case SIGINT: - $message = pht("Interrupting daemon '%s' (%s)...", $name, $pid); + $message = pht('Interrupting process %d...', $pid); break; case SIGTERM: - $message = pht("Terminating daemon '%s' (%s)...", $name, $pid); + $message = pht('Terminating process %d...', $pid); break; case SIGKILL: - $message = pht("Killing daemon '%s' (%s)...", $name, $pid); + $message = pht('Killing process %d...', $pid); break; } @@ -546,21 +552,20 @@ abstract class PhabricatorDaemonManagementWorkflow if ($wait) { $start = PhabricatorTime::getNow(); do { - foreach ($daemons as $key => $daemon) { - $pid = $daemon->getPID(); - if (!$daemon->isRunning()) { - $console->writeOut(pht('Daemon %s exited.', $pid)."\n"); - unset($daemons[$key]); + foreach ($pids as $key => $pid) { + if (!PhabricatorDaemonReference::isProcessRunning($pid)) { + $console->writeOut(pht('Process %d exited.', $pid)."\n"); + unset($pids[$key]); } } - if (empty($daemons)) { + if (empty($pids)) { break; } usleep(100000); } while (PhabricatorTime::getNow() < $start + $wait); } - return $daemons; + return $pids; } private function freeActiveLeases() { diff --git a/src/infrastructure/daemon/control/PhabricatorDaemonReference.php b/src/infrastructure/daemon/control/PhabricatorDaemonReference.php index f32f115885..89001622ef 100644 --- a/src/infrastructure/daemon/control/PhabricatorDaemonReference.php +++ b/src/infrastructure/daemon/control/PhabricatorDaemonReference.php @@ -10,7 +10,7 @@ final class PhabricatorDaemonReference { private $daemonLog; - public static function newFromFile($path) { + public static function loadReferencesFromFile($path) { $pid_data = Filesystem::readFile($path); try { @@ -19,89 +19,50 @@ final class PhabricatorDaemonReference { $dict = array(); } - $ref = self::newFromDictionary($dict); - $ref->pidFile = $path; - return $ref; - } + $refs = array(); + $daemons = idx($dict, 'daemons', array()); - public static function newFromDictionary(array $dict) { - $ref = new PhabricatorDaemonReference(); + foreach ($daemons as $daemon) { + $ref = new PhabricatorDaemonReference(); - // TODO: This is a little rough during the transition from one-to-one - // overseers to one-to-many. - $config = idx($dict, 'config', array()); + // NOTE: This is the overseer PID, not the actual daemon process PID. + // This is correct for checking status and sending signals (the only + // things we do with it), but might be confusing. $daemon['pid'] has + // the daemon PID, and we could expose that if we had some use for it. - $daemon_list = null; - if ($config) { - $daemon_list = idx($config, 'daemons'); + $ref->pid = idx($dict, 'pid'); + $ref->start = idx($dict, 'start'); + + $ref->name = idx($daemon, 'class'); + $ref->argv = idx($daemon, 'argv', array()); + + + // TODO: We previously identified daemon logs by using a tuple, but now all daemons under a single overseer will share + // that identifier. We can uniquely identify daemons by $daemon['id'], + // but that isn't currently written into the daemon logs. We should + // start writing it, then load the logs here. This would give us a + // slightly greater ability to keep the web UI in sync when daemons + // get killed forcefully and clean up `phd status` a bit. + + $ref->pidFile = $path; + $refs[] = $ref; } - if ($daemon_list) { - $ref->name = pht('Overseer Daemon Group'); - $ref->argv = array(); - } else { - $ref->name = idx($dict, 'name', 'Unknown'); - $ref->argv = idx($dict, 'argv', array()); - } - - $ref->pid = idx($dict, 'pid'); - $ref->start = idx($dict, 'start'); - - try { - $ref->daemonLog = id(new PhabricatorDaemonLog())->loadOneWhere( - 'daemon = %s AND pid = %d AND dateCreated = %d', - $ref->name, - $ref->pid, - $ref->start); - } catch (AphrontQueryException $ex) { - // Ignore the exception. We want to be able to terminate the daemons, - // even if MySQL is down. - } - - return $ref; - } - - /** - * Appropriate for getting @{class:PhabricatorDaemonReference} objects from - * the data from @{class:PhabricatorDaemonManagementWorkflow}'s method - * @{method:findRunningDaemons}. - * - * NOTE: the objects are not fully featured and should be used with caution. - */ - public static function newFromRogueDictionary(array $dict) { - $ref = new PhabricatorDaemonReference(); - $ref->name = pht('Rogue %s', idx($dict, 'type')); - $ref->pid = idx($dict, 'pid'); - - return $ref; + return $refs; } public function updateStatus($new_status) { - try { - if (!$this->daemonLog) { - $this->daemonLog = id(new PhabricatorDaemonLog())->loadOneWhere( - 'daemon = %s AND pid = %d AND dateCreated = %d', - $this->name, - $this->pid, - $this->start); - } + if (!$this->daemonLog) { + return; + } - if ($this->daemonLog) { - $this->daemonLog - ->setStatus($new_status) - ->save(); - } + try { + $this->daemonLog + ->setStatus($new_status) + ->save(); } catch (AphrontQueryException $ex) { - // Ignore anything that goes wrong here. We anticipate at least two - // specific failure modes: - // - // - Upgrade scripts which run `git pull`, then `phd stop`, then - // `bin/storage upgrade` will fail when trying to update the `status` - // column, as it does not exist yet. - // - Daemons running on machines which do not have access to MySQL - // (like an IRC bot) will not be able to load or save the log. - // - // + // Ignore anything that goes wrong here. } }