mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-26 00:32:42 +01:00
When we fail to acquire a repository lock, try to provide a hint about why
Summary: Ref T13202. See PHI889. If the lock log is enabled, we can try to offer more details about lock holders. When we fail to acquire a lock: - check for recent acquisitions and suggest that this is a bottleneck issue; - if there are no recent acquisitions, check for the last acquisition and print details about it (what process, how long ago, whether or not we believe it was released). Test Plan: - Enabled the lock log. - Changed the lock wait time to 1 second. - Added a `sleep(10)` after grabbing the lock. - In one window, ran a Conduit call or a `git fetch`. - In another window, ran another operation. - Got useful/sensible errors for both ssh and web lock holders, for example: > PhutilProxyException: Failed to acquire read lock after waiting 1 second(s). You may be able to retry later. (This lock was most recently acquired by a process (pid=12609, host=orbital-3.local, sapi=apache2handler, controller=PhabricatorConduitAPIController, method=diffusion.rawdiffquery) 3 second(s) ago. There is no record of this lock being released.) > PhutilProxyException: Failed to acquire read lock after waiting 1 second(s). You may be able to retry later. (This lock was most recently acquired by a process (pid=65251, host=orbital-3.local, sapi=cli, argv=/Users/epriestley/dev/core/lib/phabricator/bin/ssh-exec --phabricator-ssh-device local.phacility.net --phabricator-ssh-key 2) 2 second(s) ago. There is no record of this lock being released.) Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13202 Differential Revision: https://secure.phabricator.com/D19702
This commit is contained in:
parent
7db265cd5d
commit
021c612cb2
2 changed files with 160 additions and 14 deletions
|
@ -170,12 +170,13 @@ final class DiffusionRepositoryClusterEngine extends Phobject {
|
||||||
pht(
|
pht(
|
||||||
'Acquired read lock immediately.'));
|
'Acquired read lock immediately.'));
|
||||||
}
|
}
|
||||||
} catch (Exception $ex) {
|
} catch (PhutilLockException $ex) {
|
||||||
throw new PhutilProxyException(
|
throw new PhutilProxyException(
|
||||||
pht(
|
pht(
|
||||||
'Failed to acquire read lock after waiting %s second(s). You '.
|
'Failed to acquire read lock after waiting %s second(s). You '.
|
||||||
'may be able to retry later.',
|
'may be able to retry later. (%s)',
|
||||||
new PhutilNumber($lock_wait)),
|
new PhutilNumber($lock_wait),
|
||||||
|
$ex->getHint()),
|
||||||
$ex);
|
$ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -349,12 +350,13 @@ final class DiffusionRepositoryClusterEngine extends Phobject {
|
||||||
pht(
|
pht(
|
||||||
'Acquired write lock immediately.'));
|
'Acquired write lock immediately.'));
|
||||||
}
|
}
|
||||||
} catch (Exception $ex) {
|
} catch (PhutilLockException $ex) {
|
||||||
throw new PhutilProxyException(
|
throw new PhutilProxyException(
|
||||||
pht(
|
pht(
|
||||||
'Failed to acquire write lock after waiting %s second(s). You '.
|
'Failed to acquire write lock after waiting %s second(s). You '.
|
||||||
'may be able to retry later.',
|
'may be able to retry later. (%s)',
|
||||||
new PhutilNumber($lock_wait)),
|
new PhutilNumber($lock_wait),
|
||||||
|
$ex->getHint()),
|
||||||
$ex);
|
$ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -144,7 +144,8 @@ final class PhabricatorGlobalLock extends PhutilLock {
|
||||||
|
|
||||||
$ok = head($result);
|
$ok = head($result);
|
||||||
if (!$ok) {
|
if (!$ok) {
|
||||||
throw new PhutilLockException($lock_name);
|
throw id(new PhutilLockException($lock_name))
|
||||||
|
->setHint($this->newHint($lock_name, $wait));
|
||||||
}
|
}
|
||||||
|
|
||||||
$conn->rememberLock($lock_name);
|
$conn->rememberLock($lock_name);
|
||||||
|
@ -152,13 +153,7 @@ final class PhabricatorGlobalLock extends PhutilLock {
|
||||||
$this->conn = $conn;
|
$this->conn = $conn;
|
||||||
|
|
||||||
if ($this->shouldLogLock()) {
|
if ($this->shouldLogLock()) {
|
||||||
global $argv;
|
$lock_context = $this->newLockContext();
|
||||||
|
|
||||||
$lock_context = array(
|
|
||||||
'pid' => getmypid(),
|
|
||||||
'host' => php_uname('n'),
|
|
||||||
'argv' => $argv,
|
|
||||||
);
|
|
||||||
|
|
||||||
$log = id(new PhabricatorDaemonLockLog())
|
$log = id(new PhabricatorDaemonLockLog())
|
||||||
->setLockName($lock_name)
|
->setLockName($lock_name)
|
||||||
|
@ -228,4 +223,153 @@ final class PhabricatorGlobalLock extends PhutilLock {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private function newLockContext() {
|
||||||
|
$context = array(
|
||||||
|
'pid' => getmypid(),
|
||||||
|
'host' => php_uname('n'),
|
||||||
|
'sapi' => php_sapi_name(),
|
||||||
|
);
|
||||||
|
|
||||||
|
global $argv;
|
||||||
|
if ($argv) {
|
||||||
|
$context['argv'] = $argv;
|
||||||
|
}
|
||||||
|
|
||||||
|
$access_log = null;
|
||||||
|
|
||||||
|
// TODO: There's currently no cohesive way to get the parameterized access
|
||||||
|
// log for the current request across different request types. Web requests
|
||||||
|
// have an "AccessLog", SSH requests have an "SSHLog", and other processes
|
||||||
|
// (like scripts) have no log. But there's no method to say "give me any
|
||||||
|
// log you've got". For now, just test if we have a web request and use the
|
||||||
|
// "AccessLog" if we do, since that's the only one we actually read any
|
||||||
|
// parameters from.
|
||||||
|
|
||||||
|
// NOTE: "PhabricatorStartup" is only available from web requests, not
|
||||||
|
// from CLI scripts.
|
||||||
|
if (class_exists('PhabricatorStartup', false)) {
|
||||||
|
$access_log = PhabricatorAccessLog::getLog();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($access_log) {
|
||||||
|
$controller = $access_log->getData('C');
|
||||||
|
if ($controller) {
|
||||||
|
$context['controller'] = $controller;
|
||||||
|
}
|
||||||
|
|
||||||
|
$method = $access_log->getData('m');
|
||||||
|
if ($method) {
|
||||||
|
$context['method'] = $method;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $context;
|
||||||
|
}
|
||||||
|
|
||||||
|
private function newHint($lock_name, $wait) {
|
||||||
|
if (!$this->shouldLogLock()) {
|
||||||
|
return pht(
|
||||||
|
'Enable the lock log for more detailed information about '.
|
||||||
|
'which process is holding this lock.');
|
||||||
|
}
|
||||||
|
|
||||||
|
$now = PhabricatorTime::getNow();
|
||||||
|
|
||||||
|
// First, look for recent logs. If other processes have been acquiring and
|
||||||
|
// releasing this lock while we've been waiting, this is more likely to be
|
||||||
|
// a contention/throughput issue than an issue with something hung while
|
||||||
|
// holding the lock.
|
||||||
|
$limit = 100;
|
||||||
|
$logs = id(new PhabricatorDaemonLockLog())->loadAllWhere(
|
||||||
|
'lockName = %s AND dateCreated >= %d ORDER BY id ASC LIMIT %d',
|
||||||
|
$lock_name,
|
||||||
|
($now - $wait),
|
||||||
|
$limit);
|
||||||
|
|
||||||
|
if ($logs) {
|
||||||
|
if (count($logs) === $limit) {
|
||||||
|
return pht(
|
||||||
|
'During the last %s second(s) spent waiting for the lock, more '.
|
||||||
|
'than %s other process(es) acquired it, so this is likely a '.
|
||||||
|
'bottleneck. Use "bin/lock log --name %s" to review log activity.',
|
||||||
|
new PhutilNumber($wait),
|
||||||
|
new PhutilNumber($limit),
|
||||||
|
$lock_name);
|
||||||
|
} else {
|
||||||
|
return pht(
|
||||||
|
'During the last %s second(s) spent waiting for the lock, %s '.
|
||||||
|
'other process(es) acquired it, so this is likely a '.
|
||||||
|
'bottleneck. Use "bin/lock log --name %s" to review log activity.',
|
||||||
|
new PhutilNumber($wait),
|
||||||
|
phutil_count($logs),
|
||||||
|
$lock_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$last_log = id(new PhabricatorDaemonLockLog())->loadOneWhere(
|
||||||
|
'lockName = %s ORDER BY id DESC LIMIT 1',
|
||||||
|
$lock_name);
|
||||||
|
|
||||||
|
if ($last_log) {
|
||||||
|
$info = array();
|
||||||
|
|
||||||
|
$acquired = $last_log->getDateCreated();
|
||||||
|
$context = $last_log->getLockContext();
|
||||||
|
|
||||||
|
$process_info = array();
|
||||||
|
|
||||||
|
$pid = idx($context, 'pid');
|
||||||
|
if ($pid) {
|
||||||
|
$process_info[] = 'pid='.$pid;
|
||||||
|
}
|
||||||
|
|
||||||
|
$host = idx($context, 'host');
|
||||||
|
if ($host) {
|
||||||
|
$process_info[] = 'host='.$host;
|
||||||
|
}
|
||||||
|
|
||||||
|
$sapi = idx($context, 'sapi');
|
||||||
|
if ($sapi) {
|
||||||
|
$process_info[] = 'sapi='.$sapi;
|
||||||
|
}
|
||||||
|
|
||||||
|
$argv = idx($context, 'argv');
|
||||||
|
if ($argv) {
|
||||||
|
$process_info[] = 'argv='.(string)csprintf('%LR', $argv);
|
||||||
|
}
|
||||||
|
|
||||||
|
$controller = idx($context, 'controller');
|
||||||
|
if ($controller) {
|
||||||
|
$process_info[] = 'controller='.$controller;
|
||||||
|
}
|
||||||
|
|
||||||
|
$method = idx($context, 'method');
|
||||||
|
if ($method) {
|
||||||
|
$process_info[] = 'method='.$method;
|
||||||
|
}
|
||||||
|
|
||||||
|
$process_info = implode(', ', $process_info);
|
||||||
|
|
||||||
|
$info[] = pht(
|
||||||
|
'This lock was most recently acquired by a process (%s) '.
|
||||||
|
'%s second(s) ago.',
|
||||||
|
$process_info,
|
||||||
|
new PhutilNumber($now - $acquired));
|
||||||
|
|
||||||
|
$released = $last_log->getLockReleased();
|
||||||
|
if ($released) {
|
||||||
|
$info[] = pht(
|
||||||
|
'This lock was released %s second(s) ago.',
|
||||||
|
new PhutilNumber($now - $released));
|
||||||
|
} else {
|
||||||
|
$info[] = pht('There is no record of this lock being released.');
|
||||||
|
}
|
||||||
|
|
||||||
|
return implode(' ', $info);
|
||||||
|
}
|
||||||
|
|
||||||
|
return pht(
|
||||||
|
'Found no records of processes acquiring or releasing this lock.');
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue