mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-25 16:22:43 +01:00
When we fail to acquire a repository lock, try to provide a hint about why
Summary: Ref T13202. See PHI889. If the lock log is enabled, we can try to offer more details about lock holders. When we fail to acquire a lock: - check for recent acquisitions and suggest that this is a bottleneck issue; - if there are no recent acquisitions, check for the last acquisition and print details about it (what process, how long ago, whether or not we believe it was released). Test Plan: - Enabled the lock log. - Changed the lock wait time to 1 second. - Added a `sleep(10)` after grabbing the lock. - In one window, ran a Conduit call or a `git fetch`. - In another window, ran another operation. - Got useful/sensible errors for both ssh and web lock holders, for example: > PhutilProxyException: Failed to acquire read lock after waiting 1 second(s). You may be able to retry later. (This lock was most recently acquired by a process (pid=12609, host=orbital-3.local, sapi=apache2handler, controller=PhabricatorConduitAPIController, method=diffusion.rawdiffquery) 3 second(s) ago. There is no record of this lock being released.) > PhutilProxyException: Failed to acquire read lock after waiting 1 second(s). You may be able to retry later. (This lock was most recently acquired by a process (pid=65251, host=orbital-3.local, sapi=cli, argv=/Users/epriestley/dev/core/lib/phabricator/bin/ssh-exec --phabricator-ssh-device local.phacility.net --phabricator-ssh-key 2) 2 second(s) ago. There is no record of this lock being released.) Reviewers: amckinley Reviewed By: amckinley Maniphest Tasks: T13202 Differential Revision: https://secure.phabricator.com/D19702
This commit is contained in:
parent
7db265cd5d
commit
021c612cb2
2 changed files with 160 additions and 14 deletions
|
@ -170,12 +170,13 @@ final class DiffusionRepositoryClusterEngine extends Phobject {
|
|||
pht(
|
||||
'Acquired read lock immediately.'));
|
||||
}
|
||||
} catch (Exception $ex) {
|
||||
} catch (PhutilLockException $ex) {
|
||||
throw new PhutilProxyException(
|
||||
pht(
|
||||
'Failed to acquire read lock after waiting %s second(s). You '.
|
||||
'may be able to retry later.',
|
||||
new PhutilNumber($lock_wait)),
|
||||
'may be able to retry later. (%s)',
|
||||
new PhutilNumber($lock_wait),
|
||||
$ex->getHint()),
|
||||
$ex);
|
||||
}
|
||||
|
||||
|
@ -349,12 +350,13 @@ final class DiffusionRepositoryClusterEngine extends Phobject {
|
|||
pht(
|
||||
'Acquired write lock immediately.'));
|
||||
}
|
||||
} catch (Exception $ex) {
|
||||
} catch (PhutilLockException $ex) {
|
||||
throw new PhutilProxyException(
|
||||
pht(
|
||||
'Failed to acquire write lock after waiting %s second(s). You '.
|
||||
'may be able to retry later.',
|
||||
new PhutilNumber($lock_wait)),
|
||||
'may be able to retry later. (%s)',
|
||||
new PhutilNumber($lock_wait),
|
||||
$ex->getHint()),
|
||||
$ex);
|
||||
}
|
||||
|
||||
|
|
|
@ -144,7 +144,8 @@ final class PhabricatorGlobalLock extends PhutilLock {
|
|||
|
||||
$ok = head($result);
|
||||
if (!$ok) {
|
||||
throw new PhutilLockException($lock_name);
|
||||
throw id(new PhutilLockException($lock_name))
|
||||
->setHint($this->newHint($lock_name, $wait));
|
||||
}
|
||||
|
||||
$conn->rememberLock($lock_name);
|
||||
|
@ -152,13 +153,7 @@ final class PhabricatorGlobalLock extends PhutilLock {
|
|||
$this->conn = $conn;
|
||||
|
||||
if ($this->shouldLogLock()) {
|
||||
global $argv;
|
||||
|
||||
$lock_context = array(
|
||||
'pid' => getmypid(),
|
||||
'host' => php_uname('n'),
|
||||
'argv' => $argv,
|
||||
);
|
||||
$lock_context = $this->newLockContext();
|
||||
|
||||
$log = id(new PhabricatorDaemonLockLog())
|
||||
->setLockName($lock_name)
|
||||
|
@ -228,4 +223,153 @@ final class PhabricatorGlobalLock extends PhutilLock {
|
|||
return true;
|
||||
}
|
||||
|
||||
private function newLockContext() {
|
||||
$context = array(
|
||||
'pid' => getmypid(),
|
||||
'host' => php_uname('n'),
|
||||
'sapi' => php_sapi_name(),
|
||||
);
|
||||
|
||||
global $argv;
|
||||
if ($argv) {
|
||||
$context['argv'] = $argv;
|
||||
}
|
||||
|
||||
$access_log = null;
|
||||
|
||||
// TODO: There's currently no cohesive way to get the parameterized access
|
||||
// log for the current request across different request types. Web requests
|
||||
// have an "AccessLog", SSH requests have an "SSHLog", and other processes
|
||||
// (like scripts) have no log. But there's no method to say "give me any
|
||||
// log you've got". For now, just test if we have a web request and use the
|
||||
// "AccessLog" if we do, since that's the only one we actually read any
|
||||
// parameters from.
|
||||
|
||||
// NOTE: "PhabricatorStartup" is only available from web requests, not
|
||||
// from CLI scripts.
|
||||
if (class_exists('PhabricatorStartup', false)) {
|
||||
$access_log = PhabricatorAccessLog::getLog();
|
||||
}
|
||||
|
||||
if ($access_log) {
|
||||
$controller = $access_log->getData('C');
|
||||
if ($controller) {
|
||||
$context['controller'] = $controller;
|
||||
}
|
||||
|
||||
$method = $access_log->getData('m');
|
||||
if ($method) {
|
||||
$context['method'] = $method;
|
||||
}
|
||||
}
|
||||
|
||||
return $context;
|
||||
}
|
||||
|
||||
private function newHint($lock_name, $wait) {
|
||||
if (!$this->shouldLogLock()) {
|
||||
return pht(
|
||||
'Enable the lock log for more detailed information about '.
|
||||
'which process is holding this lock.');
|
||||
}
|
||||
|
||||
$now = PhabricatorTime::getNow();
|
||||
|
||||
// First, look for recent logs. If other processes have been acquiring and
|
||||
// releasing this lock while we've been waiting, this is more likely to be
|
||||
// a contention/throughput issue than an issue with something hung while
|
||||
// holding the lock.
|
||||
$limit = 100;
|
||||
$logs = id(new PhabricatorDaemonLockLog())->loadAllWhere(
|
||||
'lockName = %s AND dateCreated >= %d ORDER BY id ASC LIMIT %d',
|
||||
$lock_name,
|
||||
($now - $wait),
|
||||
$limit);
|
||||
|
||||
if ($logs) {
|
||||
if (count($logs) === $limit) {
|
||||
return pht(
|
||||
'During the last %s second(s) spent waiting for the lock, more '.
|
||||
'than %s other process(es) acquired it, so this is likely a '.
|
||||
'bottleneck. Use "bin/lock log --name %s" to review log activity.',
|
||||
new PhutilNumber($wait),
|
||||
new PhutilNumber($limit),
|
||||
$lock_name);
|
||||
} else {
|
||||
return pht(
|
||||
'During the last %s second(s) spent waiting for the lock, %s '.
|
||||
'other process(es) acquired it, so this is likely a '.
|
||||
'bottleneck. Use "bin/lock log --name %s" to review log activity.',
|
||||
new PhutilNumber($wait),
|
||||
phutil_count($logs),
|
||||
$lock_name);
|
||||
}
|
||||
}
|
||||
|
||||
$last_log = id(new PhabricatorDaemonLockLog())->loadOneWhere(
|
||||
'lockName = %s ORDER BY id DESC LIMIT 1',
|
||||
$lock_name);
|
||||
|
||||
if ($last_log) {
|
||||
$info = array();
|
||||
|
||||
$acquired = $last_log->getDateCreated();
|
||||
$context = $last_log->getLockContext();
|
||||
|
||||
$process_info = array();
|
||||
|
||||
$pid = idx($context, 'pid');
|
||||
if ($pid) {
|
||||
$process_info[] = 'pid='.$pid;
|
||||
}
|
||||
|
||||
$host = idx($context, 'host');
|
||||
if ($host) {
|
||||
$process_info[] = 'host='.$host;
|
||||
}
|
||||
|
||||
$sapi = idx($context, 'sapi');
|
||||
if ($sapi) {
|
||||
$process_info[] = 'sapi='.$sapi;
|
||||
}
|
||||
|
||||
$argv = idx($context, 'argv');
|
||||
if ($argv) {
|
||||
$process_info[] = 'argv='.(string)csprintf('%LR', $argv);
|
||||
}
|
||||
|
||||
$controller = idx($context, 'controller');
|
||||
if ($controller) {
|
||||
$process_info[] = 'controller='.$controller;
|
||||
}
|
||||
|
||||
$method = idx($context, 'method');
|
||||
if ($method) {
|
||||
$process_info[] = 'method='.$method;
|
||||
}
|
||||
|
||||
$process_info = implode(', ', $process_info);
|
||||
|
||||
$info[] = pht(
|
||||
'This lock was most recently acquired by a process (%s) '.
|
||||
'%s second(s) ago.',
|
||||
$process_info,
|
||||
new PhutilNumber($now - $acquired));
|
||||
|
||||
$released = $last_log->getLockReleased();
|
||||
if ($released) {
|
||||
$info[] = pht(
|
||||
'This lock was released %s second(s) ago.',
|
||||
new PhutilNumber($now - $released));
|
||||
} else {
|
||||
$info[] = pht('There is no record of this lock being released.');
|
||||
}
|
||||
|
||||
return implode(' ', $info);
|
||||
}
|
||||
|
||||
return pht(
|
||||
'Found no records of processes acquiring or releasing this lock.');
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue