mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-10 08:52:39 +01:00
Unify intracluster sync and Drydock working copy construction timeouts as a repository "copy time limit"
Summary: Depends on D19814. Ref T13216. See PHI885. For various eldritch reasons, `git fetch` can hang. Although we'd probably like to fix this with `git fetch --require-sustained-network-transfer-rate=512KB/5s` or similar, that flag doesn't exist and we don't have a reasonable way to build it. Short of that, move toward formalizing a repository "copy time limit": the longest amount of time anything may spend trying to make a copy of this repository. This grows out of the existing intracluster sync limit, which is effectively the same thing. Here, apply it to `git clone` and `git fetch` in Drydock working copy construction, too. A future change may make it configurable. Test Plan: - Set the limit to 0.001. - Tried to build and lease working copies, got sensible timeout errors (see D19815). ``` <Activation Failed> Lease activation failed: [CommandException] Command killed by timeout after running for more than 0.001 seconds. COMMAND ssh '-o' 'LogLevel=quiet' '-o' 'StrictHostKeyChecking=no' '-o' 'UserKnownHostsFile=/dev/null' '-o' 'BatchMode=yes' -l '********' -p '2222' -i '********' '127.0.0.1' -- '(cd '\''/var/drydock/workingcopy-163/repo/spellbook/'\'' && git clean -d --force && git fetch && git reset --hard)' ``` Reviewers: amckinley Reviewed By: amckinley Subscribers: yelirekim, PHID-OPKG-gm6ozazyms6q6i22gyam Maniphest Tasks: T13216 Differential Revision: https://secure.phabricator.com/D19816
This commit is contained in:
parent
933462b487
commit
cb033673b6
5 changed files with 50 additions and 14 deletions
|
@ -138,7 +138,8 @@ abstract class DiffusionCommandEngine extends Phobject {
|
|||
// See T13108. By default, don't let any cluster command run indefinitely
|
||||
// to try to avoid cases where `git fetch` hangs for some reason and we're
|
||||
// left sitting with a held lock forever.
|
||||
$future->setTimeout(phutil_units('15 minutes in seconds'));
|
||||
$repository = $this->getRepository();
|
||||
$future->setTimeout($repository->getCopyTimeLimit());
|
||||
|
||||
return $future;
|
||||
}
|
||||
|
|
|
@ -173,6 +173,7 @@ final class DrydockWorkingCopyBlueprintImplementation
|
|||
|
||||
$map = $resource->getAttribute('repositories.map');
|
||||
|
||||
$futures = array();
|
||||
$repositories = $this->loadRepositories(ipull($map, 'phid'));
|
||||
foreach ($map as $directory => $spec) {
|
||||
// TODO: Validate directory isn't goofy like "/etc" or "../../lol"
|
||||
|
@ -181,11 +182,18 @@ final class DrydockWorkingCopyBlueprintImplementation
|
|||
$repository = $repositories[$spec['phid']];
|
||||
$path = "{$root}/repo/{$directory}/";
|
||||
|
||||
// TODO: Run these in parallel?
|
||||
$interface->execx(
|
||||
$future = $interface->getExecFuture(
|
||||
'git clone -- %s %s',
|
||||
(string)$repository->getCloneURIObject(),
|
||||
$path);
|
||||
|
||||
$future->setTimeout($repository->getCopyTimeLimit());
|
||||
|
||||
$futures[$directory] = $future;
|
||||
}
|
||||
|
||||
foreach (new FutureIterator($futures) as $key => $future) {
|
||||
$future->resolvex();
|
||||
}
|
||||
|
||||
$resource
|
||||
|
@ -240,8 +248,12 @@ final class DrydockWorkingCopyBlueprintImplementation
|
|||
$map = $lease->getAttribute('repositories.map');
|
||||
$root = $resource->getAttribute('workingcopy.root');
|
||||
|
||||
$repositories = $this->loadRepositories(ipull($map, 'phid'));
|
||||
|
||||
$default = null;
|
||||
foreach ($map as $directory => $spec) {
|
||||
$repository = $repositories[$spec['phid']];
|
||||
|
||||
$interface->pushWorkingDirectory("{$root}/repo/{$directory}/");
|
||||
|
||||
$cmd = array();
|
||||
|
@ -271,7 +283,9 @@ final class DrydockWorkingCopyBlueprintImplementation
|
|||
$arg[] = $branch;
|
||||
}
|
||||
|
||||
$this->execxv($interface, $cmd, $arg);
|
||||
$this->newExecvFuture($interface, $cmd, $arg)
|
||||
->setTimeout($repository->getCopyTimeLimit())
|
||||
->resolvex();
|
||||
|
||||
if (idx($spec, 'default')) {
|
||||
$default = $directory;
|
||||
|
@ -295,7 +309,9 @@ final class DrydockWorkingCopyBlueprintImplementation
|
|||
$arg[] = $ref_ref;
|
||||
|
||||
try {
|
||||
$this->execxv($interface, $cmd, $arg);
|
||||
$this->newExecvFuture($interface, $cmd, $arg)
|
||||
->setTimeout($repository->getCopyTimeLimit())
|
||||
->resolvex();
|
||||
} catch (CommandException $ex) {
|
||||
$display_command = csprintf(
|
||||
'git fetch %R %R',
|
||||
|
@ -509,12 +525,18 @@ final class DrydockWorkingCopyBlueprintImplementation
|
|||
DrydockCommandInterface $interface,
|
||||
array $commands,
|
||||
array $arguments) {
|
||||
return $this->newExecvFuture($interface, $commands, $arguments)->resolvex();
|
||||
}
|
||||
|
||||
private function newExecvFuture(
|
||||
DrydockCommandInterface $interface,
|
||||
array $commands,
|
||||
array $arguments) {
|
||||
|
||||
$commands = implode(' && ', $commands);
|
||||
$argv = array_merge(array($commands), $arguments);
|
||||
|
||||
return call_user_func_array(array($interface, 'execx'), $argv);
|
||||
return call_user_func_array(array($interface, 'getExecFuture'), $argv);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -140,9 +140,9 @@ final class DrydockSlotLock extends DrydockDAO {
|
|||
try {
|
||||
queryfx(
|
||||
$conn_w,
|
||||
'INSERT INTO %T (ownerPHID, lockIndex, lockKey) VALUES %Q',
|
||||
'INSERT INTO %T (ownerPHID, lockIndex, lockKey) VALUES %LQ',
|
||||
$table->getTableName(),
|
||||
implode(', ', $sql));
|
||||
$sql);
|
||||
} catch (AphrontDuplicateKeyQueryException $ex) {
|
||||
// Try to improve the readability of the exception. We might miss on
|
||||
// this query if the lock has already been released, but most of the
|
||||
|
|
|
@ -1889,6 +1889,19 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Time limit for cloning or copying this repository.
|
||||
*
|
||||
* This limit is used to timeout operations like `git clone` or `git fetch`
|
||||
* when doing intracluster synchronization, building working copies, etc.
|
||||
*
|
||||
* @return int Maximum number of seconds to spend copying this repository.
|
||||
*/
|
||||
public function getCopyTimeLimit() {
|
||||
return phutil_units('15 minutes in seconds');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrieve the service URI for the device hosting this repository.
|
||||
*
|
||||
|
|
|
@ -48,16 +48,16 @@ final class PhabricatorRepositoryURIIndex
|
|||
|
||||
queryfx(
|
||||
$conn_w,
|
||||
'DELETE FROM %T WHERE repositoryPHID = %s',
|
||||
$table->getTableName(),
|
||||
'DELETE FROM %R WHERE repositoryPHID = %s',
|
||||
$table,
|
||||
$repository_phid);
|
||||
|
||||
if ($sql) {
|
||||
queryfx(
|
||||
$conn_w,
|
||||
'INSERT INTO %T (repositoryPHID, repositoryURI) VALUES %Q',
|
||||
$table->getTableName(),
|
||||
implode(', ', $sql));
|
||||
'INSERT INTO %R (repositoryPHID, repositoryURI) VALUES %LQ',
|
||||
$table,
|
||||
$sql);
|
||||
}
|
||||
|
||||
$table->saveTransaction();
|
||||
|
|
Loading…
Reference in a new issue