1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-10 08:52:39 +01:00

Unify intracluster sync and Drydock working copy construction timeouts as a repository "copy time limit"

Summary:
Depends on D19814. Ref T13216. See PHI885. For various eldritch reasons, `git fetch` can hang. Although we'd probably like to fix this with `git fetch --require-sustained-network-transfer-rate=512KB/5s` or similar, that flag doesn't exist and we don't have a reasonable way to build it.

Short of that, move toward formalizing a repository "copy time limit": the longest amount of time anything may spend trying to make a copy of this repository.

This grows out of the existing intracluster sync limit, which is effectively the same thing. Here, apply it to `git clone` and `git fetch` in Drydock working copy construction, too. A future change may make it configurable.

Test Plan:
  - Set the limit to 0.001.
  - Tried to build and lease working copies, got sensible timeout errors (see D19815).

```
<Activation Failed> Lease activation failed: [CommandException] Command killed by timeout after running for more than 0.001 seconds.
COMMAND
ssh '-o' 'LogLevel=quiet' '-o' 'StrictHostKeyChecking=no' '-o' 'UserKnownHostsFile=/dev/null' '-o' 'BatchMode=yes' -l '********' -p '2222' -i '********' '127.0.0.1' -- '(cd '\''/var/drydock/workingcopy-163/repo/spellbook/'\'' && git clean -d --force && git fetch && git reset --hard)'
```

Reviewers: amckinley

Reviewed By: amckinley

Subscribers: yelirekim, PHID-OPKG-gm6ozazyms6q6i22gyam

Maniphest Tasks: T13216

Differential Revision: https://secure.phabricator.com/D19816
This commit is contained in:
epriestley 2018-11-15 08:11:49 -08:00
parent 933462b487
commit cb033673b6
5 changed files with 50 additions and 14 deletions

View file

@ -138,7 +138,8 @@ abstract class DiffusionCommandEngine extends Phobject {
// See T13108. By default, don't let any cluster command run indefinitely
// to try to avoid cases where `git fetch` hangs for some reason and we're
// left sitting with a held lock forever.
$future->setTimeout(phutil_units('15 minutes in seconds'));
$repository = $this->getRepository();
$future->setTimeout($repository->getCopyTimeLimit());
return $future;
}

View file

@ -173,6 +173,7 @@ final class DrydockWorkingCopyBlueprintImplementation
$map = $resource->getAttribute('repositories.map');
$futures = array();
$repositories = $this->loadRepositories(ipull($map, 'phid'));
foreach ($map as $directory => $spec) {
// TODO: Validate directory isn't goofy like "/etc" or "../../lol"
@ -181,11 +182,18 @@ final class DrydockWorkingCopyBlueprintImplementation
$repository = $repositories[$spec['phid']];
$path = "{$root}/repo/{$directory}/";
// TODO: Run these in parallel?
$interface->execx(
$future = $interface->getExecFuture(
'git clone -- %s %s',
(string)$repository->getCloneURIObject(),
$path);
$future->setTimeout($repository->getCopyTimeLimit());
$futures[$directory] = $future;
}
foreach (new FutureIterator($futures) as $key => $future) {
$future->resolvex();
}
$resource
@ -240,8 +248,12 @@ final class DrydockWorkingCopyBlueprintImplementation
$map = $lease->getAttribute('repositories.map');
$root = $resource->getAttribute('workingcopy.root');
$repositories = $this->loadRepositories(ipull($map, 'phid'));
$default = null;
foreach ($map as $directory => $spec) {
$repository = $repositories[$spec['phid']];
$interface->pushWorkingDirectory("{$root}/repo/{$directory}/");
$cmd = array();
@ -271,7 +283,9 @@ final class DrydockWorkingCopyBlueprintImplementation
$arg[] = $branch;
}
$this->execxv($interface, $cmd, $arg);
$this->newExecvFuture($interface, $cmd, $arg)
->setTimeout($repository->getCopyTimeLimit())
->resolvex();
if (idx($spec, 'default')) {
$default = $directory;
@ -295,7 +309,9 @@ final class DrydockWorkingCopyBlueprintImplementation
$arg[] = $ref_ref;
try {
$this->execxv($interface, $cmd, $arg);
$this->newExecvFuture($interface, $cmd, $arg)
->setTimeout($repository->getCopyTimeLimit())
->resolvex();
} catch (CommandException $ex) {
$display_command = csprintf(
'git fetch %R %R',
@ -509,12 +525,18 @@ final class DrydockWorkingCopyBlueprintImplementation
DrydockCommandInterface $interface,
array $commands,
array $arguments) {
return $this->newExecvFuture($interface, $commands, $arguments)->resolvex();
}
private function newExecvFuture(
DrydockCommandInterface $interface,
array $commands,
array $arguments) {
$commands = implode(' && ', $commands);
$argv = array_merge(array($commands), $arguments);
return call_user_func_array(array($interface, 'execx'), $argv);
return call_user_func_array(array($interface, 'getExecFuture'), $argv);
}
}

View file

@ -140,9 +140,9 @@ final class DrydockSlotLock extends DrydockDAO {
try {
queryfx(
$conn_w,
'INSERT INTO %T (ownerPHID, lockIndex, lockKey) VALUES %Q',
'INSERT INTO %T (ownerPHID, lockIndex, lockKey) VALUES %LQ',
$table->getTableName(),
implode(', ', $sql));
$sql);
} catch (AphrontDuplicateKeyQueryException $ex) {
// Try to improve the readability of the exception. We might miss on
// this query if the lock has already been released, but most of the

View file

@ -1889,6 +1889,19 @@ final class PhabricatorRepository extends PhabricatorRepositoryDAO
}
/**
* Time limit for cloning or copying this repository.
*
* This limit is used to timeout operations like `git clone` or `git fetch`
* when doing intracluster synchronization, building working copies, etc.
*
* @return int Maximum number of seconds to spend copying this repository.
*/
public function getCopyTimeLimit() {
return phutil_units('15 minutes in seconds');
}
/**
* Retrieve the service URI for the device hosting this repository.
*

View file

@ -48,16 +48,16 @@ final class PhabricatorRepositoryURIIndex
queryfx(
$conn_w,
'DELETE FROM %T WHERE repositoryPHID = %s',
$table->getTableName(),
'DELETE FROM %R WHERE repositoryPHID = %s',
$table,
$repository_phid);
if ($sql) {
queryfx(
$conn_w,
'INSERT INTO %T (repositoryPHID, repositoryURI) VALUES %Q',
$table->getTableName(),
implode(', ', $sql));
'INSERT INTO %R (repositoryPHID, repositoryURI) VALUES %LQ',
$table,
$sql);
}
$table->saveTransaction();