1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2025-01-11 07:11:03 +01:00

Split large path lists into blocks when linting

Summary:
Fixes T5097. When linting a large list of paths (e.g., with `--everything`), do this internally:

  $chunks = array_chunk($paths, 32);
  foreach ($chunks as $chunk) {
    $this->lintChunk($chunk);
  }

This keeps the advantages of parallelism and artifact sharing for future-based linters, without having memory usage grow in an unbounded way.

These callbacks changed:

  - `willLintPath()`: Useless, no meaningful implementations. Internalized the required side effect and broke the hook.
  - `didRunLinters()`: Now useless, with no meaningful implementations. Broke the hook.
  - `didLintPaths()`: New hook which executes opposite `willLintPaths()`.
  - `lintPath()`: Linters no longer need to implement this method.

XHPAST now has an explicit way to release shared futures.

These minor changes also happened:

  - Formalized the "linter ID", which is a semi-durable identifier for the cache.
  - Removed linter -> exception explicit mapping, which was unused. We now just collect exceptions.
  - We do the `canRun()` checks first (and separately) now.
  - Share more service call profiling code.
  - Fix an issue where the test harness would use the path on disk, even if configuration set a different path.

Test Plan:
  - Ran `arc lint --everything` in `arcanist/`.
    - With no chunking, saw **unstable** memory usage with a peak at 941 MB.
    - With chunk size 32, saw **stable** memory usage with a peak at 269 MB.
    - With chunk size 8, saw **stable** memory usage with a peak at 180 MB.
  - Ran with `--trace` and saw profiling information.
  - Created this diff.

Reviewers: joshuaspence

Reviewed By: joshuaspence

Subscribers: epriestley

Maniphest Tasks: T5097

Differential Revision: https://secure.phabricator.com/D12501
This commit is contained in:
epriestley 2015-04-22 05:15:57 -07:00
parent ab4eac6f31
commit f4aadb9604
6 changed files with 399 additions and 130 deletions

View file

@ -212,6 +212,10 @@ abstract class ArcanistLintEngine {
throw new ArcanistNoEffectException('No linters to run.');
}
foreach ($linters as $key => $linter) {
$linter->setLinterID($key);
}
$linters = msort($linters, 'getLinterPriority');
foreach ($linters as $linter) {
$linter->setEngine($this);
@ -249,75 +253,13 @@ abstract class ArcanistLintEngine {
$this->cacheVersion = crc32(implode("\n", $versions));
$runnable = $this->getRunnableLinters($linters);
$this->stopped = array();
$exceptions = array();
foreach ($linters as $linter_name => $linter) {
if (!is_string($linter_name)) {
$linter_name = get_class($linter);
}
try {
if (!$linter->canRun()) {
continue;
}
$paths = $linter->getPaths();
foreach ($paths as $key => $path) {
// Make sure each path has a result generated, even if it is empty
// (i.e., the file has no lint messages).
$result = $this->getResultForPath($path);
if (isset($this->stopped[$path])) {
unset($paths[$key]);
}
if (isset($this->cachedResults[$path][$this->cacheVersion])) {
$cached_result = $this->cachedResults[$path][$this->cacheVersion];
$exceptions = $this->executeLinters($runnable);
$use_cache = $this->shouldUseCache(
$linter->getCacheGranularity(),
idx($cached_result, 'repository_version'));
if ($use_cache) {
unset($paths[$key]);
if (idx($cached_result, 'stopped') == $linter_name) {
$this->stopped[$path] = $linter_name;
}
}
}
}
$paths = array_values($paths);
if ($paths) {
$profiler = PhutilServiceProfiler::getInstance();
$call_id = $profiler->beginServiceCall(array(
'type' => 'lint',
'linter' => $linter_name,
'paths' => $paths,
));
try {
$linter->willLintPaths($paths);
foreach ($paths as $path) {
$linter->willLintPath($path);
$linter->lintPath($path);
if ($linter->didStopAllLinters()) {
$this->stopped[$path] = $linter_name;
}
}
} catch (Exception $ex) {
$profiler->endServiceCall($call_id, array());
throw $ex;
}
$profiler->endServiceCall($call_id, array());
}
} catch (Exception $ex) {
$exceptions[$linter_name] = $ex;
}
}
$exceptions += $this->didRunLinters($linters);
foreach ($linters as $linter) {
foreach ($runnable as $linter) {
foreach ($linter->getLintMessages() as $message) {
if (!$this->isSeverityEnabled($message->getSeverity())) {
continue;
@ -419,33 +361,6 @@ abstract class ArcanistLintEngine {
abstract public function buildLinters();
final protected function didRunLinters(array $linters) {
assert_instances_of($linters, 'ArcanistLinter');
$exceptions = array();
$profiler = PhutilServiceProfiler::getInstance();
foreach ($linters as $linter_name => $linter) {
if (!is_string($linter_name)) {
$linter_name = get_class($linter);
}
$call_id = $profiler->beginServiceCall(array(
'type' => 'lint',
'linter' => $linter_name,
));
try {
$linter->didRunLinters();
} catch (Exception $ex) {
$exceptions[$linter_name] = $ex;
}
$profiler->endServiceCall($call_id, array());
}
return $exceptions;
}
final public function setRepositoryVersion($version) {
$this->repositoryVersion = $version;
return $this;
@ -579,4 +494,161 @@ abstract class ArcanistLintEngine {
return $this;
}
private function getRunnableLinters(array $linters) {
assert_instances_of($linters, 'ArcanistLinter');
// TODO: The canRun() mechanism is only used by one linter, and just
// silently disables the linter. Almost every other linter handles this
// by throwing `ArcanistMissingLinterException`. Both mechanisms are not
// ideal; linters which can not run should emit a message, get marked as
// "skipped", and allow execution to continue. See T7045.
$runnable = array();
foreach ($linters as $key => $linter) {
if ($linter->canRun()) {
$runnable[$key] = $linter;
}
}
return $runnable;
}
private function executeLinters(array $runnable) {
$all_paths = $this->getPaths();
$path_chunks = array_chunk($all_paths, 32, $preserve_keys = true);
$exception_lists = array();
foreach ($path_chunks as $chunk) {
$exception_lists[] = $this->executeLintersOnChunk($runnable, $chunk);
}
return array_mergev($exception_lists);
}
private function executeLintersOnChunk(array $runnable, array $path_list) {
assert_instances_of($runnable, 'ArcanistLinter');
$path_map = array_fuse($path_list);
$exceptions = array();
$did_lint = array();
foreach ($runnable as $linter) {
$linter_id = $linter->getLinterID();
$paths = $linter->getPaths();
foreach ($paths as $key => $path) {
// If we aren't running this path in the current chunk of paths,
// skip it completely.
if (empty($path_map[$path])) {
unset($paths[$key]);
continue;
}
// Make sure each path has a result generated, even if it is empty
// (i.e., the file has no lint messages).
$result = $this->getResultForPath($path);
// If a linter has stopped all other linters for this path, don't
// actually run the linter.
if (isset($this->stopped[$path])) {
unset($paths[$key]);
continue;
}
// If we have a cached result for this path, don't actually run the
// linter.
if (isset($this->cachedResults[$path][$this->cacheVersion])) {
$cached_result = $this->cachedResults[$path][$this->cacheVersion];
$use_cache = $this->shouldUseCache(
$linter->getCacheGranularity(),
idx($cached_result, 'repository_version'));
if ($use_cache) {
unset($paths[$key]);
if (idx($cached_result, 'stopped') == $linter_id) {
$this->stopped[$path] = $linter_id;
}
}
}
}
$paths = array_values($paths);
if (!$paths) {
continue;
}
try {
$this->executeLinterOnPaths($linter, $paths);
$did_lint[] = array($linter, $paths);
} catch (Exception $ex) {
$exceptions[] = $ex;
}
}
foreach ($did_lint as $info) {
list($linter, $paths) = $info;
try {
$this->executeDidLintOnPaths($linter, $paths);
} catch (Exception $ex) {
$exceptions[] = $ex;
}
}
return $exceptions;
}
private function beginLintServiceCall(ArcanistLinter $linter, array $paths) {
$profiler = PhutilServiceProfiler::getInstance();
return $profiler->beginServiceCall(
array(
'type' => 'lint',
'linter' => $linter->getInfoName(),
'paths' => $paths,
));
}
private function endLintServiceCall($call_id) {
$profiler = PhutilServiceProfiler::getInstance();
$profiler->endServiceCall($call_id, array());
}
private function executeLinterOnPaths(ArcanistLinter $linter, array $paths) {
$call_id = $this->beginLintServiceCall($linter, $paths);
try {
$linter->willLintPaths($paths);
foreach ($paths as $path) {
$linter->setActivePath($path);
$linter->lintPath($path);
if ($linter->didStopAllLinters()) {
$this->stopped[$path] = $linter->getLinterID();
}
}
} catch (Exception $ex) {
$this->endLintServiceCall($call_id);
throw $ex;
}
$this->endLintServiceCall($call_id);
}
private function executeDidLintOnPaths(ArcanistLinter $linter, array $paths) {
$call_id = $this->beginLintServiceCall($linter, $paths);
try {
$linter->didLintPaths($paths);
} catch (Exception $ex) {
$this->endLintServiceCall($call_id);
throw $ex;
}
$this->endLintServiceCall($call_id);
}
}

View file

@ -8,6 +8,7 @@ abstract class ArcanistBaseXHPASTLinter extends ArcanistFutureLinter {
private $futures = array();
private $trees = array();
private $exceptions = array();
private $refcount = array();
final public function getCacheVersion() {
$parts = array();
@ -52,6 +53,10 @@ abstract class ArcanistBaseXHPASTLinter extends ArcanistFutureLinter {
return $this->getXHPASTLinter()->buildSharedFutures($paths);
}
protected function didResolveLinterFutures(array $futures) {
$this->getXHPASTLinter()->releaseSharedFutures(array_keys($futures));
}
/* -( Sharing Parse Trees )------------------------------------------------ */
@ -105,11 +110,44 @@ abstract class ArcanistBaseXHPASTLinter extends ArcanistFutureLinter {
if (!isset($this->futures[$path])) {
$this->futures[$path] = PhutilXHPASTBinary::getParserFuture(
$this->getData($path));
$this->refcount[$path] = 1;
} else {
$this->refcount[$path]++;
}
}
return array_select_keys($this->futures, $paths);
}
/**
* Release futures on this linter which are no longer in use elsewhere.
*
* @param list<string> Paths to release futures for.
* @return void
* @task sharing
*/
final protected function releaseSharedFutures(array $paths) {
foreach ($paths as $path) {
if (empty($this->refcount[$path])) {
throw new Exception(
pht(
'Imbalanced calls to shared futures: each call to '.
'buildSharedFutures() for a path must be paired with a call to '.
'releaseSharedFutures().'));
}
$this->refcount[$path]--;
if (!$this->refcount[$path]) {
unset($this->refcount[$path]);
unset($this->futures[$path]);
unset($this->trees[$path]);
unset($this->exceptions[$path]);
}
}
}
/**
* Get a path's tree from the responsible linter.
*

View file

@ -182,13 +182,14 @@ final class ArcanistCSharpLinter extends ArcanistLinter {
$this->futures = $futures;
}
public function didRunLinters() {
public function didLintPaths(array $paths) {
if ($this->futures) {
$futures = id(new FutureIterator($this->futures))
->limit(8);
foreach ($futures as $future) {
$this->resolveFuture($future);
}
$this->futures = array();
}
}

View file

@ -19,15 +19,38 @@ abstract class ArcanistFutureLinter extends ArcanistLinter {
}
}
final public function lintPath($path) {}
final public function lintPath($path) {
return;
}
final public function didRunLinters() {
if ($this->futures) {
foreach ($this->futures as $path => $future) {
$this->willLintPath($path);
$this->resolveFuture($path, $future);
}
final public function didLintPaths(array $paths) {
if (!$this->futures) {
return;
}
$map = array();
foreach ($this->futures as $path => $future) {
$this->setActivePath($path);
$this->resolveFuture($path, $future);
$map[$path] = $future;
}
$this->futures = array();
$this->didResolveLinterFutures($map);
}
/**
* Hook for cleaning up resources.
*
* This is invoked after a block of futures resolve, and allows linters to
* discard or clean up any shared resources they no longer need.
*
* @param map<string, Future> Map of paths to resolved futures.
* @return void
*/
protected function didResolveLinterFutures(array $futures) {
return;
}
}

View file

@ -4,6 +4,8 @@
* Implements lint rules, like syntax checks for a specific language.
*
* @task info Human Readable Information
* @task state Runtime State
* @task exec Executing Linters
* @stable
*/
abstract class ArcanistLinter {
@ -13,6 +15,7 @@ abstract class ArcanistLinter {
const GRANULARITY_REPOSITORY = 3;
const GRANULARITY_GLOBAL = 4;
private $id;
protected $paths = array();
protected $data = array();
protected $engine;
@ -27,6 +30,7 @@ abstract class ArcanistLinter {
/* -( Human Readable Information )---------------------------------------- */
/**
* Return an optional informative URI where humans can learn more about this
* linter.
@ -69,6 +73,163 @@ abstract class ArcanistLinter {
get_class($this));
}
/* -( Runtime State )------------------------------------------------------ */
/**
* @task state
*/
final public function getActivePath() {
return $this->activePath;
}
/**
* @task state
*/
final public function setActivePath($path) {
$this->stopAllLinters = false;
$this->activePath = $path;
return $this;
}
/**
* @task state
*/
final public function setEngine(ArcanistLintEngine $engine) {
$this->engine = $engine;
return $this;
}
/**
* @task state
*/
final protected function getEngine() {
return $this->engine;
}
/**
* Set the internal ID for this linter.
*
* This ID is assigned automatically by the @{class:ArcanistLintEngine}.
*
* @param string Unique linter ID.
* @return this
* @task state
*/
final public function setLinterID($id) {
$this->id = $id;
return $this;
}
/**
* Get the internal ID for this linter.
*
* Retrieves an internal linter ID managed by the @{class:ArcanistLintEngine}.
* This ID is a unique scalar which distinguishes linters in a list.
*
* @return string Unique linter ID.
* @task state
*/
final public function getLinterID() {
return $this->id;
}
/* -( Executing Linters )-------------------------------------------------- */
/**
* Hook called before a list of paths are linted.
*
* Parallelizable linters can start multiple requests in parallel here,
* to improve performance. They can implement @{method:didLintPaths} to
* collect results.
*
* Linters which are not parallelizable should normally ignore this callback
* and implement @{method:lintPath} instead.
*
* @param list<string> A list of paths to be linted
* @return void
* @task exec
*/
public function willLintPaths(array $paths) {
return;
}
/**
* Hook called for each path to be linted.
*
* Linters which are not parallelizable can do work here.
*
* Linters which are parallelizable may want to ignore this callback and
* implement @{method:willLintPaths} and @{method:didLintPaths} instead.
*
* @param string Path to lint.
* @return void
* @task exec
*/
public function lintPath($path) {
return;
}
/**
* Hook called after a list of paths are linted.
*
* Parallelizable linters can collect results here.
*
* Linters which are not paralleizable should normally ignore this callback
* and implement @{method:lintPath} instead.
*
* @param list<string> A list of paths which were linted.
* @return void
* @task exec
*/
public function didLintPaths(array $paths) {
return;
}
/**
* Obsolete hook which was invoked before a path was linted.
*
* WARNING: This is an obsolete hook which is not called. If you maintain
* a linter which relies on it, update to use @{method:lintPath} instead.
*
* @task exec
*/
final public function willLintPath($path) {
// TODO: Remove this method after some time. In the meantime, the "final"
// will fatal subclasses which implement this hook and point at the API
// change so maintainers get fewer surprises.
throw new PhutilMethodNotImplementedException();
}
/**
* Obsolete hook which was invoked after linters ran.
*
* WARNING: This is an obsolete hook which is not called. If you maintain
* a linter which relies on it, update to use @{method:didLintPaths} instead.
*
* @return void
* @task exec
*/
final public function didRunLinters() {
// TODO: Remove this method after some time. In the meantime, the "final"
// will fatal subclasses which implement this hook and point at the API
// change so maintainers get fewer surprises.
throw new PhutilMethodNotImplementedException();
}
public function getLinterPriority() {
return 1.0;
}
@ -86,10 +247,6 @@ abstract class ArcanistLinter {
return $this;
}
final public function getActivePath() {
return $this->activePath;
}
final public function getOtherLocation($offset, $path = null) {
if ($path === null) {
$path = $this->getActivePath();
@ -172,19 +329,11 @@ abstract class ArcanistLinter {
return $this->data[$path];
}
final public function setEngine(ArcanistLintEngine $engine) {
$this->engine = $engine;
return $this;
}
final protected function getEngine() {
return $this->engine;
}
public function getCacheVersion() {
return 0;
}
final public function getLintMessageFullCode($short_code) {
return $this->getLinterName().$short_code;
}
@ -291,30 +440,16 @@ abstract class ArcanistLinter {
$replacement);
}
public function willLintPath($path) {
$this->stopAllLinters = false;
$this->activePath = $path;
}
public function canRun() {
return true;
}
public function willLintPaths(array $paths) {
return;
}
abstract public function lintPath($path);
abstract public function getLinterName();
public function getVersion() {
return null;
}
public function didRunLinters() {
// This is a hook.
}
final protected function isCodeEnabled($code) {
$severity = $this->getLintMessageSeverity($code);
return $this->getEngine()->isSeverityEnabled($severity);

View file

@ -113,11 +113,12 @@ abstract class ArcanistLinterTestCase extends ArcanistPhutilTestCase {
$engine = new ArcanistUnitTestableLintEngine();
$engine->setWorkingCopy($working_copy);
$engine->setConfigurationManager($configuration_manager);
$engine->setPaths(array($path));
$engine->setCommitHookMode(idx($config, 'hook', false));
$path_name = idx($config, 'path', $path);
$engine->setPaths(array($path_name));
$linter->addPath($path_name);
$linter->addData($path_name, $data);
@ -129,7 +130,6 @@ abstract class ArcanistLinterTestCase extends ArcanistPhutilTestCase {
$engine->addFileData($path_name, $data);
$results = $engine->run();
$this->assertEqual(
1,
count($results),