From 9cd098ca0132cd414c7bf156bce9065c8992ae9c Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 31 May 2012 12:09:01 -0700 Subject: [PATCH] Add "ArcanistSingleLintEngine" and "ArcanistScriptAndRegexLinter" Summary: Request from @csilvers. Allow installs to get most linter features with regexes, configuration and external scripts if they are hesitant to write phutil libraries. - Add "ArcanistSingleLintEngine", which implements the smallest possible engine behavior (run one linter on every path). - Add "ArcanistScriptAndRegexLinter", which uses a script and a regex to parse lint output from other scripts. Depends on D2618. Test Plan: Basics: $ arc set-config lint.engine ArcanistSingleLintEngine Set key 'lint.engine' = 'ArcanistSingleLintEngine'. $ arc set-config lint.engine.single.linter ArcanistScriptAndRegexLinter Set key 'lint.engine.single.linter' = 'ArcanistScriptAndRegexLinter'. $ arc set-config linter.scriptandregex.script 'echo derp #' Set key 'linter.scriptandregex.script' = 'echo derp #'. $ arc set-config linter.scriptandregex.regex '/^(?P.*)$/m' Set key 'linter.scriptandregex.regex' = '/^(?P.*)$/m'. $ arc lint >>> Lint for .arcconfig: Error (S&RX) Lint derp 1 { 2 "project_id" : "arcanist", 3 "conduit_uri" : "https://secure.phabricator.com/", Throw: $ arc set-config linter.scriptandregex.regex '/^(?P.*)$/m' Set key 'linter.scriptandregex.regex' = '/^(?P.*)$/m' (was '/^(?P.*)$/m'). $ arc lint Usage Exception: ArcanistScriptAndRegexLinter: configuration captured a 'throw' named capturing group, 'derp'. Script output: derp Ignore: $ arc set-config linter.scriptandregex.regex '/^(?P.*)$/m' Set key 'linter.scriptandregex.regex' = '/^(?P.*)$/m' (was '/^(?P.*)$/m'). $ arc lint OKAY No lint warnings. Severity: $ arc set-config linter.scriptandregex.regex '/^(?P.)(?P.*)$/m' Set key 'linter.scriptandregex.regex' = '/^(?P.)(?P.*)$/m' (was '/^(?P.*)$/m'). $ arc lint >>> Lint for src/lint/engine/single/ArcanistSingleLintEngine.php: Warning (S&RX) Lint erp 1 'lint/linter/pyflakes/ArcanistPyFlakesLinter.php', 'ArcanistPyLintLinter' => 'lint/linter/pylint/ArcanistPyLintLinter.php', 'ArcanistRepositoryAPI' => 'repository/api/base/ArcanistRepositoryAPI.php', + 'ArcanistScriptAndRegexLinter' => 'lint/linter/scriptandregex/ArcanistScriptAndRegexLinter.php', 'ArcanistSetConfigWorkflow' => 'workflow/set-config/ArcanistSetConfigWorkflow.php', 'ArcanistShellCompleteWorkflow' => 'workflow/shell-complete/ArcanistShellCompleteWorkflow.php', + 'ArcanistSingleLintEngine' => 'lint/engine/single/ArcanistSingleLintEngine.php', 'ArcanistSpellingDefaultData' => 'lint/linter/spelling/ArcanistSpellingDefaultData.php', 'ArcanistSpellingLinter' => 'lint/linter/spelling/ArcanistSpellingLinter.php', 'ArcanistSpellingLinterTestCase' => 'lint/linter/spelling/__tests__/ArcanistSpellingLinterTestCase.php', @@ -187,8 +189,10 @@ phutil_register_library_map(array( 'ArcanistPhutilTestTerminatedException' => 'Exception', 'ArcanistPyFlakesLinter' => 'ArcanistLinter', 'ArcanistPyLintLinter' => 'ArcanistLinter', + 'ArcanistScriptAndRegexLinter' => 'ArcanistLinter', 'ArcanistSetConfigWorkflow' => 'ArcanistBaseWorkflow', 'ArcanistShellCompleteWorkflow' => 'ArcanistBaseWorkflow', + 'ArcanistSingleLintEngine' => 'ArcanistLintEngine', 'ArcanistSpellingLinter' => 'ArcanistLinter', 'ArcanistSpellingLinterTestCase' => 'ArcanistLinterTestCase', 'ArcanistSubversionAPI' => 'ArcanistRepositoryAPI', diff --git a/src/lint/engine/single/ArcanistSingleLintEngine.php b/src/lint/engine/single/ArcanistSingleLintEngine.php new file mode 100644 index 00000000..a8ad32fa --- /dev/null +++ b/src/lint/engine/single/ArcanistSingleLintEngine.php @@ -0,0 +1,60 @@ +getWorkingCopy()->getConfigFromAnySource($key); + + if (!$linter_name) { + throw new ArcanistUsageException( + "You must configure '{$key}' with the name of a linter in order to ". + "use ArcanistSingleLintEngine."); + } + + if (!class_exists($linter_name)) { + throw new ArcanistUsageException( + "Linter '{$linter_name}' configured in '{$key}' does not exist!"); + } + + if (!is_subclass_of($linter_name, 'ArcanistLinter')) { + throw new ArcanistUsageException( + "Linter '{$linter_name}' configured in '{$key}' MUST be a subclass of ". + "ArcanistLinter."); + } + + $linter = newv($linter_name, array()); + foreach ($this->getPaths() as $path) { + $linter->addPath($path); + } + + return array($linter); + } +} diff --git a/src/lint/linter/base/ArcanistLinter.php b/src/lint/linter/base/ArcanistLinter.php index e5e2cfe5..fbda2ba6 100644 --- a/src/lint/linter/base/ArcanistLinter.php +++ b/src/lint/linter/base/ArcanistLinter.php @@ -199,7 +199,13 @@ abstract class ArcanistLinter { abstract public function willLintPaths(array $paths); abstract public function lintPath($path); abstract public function getLinterName(); - abstract public function getLintSeverityMap(); - abstract public function getLintNameMap(); + + public function getLintSeverityMap() { + return array(); + } + + public function getLintNameMap() { + return array(); + } } diff --git a/src/lint/linter/scriptandregex/ArcanistScriptAndRegexLinter.php b/src/lint/linter/scriptandregex/ArcanistScriptAndRegexLinter.php new file mode 100644 index 00000000..5bbcabf9 --- /dev/null +++ b/src/lint/linter/scriptandregex/ArcanistScriptAndRegexLinter.php @@ -0,0 +1,416 @@ +&1' + * + * The return code of the script must be 0, or an exception will be raised + * reporting that the linter failed. If you have a script which exits nonzero + * under normal circumstances, you can force it to always exit 0 by using a + * configuration like this: + * + * sh -c '/opt/lint/lint.sh "$0" || true' + * + * Multiple instances of the script will be run in parallel if there are + * multiple files to be linted, so they should not use any unique resources. + * For instance, this configuration would not work properly, because several + * processes may attempt to write to the file at the same time: + * + * COUNTEREXAMPLE + * sh -c '/opt/lint/lint.sh --output /tmp/lint.out "$0" && cat /tmp/lint.out' + * + * There are necessary limits to how gracefully this linter can deal with + * edge cases, because it is just a script and a regex. If you need to do + * things that this linter can't handle, you can write a phutil linter and move + * the logic to handle those cases into PHP. PHP is a better general-purpose + * programming language than regular expressions are, if only by a small margin. + * + * == ...and Regex == + * + * The regex must be a valid PHP PCRE regex, including delimiters and flags. + * + * The regex will be matched against the entire output of the script, so it + * should generally be in this form if messages are one-per-line: + * + * /^...$/m + * + * The regex should capture these named patterns with `(?P...)`: + * + * - `message` (required) Text describing the lint message. For example, + * "This is a syntax error.". + * - `name` (optional) Text summarizing the lint message. For example, + * "Syntax Error". + * - `severity` (optional) The word "error", "warning", "autofix", "advice", + * or "disabled", in any combination of upper and lower case. Instead, you + * may match groups called `error`, `warning`, `advice`, `autofix`, or + * `disabled`. These allow you to match output formats like "E123" and + * "W123" to indicate errors and warnings, even though the word "error" is + * not present in the output. If no severity capturing group is present, + * messages are raised with "error" severity. If multiple severity capturing + * groups are present, messages are raised with the highest captured + * serverity. Capturing groups like `error` supersede the `severity` + * capturing group. + * - `error` (optional) Match some nonempty substring to indicate that this + * message has "error" severity. + * - `warning` (optional) Match some nonempty substring to indicate that this + * message has "warning" severity. + * - `advice` (optional) Match some nonempty substring to indicate that this + * message has "advice" severity. + * - `autofix` (optional) Match some nonempty substring to indicate that this + * message has "autofix" severity. + * - `disabled` (optional) Match some nonempty substring to indicate that this + * message has "disabled" severity. + * - `file` (optional) The name of the file to raise the lint message in. If + * not specified, defaults to the linted file. It is generally not necessary + * to capture this unless the linter can raise messages in files other than + * the one it is linting. + * - `line` (optional) The line number of the message. + * - `char` (optional) The character offset of the message. + * - `offset` (optional) The byte offset of the message. If captured, this + * supersedes `line` and `char`. + * - `original` (optional) The text the message affects. + * - `replacement` (optional) The text that the range captured by `original` + * should be automatically replaced by to resolve the message. + * - `code` (optional) A short error type identifier which can be used + * elsewhere to configure handling of specific types of messages. For + * example, "EXAMPLE1", "EXAMPLE2", etc., where each code identifies a + * class of message like "syntax error", "missing whitespace", etc. This + * allows configuration to later change the severity of all whitespace + * messages, for example. + * - `ignore` (optional) Match some nonempty substring to ignore the match. + * You can use this if your linter sometimes emits text like "No lint + * errors". + * - `stop` (optional) Match some nonempty substring to stop processing input. + * Remaining matches for this file will be discarded, but linting will + * continue with other linters and other files. + * - `halt` (optional) Match some nonempty substring to halt all linting of + * this file by any linter. Linting will continue with other files. + * - `throw` (optional) Match some nonempty substring to throw an error, which + * will stop `arc` completely. You can use this to fail abruptly if you + * encounter unexpected output. All processing will abort. + * + * Numbered capturing groups are ignored. + * + * For example, if your lint script's output looks like this: + * + * error:13 Too many goats! + * warning:22 Not enough boats. + * + * ...you could use this regex to parse it: + * + * /^(?Pwarning|error):(?P\d+) (?P.*)$/m + * + * The simplest valid regex for line-oriented output is something like this: + * + * /^(?P.*)$/m + * + * @task lint Linting + * @task linterinfo Linter Information + * @task parse Parsing Output + * @task config Validating Configuration + * + * @group linter + */ +final class ArcanistScriptAndRegexLinter extends ArcanistLinter { + + private $output = array(); + + +/* -( Linting )------------------------------------------------------------ */ + + + /** + * Run the script on each file to be linted. + * + * @task lint + */ + public function willLintPaths(array $paths) { + $script = $this->getConfiguredScript(); + $root = $this->getEngine()->getWorkingCopy()->getProjectRoot(); + + $futures = array(); + foreach ($paths as $path) { + $future = new ExecFuture('%C %s', $script, $path); + $future->setCWD($root); + $futures[$path] = $future; + } + + foreach (Futures($futures)->limit(4) as $path => $future) { + list($stdout) = $future->resolvex(); + $this->output[$path] = $stdout; + } + } + + + /** + * Run the regex on the output of the script. + * + * @task lint + */ + public function lintPath($path) { + $regex = $this->getConfiguredRegex(); + + $output = idx($this->output, $path); + if (!strlen($output)) { + // No output, but it exited 0, so just move on. + return; + } + + $matches = null; + if (!preg_match_all($regex, $output, $matches, PREG_SET_ORDER)) { + // Output with no matches. This might be a configuration error, but more + // likely it's something like "No lint errors." and the user just hasn't + // written a sufficiently powerful/ridiculous regexp to capture it into an + // 'ignore' group. Don't make them figure this out; advanced users can + // capture 'throw' to handle this case. + return; + } + + foreach ($matches as $match) { + if (!empty($match['throw'])) { + $throw = $match['throw']; + throw new ArcanistUsageException( + "ArcanistScriptAndRegexLinter: ". + "configuration captured a 'throw' named capturing group, ". + "'{$throw}'. Script output:\n". + $output); + } + + if (!empty($match['halt'])) { + $this->stopAllLinters(); + break; + } + + if (!empty($match['stop'])) { + break; + } + + if (!empty($match['ignore'])) { + continue; + } + + list($line, $char) = $this->getMatchLineAndChar($match, $path); + + $dict = array( + 'path' => idx($match, 'file', $path), + 'line' => $line, + 'char' => $char, + 'code' => idx($match, 'code', $this->getLinterName()), + 'severity' => $this->getMatchSeverity($match), + 'name' => idx($match, 'name', 'Lint'), + 'description' => idx($match, 'message', 'Undefined Lint Message'), + ); + + $original = idx($match, 'original'); + if ($original !== null) { + $dict['original'] = $original; + } + + $replacement = idx($match, 'replacement'); + if ($replacement !== null) { + $dict['replacement'] = $replacement; + } + + $lint = ArcanistLintMessage::newFromDictionary($dict); + $this->addLintMessage($lint); + } + } + + +/* -( Linter Information )------------------------------------------------- */ + + + /** + * Return the short name of the linter. + * + * @return string Short linter identifier. + * + * @task linterinfo + */ + public function getLinterName() { + return 'S&RX'; + } + + +/* -( Parsing Output )----------------------------------------------------- */ + + + /** + * Get the line and character of the message from the regex match. + * + * @param dict Captured groups from regex. + * @return pair Line and character of the message. + * + * @task parse + */ + private function getMatchLineAndChar(array $match, $path) { + if (!empty($match['offset'])) { + list($line, $char) = $this->getEngine()->getLineAndCharFromOffset( + idx($match, 'file', $path), + $match['offset']); + return array($line + 1, $char + 1); + } + + $line = idx($match, 'line', 1); + $char = idx($match, 'char', 1); + + return array($line, $char); + } + + + /** + * Map the regex matching groups to a message severity. We look for either + * a nonempty severity name group like 'error', or a group called 'severity' + * with a valid name. + * + * @param dict Captured groups from regex. + * @return const @{class:ArcanistLintSeverity} constant. + * + * @task parse + */ + private function getMatchSeverity(array $match) { + $map = array( + 'error' => ArcanistLintSeverity::SEVERITY_ERROR, + 'warning' => ArcanistLintSeverity::SEVERITY_WARNING, + 'autofix' => ArcanistLintSeverity::SEVERITY_AUTOFIX, + 'advice' => ArcanistLintSeverity::SEVERITY_ADVICE, + 'disabled' => ArcanistLintSeverity::SEVERITY_DISABLED, + ); + + $severity_name = strtolower(idx($match, 'severity')); + + foreach ($map as $name => $severity) { + if (!empty($match[$name])) { + return $severity; + } + if ($severity_name == $name) { + return $severity; + } + } + + return ArcanistLintSeverity::SEVERITY_ERROR; + } + + +/* -( Validating Configuration )------------------------------------------- */ + + + /** + * Load, validate, and return the "script" configuration. + * + * @return string The shell command fragment to use to run the linter. + * + * @task config + */ + private function getConfiguredScript() { + $working_copy = $this->getEngine()->getWorkingCopy(); + $key = 'linter.scriptandregex.script'; + $config = $working_copy->getConfigFromAnySource($key); + + if (!$config) { + throw new ArcanistUsageException( + "ArcanistScriptAndRegexLinter: ". + "You must configure '{$key}' to point to a script to execute."); + } + + // NOTE: No additional validation since the "script" can be some random + // shell command and/or include flags, so it does not need to point to some + // file on disk. + + return $config; + } + + + /** + * Load, validate, and return the "regex" configuration. + * + * @return string A valid PHP PCRE regular expression. + * + * @task config + */ + private function getConfiguredRegex() { + $working_copy = $this->getEngine()->getWorkingCopy(); + $key = 'linter.scriptandregex.regex'; + $config = $working_copy->getConfigFromAnySource($key); + + if (!$config) { + throw new ArcanistUsageException( + "ArcanistScriptAndRegexLinter: ". + "You must configure '{$key}' with a valid PHP PCRE regex."); + } + + // NOTE: preg_match() returns 0 for no matches and false for compile error; + // this won't match, but will validate the syntax of the regex. + + $ok = preg_match($config, 'syntax-check'); + if ($ok === false) { + throw new ArcanistUsageException( + "ArcanistScriptAndRegexLinter: ". + "Regex '{$config}' does not compile. You must configure '{$key}' with ". + "a valid PHP PCRE regex, including delimiters."); + } + + return $config; + } + +}