1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2025-02-10 13:58:33 +01:00
phorge-arcanist/src/lint/linter/ArcanistSpellingLinter.php
Joshua Spence 494d974005 Move ArcanistSpellingDefaultData into a configurable JSON file
Summary: Currently, the `ArcanistSpellingLinter` loads data from `ArcanistSpellingDefaultData`, with no way to configure the linter from an `.arclint` file. Instead we should define a format for a "dictionary" file, of which the `ArvcanistSpellingLinter` can load and of which the paths are easily configured through `.arclint`.

Test Plan:
Updated the test case and ran `arc unit`.

NOTE: I have removed the `LINT_SPELLING_PICKY` and `LINT_SPELLING_IMPORTANT` constants and replaced them with `LINT_SPELLING_FULL` and `LINT_SPELLING_PARTIAL`. This was done because it simplifies the implementation considerably and makes customization of the `ArcanistSpellingLinter` simpler, but also because these constants were not widely used in the existing implementation.

Reviewers: epriestley, #blessed_reviewers

Reviewed By: epriestley, #blessed_reviewers

Subscribers: epriestley, Korvin

Differential Revision: https://secure.phabricator.com/D9805
2014-07-04 08:18:33 +10:00

182 lines
5 KiB
PHP

<?php
/**
* Enforces basic spelling. Spelling inside code is actually pretty hard to
* get right without false positives. I take a conservative approach and just
* use a blacklisted set of words that are commonly spelled incorrectly.
*/
final class ArcanistSpellingLinter extends ArcanistLinter {
const LINT_SPELLING_EXACT = 1;
const LINT_SPELLING_PARTIAL = 2;
private $dictionaries = array();
private $exactWordRules = array();
private $partialWordRules = array();
public function getInfoName() {
return pht('Spellchecker');
}
public function getInfoDescription() {
return pht('Detects common misspellings of English words.');
}
public function getLinterName() {
return 'SPELL';
}
public function getLinterConfigurationName() {
return 'spelling';
}
public function getLinterConfigurationOptions() {
$options = array(
'spelling.dictionaries' => array(
'type' => 'optional list<string>',
'help' => pht('Pass in custom dictionaries.'),
),
);
return $options + parent::getLinterConfigurationOptions();
}
public function setLinterConfigurationValue($key, $value) {
switch ($key) {
case 'spelling.dictionaries':
foreach ($value as $dictionary) {
$this->loadDictionary($dictionary);
}
return;
}
return parent::setLinterConfigurationValue($key, $value);
}
public function loadDictionary($path) {
$root = $this->getEngine()->getWorkingCopy()->getProjectRoot();
$path = Filesystem::resolvePath($path, $root);
$dict = phutil_json_decode(Filesystem::readFile($path));
PhutilTypeSpec::checkMap(
$dict,
array(
'rules' => 'map<string, map<string, string>>',
));
$rules = $dict['rules'];
$this->dictionaries[] = $path;
$this->exactWordRules = array_merge(
$this->exactWordRules,
idx($rules, 'exact', array()));
$this->partialWordRules = array_merge(
$this->partialWordRules,
idx($rules, 'partial', array()));
}
public function addExactWordRule($misspelling, $correction) {
$this->exactWordRules = array_merge(
$this->exactWordRules,
array($misspelling => $correction));
}
public function addPartialWordRule($misspelling, $correction) {
$this->partialWordRules = array_merge(
$this->partialWordRules,
array($misspelling => $correction));
}
public function getLintSeverityMap() {
return array(
self::LINT_SPELLING_EXACT => ArcanistLintSeverity::SEVERITY_WARNING,
self::LINT_SPELLING_PARTIAL => ArcanistLintSeverity::SEVERITY_WARNING,
);
}
public function getLintNameMap() {
return array(
self::LINT_SPELLING_EXACT => pht('Possible Spelling Mistake'),
self::LINT_SPELLING_PARTIAL => pht('Possible Spelling Mistake'),
);
}
public function lintPath($path) {
// TODO: This is a bit hacky. If no dictionaries were specified, then add
// the default dictionary.
if (!$this->dictionaries) {
$root = dirname(phutil_get_library_root('arcanist'));
$this->loadDictionary($root.'/resources/spelling/english.json');
}
foreach ($this->exactWordRules as $misspelling => $correction) {
$this->checkExactWord($path, $misspelling, $correction);
}
foreach ($this->partialWordRules as $misspelling => $correction) {
$this->checkPartialWord($path, $misspelling, $correction);
}
}
private function checkExactWord($path, $word, $correction) {
$text = $this->getData($path);
$matches = array();
$num_matches = preg_match_all(
'#\b'.preg_quote($word, '#').'\b#i',
$text,
$matches,
PREG_OFFSET_CAPTURE);
if (!$num_matches) {
return;
}
foreach ($matches[0] as $match) {
$original = $match[0];
$replacement = self::fixLetterCase($correction, $original);
$this->raiseLintAtOffset(
$match[1],
self::LINT_SPELLING_EXACT,
pht(
"Possible spelling error. You wrote '%s', but did you mean '%s'?",
$word,
$correction),
$original,
$replacement);
}
}
private function checkPartialWord($path, $word, $correction) {
$text = $this->getData($path);
$pos = 0;
while ($pos < strlen($text)) {
$next = stripos($text, $word, $pos);
if ($next === false) {
return;
}
$original = substr($text, $next, strlen($word));
$replacement = self::fixLetterCase($correction, $original);
$this->raiseLintAtOffset(
$next,
self::LINT_SPELLING_PARTIAL,
pht(
"Possible spelling error. You wrote '%s', but did you mean '%s'?",
$word,
$correction),
$original,
$replacement);
$pos = $next + 1;
}
}
public static function fixLetterCase($string, $case) {
switch ($case) {
case strtolower($case):
return strtolower($string);
case strtoupper($case):
return strtoupper($string);
case ucwords(strtolower($case)):
return ucwords(strtolower($string));
default:
return null;
}
}
}