mirror of
https://we.phorge.it/source/arcanist.git
synced 2025-02-10 13:58:33 +01:00
Summary: Currently, the `ArcanistSpellingLinter` loads data from `ArcanistSpellingDefaultData`, with no way to configure the linter from an `.arclint` file. Instead we should define a format for a "dictionary" file, of which the `ArvcanistSpellingLinter` can load and of which the paths are easily configured through `.arclint`. Test Plan: Updated the test case and ran `arc unit`. NOTE: I have removed the `LINT_SPELLING_PICKY` and `LINT_SPELLING_IMPORTANT` constants and replaced them with `LINT_SPELLING_FULL` and `LINT_SPELLING_PARTIAL`. This was done because it simplifies the implementation considerably and makes customization of the `ArcanistSpellingLinter` simpler, but also because these constants were not widely used in the existing implementation. Reviewers: epriestley, #blessed_reviewers Reviewed By: epriestley, #blessed_reviewers Subscribers: epriestley, Korvin Differential Revision: https://secure.phabricator.com/D9805
182 lines
5 KiB
PHP
182 lines
5 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Enforces basic spelling. Spelling inside code is actually pretty hard to
|
|
* get right without false positives. I take a conservative approach and just
|
|
* use a blacklisted set of words that are commonly spelled incorrectly.
|
|
*/
|
|
final class ArcanistSpellingLinter extends ArcanistLinter {
|
|
|
|
const LINT_SPELLING_EXACT = 1;
|
|
const LINT_SPELLING_PARTIAL = 2;
|
|
|
|
private $dictionaries = array();
|
|
private $exactWordRules = array();
|
|
private $partialWordRules = array();
|
|
|
|
public function getInfoName() {
|
|
return pht('Spellchecker');
|
|
}
|
|
|
|
public function getInfoDescription() {
|
|
return pht('Detects common misspellings of English words.');
|
|
}
|
|
|
|
public function getLinterName() {
|
|
return 'SPELL';
|
|
}
|
|
|
|
public function getLinterConfigurationName() {
|
|
return 'spelling';
|
|
}
|
|
|
|
public function getLinterConfigurationOptions() {
|
|
$options = array(
|
|
'spelling.dictionaries' => array(
|
|
'type' => 'optional list<string>',
|
|
'help' => pht('Pass in custom dictionaries.'),
|
|
),
|
|
);
|
|
|
|
return $options + parent::getLinterConfigurationOptions();
|
|
}
|
|
|
|
public function setLinterConfigurationValue($key, $value) {
|
|
switch ($key) {
|
|
case 'spelling.dictionaries':
|
|
foreach ($value as $dictionary) {
|
|
$this->loadDictionary($dictionary);
|
|
}
|
|
return;
|
|
}
|
|
|
|
return parent::setLinterConfigurationValue($key, $value);
|
|
}
|
|
|
|
public function loadDictionary($path) {
|
|
$root = $this->getEngine()->getWorkingCopy()->getProjectRoot();
|
|
$path = Filesystem::resolvePath($path, $root);
|
|
|
|
$dict = phutil_json_decode(Filesystem::readFile($path));
|
|
PhutilTypeSpec::checkMap(
|
|
$dict,
|
|
array(
|
|
'rules' => 'map<string, map<string, string>>',
|
|
));
|
|
$rules = $dict['rules'];
|
|
|
|
$this->dictionaries[] = $path;
|
|
$this->exactWordRules = array_merge(
|
|
$this->exactWordRules,
|
|
idx($rules, 'exact', array()));
|
|
$this->partialWordRules = array_merge(
|
|
$this->partialWordRules,
|
|
idx($rules, 'partial', array()));
|
|
}
|
|
|
|
public function addExactWordRule($misspelling, $correction) {
|
|
$this->exactWordRules = array_merge(
|
|
$this->exactWordRules,
|
|
array($misspelling => $correction));
|
|
}
|
|
|
|
public function addPartialWordRule($misspelling, $correction) {
|
|
$this->partialWordRules = array_merge(
|
|
$this->partialWordRules,
|
|
array($misspelling => $correction));
|
|
}
|
|
|
|
public function getLintSeverityMap() {
|
|
return array(
|
|
self::LINT_SPELLING_EXACT => ArcanistLintSeverity::SEVERITY_WARNING,
|
|
self::LINT_SPELLING_PARTIAL => ArcanistLintSeverity::SEVERITY_WARNING,
|
|
);
|
|
}
|
|
|
|
public function getLintNameMap() {
|
|
return array(
|
|
self::LINT_SPELLING_EXACT => pht('Possible Spelling Mistake'),
|
|
self::LINT_SPELLING_PARTIAL => pht('Possible Spelling Mistake'),
|
|
);
|
|
}
|
|
|
|
public function lintPath($path) {
|
|
// TODO: This is a bit hacky. If no dictionaries were specified, then add
|
|
// the default dictionary.
|
|
if (!$this->dictionaries) {
|
|
$root = dirname(phutil_get_library_root('arcanist'));
|
|
$this->loadDictionary($root.'/resources/spelling/english.json');
|
|
}
|
|
|
|
foreach ($this->exactWordRules as $misspelling => $correction) {
|
|
$this->checkExactWord($path, $misspelling, $correction);
|
|
}
|
|
|
|
foreach ($this->partialWordRules as $misspelling => $correction) {
|
|
$this->checkPartialWord($path, $misspelling, $correction);
|
|
}
|
|
}
|
|
|
|
private function checkExactWord($path, $word, $correction) {
|
|
$text = $this->getData($path);
|
|
$matches = array();
|
|
$num_matches = preg_match_all(
|
|
'#\b'.preg_quote($word, '#').'\b#i',
|
|
$text,
|
|
$matches,
|
|
PREG_OFFSET_CAPTURE);
|
|
if (!$num_matches) {
|
|
return;
|
|
}
|
|
foreach ($matches[0] as $match) {
|
|
$original = $match[0];
|
|
$replacement = self::fixLetterCase($correction, $original);
|
|
$this->raiseLintAtOffset(
|
|
$match[1],
|
|
self::LINT_SPELLING_EXACT,
|
|
pht(
|
|
"Possible spelling error. You wrote '%s', but did you mean '%s'?",
|
|
$word,
|
|
$correction),
|
|
$original,
|
|
$replacement);
|
|
}
|
|
}
|
|
|
|
private function checkPartialWord($path, $word, $correction) {
|
|
$text = $this->getData($path);
|
|
$pos = 0;
|
|
while ($pos < strlen($text)) {
|
|
$next = stripos($text, $word, $pos);
|
|
if ($next === false) {
|
|
return;
|
|
}
|
|
$original = substr($text, $next, strlen($word));
|
|
$replacement = self::fixLetterCase($correction, $original);
|
|
$this->raiseLintAtOffset(
|
|
$next,
|
|
self::LINT_SPELLING_PARTIAL,
|
|
pht(
|
|
"Possible spelling error. You wrote '%s', but did you mean '%s'?",
|
|
$word,
|
|
$correction),
|
|
$original,
|
|
$replacement);
|
|
$pos = $next + 1;
|
|
}
|
|
}
|
|
|
|
public static function fixLetterCase($string, $case) {
|
|
switch ($case) {
|
|
case strtolower($case):
|
|
return strtolower($string);
|
|
case strtoupper($case):
|
|
return strtoupper($string);
|
|
case ucwords(strtolower($case)):
|
|
return ucwords(strtolower($string));
|
|
default:
|
|
return null;
|
|
}
|
|
}
|
|
|
|
}
|