2012-01-15 21:28:58 -08:00
|
|
|
<?php
|
|
|
|
|
|
|
|
/**
|
2014-05-11 13:42:56 -07:00
|
|
|
* Enforces basic spelling. Spelling inside code is actually pretty hard to
|
|
|
|
* get right without false positives. I take a conservative approach and
|
2012-01-15 21:28:58 -08:00
|
|
|
* just use a blacklisted set of words that are commonly spelled
|
|
|
|
* incorrectly.
|
|
|
|
*/
|
2012-01-31 12:07:05 -08:00
|
|
|
final class ArcanistSpellingLinter extends ArcanistLinter {
|
2012-01-15 21:28:58 -08:00
|
|
|
|
|
|
|
const LINT_SPELLING_PICKY = 0;
|
|
|
|
const LINT_SPELLING_IMPORTANT = 1;
|
|
|
|
|
|
|
|
private $partialWordRules;
|
|
|
|
private $wholeWordRules;
|
|
|
|
private $severity;
|
|
|
|
|
2014-05-11 13:42:56 -07:00
|
|
|
public function getInfoName() {
|
|
|
|
return pht('Spellchecker');
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getInfoDescription() {
|
|
|
|
return pht('Detects common misspellings of English words.');
|
|
|
|
}
|
|
|
|
|
2012-01-15 21:28:58 -08:00
|
|
|
public function __construct($severity = self::LINT_SPELLING_PICKY) {
|
|
|
|
$this->severity = $severity;
|
|
|
|
$this->wholeWordRules = ArcanistSpellingDefaultData::getFullWordRules();
|
|
|
|
$this->partialWordRules =
|
|
|
|
ArcanistSpellingDefaultData::getPartialWordRules();
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getLinterName() {
|
|
|
|
return 'SPELL';
|
|
|
|
}
|
|
|
|
|
Ready more linters and linter functions for .arclint
Summary:
Ref T3186. Ref T2039. Continues work on readying linters for `.arclint`.
- **Ruby**: Make this an ExternalLinter.
- **Priority**: Currently, linters have an implicit "correct" order (notably, the "NoLint" linter needs to run before other linters). Make this explicit by introducing `getLinterPriority()`.
- **Binaries**: Currently, linters manually reject binary files. Instead, reject binary files by default (linters can override this if they do want to lint binary files).
- **Deleted Files**: Currently, linters manually reject deleted files (usually in engines). Instead, reject deleted files by default (linters can override this).
- **Severity**: Move this `.arclint` config option up to top level.
- **willLintPaths()**: This method is abstract, but almost all linters provide a trivial implementation. Provide a trivial implementation in the base class.
- **getLintSeverityMap()/getLintNameMap()**: A bunch of linters have empty implementations; these are redundant. Remove them.
- **Spelling**: clean up some dead / test-only / unconventional code.
- **`.arclint`**: Allow the filename, generated, nolint, text, spelling and ruby linters to be configured via `.arclint`.
Test Plan:
https://github.com/epriestley/arclint-examples/commit/458beca3d65b64d52ed612904ae66eb837118b94
Ran unit tests.
Reviewers: btrahan
Reviewed By: btrahan
CC: Firehed, aran
Maniphest Tasks: T2039, T3186
Differential Revision: https://secure.phabricator.com/D6805
2013-08-26 05:37:10 -07:00
|
|
|
public function getLinterConfigurationName() {
|
|
|
|
return 'spelling';
|
2012-01-15 21:28:58 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
public function addPartialWordRule(
|
Ready more linters and linter functions for .arclint
Summary:
Ref T3186. Ref T2039. Continues work on readying linters for `.arclint`.
- **Ruby**: Make this an ExternalLinter.
- **Priority**: Currently, linters have an implicit "correct" order (notably, the "NoLint" linter needs to run before other linters). Make this explicit by introducing `getLinterPriority()`.
- **Binaries**: Currently, linters manually reject binary files. Instead, reject binary files by default (linters can override this if they do want to lint binary files).
- **Deleted Files**: Currently, linters manually reject deleted files (usually in engines). Instead, reject deleted files by default (linters can override this).
- **Severity**: Move this `.arclint` config option up to top level.
- **willLintPaths()**: This method is abstract, but almost all linters provide a trivial implementation. Provide a trivial implementation in the base class.
- **getLintSeverityMap()/getLintNameMap()**: A bunch of linters have empty implementations; these are redundant. Remove them.
- **Spelling**: clean up some dead / test-only / unconventional code.
- **`.arclint`**: Allow the filename, generated, nolint, text, spelling and ruby linters to be configured via `.arclint`.
Test Plan:
https://github.com/epriestley/arclint-examples/commit/458beca3d65b64d52ed612904ae66eb837118b94
Ran unit tests.
Reviewers: btrahan
Reviewed By: btrahan
CC: Firehed, aran
Maniphest Tasks: T2039, T3186
Differential Revision: https://secure.phabricator.com/D6805
2013-08-26 05:37:10 -07:00
|
|
|
$incorrect_word,
|
|
|
|
$correct_word,
|
|
|
|
$severity = self::LINT_SPELLING_IMPORTANT) {
|
|
|
|
|
2012-01-15 21:28:58 -08:00
|
|
|
$this->partialWordRules[$severity][$incorrect_word] = $correct_word;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function addWholeWordRule(
|
Ready more linters and linter functions for .arclint
Summary:
Ref T3186. Ref T2039. Continues work on readying linters for `.arclint`.
- **Ruby**: Make this an ExternalLinter.
- **Priority**: Currently, linters have an implicit "correct" order (notably, the "NoLint" linter needs to run before other linters). Make this explicit by introducing `getLinterPriority()`.
- **Binaries**: Currently, linters manually reject binary files. Instead, reject binary files by default (linters can override this if they do want to lint binary files).
- **Deleted Files**: Currently, linters manually reject deleted files (usually in engines). Instead, reject deleted files by default (linters can override this).
- **Severity**: Move this `.arclint` config option up to top level.
- **willLintPaths()**: This method is abstract, but almost all linters provide a trivial implementation. Provide a trivial implementation in the base class.
- **getLintSeverityMap()/getLintNameMap()**: A bunch of linters have empty implementations; these are redundant. Remove them.
- **Spelling**: clean up some dead / test-only / unconventional code.
- **`.arclint`**: Allow the filename, generated, nolint, text, spelling and ruby linters to be configured via `.arclint`.
Test Plan:
https://github.com/epriestley/arclint-examples/commit/458beca3d65b64d52ed612904ae66eb837118b94
Ran unit tests.
Reviewers: btrahan
Reviewed By: btrahan
CC: Firehed, aran
Maniphest Tasks: T2039, T3186
Differential Revision: https://secure.phabricator.com/D6805
2013-08-26 05:37:10 -07:00
|
|
|
$incorrect_word,
|
|
|
|
$correct_word,
|
|
|
|
$severity = self::LINT_SPELLING_IMPORTANT) {
|
|
|
|
|
2012-01-15 21:28:58 -08:00
|
|
|
$this->wholeWordRules[$severity][$incorrect_word] = $correct_word;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getLintSeverityMap() {
|
|
|
|
return array(
|
2014-06-20 18:26:44 +10:00
|
|
|
self::LINT_SPELLING_PICKY => ArcanistLintSeverity::SEVERITY_WARNING,
|
2012-01-15 21:28:58 -08:00
|
|
|
self::LINT_SPELLING_IMPORTANT => ArcanistLintSeverity::SEVERITY_ERROR,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getLintNameMap() {
|
|
|
|
return array(
|
2014-06-20 18:26:44 +10:00
|
|
|
self::LINT_SPELLING_PICKY => pht('Possible Spelling Mistake'),
|
Ready more linters and linter functions for .arclint
Summary:
Ref T3186. Ref T2039. Continues work on readying linters for `.arclint`.
- **Ruby**: Make this an ExternalLinter.
- **Priority**: Currently, linters have an implicit "correct" order (notably, the "NoLint" linter needs to run before other linters). Make this explicit by introducing `getLinterPriority()`.
- **Binaries**: Currently, linters manually reject binary files. Instead, reject binary files by default (linters can override this if they do want to lint binary files).
- **Deleted Files**: Currently, linters manually reject deleted files (usually in engines). Instead, reject deleted files by default (linters can override this).
- **Severity**: Move this `.arclint` config option up to top level.
- **willLintPaths()**: This method is abstract, but almost all linters provide a trivial implementation. Provide a trivial implementation in the base class.
- **getLintSeverityMap()/getLintNameMap()**: A bunch of linters have empty implementations; these are redundant. Remove them.
- **Spelling**: clean up some dead / test-only / unconventional code.
- **`.arclint`**: Allow the filename, generated, nolint, text, spelling and ruby linters to be configured via `.arclint`.
Test Plan:
https://github.com/epriestley/arclint-examples/commit/458beca3d65b64d52ed612904ae66eb837118b94
Ran unit tests.
Reviewers: btrahan
Reviewed By: btrahan
CC: Firehed, aran
Maniphest Tasks: T2039, T3186
Differential Revision: https://secure.phabricator.com/D6805
2013-08-26 05:37:10 -07:00
|
|
|
self::LINT_SPELLING_IMPORTANT => pht('Possible Spelling Mistake'),
|
2012-01-15 21:28:58 -08:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
public function lintPath($path) {
|
|
|
|
foreach ($this->partialWordRules as $severity => $wordlist) {
|
|
|
|
if ($severity >= $this->severity) {
|
2013-02-14 15:49:22 -08:00
|
|
|
if (!$this->isCodeEnabled($severity)) {
|
|
|
|
continue;
|
|
|
|
}
|
2012-01-15 21:28:58 -08:00
|
|
|
foreach ($wordlist as $misspell => $correct) {
|
|
|
|
$this->checkPartialWord($path, $misspell, $correct, $severity);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-02-14 15:49:22 -08:00
|
|
|
|
2012-01-15 21:28:58 -08:00
|
|
|
foreach ($this->wholeWordRules as $severity => $wordlist) {
|
|
|
|
if ($severity >= $this->severity) {
|
2013-02-14 15:49:22 -08:00
|
|
|
if (!$this->isCodeEnabled($severity)) {
|
|
|
|
continue;
|
|
|
|
}
|
2012-01-15 21:28:58 -08:00
|
|
|
foreach ($wordlist as $misspell => $correct) {
|
|
|
|
$this->checkWholeWord($path, $misspell, $correct, $severity);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
protected function checkPartialWord($path, $word, $correct_word, $severity) {
|
|
|
|
$text = $this->getData($path);
|
|
|
|
$pos = 0;
|
|
|
|
while ($pos < strlen($text)) {
|
|
|
|
$next = stripos($text, $word, $pos);
|
|
|
|
if ($next === false) {
|
|
|
|
return;
|
|
|
|
}
|
2012-09-10 16:09:44 -07:00
|
|
|
$original = substr($text, $next, strlen($word));
|
|
|
|
$replacement = self::fixLetterCase($correct_word, $original);
|
2012-01-15 21:28:58 -08:00
|
|
|
$this->raiseLintAtOffset(
|
|
|
|
$next,
|
|
|
|
$severity,
|
2014-06-20 18:26:44 +10:00
|
|
|
pht(
|
2012-09-04 23:45:54 -07:00
|
|
|
"Possible spelling error. You wrote '%s', but did you mean '%s'?",
|
2012-01-15 21:28:58 -08:00
|
|
|
$word,
|
2013-02-19 14:09:20 -08:00
|
|
|
$correct_word),
|
2012-09-10 16:09:44 -07:00
|
|
|
$original,
|
2013-02-19 14:09:20 -08:00
|
|
|
$replacement);
|
2012-01-15 21:28:58 -08:00
|
|
|
$pos = $next + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
protected function checkWholeWord($path, $word, $correct_word, $severity) {
|
|
|
|
$text = $this->getData($path);
|
|
|
|
$matches = array();
|
|
|
|
$num_matches = preg_match_all(
|
Ready more linters and linter functions for .arclint
Summary:
Ref T3186. Ref T2039. Continues work on readying linters for `.arclint`.
- **Ruby**: Make this an ExternalLinter.
- **Priority**: Currently, linters have an implicit "correct" order (notably, the "NoLint" linter needs to run before other linters). Make this explicit by introducing `getLinterPriority()`.
- **Binaries**: Currently, linters manually reject binary files. Instead, reject binary files by default (linters can override this if they do want to lint binary files).
- **Deleted Files**: Currently, linters manually reject deleted files (usually in engines). Instead, reject deleted files by default (linters can override this).
- **Severity**: Move this `.arclint` config option up to top level.
- **willLintPaths()**: This method is abstract, but almost all linters provide a trivial implementation. Provide a trivial implementation in the base class.
- **getLintSeverityMap()/getLintNameMap()**: A bunch of linters have empty implementations; these are redundant. Remove them.
- **Spelling**: clean up some dead / test-only / unconventional code.
- **`.arclint`**: Allow the filename, generated, nolint, text, spelling and ruby linters to be configured via `.arclint`.
Test Plan:
https://github.com/epriestley/arclint-examples/commit/458beca3d65b64d52ed612904ae66eb837118b94
Ran unit tests.
Reviewers: btrahan
Reviewed By: btrahan
CC: Firehed, aran
Maniphest Tasks: T2039, T3186
Differential Revision: https://secure.phabricator.com/D6805
2013-08-26 05:37:10 -07:00
|
|
|
'#\b'.preg_quote($word, '#').'\b#i',
|
2012-01-15 21:28:58 -08:00
|
|
|
$text,
|
|
|
|
$matches,
|
2013-02-19 14:09:20 -08:00
|
|
|
PREG_OFFSET_CAPTURE);
|
2012-01-15 21:28:58 -08:00
|
|
|
if (!$num_matches) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
foreach ($matches[0] as $match) {
|
2012-09-10 16:09:44 -07:00
|
|
|
$original = $match[0];
|
|
|
|
$replacement = self::fixLetterCase($correct_word, $original);
|
2012-01-15 21:28:58 -08:00
|
|
|
$this->raiseLintAtOffset(
|
|
|
|
$match[1],
|
|
|
|
$severity,
|
2014-06-20 18:26:44 +10:00
|
|
|
pht(
|
2012-09-04 23:45:54 -07:00
|
|
|
"Possible spelling error. You wrote '%s', but did you mean '%s'?",
|
2012-01-15 21:28:58 -08:00
|
|
|
$word,
|
2013-02-19 14:09:20 -08:00
|
|
|
$correct_word),
|
2012-09-10 16:09:44 -07:00
|
|
|
$original,
|
2013-02-19 14:09:20 -08:00
|
|
|
$replacement);
|
2012-01-15 21:28:58 -08:00
|
|
|
}
|
|
|
|
}
|
2012-09-10 16:09:44 -07:00
|
|
|
|
|
|
|
public static function fixLetterCase($string, $case) {
|
|
|
|
if ($case == strtolower($case)) {
|
|
|
|
return strtolower($string);
|
2014-06-20 18:26:44 +10:00
|
|
|
} else if ($case == strtoupper($case)) {
|
2012-09-10 16:09:44 -07:00
|
|
|
return strtoupper($string);
|
2014-06-20 18:26:44 +10:00
|
|
|
} else if ($case == ucwords(strtolower($case))) {
|
2012-09-10 16:09:44 -07:00
|
|
|
return ucwords(strtolower($string));
|
2014-06-20 18:26:44 +10:00
|
|
|
} else {
|
|
|
|
return null;
|
2012-09-10 16:09:44 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-01-15 21:28:58 -08:00
|
|
|
}
|