1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2024-09-19 16:38:51 +02:00

Move ArcanistSpellingDefaultData into a configurable JSON file

Summary: Currently, the `ArcanistSpellingLinter` loads data from `ArcanistSpellingDefaultData`, with no way to configure the linter from an `.arclint` file. Instead we should define a format for a "dictionary" file, of which the `ArvcanistSpellingLinter` can load and of which the paths are easily configured through `.arclint`.

Test Plan:
Updated the test case and ran `arc unit`.

NOTE: I have removed the `LINT_SPELLING_PICKY` and `LINT_SPELLING_IMPORTANT` constants and replaced them with `LINT_SPELLING_FULL` and `LINT_SPELLING_PARTIAL`. This was done because it simplifies the implementation considerably and makes customization of the `ArcanistSpellingLinter` simpler, but also because these constants were not widely used in the existing implementation.

Reviewers: epriestley, #blessed_reviewers

Reviewed By: epriestley, #blessed_reviewers

Subscribers: epriestley, Korvin

Differential Revision: https://secure.phabricator.com/D9805
This commit is contained in:
Joshua Spence 2014-07-04 08:18:33 +10:00
parent df1491c449
commit 494d974005
6 changed files with 715 additions and 721 deletions

View file

@ -38,9 +38,7 @@
}, },
"spelling": { "spelling": {
"type": "spelling", "type": "spelling",
"exclude": [ "exclude": "(resources/spelling/.*\\.json$)"
"(^src/lint/linter/spelling/ArcanistSpellingDefaultData\\.php$)"
]
}, },
"text": { "text": {
"type": "text" "type": "text"

View file

@ -0,0 +1,598 @@
{
"rules": {
"exact": {
"abandonning": "abandoning",
"abigious": "ambiguous",
"abitrate": "arbitrate",
"abov": "above",
"absense": "absence",
"absolut": "absolute",
"absoulte": "absolute",
"acceleratoin": "acceleration",
"accelleration": "acceleration",
"accesing": "accessing",
"accesnt": "accent",
"accessable": "accessible",
"accesss": "access",
"accidentaly": "accidentally",
"accidentually": "accidentally",
"accomodate": "accommodate",
"accomodates": "accommodates",
"accout": "account",
"acess": "access",
"acessable": "accessible",
"acient": "ancient",
"ackowledge": "acknowledge",
"ackowledged": "acknowledged",
"acknowldegement": "acknowldegement",
"acording": "according",
"activete": "activate",
"acumulating": "accumulating",
"addional": "additional",
"additionaly": "additionally",
"addreses": "addresses",
"aditional": "additional",
"aditionally": "additionally",
"aditionaly": "additionally",
"adress": "address",
"adresses": "addresses",
"adviced": "advised",
"afecting": "affecting",
"albumns": "albums",
"alegorical": "allegorical",
"algorith": "algorithm",
"algorithmical": "algorithmic",
"algoritm": "algorithm",
"algoritms": "algorithms",
"algorrithm": "algorithm",
"algorritm": "algorithm",
"allpication": "application",
"alogirhtms": "algorithms",
"alot": "a lot",
"alow": "allow",
"alows": "allows",
"altough": "although",
"ambigious": "ambiguous",
"amoung": "among",
"amout": "amount",
"analysator": "analyzer",
"ang": "and",
"anniversery": "anniversary",
"annoucement": "announcement",
"anomolies": "anomalies",
"anomoly": "anomaly",
"aplication": "application",
"appearence": "appearance",
"appliction": "application",
"applictions": "applications",
"appropiate": "appropriate",
"appropriatly": "appropriately",
"aquired": "acquired",
"arbitary": "arbitrary",
"architechture": "architecture",
"arguement": "argument",
"arguements": "arguments",
"aritmetic": "arithmetic",
"arraival": "arrival",
"artifical": "artificial",
"artillary": "artillery",
"assigment": "assignment",
"assigments": "assignments",
"assistent": "assistant",
"asuming": "assuming",
"asycronous": "asynchronous",
"atomatically": "automatically",
"attachement": "attachment",
"attemps": "attempts",
"attruibutes": "attributes",
"authentification": "authentication",
"automaticaly": "automatically",
"automaticly": "automatically",
"automatize": "automate",
"automatized": "automated",
"automatizes": "automates",
"autonymous": "autonomous",
"auxilliary": "auxiliary",
"avaiable": "available",
"availabled": "available",
"availablity": "availability",
"availale": "available",
"availavility": "availability",
"availble": "available",
"availiable": "available",
"avaliable": "available",
"backgroud": "background",
"bahavior": "behavior",
"baloon": "balloon",
"baloons": "balloons",
"bandwith": "bandwidth",
"batery": "battery",
"becomming": "becoming",
"becuase": "because",
"begining": "beginning",
"bianries": "binaries",
"calender": "calendar",
"cancelation": "cancellation",
"capabilites": "capabilities",
"capatibilities": "capabilities",
"cariage": "carriage",
"challange": "challenge",
"challanges": "challenges",
"changable": "changeable",
"charachter": "character",
"charachters": "characters",
"charater": "character",
"charaters": "characters",
"charcter": "character",
"childs": "children",
"chnage": "change",
"chnages": "changes",
"choosen": "chosen",
"collapsable": "collapsible",
"colorfull": "colorful",
"comand": "command",
"comit": "commit",
"commerical": "commercial",
"comminucation": "communication",
"commited": "committed",
"commiting": "committing",
"committ": "commit",
"commoditiy": "commodity",
"compability": "compatibility",
"compatability": "compatibility",
"compatable": "compatible",
"compatibiliy": "compatibility",
"compatibilty": "compatibility",
"compilant": "compliant",
"compleatly": "completely",
"completly": "completely",
"complient": "compliant",
"compres": "compress",
"compresion": "compression",
"comression": "compression",
"conditionaly": "conditionally",
"configuratoin": "configuration",
"conjuction": "conjunction",
"connectinos": "connections",
"connnection": "connection",
"connnections": "connections",
"consistancy": "consistency",
"consistant": "consistent",
"containes": "contains",
"containts": "contains",
"contaisn": "contains",
"contence": "contents",
"continous": "continuous",
"continously": "continuously",
"continueing": "continuing",
"contraints": "constraints",
"convertor": "converter",
"convinient": "convenient",
"corected": "corrected",
"correponding": "corresponding",
"correponds": "corresponds",
"correspoding": "corresponding",
"cryptocraphic": "cryptographic",
"curently": "currently",
"dafault": "default",
"deafult": "default",
"deamon": "daemon",
"decompres": "decompress",
"definate": "definite",
"definately": "definitely",
"delare": "declare",
"delared": "declared",
"delares": "declares",
"delaring": "declaring",
"delemiter": "delimiter",
"delemiters": "delimiters",
"delimeter": "delimiter",
"delimeters": "delimiters",
"dependancies": "dependencies",
"dependancy": "dependency",
"dependant": "dependent",
"depreacted": "deprecated",
"depreacte": "deprecate",
"desactivate": "deactivate",
"detabase": "database",
"developement": "development",
"developped": "developed",
"developpement": "development",
"developper": "developer",
"developpment": "development",
"deveolpment": "development",
"devided": "divided",
"dictionnary": "dictionary",
"diplay": "display",
"disapeared": "disappeared",
"discontiguous": "noncontiguous",
"dispertion": "dispersion",
"dissapears": "disappears",
"docuentation": "documentation",
"documantation": "documentation",
"documentaion": "documentation",
"downlad": "download",
"downlads": "downloads",
"easilly": "easily",
"ecspecially": "especially",
"edditable": "editable",
"editting": "editing",
"efficently": "efficiently",
"eletronic": "electronic",
"enchanced": "enhanced",
"encorporating": "incorporating",
"endianess": "endianness",
"enhaced": "enhanced",
"enlightnment": "enlightenment",
"enocded": "encoded",
"enterily": "entirely",
"envireonment": "environment",
"enviroiment": "environment",
"enviroment": "environment",
"environement": "environment",
"environent": "environment",
"equiped": "equipped",
"equivelant": "equivalent",
"equivilant": "equivalent",
"estbalishment": "establishment",
"etsablishment": "establishment",
"etsbalishment": "establishment",
"excecutable": "executable",
"exceded": "exceeded",
"excellant": "excellent",
"exlcude": "exclude",
"exlcusive": "exclusive",
"expecially": "especially",
"explicitely": "explicitly",
"explict": "explicit",
"explictly": "explicitly",
"expresion": "expression",
"exprimental": "experimental",
"extensability": "extensibility",
"extention": "extension",
"extracter": "extractor",
"failuer": "failure",
"familar": "familiar",
"fatser": "faster",
"feauture": "feature",
"feautures": "features",
"fetaure": "feature",
"fetaures": "features",
"forse": "force",
"fortan": "fortran",
"forwardig": "forwarding",
"framwork": "framework",
"functionallity": "functionality",
"functionaly": "functionally",
"functionnality": "functionality",
"functiosn": "functions",
"functonality": "functionality",
"futhermore": "furthermore",
"generiously": "generously",
"grabing": "grabbing",
"grahical": "graphical",
"grahpical": "graphical",
"grapic": "graphic",
"guage": "gauge",
"halfs": "halves",
"handfull": "handful",
"heirarchically": "hierarchically",
"helpfull": "helpful",
"hierachy": "hierarchy",
"heirachy": "hierarchy",
"heirarchy": "hierarchy",
"hierarchie": "hierarchy",
"heirarchie": "hierarchy",
"howver": "however",
"immeadiately": "immediately",
"implemantation": "implementation",
"implemention": "implementation",
"incomming": "incoming",
"incompatabilities": "incompatibilities",
"incompatable": "incompatible",
"inconsistant": "inconsistent",
"indendation": "indentation",
"indended": "intended",
"independant": "independent",
"independed": "independent",
"informatiom": "information",
"informations": "information",
"infromation": "information",
"initalize": "initialize",
"initators": "initiators",
"initializiation": "initialization",
"inofficial": "unofficial",
"integreated": "integrated",
"integrety": "integrity",
"integrey": "integrity",
"intendet": "intended",
"interchangable": "interchangeable",
"intermittant": "intermittent",
"interupted": "interrupted",
"intial": "initial",
"intregral": "integral",
"intuative": "intuitive",
"invokation": "invocation",
"invokations": "invocations",
"jave": "java",
"langage": "language",
"langauage": "language",
"langauge": "language",
"langugage": "language",
"lauch": "launch",
"leightweight": "lightweight",
"lesstiff": "lesstif",
"libaries": "libraries",
"libary": "library",
"librairies": "libraries",
"libraris": "libraries",
"licenceing": "licencing",
"loggging": "logging",
"loggin": "login",
"logile": "logfile",
"machinary": "machinery",
"maintainance": "maintenance",
"maintainence": "maintenance",
"maintan": "maintain",
"makeing": "making",
"malplace": "misplace",
"malplaced": "misplaced",
"managable": "manageable",
"managment": "management",
"manoeuvering": "maneuvering",
"mathimatical": "mathematical",
"mathimatic": "mathematic",
"mathimatics": "mathematics",
"ment": "meant",
"messsage": "message",
"messsages": "messages",
"microprocesspr": "microprocessor",
"milliseonds": "milliseconds",
"miscelleneous": "miscellaneous",
"misformed": "malformed",
"mispelled": "misspelled",
"mispelt": "misspelt",
"mmnemonic": "mnemonic",
"modulues": "modules",
"monochorome": "monochrome",
"monochromo": "monochrome",
"monocrome": "monochrome",
"mroe": "more",
"multidimensionnal": "multidimensional",
"mulitplied": "multiplied",
"mutiple": "multiple",
"nam": "name",
"nams": "names",
"navagating": "navigating",
"nead": "need",
"neccesary": "necessary",
"neccessary": "necessary",
"necesary": "necessary",
"negotation": "negotiation",
"nescessary": "necessary",
"nessessary": "necessary",
"noticable": "noticeable",
"notications": "notifications",
"occationally": "occasionally",
"omitt": "omit",
"ommitted": "omitted",
"onself": "oneself",
"optionnal": "optional",
"optmizations": "optimizations",
"orientatied": "orientated",
"orientied": "oriented",
"ouput": "output",
"overaall": "overall",
"overriden": "overridden",
"pacakge": "package",
"pachage": "package",
"packacge": "package",
"packege": "package",
"packge": "package",
"pakage": "package",
"pallette": "palette",
"paramameters": "parameters",
"paramater": "parameter",
"parametes": "parameters",
"parametised": "parametrised",
"paramter": "parameter",
"paramters": "parameters",
"particularily": "particularly",
"pased": "passed",
"pendantic": "pedantic",
"peprocessor": "preprocessor",
"perfoming": "performing",
"permissons": "permissions",
"persistant": "persistent",
"plattform": "platform",
"pleaes": "please",
"ploting": "plotting",
"poinnter": "pointer",
"posible": "possible",
"possibilites": "possibilities",
"powerfull": "powerful",
"preceed": "precede",
"preceeded": "preceded",
"preceeding": "preceding",
"precendence": "precedence",
"precission": "precision",
"prefered": "preferred",
"prefferably": "preferably",
"prepaired": "prepared",
"primative": "primitive",
"princliple": "principle",
"priorty": "priority",
"priviledge": "privilege",
"priviledges": "privileges",
"procceed": "proceed",
"proccesors": "processors",
"proces": "process",
"processess": "processes",
"processessing": "processing",
"processpr": "processor",
"processsing": "processing",
"progams": "programs",
"programers": "programmers",
"programm": "program",
"programms": "programs",
"promps": "prompts",
"pronnounced": "pronounced",
"prononciation": "pronunciation",
"pronouce": "pronounce",
"pronunce": "pronounce",
"propery": "property",
"propigate": "propagate",
"propigation": "propagation",
"prosess": "process",
"protable": "portable",
"protcol": "protocol",
"protecion": "protection",
"protocoll": "protocol",
"psychadelic": "psychedelic",
"quering": "querying",
"reasearch": "research",
"reasearcher": "researcher",
"reasearchers": "researchers",
"recogniced": "recognised",
"recognizeable": "recognizable",
"recommanded": "recommended",
"redircet": "redirect",
"redirectrion": "redirection",
"reenable": "re-enable",
"reenabled": "re-enabled",
"reencode": "re-encode",
"refence": "reference",
"registerd": "registered",
"registraration": "registration",
"regulamentations": "regulations",
"remoote": "remote",
"removeable": "removable",
"repectively": "respectively",
"replacments": "replacements",
"replys": "replies",
"requiere": "require",
"requred": "required",
"requried": "required",
"resizeable": "resizable",
"ressize": "resize",
"ressource": "resource",
"ressources": "resources",
"retransmited": "retransmitted",
"retreive": "retrieve",
"retreived": "retrieved",
"rmeove": "remove",
"rmeoved": "removed",
"rmeoves": "removes",
"runned": "ran",
"runnning": "running",
"sacrifying": "sacrificing",
"safly": "safely",
"savable": "saveable",
"searchs": "searches",
"secund": "second",
"separatly": "separately",
"sepcify": "specify",
"seperated": "separated",
"seperately": "separately",
"seperate": "separate",
"seperatly": "separately",
"seperator": "separator",
"sepperate": "separate",
"sequencial": "sequential",
"serveral": "several",
"setts": "sets",
"similiar": "similar",
"simliar": "similar",
"softwares": "software",
"speach": "speech",
"speciefied": "specified",
"specifed": "specified",
"specificatin": "specification",
"specificaton": "specification",
"specifing": "specifying",
"speficied": "specified",
"speling": "spelling",
"splitted": "split",
"spreaded": "spread",
"staically": "statically",
"standardss": "standards",
"standart": "standard",
"staticly": "statically",
"subdirectoires": "subdirectories",
"suble": "subtle",
"succesfully": "successfully",
"succesful": "successful",
"sucessfully": "successfully",
"superflous": "superfluous",
"superseeded": "superseded",
"suplied": "supplied",
"suport": "support",
"suppored": "supported",
"supportin": "supporting",
"suppoted": "supported",
"suppported": "supported",
"suppport": "support",
"supress": "suppress",
"surpress": "suppress",
"surpresses": "suppresses",
"surpesses": "suppresses",
"suspicously": "suspiciously",
"synax": "syntax",
"synchonized": "synchronized",
"syncronize": "synchronize",
"syncronizing": "synchronizing",
"syncronus": "synchronous",
"syste": "system",
"sytem": "system",
"sythesis": "synthesis",
"taht": "that",
"targetted": "targeted",
"targetting": "targeting",
"teh": "the",
"throught": "through",
"transfered": "transferred",
"transfering": "transferring",
"trasmission": "transmission",
"treshold": "threshold",
"trigerring": "triggering",
"unconditionaly": "unconditionally",
"unecessary": "unnecessary",
"unexecpted": "unexpected",
"unfortunatelly": "unfortunately",
"unknonw": "unknown",
"unkown": "unknown",
"unneedingly": "unnecessarily",
"unuseful": "useless",
"usefule": "useful",
"usefull": "useful",
"usege": "usage",
"usera": "users",
"usualy": "usually",
"utilites": "utilities",
"utillities": "utilities",
"utilties": "utilities",
"utiltity": "utility",
"utitlty": "utility",
"variantions": "variations",
"varient": "variant",
"verbse": "verbose",
"verisons": "versions",
"verison": "version",
"verson": "version",
"visiters": "visitors",
"vitual": "virtual",
"whataver": "whatever",
"wheter": "whether",
"wierd": "weird",
"writting": "writing",
"yur": "your"
},
"partial": {
"recieve": "receive",
"uft8": "utf8",
"lenght": "length",
"heigth": "height",
"fuction": "function"
}
}
}

View file

@ -2,18 +2,17 @@
/** /**
* Enforces basic spelling. Spelling inside code is actually pretty hard to * Enforces basic spelling. Spelling inside code is actually pretty hard to
* get right without false positives. I take a conservative approach and * get right without false positives. I take a conservative approach and just
* just use a blacklisted set of words that are commonly spelled * use a blacklisted set of words that are commonly spelled incorrectly.
* incorrectly.
*/ */
final class ArcanistSpellingLinter extends ArcanistLinter { final class ArcanistSpellingLinter extends ArcanistLinter {
const LINT_SPELLING_PICKY = 0; const LINT_SPELLING_EXACT = 1;
const LINT_SPELLING_IMPORTANT = 1; const LINT_SPELLING_PARTIAL = 2;
private $partialWordRules; private $dictionaries = array();
private $wholeWordRules; private $exactWordRules = array();
private $severity; private $partialWordRules = array();
public function getInfoName() { public function getInfoName() {
return pht('Spellchecker'); return pht('Spellchecker');
@ -23,13 +22,6 @@ final class ArcanistSpellingLinter extends ArcanistLinter {
return pht('Detects common misspellings of English words.'); return pht('Detects common misspellings of English words.');
} }
public function __construct($severity = self::LINT_SPELLING_PICKY) {
$this->severity = $severity;
$this->wholeWordRules = ArcanistSpellingDefaultData::getFullWordRules();
$this->partialWordRules =
ArcanistSpellingDefaultData::getPartialWordRules();
}
public function getLinterName() { public function getLinterName() {
return 'SPELL'; return 'SPELL';
} }
@ -38,84 +30,94 @@ final class ArcanistSpellingLinter extends ArcanistLinter {
return 'spelling'; return 'spelling';
} }
public function addPartialWordRule( public function getLinterConfigurationOptions() {
$incorrect_word, $options = array(
$correct_word, 'spelling.dictionaries' => array(
$severity = self::LINT_SPELLING_IMPORTANT) { 'type' => 'optional list<string>',
'help' => pht('Pass in custom dictionaries.'),
),
);
$this->partialWordRules[$severity][$incorrect_word] = $correct_word; return $options + parent::getLinterConfigurationOptions();
} }
public function addWholeWordRule( public function setLinterConfigurationValue($key, $value) {
$incorrect_word, switch ($key) {
$correct_word, case 'spelling.dictionaries':
$severity = self::LINT_SPELLING_IMPORTANT) { foreach ($value as $dictionary) {
$this->loadDictionary($dictionary);
}
return;
}
$this->wholeWordRules[$severity][$incorrect_word] = $correct_word; return parent::setLinterConfigurationValue($key, $value);
}
public function loadDictionary($path) {
$root = $this->getEngine()->getWorkingCopy()->getProjectRoot();
$path = Filesystem::resolvePath($path, $root);
$dict = phutil_json_decode(Filesystem::readFile($path));
PhutilTypeSpec::checkMap(
$dict,
array(
'rules' => 'map<string, map<string, string>>',
));
$rules = $dict['rules'];
$this->dictionaries[] = $path;
$this->exactWordRules = array_merge(
$this->exactWordRules,
idx($rules, 'exact', array()));
$this->partialWordRules = array_merge(
$this->partialWordRules,
idx($rules, 'partial', array()));
}
public function addExactWordRule($misspelling, $correction) {
$this->exactWordRules = array_merge(
$this->exactWordRules,
array($misspelling => $correction));
}
public function addPartialWordRule($misspelling, $correction) {
$this->partialWordRules = array_merge(
$this->partialWordRules,
array($misspelling => $correction));
} }
public function getLintSeverityMap() { public function getLintSeverityMap() {
return array( return array(
self::LINT_SPELLING_PICKY => ArcanistLintSeverity::SEVERITY_WARNING, self::LINT_SPELLING_EXACT => ArcanistLintSeverity::SEVERITY_WARNING,
self::LINT_SPELLING_IMPORTANT => ArcanistLintSeverity::SEVERITY_ERROR, self::LINT_SPELLING_PARTIAL => ArcanistLintSeverity::SEVERITY_WARNING,
); );
} }
public function getLintNameMap() { public function getLintNameMap() {
return array( return array(
self::LINT_SPELLING_PICKY => pht('Possible Spelling Mistake'), self::LINT_SPELLING_EXACT => pht('Possible Spelling Mistake'),
self::LINT_SPELLING_IMPORTANT => pht('Possible Spelling Mistake'), self::LINT_SPELLING_PARTIAL => pht('Possible Spelling Mistake'),
); );
} }
public function lintPath($path) { public function lintPath($path) {
foreach ($this->partialWordRules as $severity => $wordlist) { // TODO: This is a bit hacky. If no dictionaries were specified, then add
if ($severity >= $this->severity) { // the default dictionary.
if (!$this->isCodeEnabled($severity)) { if (!$this->dictionaries) {
continue; $root = dirname(phutil_get_library_root('arcanist'));
$this->loadDictionary($root.'/resources/spelling/english.json');
} }
foreach ($wordlist as $misspell => $correct) {
$this->checkPartialWord($path, $misspell, $correct, $severity); foreach ($this->exactWordRules as $misspelling => $correction) {
$this->checkExactWord($path, $misspelling, $correction);
} }
foreach ($this->partialWordRules as $misspelling => $correction) {
$this->checkPartialWord($path, $misspelling, $correction);
} }
} }
foreach ($this->wholeWordRules as $severity => $wordlist) { private function checkExactWord($path, $word, $correction) {
if ($severity >= $this->severity) {
if (!$this->isCodeEnabled($severity)) {
continue;
}
foreach ($wordlist as $misspell => $correct) {
$this->checkWholeWord($path, $misspell, $correct, $severity);
}
}
}
}
protected function checkPartialWord($path, $word, $correct_word, $severity) {
$text = $this->getData($path);
$pos = 0;
while ($pos < strlen($text)) {
$next = stripos($text, $word, $pos);
if ($next === false) {
return;
}
$original = substr($text, $next, strlen($word));
$replacement = self::fixLetterCase($correct_word, $original);
$this->raiseLintAtOffset(
$next,
$severity,
pht(
"Possible spelling error. You wrote '%s', but did you mean '%s'?",
$word,
$correct_word),
$original,
$replacement);
$pos = $next + 1;
}
}
protected function checkWholeWord($path, $word, $correct_word, $severity) {
$text = $this->getData($path); $text = $this->getData($path);
$matches = array(); $matches = array();
$num_matches = preg_match_all( $num_matches = preg_match_all(
@ -128,27 +130,51 @@ final class ArcanistSpellingLinter extends ArcanistLinter {
} }
foreach ($matches[0] as $match) { foreach ($matches[0] as $match) {
$original = $match[0]; $original = $match[0];
$replacement = self::fixLetterCase($correct_word, $original); $replacement = self::fixLetterCase($correction, $original);
$this->raiseLintAtOffset( $this->raiseLintAtOffset(
$match[1], $match[1],
$severity, self::LINT_SPELLING_EXACT,
pht( pht(
"Possible spelling error. You wrote '%s', but did you mean '%s'?", "Possible spelling error. You wrote '%s', but did you mean '%s'?",
$word, $word,
$correct_word), $correction),
$original, $original,
$replacement); $replacement);
} }
} }
private function checkPartialWord($path, $word, $correction) {
$text = $this->getData($path);
$pos = 0;
while ($pos < strlen($text)) {
$next = stripos($text, $word, $pos);
if ($next === false) {
return;
}
$original = substr($text, $next, strlen($word));
$replacement = self::fixLetterCase($correction, $original);
$this->raiseLintAtOffset(
$next,
self::LINT_SPELLING_PARTIAL,
pht(
"Possible spelling error. You wrote '%s', but did you mean '%s'?",
$word,
$correction),
$original,
$replacement);
$pos = $next + 1;
}
}
public static function fixLetterCase($string, $case) { public static function fixLetterCase($string, $case) {
if ($case == strtolower($case)) { switch ($case) {
case strtolower($case):
return strtolower($string); return strtolower($string);
} else if ($case == strtoupper($case)) { case strtoupper($case):
return strtoupper($string); return strtoupper($string);
} else if ($case == ucwords(strtolower($case))) { case ucwords(strtolower($case)):
return ucwords(strtolower($string)); return ucwords(strtolower($string));
} else { default:
return null; return null;
} }
} }

View file

@ -6,7 +6,7 @@ final class ArcanistSpellingLinterTestCase
public function testSpellingLint() { public function testSpellingLint() {
$linter = new ArcanistSpellingLinter(); $linter = new ArcanistSpellingLinter();
$linter->addPartialWordRule('supermn', 'superman'); $linter->addPartialWordRule('supermn', 'superman');
$linter->addWholeWordRule('batmn', 'batman'); $linter->addExactWordRule('batmn', 'batman');
$this->executeTestsInDirectory( $this->executeTestsInDirectory(
dirname(__FILE__).'/spelling/', dirname(__FILE__).'/spelling/',

View file

@ -10,14 +10,14 @@ didn't remove acording
Added ZZZZsupermnZZZZ Added ZZZZsupermnZZZZ
Added full batmn batmnZZZZ Added full batmn batmnZZZZ
~~~~~~~~~~ ~~~~~~~~~~
error:2:1 warning:2:1
error:4:10 warning:4:10
error:5:15 warning:5:15
error:7:7 warning:7:7
error:7:12 warning:7:12
warning:9:15 warning:9:15
error:10:11 warning:10:11
error:11:12 warning:11:12
~~~~~~~~~~ ~~~~~~~~~~
~~~~~~~~~~ ~~~~~~~~~~
{ {

View file

@ -1,628 +0,0 @@
<?php
/**
* Contains default spelling correction rules for ArcanistSpellingLinter.
* Inside its own file to keep logic of ArcanistSpellingLinter clean.
*/
class ArcanistSpellingDefaultData {
// From http://cpansearch.perl.org/src/APOCAL/Pod-Spell-CommonMistakes-1.000/lib/Pod/Spell/CommonMistakes/WordList.pm
// Pruned by hand. If you modify this list, remember to remove case only
// spelling suggestions and any with special characters like - or '.
public static function getFullWordRules() {
return array(
array(
// Variable common
'abandonning' => 'abandoning',
'abigious' => 'ambiguous',
'abitrate' => 'arbitrate',
'abov' => 'above',
'absense' => 'absence',
'absolut' => 'absolute',
'absoulte' => 'absolute',
'acceleratoin' => 'acceleration',
'accelleration' => 'acceleration',
'accesing' => 'accessing',
'accesnt' => 'accent',
'accessable' => 'accessible',
'accesss' => 'access',
'accidentaly' => 'accidentally',
'accidentually' => 'accidentally',
'accomodate' => 'accommodate',
'accomodates' => 'accommodates',
'accout' => 'account',
'acess' => 'access',
'acessable' => 'accessible',
'acient' => 'ancient',
'ackowledge' => 'acknowledge',
'ackowledged' => 'acknowledged',
'acknowldegement' => 'acknowldegement',
'acording' => 'according',
'activete' => 'activate',
'acumulating' => 'accumulating',
'addional' => 'additional',
'additionaly' => 'additionally',
'addreses' => 'addresses',
'aditional' => 'additional',
'aditionally' => 'additionally',
'aditionaly' => 'additionally',
'adress' => 'address',
'adresses' => 'addresses',
'adviced' => 'advised',
'afecting' => 'affecting',
'albumns' => 'albums',
'alegorical' => 'allegorical',
'algorith' => 'algorithm',
'algorithmical' => 'algorithmic',
'algoritm' => 'algorithm',
'algoritms' => 'algorithms',
'algorrithm' => 'algorithm',
'algorritm' => 'algorithm',
'allpication' => 'application',
'alogirhtms' => 'algorithms',
'alot' => 'a lot',
'alow' => 'allow',
'alows' => 'allows',
'altough' => 'although',
'ambigious' => 'ambiguous',
'amoung' => 'among',
'amout' => 'amount',
'analysator' => 'analyzer',
'ang' => 'and',
'anniversery' => 'anniversary',
'annoucement' => 'announcement',
'anomolies' => 'anomalies',
'anomoly' => 'anomaly',
'aplication' => 'application',
'appearence' => 'appearance',
'appliction' => 'application',
'applictions' => 'applications',
'appropiate' => 'appropriate',
'appropriatly' => 'appropriately',
'aquired' => 'acquired',
'arbitary' => 'arbitrary',
'architechture' => 'architecture',
'arguement' => 'argument',
'arguements' => 'arguments',
'aritmetic' => 'arithmetic',
'arraival' => 'arrival',
'artifical' => 'artificial',
'artillary' => 'artillery',
'assigment' => 'assignment',
'assigments' => 'assignments',
'assistent' => 'assistant',
'asuming' => 'assuming',
'asycronous' => 'asynchronous',
'atomatically' => 'automatically',
'attachement' => 'attachment',
'attemps' => 'attempts',
'attruibutes' => 'attributes',
'authentification' => 'authentication',
'automaticaly' => 'automatically',
'automaticly' => 'automatically',
'automatize' => 'automate',
'automatized' => 'automated',
'automatizes' => 'automates',
'autonymous' => 'autonomous',
'auxilliary' => 'auxiliary',
'avaiable' => 'available',
'availabled' => 'available',
'availablity' => 'availability',
'availale' => 'available',
'availavility' => 'availability',
'availble' => 'available',
'availiable' => 'available',
'avaliable' => 'available',
'backgroud' => 'background',
'bahavior' => 'behavior',
'baloon' => 'balloon',
'baloons' => 'balloons',
'bandwith' => 'bandwidth',
'batery' => 'battery',
'becomming' => 'becoming',
'becuase' => 'because',
'begining' => 'beginning',
'bianries' => 'binaries',
'calender' => 'calendar',
'cancelation' => 'cancellation',
'capabilites' => 'capabilities',
'capatibilities' => 'capabilities',
'cariage' => 'carriage',
'challange' => 'challenge',
'challanges' => 'challenges',
'changable' => 'changeable',
'charachter' => 'character',
'charachters' => 'characters',
'charater' => 'character',
'charaters' => 'characters',
'charcter' => 'character',
'childs' => 'children',
'chnage' => 'change',
'chnages' => 'changes',
'choosen' => 'chosen',
'collapsable' => 'collapsible',
'colorfull' => 'colorful',
'comand' => 'command',
'comit' => 'commit',
'commerical' => 'commercial',
'comminucation' => 'communication',
'commited' => 'committed',
'commiting' => 'committing',
'committ' => 'commit',
'commoditiy' => 'commodity',
'compability' => 'compatibility',
'compatability' => 'compatibility',
'compatable' => 'compatible',
'compatibiliy' => 'compatibility',
'compatibilty' => 'compatibility',
'compilant' => 'compliant',
'compleatly' => 'completely',
'completly' => 'completely',
'complient' => 'compliant',
'compres' => 'compress',
'compresion' => 'compression',
'comression' => 'compression',
'conditionaly' => 'conditionally',
'configuratoin' => 'configuration',
'conjuction' => 'conjunction',
'connectinos' => 'connections',
'connnection' => 'connection',
'connnections' => 'connections',
'consistancy' => 'consistency',
'consistant' => 'consistent',
'containes' => 'contains',
'containts' => 'contains',
'contaisn' => 'contains',
'contence' => 'contents',
'continous' => 'continuous',
'continously' => 'continuously',
'continueing' => 'continuing',
'contraints' => 'constraints',
'convertor' => 'converter',
'convinient' => 'convenient',
'corected' => 'corrected',
'correponding' => 'corresponding',
'correponds' => 'corresponds',
'correspoding' => 'corresponding',
'cryptocraphic' => 'cryptographic',
'curently' => 'currently',
'dafault' => 'default',
'deafult' => 'default',
'deamon' => 'daemon',
'decompres' => 'decompress',
'definate' => 'definite',
'definately' => 'definitely',
'delare' => 'declare',
'delared' => 'declared',
'delares' => 'declares',
'delaring' => 'declaring',
'delemiter' => 'delimiter',
'delemiters' => 'delimiters',
'delimeter' => 'delimiter',
'delimeters' => 'delimiters',
'dependancies' => 'dependencies',
'dependancy' => 'dependency',
'dependant' => 'dependent',
'depreacted' => 'deprecated',
'depreacte' => 'deprecate',
'desactivate' => 'deactivate',
'detabase' => 'database',
'developement' => 'development',
'developped' => 'developed',
'developpement' => 'development',
'developper' => 'developer',
'developpment' => 'development',
'deveolpment' => 'development',
'devided' => 'divided',
'dictionnary' => 'dictionary',
'diplay' => 'display',
'disapeared' => 'disappeared',
'discontiguous' => 'noncontiguous',
'dispertion' => 'dispersion',
'dissapears' => 'disappears',
'docuentation' => 'documentation',
'documantation' => 'documentation',
'documentaion' => 'documentation',
'downlad' => 'download',
'downlads' => 'downloads',
'easilly' => 'easily',
'ecspecially' => 'especially',
'edditable' => 'editable',
'editting' => 'editing',
'efficently' => 'efficiently',
'eletronic' => 'electronic',
'enchanced' => 'enhanced',
'encorporating' => 'incorporating',
'endianess' => 'endianness',
'enhaced' => 'enhanced',
'enlightnment' => 'enlightenment',
'enocded' => 'encoded',
'enterily' => 'entirely',
'envireonment' => 'environment',
'enviroiment' => 'environment',
'enviroment' => 'environment',
'environement' => 'environment',
'environent' => 'environment',
'equiped' => 'equipped',
'equivelant' => 'equivalent',
'equivilant' => 'equivalent',
'estbalishment' => 'establishment',
'etsablishment' => 'establishment',
'etsbalishment' => 'establishment',
'excecutable' => 'executable',
'exceded' => 'exceeded',
'excellant' => 'excellent',
'exlcude' => 'exclude',
'exlcusive' => 'exclusive',
'expecially' => 'especially',
'explicitely' => 'explicitly',
'explict' => 'explicit',
'explictly' => 'explicitly',
'expresion' => 'expression',
'exprimental' => 'experimental',
'extensability' => 'extensibility',
'extention' => 'extension',
'extracter' => 'extractor',
'failuer' => 'failure',
'familar' => 'familiar',
'fatser' => 'faster',
'feauture' => 'feature',
'feautures' => 'features',
'fetaure' => 'feature',
'fetaures' => 'features',
'forse' => 'force',
'fortan' => 'fortran',
'forwardig' => 'forwarding',
'framwork' => 'framework',
'functionallity' => 'functionality',
'functionaly' => 'functionally',
'functionnality' => 'functionality',
'functiosn' => 'functions',
'functonality' => 'functionality',
'futhermore' => 'furthermore',
'generiously' => 'generously',
'grabing' => 'grabbing',
'grahical' => 'graphical',
'grahpical' => 'graphical',
'grapic' => 'graphic',
'guage' => 'gauge',
'halfs' => 'halves',
'handfull' => 'handful',
'heirarchically' => 'hierarchically',
'helpfull' => 'helpful',
'hierachy' => 'hierarchy',
'heirachy' => 'hierarchy',
'heirarchy' => 'hierarchy',
'hierarchie' => 'hierarchy',
'heirarchie' => 'hierarchy',
'howver' => 'however',
'immeadiately' => 'immediately',
'implemantation' => 'implementation',
'implemention' => 'implementation',
'incomming' => 'incoming',
'incompatabilities' => 'incompatibilities',
'incompatable' => 'incompatible',
'inconsistant' => 'inconsistent',
'indendation' => 'indentation',
'indended' => 'intended',
'independant' => 'independent',
'independed' => 'independent',
'informatiom' => 'information',
'informations' => 'information',
'infromation' => 'information',
'initalize' => 'initialize',
'initators' => 'initiators',
'initializiation' => 'initialization',
'inofficial' => 'unofficial',
'integreated' => 'integrated',
'integrety' => 'integrity',
'integrey' => 'integrity',
'intendet' => 'intended',
'interchangable' => 'interchangeable',
'intermittant' => 'intermittent',
'interupted' => 'interrupted',
'intial' => 'initial',
'intregral' => 'integral',
'intuative' => 'intuitive',
'invokation' => 'invocation',
'invokations' => 'invocations',
'jave' => 'java',
'langage' => 'language',
'langauage' => 'language',
'langauge' => 'language',
'langugage' => 'language',
'lauch' => 'launch',
'leightweight' => 'lightweight',
'lesstiff' => 'lesstif',
'libaries' => 'libraries',
'libary' => 'library',
'librairies' => 'libraries',
'libraris' => 'libraries',
'licenceing' => 'licencing',
'loggging' => 'logging',
'loggin' => 'login',
'logile' => 'logfile',
'machinary' => 'machinery',
'maintainance' => 'maintenance',
'maintainence' => 'maintenance',
'maintan' => 'maintain',
'makeing' => 'making',
'malplace' => 'misplace',
'malplaced' => 'misplaced',
'managable' => 'manageable',
'managment' => 'management',
'manoeuvering' => 'maneuvering',
'mathimatical' => 'mathematical',
'mathimatic' => 'mathematic',
'mathimatics' => 'mathematics',
'ment' => 'meant',
'messsage' => 'message',
'messsages' => 'messages',
'microprocesspr' => 'microprocessor',
'milliseonds' => 'milliseconds',
'miscelleneous' => 'miscellaneous',
'misformed' => 'malformed',
'mispelled' => 'misspelled',
'mispelt' => 'misspelt',
'mmnemonic' => 'mnemonic',
'modulues' => 'modules',
'monochorome' => 'monochrome',
'monochromo' => 'monochrome',
'monocrome' => 'monochrome',
'mroe' => 'more',
'multidimensionnal' => 'multidimensional',
'mulitplied' => 'multiplied',
'mutiple' => 'multiple',
'nam' => 'name',
'nams' => 'names',
'navagating' => 'navigating',
'nead' => 'need',
'neccesary' => 'necessary',
'neccessary' => 'necessary',
'necesary' => 'necessary',
'negotation' => 'negotiation',
'nescessary' => 'necessary',
'nessessary' => 'necessary',
'noticable' => 'noticeable',
'notications' => 'notifications',
'occationally' => 'occasionally',
'omitt' => 'omit',
'ommitted' => 'omitted',
'onself' => 'oneself',
'optionnal' => 'optional',
'optmizations' => 'optimizations',
'orientatied' => 'orientated',
'orientied' => 'oriented',
'ouput' => 'output',
'overaall' => 'overall',
'overriden' => 'overridden',
'pacakge' => 'package',
'pachage' => 'package',
'packacge' => 'package',
'packege' => 'package',
'packge' => 'package',
'pakage' => 'package',
'pallette' => 'palette',
'paramameters' => 'parameters',
'paramater' => 'parameter',
'parametes' => 'parameters',
'parametised' => 'parametrised',
'paramter' => 'parameter',
'paramters' => 'parameters',
'particularily' => 'particularly',
'pased' => 'passed',
'pendantic' => 'pedantic',
'peprocessor' => 'preprocessor',
'perfoming' => 'performing',
'permissons' => 'permissions',
'persistant' => 'persistent',
'plattform' => 'platform',
'pleaes' => 'please',
'ploting' => 'plotting',
'poinnter' => 'pointer',
'posible' => 'possible',
'possibilites' => 'possibilities',
'powerfull' => 'powerful',
'preceed' => 'precede',
'preceeded' => 'preceded',
'preceeding' => 'preceding',
'precendence' => 'precedence',
'precission' => 'precision',
'prefered' => 'preferred',
'prefferably' => 'preferably',
'prepaired' => 'prepared',
'primative' => 'primitive',
'princliple' => 'principle',
'priorty' => 'priority',
'priviledge' => 'privilege',
'priviledges' => 'privileges',
'procceed' => 'proceed',
'proccesors' => 'processors',
'proces' => 'process',
'processess' => 'processes',
'processessing' => 'processing',
'processpr' => 'processor',
'processsing' => 'processing',
'progams' => 'programs',
'programers' => 'programmers',
'programm' => 'program',
'programms' => 'programs',
'promps' => 'prompts',
'pronnounced' => 'pronounced',
'prononciation' => 'pronunciation',
'pronouce' => 'pronounce',
'pronunce' => 'pronounce',
'propery' => 'property',
'propigate' => 'propagate',
'propigation' => 'propagation',
'prosess' => 'process',
'protable' => 'portable',
'protcol' => 'protocol',
'protecion' => 'protection',
'protocoll' => 'protocol',
'psychadelic' => 'psychedelic',
'quering' => 'querying',
'reasearch' => 'research',
'reasearcher' => 'researcher',
'reasearchers' => 'researchers',
'recogniced' => 'recognised',
'recognizeable' => 'recognizable',
'recommanded' => 'recommended',
'redircet' => 'redirect',
'redirectrion' => 'redirection',
'reenable' => 're-enable',
'reenabled' => 're-enabled',
'reencode' => 're-encode',
'refence' => 'reference',
'registerd' => 'registered',
'registraration' => 'registration',
'regulamentations' => 'regulations',
'remoote' => 'remote',
'removeable' => 'removable',
'repectively' => 'respectively',
'replacments' => 'replacements',
'replys' => 'replies',
'requiere' => 'require',
'requred' => 'required',
'requried' => 'required',
'resizeable' => 'resizable',
'ressize' => 'resize',
'ressource' => 'resource',
'ressources' => 'resources',
'retransmited' => 'retransmitted',
'retreive' => 'retrieve',
'retreived' => 'retrieved',
'rmeove' => 'remove',
'rmeoved' => 'removed',
'rmeoves' => 'removes',
'runned' => 'ran',
'runnning' => 'running',
'sacrifying' => 'sacrificing',
'safly' => 'safely',
'savable' => 'saveable',
'searchs' => 'searches',
'secund' => 'second',
'separatly' => 'separately',
'sepcify' => 'specify',
'seperated' => 'separated',
'seperately' => 'separately',
'seperate' => 'separate',
'seperatly' => 'separately',
'seperator' => 'separator',
'sepperate' => 'separate',
'sequencial' => 'sequential',
'serveral' => 'several',
'setts' => 'sets',
'similiar' => 'similar',
'simliar' => 'similar',
'softwares' => 'software',
'speach' => 'speech',
'speciefied' => 'specified',
'specifed' => 'specified',
'specificatin' => 'specification',
'specificaton' => 'specification',
'specifing' => 'specifying',
'speficied' => 'specified',
'speling' => 'spelling',
'splitted' => 'split',
'spreaded' => 'spread',
'staically' => 'statically',
'standardss' => 'standards',
'standart' => 'standard',
'staticly' => 'statically',
'subdirectoires' => 'subdirectories',
'suble' => 'subtle',
'succesfully' => 'successfully',
'succesful' => 'successful',
'sucessfully' => 'successfully',
'superflous' => 'superfluous',
'superseeded' => 'superseded',
'suplied' => 'supplied',
'suport' => 'support',
'suppored' => 'supported',
'supportin' => 'supporting',
'suppoted' => 'supported',
'suppported' => 'supported',
'suppport' => 'support',
'supress' => 'suppress',
'surpress' => 'suppress',
'surpresses' => 'suppresses',
'surpesses' => 'suppresses',
'suspicously' => 'suspiciously',
'synax' => 'syntax',
'synchonized' => 'synchronized',
'syncronize' => 'synchronize',
'syncronizing' => 'synchronizing',
'syncronus' => 'synchronous',
'syste' => 'system',
'sytem' => 'system',
'sythesis' => 'synthesis',
'taht' => 'that',
'targetted' => 'targeted',
'targetting' => 'targeting',
'throught' => 'through',
'transfered' => 'transferred',
'transfering' => 'transferring',
'trasmission' => 'transmission',
'treshold' => 'threshold',
'trigerring' => 'triggering',
'unconditionaly' => 'unconditionally',
'unecessary' => 'unnecessary',
'unexecpted' => 'unexpected',
'unfortunatelly' => 'unfortunately',
'unknonw' => 'unknown',
'unkown' => 'unknown',
'unneedingly' => 'unnecessarily',
'unuseful' => 'useless',
'usefule' => 'useful',
'usefull' => 'useful',
'usege' => 'usage',
'usera' => 'users',
'usualy' => 'usually',
'utilites' => 'utilities',
'utillities' => 'utilities',
'utilties' => 'utilities',
'utiltity' => 'utility',
'utitlty' => 'utility',
'variantions' => 'variations',
'varient' => 'variant',
'verbse' => 'verbose',
'verisons' => 'versions',
'verison' => 'version',
'verson' => 'version',
'visiters' => 'visitors',
'vitual' => 'virtual',
'whataver' => 'whatever',
'wheter' => 'whether',
'wierd' => 'weird',
'writting' => 'writing',
'yur' => 'your',
// Variable common_cpan
'refering' => 'referring',
'writeable' => 'writable',
'nineth' => 'ninth',
'ommited' => 'omitted',
'omited' => 'omitted',
'requrie' => 'require',
'existant' => 'existent',
'agument' => 'argument',
'destionation' => 'destination',
), array(
'teh' => 'the',
),
);
}
public static function getPartialWordRules() {
return array(
array(),
array(
'recieve' => 'receive',
'uft8' => 'utf8',
'lenght' => 'length',
'heigth' => 'height',
'fuction' => 'function',
),
);
}
}