diff --git a/.arclint b/.arclint index 622e8b47..63d108e6 100644 --- a/.arclint +++ b/.arclint @@ -38,9 +38,7 @@ }, "spelling": { "type": "spelling", - "exclude": [ - "(^src/lint/linter/spelling/ArcanistSpellingDefaultData\\.php$)" - ] + "exclude": "(resources/spelling/.*\\.json$)" }, "text": { "type": "text" diff --git a/resources/spelling/english.json b/resources/spelling/english.json new file mode 100644 index 00000000..f3d12874 --- /dev/null +++ b/resources/spelling/english.json @@ -0,0 +1,598 @@ +{ + "rules": { + "exact": { + "abandonning": "abandoning", + "abigious": "ambiguous", + "abitrate": "arbitrate", + "abov": "above", + "absense": "absence", + "absolut": "absolute", + "absoulte": "absolute", + "acceleratoin": "acceleration", + "accelleration": "acceleration", + "accesing": "accessing", + "accesnt": "accent", + "accessable": "accessible", + "accesss": "access", + "accidentaly": "accidentally", + "accidentually": "accidentally", + "accomodate": "accommodate", + "accomodates": "accommodates", + "accout": "account", + "acess": "access", + "acessable": "accessible", + "acient": "ancient", + "ackowledge": "acknowledge", + "ackowledged": "acknowledged", + "acknowldegement": "acknowldegement", + "acording": "according", + "activete": "activate", + "acumulating": "accumulating", + "addional": "additional", + "additionaly": "additionally", + "addreses": "addresses", + "aditional": "additional", + "aditionally": "additionally", + "aditionaly": "additionally", + "adress": "address", + "adresses": "addresses", + "adviced": "advised", + "afecting": "affecting", + "albumns": "albums", + "alegorical": "allegorical", + "algorith": "algorithm", + "algorithmical": "algorithmic", + "algoritm": "algorithm", + "algoritms": "algorithms", + "algorrithm": "algorithm", + "algorritm": "algorithm", + "allpication": "application", + "alogirhtms": "algorithms", + "alot": "a lot", + "alow": "allow", + "alows": "allows", + "altough": "although", + "ambigious": "ambiguous", + "amoung": "among", + "amout": "amount", + "analysator": "analyzer", + "ang": "and", + "anniversery": "anniversary", + "annoucement": "announcement", + "anomolies": "anomalies", + "anomoly": "anomaly", + "aplication": "application", + "appearence": "appearance", + "appliction": "application", + "applictions": "applications", + "appropiate": "appropriate", + "appropriatly": "appropriately", + "aquired": "acquired", + "arbitary": "arbitrary", + "architechture": "architecture", + "arguement": "argument", + "arguements": "arguments", + "aritmetic": "arithmetic", + "arraival": "arrival", + "artifical": "artificial", + "artillary": "artillery", + "assigment": "assignment", + "assigments": "assignments", + "assistent": "assistant", + "asuming": "assuming", + "asycronous": "asynchronous", + "atomatically": "automatically", + "attachement": "attachment", + "attemps": "attempts", + "attruibutes": "attributes", + "authentification": "authentication", + "automaticaly": "automatically", + "automaticly": "automatically", + "automatize": "automate", + "automatized": "automated", + "automatizes": "automates", + "autonymous": "autonomous", + "auxilliary": "auxiliary", + "avaiable": "available", + "availabled": "available", + "availablity": "availability", + "availale": "available", + "availavility": "availability", + "availble": "available", + "availiable": "available", + "avaliable": "available", + "backgroud": "background", + "bahavior": "behavior", + "baloon": "balloon", + "baloons": "balloons", + "bandwith": "bandwidth", + "batery": "battery", + "becomming": "becoming", + "becuase": "because", + "begining": "beginning", + "bianries": "binaries", + "calender": "calendar", + "cancelation": "cancellation", + "capabilites": "capabilities", + "capatibilities": "capabilities", + "cariage": "carriage", + "challange": "challenge", + "challanges": "challenges", + "changable": "changeable", + "charachter": "character", + "charachters": "characters", + "charater": "character", + "charaters": "characters", + "charcter": "character", + "childs": "children", + "chnage": "change", + "chnages": "changes", + "choosen": "chosen", + "collapsable": "collapsible", + "colorfull": "colorful", + "comand": "command", + "comit": "commit", + "commerical": "commercial", + "comminucation": "communication", + "commited": "committed", + "commiting": "committing", + "committ": "commit", + "commoditiy": "commodity", + "compability": "compatibility", + "compatability": "compatibility", + "compatable": "compatible", + "compatibiliy": "compatibility", + "compatibilty": "compatibility", + "compilant": "compliant", + "compleatly": "completely", + "completly": "completely", + "complient": "compliant", + "compres": "compress", + "compresion": "compression", + "comression": "compression", + "conditionaly": "conditionally", + "configuratoin": "configuration", + "conjuction": "conjunction", + "connectinos": "connections", + "connnection": "connection", + "connnections": "connections", + "consistancy": "consistency", + "consistant": "consistent", + "containes": "contains", + "containts": "contains", + "contaisn": "contains", + "contence": "contents", + "continous": "continuous", + "continously": "continuously", + "continueing": "continuing", + "contraints": "constraints", + "convertor": "converter", + "convinient": "convenient", + "corected": "corrected", + "correponding": "corresponding", + "correponds": "corresponds", + "correspoding": "corresponding", + "cryptocraphic": "cryptographic", + "curently": "currently", + "dafault": "default", + "deafult": "default", + "deamon": "daemon", + "decompres": "decompress", + "definate": "definite", + "definately": "definitely", + "delare": "declare", + "delared": "declared", + "delares": "declares", + "delaring": "declaring", + "delemiter": "delimiter", + "delemiters": "delimiters", + "delimeter": "delimiter", + "delimeters": "delimiters", + "dependancies": "dependencies", + "dependancy": "dependency", + "dependant": "dependent", + "depreacted": "deprecated", + "depreacte": "deprecate", + "desactivate": "deactivate", + "detabase": "database", + "developement": "development", + "developped": "developed", + "developpement": "development", + "developper": "developer", + "developpment": "development", + "deveolpment": "development", + "devided": "divided", + "dictionnary": "dictionary", + "diplay": "display", + "disapeared": "disappeared", + "discontiguous": "noncontiguous", + "dispertion": "dispersion", + "dissapears": "disappears", + "docuentation": "documentation", + "documantation": "documentation", + "documentaion": "documentation", + "downlad": "download", + "downlads": "downloads", + "easilly": "easily", + "ecspecially": "especially", + "edditable": "editable", + "editting": "editing", + "efficently": "efficiently", + "eletronic": "electronic", + "enchanced": "enhanced", + "encorporating": "incorporating", + "endianess": "endianness", + "enhaced": "enhanced", + "enlightnment": "enlightenment", + "enocded": "encoded", + "enterily": "entirely", + "envireonment": "environment", + "enviroiment": "environment", + "enviroment": "environment", + "environement": "environment", + "environent": "environment", + "equiped": "equipped", + "equivelant": "equivalent", + "equivilant": "equivalent", + "estbalishment": "establishment", + "etsablishment": "establishment", + "etsbalishment": "establishment", + "excecutable": "executable", + "exceded": "exceeded", + "excellant": "excellent", + "exlcude": "exclude", + "exlcusive": "exclusive", + "expecially": "especially", + "explicitely": "explicitly", + "explict": "explicit", + "explictly": "explicitly", + "expresion": "expression", + "exprimental": "experimental", + "extensability": "extensibility", + "extention": "extension", + "extracter": "extractor", + "failuer": "failure", + "familar": "familiar", + "fatser": "faster", + "feauture": "feature", + "feautures": "features", + "fetaure": "feature", + "fetaures": "features", + "forse": "force", + "fortan": "fortran", + "forwardig": "forwarding", + "framwork": "framework", + "functionallity": "functionality", + "functionaly": "functionally", + "functionnality": "functionality", + "functiosn": "functions", + "functonality": "functionality", + "futhermore": "furthermore", + "generiously": "generously", + "grabing": "grabbing", + "grahical": "graphical", + "grahpical": "graphical", + "grapic": "graphic", + "guage": "gauge", + "halfs": "halves", + "handfull": "handful", + "heirarchically": "hierarchically", + "helpfull": "helpful", + "hierachy": "hierarchy", + "heirachy": "hierarchy", + "heirarchy": "hierarchy", + "hierarchie": "hierarchy", + "heirarchie": "hierarchy", + "howver": "however", + "immeadiately": "immediately", + "implemantation": "implementation", + "implemention": "implementation", + "incomming": "incoming", + "incompatabilities": "incompatibilities", + "incompatable": "incompatible", + "inconsistant": "inconsistent", + "indendation": "indentation", + "indended": "intended", + "independant": "independent", + "independed": "independent", + "informatiom": "information", + "informations": "information", + "infromation": "information", + "initalize": "initialize", + "initators": "initiators", + "initializiation": "initialization", + "inofficial": "unofficial", + "integreated": "integrated", + "integrety": "integrity", + "integrey": "integrity", + "intendet": "intended", + "interchangable": "interchangeable", + "intermittant": "intermittent", + "interupted": "interrupted", + "intial": "initial", + "intregral": "integral", + "intuative": "intuitive", + "invokation": "invocation", + "invokations": "invocations", + "jave": "java", + "langage": "language", + "langauage": "language", + "langauge": "language", + "langugage": "language", + "lauch": "launch", + "leightweight": "lightweight", + "lesstiff": "lesstif", + "libaries": "libraries", + "libary": "library", + "librairies": "libraries", + "libraris": "libraries", + "licenceing": "licencing", + "loggging": "logging", + "loggin": "login", + "logile": "logfile", + "machinary": "machinery", + "maintainance": "maintenance", + "maintainence": "maintenance", + "maintan": "maintain", + "makeing": "making", + "malplace": "misplace", + "malplaced": "misplaced", + "managable": "manageable", + "managment": "management", + "manoeuvering": "maneuvering", + "mathimatical": "mathematical", + "mathimatic": "mathematic", + "mathimatics": "mathematics", + "ment": "meant", + "messsage": "message", + "messsages": "messages", + "microprocesspr": "microprocessor", + "milliseonds": "milliseconds", + "miscelleneous": "miscellaneous", + "misformed": "malformed", + "mispelled": "misspelled", + "mispelt": "misspelt", + "mmnemonic": "mnemonic", + "modulues": "modules", + "monochorome": "monochrome", + "monochromo": "monochrome", + "monocrome": "monochrome", + "mroe": "more", + "multidimensionnal": "multidimensional", + "mulitplied": "multiplied", + "mutiple": "multiple", + "nam": "name", + "nams": "names", + "navagating": "navigating", + "nead": "need", + "neccesary": "necessary", + "neccessary": "necessary", + "necesary": "necessary", + "negotation": "negotiation", + "nescessary": "necessary", + "nessessary": "necessary", + "noticable": "noticeable", + "notications": "notifications", + "occationally": "occasionally", + "omitt": "omit", + "ommitted": "omitted", + "onself": "oneself", + "optionnal": "optional", + "optmizations": "optimizations", + "orientatied": "orientated", + "orientied": "oriented", + "ouput": "output", + "overaall": "overall", + "overriden": "overridden", + "pacakge": "package", + "pachage": "package", + "packacge": "package", + "packege": "package", + "packge": "package", + "pakage": "package", + "pallette": "palette", + "paramameters": "parameters", + "paramater": "parameter", + "parametes": "parameters", + "parametised": "parametrised", + "paramter": "parameter", + "paramters": "parameters", + "particularily": "particularly", + "pased": "passed", + "pendantic": "pedantic", + "peprocessor": "preprocessor", + "perfoming": "performing", + "permissons": "permissions", + "persistant": "persistent", + "plattform": "platform", + "pleaes": "please", + "ploting": "plotting", + "poinnter": "pointer", + "posible": "possible", + "possibilites": "possibilities", + "powerfull": "powerful", + "preceed": "precede", + "preceeded": "preceded", + "preceeding": "preceding", + "precendence": "precedence", + "precission": "precision", + "prefered": "preferred", + "prefferably": "preferably", + "prepaired": "prepared", + "primative": "primitive", + "princliple": "principle", + "priorty": "priority", + "priviledge": "privilege", + "priviledges": "privileges", + "procceed": "proceed", + "proccesors": "processors", + "proces": "process", + "processess": "processes", + "processessing": "processing", + "processpr": "processor", + "processsing": "processing", + "progams": "programs", + "programers": "programmers", + "programm": "program", + "programms": "programs", + "promps": "prompts", + "pronnounced": "pronounced", + "prononciation": "pronunciation", + "pronouce": "pronounce", + "pronunce": "pronounce", + "propery": "property", + "propigate": "propagate", + "propigation": "propagation", + "prosess": "process", + "protable": "portable", + "protcol": "protocol", + "protecion": "protection", + "protocoll": "protocol", + "psychadelic": "psychedelic", + "quering": "querying", + "reasearch": "research", + "reasearcher": "researcher", + "reasearchers": "researchers", + "recogniced": "recognised", + "recognizeable": "recognizable", + "recommanded": "recommended", + "redircet": "redirect", + "redirectrion": "redirection", + "reenable": "re-enable", + "reenabled": "re-enabled", + "reencode": "re-encode", + "refence": "reference", + "registerd": "registered", + "registraration": "registration", + "regulamentations": "regulations", + "remoote": "remote", + "removeable": "removable", + "repectively": "respectively", + "replacments": "replacements", + "replys": "replies", + "requiere": "require", + "requred": "required", + "requried": "required", + "resizeable": "resizable", + "ressize": "resize", + "ressource": "resource", + "ressources": "resources", + "retransmited": "retransmitted", + "retreive": "retrieve", + "retreived": "retrieved", + "rmeove": "remove", + "rmeoved": "removed", + "rmeoves": "removes", + "runned": "ran", + "runnning": "running", + "sacrifying": "sacrificing", + "safly": "safely", + "savable": "saveable", + "searchs": "searches", + "secund": "second", + "separatly": "separately", + "sepcify": "specify", + "seperated": "separated", + "seperately": "separately", + "seperate": "separate", + "seperatly": "separately", + "seperator": "separator", + "sepperate": "separate", + "sequencial": "sequential", + "serveral": "several", + "setts": "sets", + "similiar": "similar", + "simliar": "similar", + "softwares": "software", + "speach": "speech", + "speciefied": "specified", + "specifed": "specified", + "specificatin": "specification", + "specificaton": "specification", + "specifing": "specifying", + "speficied": "specified", + "speling": "spelling", + "splitted": "split", + "spreaded": "spread", + "staically": "statically", + "standardss": "standards", + "standart": "standard", + "staticly": "statically", + "subdirectoires": "subdirectories", + "suble": "subtle", + "succesfully": "successfully", + "succesful": "successful", + "sucessfully": "successfully", + "superflous": "superfluous", + "superseeded": "superseded", + "suplied": "supplied", + "suport": "support", + "suppored": "supported", + "supportin": "supporting", + "suppoted": "supported", + "suppported": "supported", + "suppport": "support", + "supress": "suppress", + "surpress": "suppress", + "surpresses": "suppresses", + "surpesses": "suppresses", + "suspicously": "suspiciously", + "synax": "syntax", + "synchonized": "synchronized", + "syncronize": "synchronize", + "syncronizing": "synchronizing", + "syncronus": "synchronous", + "syste": "system", + "sytem": "system", + "sythesis": "synthesis", + "taht": "that", + "targetted": "targeted", + "targetting": "targeting", + "teh": "the", + "throught": "through", + "transfered": "transferred", + "transfering": "transferring", + "trasmission": "transmission", + "treshold": "threshold", + "trigerring": "triggering", + "unconditionaly": "unconditionally", + "unecessary": "unnecessary", + "unexecpted": "unexpected", + "unfortunatelly": "unfortunately", + "unknonw": "unknown", + "unkown": "unknown", + "unneedingly": "unnecessarily", + "unuseful": "useless", + "usefule": "useful", + "usefull": "useful", + "usege": "usage", + "usera": "users", + "usualy": "usually", + "utilites": "utilities", + "utillities": "utilities", + "utilties": "utilities", + "utiltity": "utility", + "utitlty": "utility", + "variantions": "variations", + "varient": "variant", + "verbse": "verbose", + "verisons": "versions", + "verison": "version", + "verson": "version", + "visiters": "visitors", + "vitual": "virtual", + "whataver": "whatever", + "wheter": "whether", + "wierd": "weird", + "writting": "writing", + "yur": "your" + }, + "partial": { + "recieve": "receive", + "uft8": "utf8", + "lenght": "length", + "heigth": "height", + "fuction": "function" + } + } +} diff --git a/src/lint/linter/ArcanistSpellingLinter.php b/src/lint/linter/ArcanistSpellingLinter.php index c96d2403..d22e67fb 100644 --- a/src/lint/linter/ArcanistSpellingLinter.php +++ b/src/lint/linter/ArcanistSpellingLinter.php @@ -2,18 +2,17 @@ /** * Enforces basic spelling. Spelling inside code is actually pretty hard to - * get right without false positives. I take a conservative approach and - * just use a blacklisted set of words that are commonly spelled - * incorrectly. + * get right without false positives. I take a conservative approach and just + * use a blacklisted set of words that are commonly spelled incorrectly. */ final class ArcanistSpellingLinter extends ArcanistLinter { - const LINT_SPELLING_PICKY = 0; - const LINT_SPELLING_IMPORTANT = 1; + const LINT_SPELLING_EXACT = 1; + const LINT_SPELLING_PARTIAL = 2; - private $partialWordRules; - private $wholeWordRules; - private $severity; + private $dictionaries = array(); + private $exactWordRules = array(); + private $partialWordRules = array(); public function getInfoName() { return pht('Spellchecker'); @@ -23,13 +22,6 @@ final class ArcanistSpellingLinter extends ArcanistLinter { return pht('Detects common misspellings of English words.'); } - public function __construct($severity = self::LINT_SPELLING_PICKY) { - $this->severity = $severity; - $this->wholeWordRules = ArcanistSpellingDefaultData::getFullWordRules(); - $this->partialWordRules = - ArcanistSpellingDefaultData::getPartialWordRules(); - } - public function getLinterName() { return 'SPELL'; } @@ -38,84 +30,94 @@ final class ArcanistSpellingLinter extends ArcanistLinter { return 'spelling'; } - public function addPartialWordRule( - $incorrect_word, - $correct_word, - $severity = self::LINT_SPELLING_IMPORTANT) { + public function getLinterConfigurationOptions() { + $options = array( + 'spelling.dictionaries' => array( + 'type' => 'optional list', + 'help' => pht('Pass in custom dictionaries.'), + ), + ); - $this->partialWordRules[$severity][$incorrect_word] = $correct_word; + return $options + parent::getLinterConfigurationOptions(); } - public function addWholeWordRule( - $incorrect_word, - $correct_word, - $severity = self::LINT_SPELLING_IMPORTANT) { + public function setLinterConfigurationValue($key, $value) { + switch ($key) { + case 'spelling.dictionaries': + foreach ($value as $dictionary) { + $this->loadDictionary($dictionary); + } + return; + } - $this->wholeWordRules[$severity][$incorrect_word] = $correct_word; + return parent::setLinterConfigurationValue($key, $value); + } + + public function loadDictionary($path) { + $root = $this->getEngine()->getWorkingCopy()->getProjectRoot(); + $path = Filesystem::resolvePath($path, $root); + + $dict = phutil_json_decode(Filesystem::readFile($path)); + PhutilTypeSpec::checkMap( + $dict, + array( + 'rules' => 'map>', + )); + $rules = $dict['rules']; + + $this->dictionaries[] = $path; + $this->exactWordRules = array_merge( + $this->exactWordRules, + idx($rules, 'exact', array())); + $this->partialWordRules = array_merge( + $this->partialWordRules, + idx($rules, 'partial', array())); + } + + public function addExactWordRule($misspelling, $correction) { + $this->exactWordRules = array_merge( + $this->exactWordRules, + array($misspelling => $correction)); + } + + public function addPartialWordRule($misspelling, $correction) { + $this->partialWordRules = array_merge( + $this->partialWordRules, + array($misspelling => $correction)); } public function getLintSeverityMap() { return array( - self::LINT_SPELLING_PICKY => ArcanistLintSeverity::SEVERITY_WARNING, - self::LINT_SPELLING_IMPORTANT => ArcanistLintSeverity::SEVERITY_ERROR, + self::LINT_SPELLING_EXACT => ArcanistLintSeverity::SEVERITY_WARNING, + self::LINT_SPELLING_PARTIAL => ArcanistLintSeverity::SEVERITY_WARNING, ); } public function getLintNameMap() { return array( - self::LINT_SPELLING_PICKY => pht('Possible Spelling Mistake'), - self::LINT_SPELLING_IMPORTANT => pht('Possible Spelling Mistake'), + self::LINT_SPELLING_EXACT => pht('Possible Spelling Mistake'), + self::LINT_SPELLING_PARTIAL => pht('Possible Spelling Mistake'), ); } public function lintPath($path) { - foreach ($this->partialWordRules as $severity => $wordlist) { - if ($severity >= $this->severity) { - if (!$this->isCodeEnabled($severity)) { - continue; - } - foreach ($wordlist as $misspell => $correct) { - $this->checkPartialWord($path, $misspell, $correct, $severity); - } - } + // TODO: This is a bit hacky. If no dictionaries were specified, then add + // the default dictionary. + if (!$this->dictionaries) { + $root = dirname(phutil_get_library_root('arcanist')); + $this->loadDictionary($root.'/resources/spelling/english.json'); } - foreach ($this->wholeWordRules as $severity => $wordlist) { - if ($severity >= $this->severity) { - if (!$this->isCodeEnabled($severity)) { - continue; - } - foreach ($wordlist as $misspell => $correct) { - $this->checkWholeWord($path, $misspell, $correct, $severity); - } - } + foreach ($this->exactWordRules as $misspelling => $correction) { + $this->checkExactWord($path, $misspelling, $correction); + } + + foreach ($this->partialWordRules as $misspelling => $correction) { + $this->checkPartialWord($path, $misspelling, $correction); } } - protected function checkPartialWord($path, $word, $correct_word, $severity) { - $text = $this->getData($path); - $pos = 0; - while ($pos < strlen($text)) { - $next = stripos($text, $word, $pos); - if ($next === false) { - return; - } - $original = substr($text, $next, strlen($word)); - $replacement = self::fixLetterCase($correct_word, $original); - $this->raiseLintAtOffset( - $next, - $severity, - pht( - "Possible spelling error. You wrote '%s', but did you mean '%s'?", - $word, - $correct_word), - $original, - $replacement); - $pos = $next + 1; - } - } - - protected function checkWholeWord($path, $word, $correct_word, $severity) { + private function checkExactWord($path, $word, $correction) { $text = $this->getData($path); $matches = array(); $num_matches = preg_match_all( @@ -128,28 +130,52 @@ final class ArcanistSpellingLinter extends ArcanistLinter { } foreach ($matches[0] as $match) { $original = $match[0]; - $replacement = self::fixLetterCase($correct_word, $original); + $replacement = self::fixLetterCase($correction, $original); $this->raiseLintAtOffset( $match[1], - $severity, + self::LINT_SPELLING_EXACT, pht( "Possible spelling error. You wrote '%s', but did you mean '%s'?", $word, - $correct_word), + $correction), $original, $replacement); } } + private function checkPartialWord($path, $word, $correction) { + $text = $this->getData($path); + $pos = 0; + while ($pos < strlen($text)) { + $next = stripos($text, $word, $pos); + if ($next === false) { + return; + } + $original = substr($text, $next, strlen($word)); + $replacement = self::fixLetterCase($correction, $original); + $this->raiseLintAtOffset( + $next, + self::LINT_SPELLING_PARTIAL, + pht( + "Possible spelling error. You wrote '%s', but did you mean '%s'?", + $word, + $correction), + $original, + $replacement); + $pos = $next + 1; + } + } + public static function fixLetterCase($string, $case) { - if ($case == strtolower($case)) { - return strtolower($string); - } else if ($case == strtoupper($case)) { - return strtoupper($string); - } else if ($case == ucwords(strtolower($case))) { - return ucwords(strtolower($string)); - } else { - return null; + switch ($case) { + case strtolower($case): + return strtolower($string); + case strtoupper($case): + return strtoupper($string); + case ucwords(strtolower($case)): + return ucwords(strtolower($string)); + default: + return null; } } diff --git a/src/lint/linter/__tests__/ArcanistSpellingLinterTestCase.php b/src/lint/linter/__tests__/ArcanistSpellingLinterTestCase.php index 6a653274..36ef7a8e 100644 --- a/src/lint/linter/__tests__/ArcanistSpellingLinterTestCase.php +++ b/src/lint/linter/__tests__/ArcanistSpellingLinterTestCase.php @@ -6,7 +6,7 @@ final class ArcanistSpellingLinterTestCase public function testSpellingLint() { $linter = new ArcanistSpellingLinter(); $linter->addPartialWordRule('supermn', 'superman'); - $linter->addWholeWordRule('batmn', 'batman'); + $linter->addExactWordRule('batmn', 'batman'); $this->executeTestsInDirectory( dirname(__FILE__).'/spelling/', diff --git a/src/lint/linter/__tests__/spelling/spell.lint-test b/src/lint/linter/__tests__/spelling/spell.lint-test index aa1a5f1f..a992d1f3 100644 --- a/src/lint/linter/__tests__/spelling/spell.lint-test +++ b/src/lint/linter/__tests__/spelling/spell.lint-test @@ -10,14 +10,14 @@ didn't remove acording Added ZZZZsupermnZZZZ Added full batmn batmnZZZZ ~~~~~~~~~~ -error:2:1 -error:4:10 -error:5:15 -error:7:7 -error:7:12 +warning:2:1 +warning:4:10 +warning:5:15 +warning:7:7 +warning:7:12 warning:9:15 -error:10:11 -error:11:12 +warning:10:11 +warning:11:12 ~~~~~~~~~~ ~~~~~~~~~~ { diff --git a/src/lint/linter/spelling/ArcanistSpellingDefaultData.php b/src/lint/linter/spelling/ArcanistSpellingDefaultData.php deleted file mode 100644 index 7be0eacf..00000000 --- a/src/lint/linter/spelling/ArcanistSpellingDefaultData.php +++ /dev/null @@ -1,628 +0,0 @@ - 'abandoning', - 'abigious' => 'ambiguous', - 'abitrate' => 'arbitrate', - 'abov' => 'above', - 'absense' => 'absence', - 'absolut' => 'absolute', - 'absoulte' => 'absolute', - 'acceleratoin' => 'acceleration', - 'accelleration' => 'acceleration', - 'accesing' => 'accessing', - 'accesnt' => 'accent', - 'accessable' => 'accessible', - 'accesss' => 'access', - 'accidentaly' => 'accidentally', - 'accidentually' => 'accidentally', - 'accomodate' => 'accommodate', - 'accomodates' => 'accommodates', - 'accout' => 'account', - 'acess' => 'access', - 'acessable' => 'accessible', - 'acient' => 'ancient', - 'ackowledge' => 'acknowledge', - 'ackowledged' => 'acknowledged', - 'acknowldegement' => 'acknowldegement', - 'acording' => 'according', - 'activete' => 'activate', - 'acumulating' => 'accumulating', - 'addional' => 'additional', - 'additionaly' => 'additionally', - 'addreses' => 'addresses', - 'aditional' => 'additional', - 'aditionally' => 'additionally', - 'aditionaly' => 'additionally', - 'adress' => 'address', - 'adresses' => 'addresses', - 'adviced' => 'advised', - 'afecting' => 'affecting', - 'albumns' => 'albums', - 'alegorical' => 'allegorical', - 'algorith' => 'algorithm', - 'algorithmical' => 'algorithmic', - 'algoritm' => 'algorithm', - 'algoritms' => 'algorithms', - 'algorrithm' => 'algorithm', - 'algorritm' => 'algorithm', - 'allpication' => 'application', - 'alogirhtms' => 'algorithms', - 'alot' => 'a lot', - 'alow' => 'allow', - 'alows' => 'allows', - 'altough' => 'although', - 'ambigious' => 'ambiguous', - 'amoung' => 'among', - 'amout' => 'amount', - 'analysator' => 'analyzer', - 'ang' => 'and', - 'anniversery' => 'anniversary', - 'annoucement' => 'announcement', - 'anomolies' => 'anomalies', - 'anomoly' => 'anomaly', - 'aplication' => 'application', - 'appearence' => 'appearance', - 'appliction' => 'application', - 'applictions' => 'applications', - 'appropiate' => 'appropriate', - 'appropriatly' => 'appropriately', - 'aquired' => 'acquired', - 'arbitary' => 'arbitrary', - 'architechture' => 'architecture', - 'arguement' => 'argument', - 'arguements' => 'arguments', - 'aritmetic' => 'arithmetic', - 'arraival' => 'arrival', - 'artifical' => 'artificial', - 'artillary' => 'artillery', - 'assigment' => 'assignment', - 'assigments' => 'assignments', - 'assistent' => 'assistant', - 'asuming' => 'assuming', - 'asycronous' => 'asynchronous', - 'atomatically' => 'automatically', - 'attachement' => 'attachment', - 'attemps' => 'attempts', - 'attruibutes' => 'attributes', - 'authentification' => 'authentication', - 'automaticaly' => 'automatically', - 'automaticly' => 'automatically', - 'automatize' => 'automate', - 'automatized' => 'automated', - 'automatizes' => 'automates', - 'autonymous' => 'autonomous', - 'auxilliary' => 'auxiliary', - 'avaiable' => 'available', - 'availabled' => 'available', - 'availablity' => 'availability', - 'availale' => 'available', - 'availavility' => 'availability', - 'availble' => 'available', - 'availiable' => 'available', - 'avaliable' => 'available', - 'backgroud' => 'background', - 'bahavior' => 'behavior', - 'baloon' => 'balloon', - 'baloons' => 'balloons', - 'bandwith' => 'bandwidth', - 'batery' => 'battery', - 'becomming' => 'becoming', - 'becuase' => 'because', - 'begining' => 'beginning', - 'bianries' => 'binaries', - 'calender' => 'calendar', - 'cancelation' => 'cancellation', - 'capabilites' => 'capabilities', - 'capatibilities' => 'capabilities', - 'cariage' => 'carriage', - 'challange' => 'challenge', - 'challanges' => 'challenges', - 'changable' => 'changeable', - 'charachter' => 'character', - 'charachters' => 'characters', - 'charater' => 'character', - 'charaters' => 'characters', - 'charcter' => 'character', - 'childs' => 'children', - 'chnage' => 'change', - 'chnages' => 'changes', - 'choosen' => 'chosen', - 'collapsable' => 'collapsible', - 'colorfull' => 'colorful', - 'comand' => 'command', - 'comit' => 'commit', - 'commerical' => 'commercial', - 'comminucation' => 'communication', - 'commited' => 'committed', - 'commiting' => 'committing', - 'committ' => 'commit', - 'commoditiy' => 'commodity', - 'compability' => 'compatibility', - 'compatability' => 'compatibility', - 'compatable' => 'compatible', - 'compatibiliy' => 'compatibility', - 'compatibilty' => 'compatibility', - 'compilant' => 'compliant', - 'compleatly' => 'completely', - 'completly' => 'completely', - 'complient' => 'compliant', - 'compres' => 'compress', - 'compresion' => 'compression', - 'comression' => 'compression', - 'conditionaly' => 'conditionally', - 'configuratoin' => 'configuration', - 'conjuction' => 'conjunction', - 'connectinos' => 'connections', - 'connnection' => 'connection', - 'connnections' => 'connections', - 'consistancy' => 'consistency', - 'consistant' => 'consistent', - 'containes' => 'contains', - 'containts' => 'contains', - 'contaisn' => 'contains', - 'contence' => 'contents', - 'continous' => 'continuous', - 'continously' => 'continuously', - 'continueing' => 'continuing', - 'contraints' => 'constraints', - 'convertor' => 'converter', - 'convinient' => 'convenient', - 'corected' => 'corrected', - 'correponding' => 'corresponding', - 'correponds' => 'corresponds', - 'correspoding' => 'corresponding', - 'cryptocraphic' => 'cryptographic', - 'curently' => 'currently', - 'dafault' => 'default', - 'deafult' => 'default', - 'deamon' => 'daemon', - 'decompres' => 'decompress', - 'definate' => 'definite', - 'definately' => 'definitely', - 'delare' => 'declare', - 'delared' => 'declared', - 'delares' => 'declares', - 'delaring' => 'declaring', - 'delemiter' => 'delimiter', - 'delemiters' => 'delimiters', - 'delimeter' => 'delimiter', - 'delimeters' => 'delimiters', - 'dependancies' => 'dependencies', - 'dependancy' => 'dependency', - 'dependant' => 'dependent', - 'depreacted' => 'deprecated', - 'depreacte' => 'deprecate', - 'desactivate' => 'deactivate', - 'detabase' => 'database', - 'developement' => 'development', - 'developped' => 'developed', - 'developpement' => 'development', - 'developper' => 'developer', - 'developpment' => 'development', - 'deveolpment' => 'development', - 'devided' => 'divided', - 'dictionnary' => 'dictionary', - 'diplay' => 'display', - 'disapeared' => 'disappeared', - 'discontiguous' => 'noncontiguous', - 'dispertion' => 'dispersion', - 'dissapears' => 'disappears', - 'docuentation' => 'documentation', - 'documantation' => 'documentation', - 'documentaion' => 'documentation', - 'downlad' => 'download', - 'downlads' => 'downloads', - 'easilly' => 'easily', - 'ecspecially' => 'especially', - 'edditable' => 'editable', - 'editting' => 'editing', - 'efficently' => 'efficiently', - 'eletronic' => 'electronic', - 'enchanced' => 'enhanced', - 'encorporating' => 'incorporating', - 'endianess' => 'endianness', - 'enhaced' => 'enhanced', - 'enlightnment' => 'enlightenment', - 'enocded' => 'encoded', - 'enterily' => 'entirely', - 'envireonment' => 'environment', - 'enviroiment' => 'environment', - 'enviroment' => 'environment', - 'environement' => 'environment', - 'environent' => 'environment', - 'equiped' => 'equipped', - 'equivelant' => 'equivalent', - 'equivilant' => 'equivalent', - 'estbalishment' => 'establishment', - 'etsablishment' => 'establishment', - 'etsbalishment' => 'establishment', - 'excecutable' => 'executable', - 'exceded' => 'exceeded', - 'excellant' => 'excellent', - 'exlcude' => 'exclude', - 'exlcusive' => 'exclusive', - 'expecially' => 'especially', - 'explicitely' => 'explicitly', - 'explict' => 'explicit', - 'explictly' => 'explicitly', - 'expresion' => 'expression', - 'exprimental' => 'experimental', - 'extensability' => 'extensibility', - 'extention' => 'extension', - 'extracter' => 'extractor', - 'failuer' => 'failure', - 'familar' => 'familiar', - 'fatser' => 'faster', - 'feauture' => 'feature', - 'feautures' => 'features', - 'fetaure' => 'feature', - 'fetaures' => 'features', - 'forse' => 'force', - 'fortan' => 'fortran', - 'forwardig' => 'forwarding', - 'framwork' => 'framework', - 'functionallity' => 'functionality', - 'functionaly' => 'functionally', - 'functionnality' => 'functionality', - 'functiosn' => 'functions', - 'functonality' => 'functionality', - 'futhermore' => 'furthermore', - 'generiously' => 'generously', - 'grabing' => 'grabbing', - 'grahical' => 'graphical', - 'grahpical' => 'graphical', - 'grapic' => 'graphic', - 'guage' => 'gauge', - 'halfs' => 'halves', - 'handfull' => 'handful', - 'heirarchically' => 'hierarchically', - 'helpfull' => 'helpful', - 'hierachy' => 'hierarchy', - 'heirachy' => 'hierarchy', - 'heirarchy' => 'hierarchy', - 'hierarchie' => 'hierarchy', - 'heirarchie' => 'hierarchy', - 'howver' => 'however', - 'immeadiately' => 'immediately', - 'implemantation' => 'implementation', - 'implemention' => 'implementation', - 'incomming' => 'incoming', - 'incompatabilities' => 'incompatibilities', - 'incompatable' => 'incompatible', - 'inconsistant' => 'inconsistent', - 'indendation' => 'indentation', - 'indended' => 'intended', - 'independant' => 'independent', - 'independed' => 'independent', - 'informatiom' => 'information', - 'informations' => 'information', - 'infromation' => 'information', - 'initalize' => 'initialize', - 'initators' => 'initiators', - 'initializiation' => 'initialization', - 'inofficial' => 'unofficial', - 'integreated' => 'integrated', - 'integrety' => 'integrity', - 'integrey' => 'integrity', - 'intendet' => 'intended', - 'interchangable' => 'interchangeable', - 'intermittant' => 'intermittent', - 'interupted' => 'interrupted', - 'intial' => 'initial', - 'intregral' => 'integral', - 'intuative' => 'intuitive', - 'invokation' => 'invocation', - 'invokations' => 'invocations', - 'jave' => 'java', - 'langage' => 'language', - 'langauage' => 'language', - 'langauge' => 'language', - 'langugage' => 'language', - 'lauch' => 'launch', - 'leightweight' => 'lightweight', - 'lesstiff' => 'lesstif', - 'libaries' => 'libraries', - 'libary' => 'library', - 'librairies' => 'libraries', - 'libraris' => 'libraries', - 'licenceing' => 'licencing', - 'loggging' => 'logging', - 'loggin' => 'login', - 'logile' => 'logfile', - 'machinary' => 'machinery', - 'maintainance' => 'maintenance', - 'maintainence' => 'maintenance', - 'maintan' => 'maintain', - 'makeing' => 'making', - 'malplace' => 'misplace', - 'malplaced' => 'misplaced', - 'managable' => 'manageable', - 'managment' => 'management', - 'manoeuvering' => 'maneuvering', - 'mathimatical' => 'mathematical', - 'mathimatic' => 'mathematic', - 'mathimatics' => 'mathematics', - 'ment' => 'meant', - 'messsage' => 'message', - 'messsages' => 'messages', - 'microprocesspr' => 'microprocessor', - 'milliseonds' => 'milliseconds', - 'miscelleneous' => 'miscellaneous', - 'misformed' => 'malformed', - 'mispelled' => 'misspelled', - 'mispelt' => 'misspelt', - 'mmnemonic' => 'mnemonic', - 'modulues' => 'modules', - 'monochorome' => 'monochrome', - 'monochromo' => 'monochrome', - 'monocrome' => 'monochrome', - 'mroe' => 'more', - 'multidimensionnal' => 'multidimensional', - 'mulitplied' => 'multiplied', - 'mutiple' => 'multiple', - 'nam' => 'name', - 'nams' => 'names', - 'navagating' => 'navigating', - 'nead' => 'need', - 'neccesary' => 'necessary', - 'neccessary' => 'necessary', - 'necesary' => 'necessary', - 'negotation' => 'negotiation', - 'nescessary' => 'necessary', - 'nessessary' => 'necessary', - 'noticable' => 'noticeable', - 'notications' => 'notifications', - 'occationally' => 'occasionally', - 'omitt' => 'omit', - 'ommitted' => 'omitted', - 'onself' => 'oneself', - 'optionnal' => 'optional', - 'optmizations' => 'optimizations', - 'orientatied' => 'orientated', - 'orientied' => 'oriented', - 'ouput' => 'output', - 'overaall' => 'overall', - 'overriden' => 'overridden', - 'pacakge' => 'package', - 'pachage' => 'package', - 'packacge' => 'package', - 'packege' => 'package', - 'packge' => 'package', - 'pakage' => 'package', - 'pallette' => 'palette', - 'paramameters' => 'parameters', - 'paramater' => 'parameter', - 'parametes' => 'parameters', - 'parametised' => 'parametrised', - 'paramter' => 'parameter', - 'paramters' => 'parameters', - 'particularily' => 'particularly', - 'pased' => 'passed', - 'pendantic' => 'pedantic', - 'peprocessor' => 'preprocessor', - 'perfoming' => 'performing', - 'permissons' => 'permissions', - 'persistant' => 'persistent', - 'plattform' => 'platform', - 'pleaes' => 'please', - 'ploting' => 'plotting', - 'poinnter' => 'pointer', - 'posible' => 'possible', - 'possibilites' => 'possibilities', - 'powerfull' => 'powerful', - 'preceed' => 'precede', - 'preceeded' => 'preceded', - 'preceeding' => 'preceding', - 'precendence' => 'precedence', - 'precission' => 'precision', - 'prefered' => 'preferred', - 'prefferably' => 'preferably', - 'prepaired' => 'prepared', - 'primative' => 'primitive', - 'princliple' => 'principle', - 'priorty' => 'priority', - 'priviledge' => 'privilege', - 'priviledges' => 'privileges', - 'procceed' => 'proceed', - 'proccesors' => 'processors', - 'proces' => 'process', - 'processess' => 'processes', - 'processessing' => 'processing', - 'processpr' => 'processor', - 'processsing' => 'processing', - 'progams' => 'programs', - 'programers' => 'programmers', - 'programm' => 'program', - 'programms' => 'programs', - 'promps' => 'prompts', - 'pronnounced' => 'pronounced', - 'prononciation' => 'pronunciation', - 'pronouce' => 'pronounce', - 'pronunce' => 'pronounce', - 'propery' => 'property', - 'propigate' => 'propagate', - 'propigation' => 'propagation', - 'prosess' => 'process', - 'protable' => 'portable', - 'protcol' => 'protocol', - 'protecion' => 'protection', - 'protocoll' => 'protocol', - 'psychadelic' => 'psychedelic', - 'quering' => 'querying', - 'reasearch' => 'research', - 'reasearcher' => 'researcher', - 'reasearchers' => 'researchers', - 'recogniced' => 'recognised', - 'recognizeable' => 'recognizable', - 'recommanded' => 'recommended', - 'redircet' => 'redirect', - 'redirectrion' => 'redirection', - 'reenable' => 're-enable', - 'reenabled' => 're-enabled', - 'reencode' => 're-encode', - 'refence' => 'reference', - 'registerd' => 'registered', - 'registraration' => 'registration', - 'regulamentations' => 'regulations', - 'remoote' => 'remote', - 'removeable' => 'removable', - 'repectively' => 'respectively', - 'replacments' => 'replacements', - 'replys' => 'replies', - 'requiere' => 'require', - 'requred' => 'required', - 'requried' => 'required', - 'resizeable' => 'resizable', - 'ressize' => 'resize', - 'ressource' => 'resource', - 'ressources' => 'resources', - 'retransmited' => 'retransmitted', - 'retreive' => 'retrieve', - 'retreived' => 'retrieved', - 'rmeove' => 'remove', - 'rmeoved' => 'removed', - 'rmeoves' => 'removes', - 'runned' => 'ran', - 'runnning' => 'running', - 'sacrifying' => 'sacrificing', - 'safly' => 'safely', - 'savable' => 'saveable', - 'searchs' => 'searches', - 'secund' => 'second', - 'separatly' => 'separately', - 'sepcify' => 'specify', - 'seperated' => 'separated', - 'seperately' => 'separately', - 'seperate' => 'separate', - 'seperatly' => 'separately', - 'seperator' => 'separator', - 'sepperate' => 'separate', - 'sequencial' => 'sequential', - 'serveral' => 'several', - 'setts' => 'sets', - 'similiar' => 'similar', - 'simliar' => 'similar', - 'softwares' => 'software', - 'speach' => 'speech', - 'speciefied' => 'specified', - 'specifed' => 'specified', - 'specificatin' => 'specification', - 'specificaton' => 'specification', - 'specifing' => 'specifying', - 'speficied' => 'specified', - 'speling' => 'spelling', - 'splitted' => 'split', - 'spreaded' => 'spread', - 'staically' => 'statically', - 'standardss' => 'standards', - 'standart' => 'standard', - 'staticly' => 'statically', - 'subdirectoires' => 'subdirectories', - 'suble' => 'subtle', - 'succesfully' => 'successfully', - 'succesful' => 'successful', - 'sucessfully' => 'successfully', - 'superflous' => 'superfluous', - 'superseeded' => 'superseded', - 'suplied' => 'supplied', - 'suport' => 'support', - 'suppored' => 'supported', - 'supportin' => 'supporting', - 'suppoted' => 'supported', - 'suppported' => 'supported', - 'suppport' => 'support', - 'supress' => 'suppress', - 'surpress' => 'suppress', - 'surpresses' => 'suppresses', - 'surpesses' => 'suppresses', - 'suspicously' => 'suspiciously', - 'synax' => 'syntax', - 'synchonized' => 'synchronized', - 'syncronize' => 'synchronize', - 'syncronizing' => 'synchronizing', - 'syncronus' => 'synchronous', - 'syste' => 'system', - 'sytem' => 'system', - 'sythesis' => 'synthesis', - 'taht' => 'that', - 'targetted' => 'targeted', - 'targetting' => 'targeting', - 'throught' => 'through', - 'transfered' => 'transferred', - 'transfering' => 'transferring', - 'trasmission' => 'transmission', - 'treshold' => 'threshold', - 'trigerring' => 'triggering', - 'unconditionaly' => 'unconditionally', - 'unecessary' => 'unnecessary', - 'unexecpted' => 'unexpected', - 'unfortunatelly' => 'unfortunately', - 'unknonw' => 'unknown', - 'unkown' => 'unknown', - 'unneedingly' => 'unnecessarily', - 'unuseful' => 'useless', - 'usefule' => 'useful', - 'usefull' => 'useful', - 'usege' => 'usage', - 'usera' => 'users', - 'usualy' => 'usually', - 'utilites' => 'utilities', - 'utillities' => 'utilities', - 'utilties' => 'utilities', - 'utiltity' => 'utility', - 'utitlty' => 'utility', - 'variantions' => 'variations', - 'varient' => 'variant', - 'verbse' => 'verbose', - 'verisons' => 'versions', - 'verison' => 'version', - 'verson' => 'version', - 'visiters' => 'visitors', - 'vitual' => 'virtual', - 'whataver' => 'whatever', - 'wheter' => 'whether', - 'wierd' => 'weird', - 'writting' => 'writing', - 'yur' => 'your', - - // Variable common_cpan - 'refering' => 'referring', - 'writeable' => 'writable', - 'nineth' => 'ninth', - 'ommited' => 'omitted', - 'omited' => 'omitted', - 'requrie' => 'require', - 'existant' => 'existent', - 'agument' => 'argument', - 'destionation' => 'destination', - ), array( - 'teh' => 'the', - ), - ); - } - - public static function getPartialWordRules() { - return array( - array(), - array( - 'recieve' => 'receive', - 'uft8' => 'utf8', - 'lenght' => 'length', - 'heigth' => 'height', - 'fuction' => 'function', - ), - ); - } -}