* * Originally available under the GPL 2 or greater. Relicensed with permission * of original authors under the MIT License in 2016. * * All rights reserved. * * @package PorterStemmer * @author Richard Heyes * @author Jon Abernathy * @copyright 2005-2016 Richard Heyes (http://www.phpguru.org/) * @license http://www.opensource.org/licenses/mit-license.html MIT License */ /** * PHP 5 Implementation of the Porter Stemmer algorithm. Certain elements * were borrowed from the (broken) implementation by Jon Abernathy. * * See http://tartarus.org/~martin/PorterStemmer/ for a description of the * algorithm. * * Usage: * * $stem = PorterStemmer::Stem($word); * * How easy is that? * * @package PorterStemmer * @author Richard Heyes * @author Jon Abernathy * @copyright 2005-2016 Richard Heyes (http://www.phpguru.org/) * @license http://www.opensource.org/licenses/mit-license.html MIT License */ class Porter { /** * Regex for matching a consonant * * @var string */ private static $regex_consonant = '(?:[bcdfghjklmnpqrstvwxz]|(?<=[aeiou])y|^y)'; /** * Regex for matching a vowel * * @var string */ private static $regex_vowel = '(?:[aeiou]|(? 1) { self::replace($word, 'e', ''); } elseif (self::m(substr($word, 0, -1)) == 1) { if (!self::cvc(substr($word, 0, -1))) { self::replace($word, 'e', ''); } } } // Part b if (self::m($word) > 1 AND self::doubleConsonant($word) AND substr($word, -1) == 'l') { $word = substr($word, 0, -1); } return $word; } /** * Replaces the first string with the second, at the end of the string * * If third arg is given, then the preceding string must match that m * count at least. * * @param string $str String to check * @param string $check Ending to check for * @param string $repl Replacement string * @param int $m Optional minimum number of m() to meet * * @return bool Whether the $check string was at the end of the $str * string. True does not necessarily mean that it was * replaced. */ private static function replace(&$str, $check, $repl, $m = null) { $len = 0 - strlen($check); if (substr($str, $len) == $check) { $substr = substr($str, 0, $len); if (is_null($m) OR self::m($substr) > $m) { $str = $substr . $repl; } return true; } return false; } /** * What, you mean it's not obvious from the name? * * m() measures the number of consonant sequences in $str. if c is * a consonant sequence and v a vowel sequence, and <..> indicates arbitrary * presence, * * gives 0 * vc gives 1 * vcvc gives 2 * vcvcvc gives 3 * * @param string $str The string to return the m count for * * @return int The m count */ private static function m($str) { $c = self::$regex_consonant; $v = self::$regex_vowel; $str = preg_replace("#^$c+#", '', $str); $str = preg_replace("#$v+$#", '', $str); preg_match_all("#($v+$c+)#", $str, $matches); return count($matches[1]); } /** * Returns true/false as to whether the given string contains two * of the same consonant next to each other at the end of the string. * * @param string $str String to check * * @return bool Result */ private static function doubleConsonant($str) { $c = self::$regex_consonant; return preg_match("#$c{2}$#", $str, $matches) AND $matches[0][0] == $matches[0][1]; } /** * Checks for ending CVC sequence where second C is not W, X or Y * * @param string $str String to check * * @return bool Result */ private static function cvc($str) { $c = self::$regex_consonant; $v = self::$regex_vowel; return preg_match("#($c$v$c)$#", $str, $matches) AND strlen($matches[1]) == 3 AND $matches[1][2] != 'w' AND $matches[1][2] != 'x' AND $matches[1][2] != 'y'; } }