From ccc7c1b42436dcdbc19d1239427f31cc6a02c7ba Mon Sep 17 00:00:00 2001 From: epriestley Date: Sun, 3 Jul 2016 17:40:09 -0700 Subject: [PATCH] Make i18n string extraction faster and more flexible Summary: Ref T5267. Two general changes: - Make string extraction use a cache, so that it doesn't take several minutes every time you change something. Minor updates now only take a few seconds (like `arc liberate` and similar). - Instead of dumping a sort-of-template file out, write out to a cache (`src/.cache/i18n_strings.json`). I'm planning to add more steps to read this cache and do interesting things with it (emit translatewiki strings, generate or update standalone translation files, etc). Test Plan: - Ran `bin/i18n extract`. - Ran it again, saw it go a lot faster. - Changed stuff, ran it, saw it only look at new stuff. - Examined caches. Reviewers: chad Reviewed By: chad Maniphest Tasks: T5267 Differential Revision: https://secure.phabricator.com/D16227 --- .gitignore | 1 + ...tionalizationManagementExtractWorkflow.php | 274 +++++++++++++++--- 2 files changed, 229 insertions(+), 46 deletions(-) diff --git a/.gitignore b/.gitignore index df6c16cde6..d9f44b6bab 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ # Diviner /docs/ /.divinercache/ +/src/.cache/ # libphutil /src/.phutil_module_cache diff --git a/src/infrastructure/internationalization/management/PhabricatorInternationalizationManagementExtractWorkflow.php b/src/infrastructure/internationalization/management/PhabricatorInternationalizationManagementExtractWorkflow.php index 7714ad81ab..1da2e16e27 100644 --- a/src/infrastructure/internationalization/management/PhabricatorInternationalizationManagementExtractWorkflow.php +++ b/src/infrastructure/internationalization/management/PhabricatorInternationalizationManagementExtractWorkflow.php @@ -3,9 +3,13 @@ final class PhabricatorInternationalizationManagementExtractWorkflow extends PhabricatorInternationalizationManagementWorkflow { + const CACHE_VERSION = 1; + protected function didConstruct() { $this ->setName('extract') + ->setExamples( + '**extract** [__options__] __library__') ->setSynopsis(pht('Extract translatable strings.')) ->setArguments( array( @@ -13,44 +17,138 @@ final class PhabricatorInternationalizationManagementExtractWorkflow 'name' => 'paths', 'wildcard' => true, ), + array( + 'name' => 'clean', + 'help' => pht('Drop caches before extracting strings. Slow!'), + ), )); } public function execute(PhutilArgumentParser $args) { $console = PhutilConsole::getConsole(); - $paths = $args->getArg('paths'); - $futures = array(); + $paths = $args->getArg('paths'); + if (!$paths) { + $paths = array(getcwd()); + } + + $targets = array(); foreach ($paths as $path) { $root = Filesystem::resolvePath($path); - $path_files = id(new FileFinder($root)) - ->withType('f') - ->withSuffix('php') - ->find(); - foreach ($path_files as $file) { - $full_path = $root.DIRECTORY_SEPARATOR.$file; - $data = Filesystem::readFile($full_path); - $futures[$full_path] = PhutilXHPASTBinary::getParserFuture($data); + if (!Filesystem::pathExists($root) || !is_dir($root)) { + throw new PhutilArgumentUsageException( + pht( + 'Path "%s" does not exist, or is not a directory.', + $path)); + } + + $libraries = id(new FileFinder($path)) + ->withPath('*/__phutil_library_init__.php') + ->find(); + if (!$libraries) { + throw new PhutilArgumentUsageException( + pht( + 'Path "%s" contains no libphutil libraries.', + $path)); + } + + foreach ($libraries as $library) { + $targets[] = Filesystem::resolvePath(dirname($library)).'/'; } } - $console->writeErr( - "%s\n", - pht('Found %s file(s)...', phutil_count($futures))); + $targets = array_unique($targets); - $results = array(); + foreach ($targets as $library) { + echo tsprintf( + "** %s ** %s\n", + pht('EXTRACT'), + pht( + 'Extracting "%s"...', + Filesystem::readablePath($library))); + + $this->extractLibrary($library); + } + + return 0; + } + + private function extractLibrary($root) { + $files = $this->loadLibraryFiles($root); + $cache = $this->readCache($root); + + $modified = $this->getModifiedFiles($files, $cache); + $cache['files'] = $files; + + if ($modified) { + echo tsprintf( + "** %s ** %s\n", + pht('MODIFIED'), + pht( + 'Found %s modified file(s) (of %s total).', + phutil_count($modified), + phutil_count($files))); + + $old_strings = idx($cache, 'strings'); + $old_strings = array_select_keys($old_strings, $files); + $new_strings = $this->extractFiles($root, $modified); + $all_strings = $new_strings + $old_strings; + $cache['strings'] = $all_strings; + + $this->writeStrings($root, $all_strings); + } else { + echo tsprintf( + "** %s ** %s\n", + pht('NOT MODIFIED'), + pht('Strings for this library are already up to date.')); + } + + $cache = id(new PhutilJSON())->encodeFormatted($cache); + $this->writeCache($root, 'i18n_files.json', $cache); + } + + private function getModifiedFiles(array $files, array $cache) { + $known = idx($cache, 'files', array()); + $known = array_fuse($known); + + $modified = array(); + foreach ($files as $file => $hash) { + + if (isset($known[$hash])) { + continue; + } + $modified[$file] = $hash; + } + + return $modified; + } + + private function extractFiles($root_path, array $files) { + $hashes = array(); + + $futures = array(); + foreach ($files as $file => $hash) { + $full_path = $root_path.DIRECTORY_SEPARATOR.$file; + $data = Filesystem::readFile($full_path); + $futures[$full_path] = PhutilXHPASTBinary::getParserFuture($data); + + $hashes[$full_path] = $hash; + } $bar = id(new PhutilConsoleProgressBar()) ->setTotal(count($futures)); $messages = array(); + $results = array(); $futures = id(new FutureIterator($futures)) ->limit(8); foreach ($futures as $full_path => $future) { $bar->update(1); + $hash = $hashes[$full_path]; + try { $tree = XHPASTTree::newFromDataAndResolvedExecFuture( Filesystem::readFile($full_path), @@ -67,24 +165,27 @@ final class PhabricatorInternationalizationManagementExtractWorkflow $calls = $root->selectDescendantsOfType('n_FUNCTION_CALL'); foreach ($calls as $call) { $name = $call->getChildByIndex(0)->getConcreteString(); - if ($name == 'pht') { - $params = $call->getChildByIndex(1, 'n_CALL_PARAMETER_LIST'); - $string_node = $params->getChildByIndex(0); - $string_line = $string_node->getLineNumber(); - try { - $string_value = $string_node->evalStatic(); + if ($name != 'pht') { + continue; + } - $results[$string_value][] = array( - 'file' => Filesystem::readablePath($full_path), - 'line' => $string_line, - ); - } catch (Exception $ex) { - $messages[] = pht( - 'WARNING: Failed to evaluate pht() call on line %d in "%s": %s', - $call->getLineNumber(), - $full_path, - $ex->getMessage()); - } + $params = $call->getChildByIndex(1, 'n_CALL_PARAMETER_LIST'); + $string_node = $params->getChildByIndex(0); + $string_line = $string_node->getLineNumber(); + try { + $string_value = $string_node->evalStatic(); + + $results[$hash][] = array( + 'string' => $string_value, + 'file' => Filesystem::readablePath($full_path, $root_path), + 'line' => $string_line, + ); + } catch (Exception $ex) { + $messages[] = pht( + 'WARNING: Failed to evaluate pht() call on line %d in "%s": %s', + $call->getLineNumber(), + $full_path, + $ex->getMessage()); } } @@ -93,28 +194,109 @@ final class PhabricatorInternationalizationManagementExtractWorkflow $bar->done(); foreach ($messages as $message) { - $console->writeErr("%s\n", $message); + echo tsprintf( + "** %s ** %s\n", + pht('WARNING'), + $message); } - ksort($results); + return $results; + } - $out = array(); - $out[] = ' $locations) { - foreach ($locations as $location) { - $out[] = ' // '.$location['file'].':'.$location['line']; + private function writeStrings($root, array $strings) { + $map = array(); + foreach ($strings as $hash => $string_list) { + foreach ($string_list as $string_info) { + $map[$string_info['string']]['uses'][] = array( + 'file' => $string_info['file'], + 'line' => $string_info['line'], + ); } - $out[] = " '".addcslashes($string, "\0..\37\\'\177..\377")."' => null,"; - $out[] = null; } - $out[] = ');'; - $out[] = null; - echo implode("\n", $out); + ksort($map); - return 0; + $json = id(new PhutilJSON())->encodeFormatted($map); + $this->writeCache($root, 'i18n_strings.json', $json); + } + + private function loadLibraryFiles($root) { + $files = id(new FileFinder($root)) + ->withType('f') + ->withSuffix('php') + ->excludePath('*/.*') + ->setGenerateChecksums(true) + ->find(); + + $map = array(); + foreach ($files as $file => $hash) { + $file = Filesystem::readablePath($file, $root); + $file = ltrim($file, '/'); + + if (dirname($file) == '.') { + continue; + } + + if (dirname($file) == 'extensions') { + continue; + } + + $map[$file] = md5($hash.$file); + } + + return $map; + } + + private function readCache($root) { + $path = $this->getCachePath($root, 'i18n_files.json'); + + $default = array( + 'version' => self::CACHE_VERSION, + 'files' => array(), + 'strings' => array(), + ); + + if ($this->getArgv()->getArg('clean')) { + return $default; + } + + if (!Filesystem::pathExists($path)) { + return $default; + } + + try { + $data = Filesystem::readFile($path); + } catch (Exception $ex) { + return $default; + } + + try { + $cache = phutil_json_decode($data); + } catch (PhutilJSONParserException $e) { + return $default; + } + + $version = idx($cache, 'version'); + if ($version !== self::CACHE_VERSION) { + return $default; + } + + return $cache; + } + + private function writeCache($root, $file, $data) { + $path = $this->getCachePath($root, $file); + + $cache_dir = dirname($path); + if (!Filesystem::pathExists($cache_dir)) { + Filesystem::createDirectory($cache_dir, 0755, true); + } + + Filesystem::writeFile($path, $data); + } + + private function getCachePath($root, $to_file) { + return $root.'/.cache/'.$to_file; } }