From 77ed7ade66f147f7d8cd631000a0b527546d7514 Mon Sep 17 00:00:00 2001 From: epriestley Date: Sun, 4 Sep 2011 18:24:46 -0700 Subject: [PATCH] Add symbol import scripts for crossref features Summary: Adds a script to import symbols from a ctags-like format, and another to generate that format for PHP files. Test Plan: Ran it on Phabricator: mysql> select * from repository_symbol limit 200, 20; +-------------------+---------------------------------------------------+------------+----------------+--------+------------+ | arcanistProjectID | symbolName | symbolType | symbolLanguage | pathID | lineNumber | +-------------------+---------------------------------------------------+------------+----------------+--------+------------+ | 1 | DifferentialDiffViewController | class | php | 52 | 19 | | 1 | DifferentialInlineCommentEditController | class | php | 308 | 19 | | 1 | DifferentialInlineCommentPreviewController | class | php | 10543 | 19 | | 1 | DifferentialRevisionEditController | class | php | 10544 | 19 | | 1 | DifferentialRevisionListController | class | php | 10545 | 19 | | 1 | DifferentialRevisionViewController | class | php | 142 | 19 | | 1 | DifferentialSubscribeController | class | php | 10546 | 19 | | 1 | DifferentialRevisionListData | class | php | 58 | 19 | | 1 | DifferentialCommentEditor | class | php | 39 | 19 | | 1 | DifferentialRevisionEditor | class | php | 42 | 24 | | 1 | DifferentialFieldSpecificationIncompleteException | class | php | 10547 | 19 | | 1 | DifferentialFieldDataNotAvailableException | class | php | 10548 | 19 | | 1 | DifferentialFieldParseException | class | php | 10549 | 19 | | 1 | DifferentialFieldValidationException | class | php | 10550 | 19 | | 1 | DifferentialFieldSelector | class | php | 10551 | 19 | | 1 | DifferentialDefaultFieldSelector | class | php | 10552 | 19 | | 1 | DifferentialApplyPatchFieldSpecification | class | php | 10553 | 19 | | 1 | DifferentialArcanistProjectFieldSpecification | class | php | 10554 | 19 | | 1 | DifferentialAuthorFieldSpecification | class | php | 10555 | 19 | | 1 | DifferentialFieldSpecification | class | php | 10556 | 35 | +-------------------+---------------------------------------------------+------------+----------------+--------+------------+ 20 rows in set (0.00 sec) Reviewers: jungejason, nh, tuomaspelkonen, aran Reviewed By: tuomaspelkonen CC: aran, tuomaspelkonen Differential Revision: 898 --- scripts/crossref/generate_php_symbols.php | 81 +++++++++ scripts/crossref/import_project_symbols.php | 154 ++++++++++++++++++ ...atorRepositoryCommitChangeParserWorker.php | 8 +- 3 files changed, 239 insertions(+), 4 deletions(-) create mode 100755 scripts/crossref/generate_php_symbols.php create mode 100755 scripts/crossref/import_project_symbols.php diff --git a/scripts/crossref/generate_php_symbols.php b/scripts/crossref/generate_php_symbols.php new file mode 100755 index 0000000000..d46460c054 --- /dev/null +++ b/scripts/crossref/generate_php_symbols.php @@ -0,0 +1,81 @@ +#!/usr/bin/env php +limit(8) as $file => $future) { + $tree = XHPASTTree::newFromDataAndResolvedExecFuture( + $data[$file], + $future->resolve()); + + $root = $tree->getRootNode(); + + $functions = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION'); + foreach ($functions as $function) { + $name = $function->getChildByIndex(2); + print_symbol($file, 'function', $name); + } + + $classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); + foreach ($classes as $class) { + $class_name = $class->getChildByIndex(1); + print_symbol($file, 'class', $class_name); + } + + $interfaces = $root->selectDescendantsOfType('n_INTERFACE_DECLARATION'); + foreach ($interfaces as $interface) { + $interface_name = $interface->getChildByIndex(1); + print_symbol($file, 'interface', $interface_name); + } +} + +function print_symbol($file, $type, $token) { + $parts = array( + $token->getConcreteString(), + $type, + 'php', + $token->getLineNumber(), + '/'.ltrim($file, './'), + ); + echo implode(' ', $parts)."\n"; +} diff --git a/scripts/crossref/import_project_symbols.php b/scripts/crossref/import_project_symbols.php new file mode 100755 index 0000000000..058e8d07f1 --- /dev/null +++ b/scripts/crossref/import_project_symbols.php @@ -0,0 +1,154 @@ +#!/usr/bin/env php +loadOneWhere( + 'name = %s', + $project_name); + +if (!$project) { + // TODO: Provide a less silly way to do this explicitly, or just do it right + // here. + echo "Project '{$project_name}' is unknown. Upload a diff to implicitly ". + "create it.\n"; + exit(1); +} + +echo "Parsing input from stdin...\n"; +$input = file_get_contents('php://stdin'); +$input = trim($input); +$input = explode("\n", $input); + +$map = array(); +$symbols = array(); +foreach ($input as $key => $line) { + $line_no = $key + 1; + $matches = null; + $ok = preg_match('/^([^ ]+) ([^ ]+) ([^ ]+) (\d+) (.*)$/', $line, $matches); + if (!$ok) { + throw new Exception( + "Line #{$line_no} of input is invalid. Expected five space-delimited ". + "fields: symbol name, symbol type, symbol language, line number, path. ". + "For example:\n\n". + "idx function php 13 /path/to/some/file.php\n\n". + "Actual line was:\n\n". + "{$line}"); + } + list($all, $name, $type, $lang, $line_number, $path) = $matches; + + if (isset($map[$name][$type][$lang])) { + $previous = $map[$name][$type][$lang] + 1; + throw new Exception( + "Line #{$line_no} of input is invalid. It specifies a duplicate symbol ". + "(same name, language, and type) which has already been defined ". + "elsewhere. You must preprocess the symbol list to remove duplicates ". + "and choose exactly one master definition for each symbol. This symbol ". + "was previously defined on line #{$previous}.\n\n". + "Line #{$line_no}:\n". + $line."\n\n". + "Line #{$previous}:\n". + $input[$previous - 1]); + } else { + $map[$name][$type][$lang] = $key; + } + + if (strlen($name) > 128) { + throw new Exception( + "Symbol name '{$name}' defined on line #{$line_no} is too long, maximum ". + "symbol name length is 128 characters."); + } + + if (strlen($type) > 12) { + throw new Exception( + "Symbol type '{$type}' defined on line #{$line_no} is too long, maximum ". + "symbol type length is 12 characters."); + } + + if (strlen($lang) > 32) { + throw new Exception( + "Symbol language '{$lang}' defined on line #{$line_no} is too long, ". + "maximum symbol language length is 32 characters."); + } + + if (!strlen($path) || $path[0] != 0) { + throw new Exception( + "Path '{$path}' defined on line #{$line_no} is invalid. Paths should be ". + "begin with '/' and specify a path from the root of the project, like ". + "'/src/utils/utils.php'."); + } + + $symbols[] = array( + 'name' => $name, + 'type' => $type, + 'lang' => $lang, + 'line' => $line_number, + 'path' => $path, + ); +} + +echo "Looking up path IDs...\n"; +$path_map = PhabricatorRepositoryCommitChangeParserWorker::lookupOrCreatePaths( + ipull($symbols, 'path')); + +$symbol = new PhabricatorRepositorySymbol(); +$conn_w = $symbol->establishConnection('w'); + +echo "Preparing queries...\n"; +$sql = array(); +foreach ($symbols as $dict) { + $sql[] = qsprintf( + $conn_w, + '(%d, %s, %s, %s, %d, %d)', + $project->getID(), + $dict['name'], + $dict['type'], + $dict['lang'], + $dict['line'], + $path_map[$dict['path']]); +} + +echo "Purging old symbols...\n"; +queryfx( + $conn_w, + 'DELETE FROM %T WHERE arcanistProjectID = %d', + $symbol->getTableName(), + $project->getID()); + +echo "Loading ".number_format(count($sql))." symbols...\n"; +foreach (array_chunk($sql, 128) as $chunk) { + queryfx( + $conn_w, + 'INSERT INTO %T + (arcanistProjectID, symbolName, symbolType, symbolLanguage, lineNumber, + pathID) VALUES %Q', + $symbol->getTableName(), + implode(', ', $chunk)); +} + +echo "Done.\n"; diff --git a/src/applications/repository/worker/commitchangeparser/base/PhabricatorRepositoryCommitChangeParserWorker.php b/src/applications/repository/worker/commitchangeparser/base/PhabricatorRepositoryCommitChangeParserWorker.php index b92805bc27..d581947658 100644 --- a/src/applications/repository/worker/commitchangeparser/base/PhabricatorRepositoryCommitChangeParserWorker.php +++ b/src/applications/repository/worker/commitchangeparser/base/PhabricatorRepositoryCommitChangeParserWorker.php @@ -25,11 +25,11 @@ abstract class PhabricatorRepositoryCommitChangeParserWorker return 60 * 60 * 24; } - protected function lookupOrCreatePaths(array $paths) { + public static function lookupOrCreatePaths(array $paths) { $repository = new PhabricatorRepository(); $conn_w = $repository->establishConnection('w'); - $result_map = $this->lookupPaths($paths); + $result_map = self::lookupPaths($paths); $missing_paths = array_fill_keys($paths, true); $missing_paths = array_diff_key($missing_paths, $result_map); @@ -47,13 +47,13 @@ abstract class PhabricatorRepositoryCommitChangeParserWorker PhabricatorRepository::TABLE_PATH, implode(', ', $sql)); } - $result_map += $this->lookupPaths($missing_paths); + $result_map += self::lookupPaths($missing_paths); } return $result_map; } - private function lookupPaths(array $paths) { + private static function lookupPaths(array $paths) { $repository = new PhabricatorRepository(); $conn_w = $repository->establishConnection('w');