mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-22 14:52:41 +01:00
Update symbol generation scripts
Summary: Scripts now return scoped symbols -- in particular, PHP class constants, fields, and methods. ctags gives some for other languages. (Turns out XHPAST doesn't support traits. But no one uses traits anyway so it's probably fine.) I couldn't find a list of the context types ctags uses (class/struct/union/enum/maybe others?), so the context code just ignores that. Also, it uses a blacklist for the symbol type instead of a whitelist because there are a ton, they vary by language, and I didn't want to unintentionally exclude anything (P480). Test Plan: Scrape symbols from arcanist and phabricator. Upload them to sandbox. Search for things. Reviewers: epriestley Reviewed By: epriestley CC: nh, aran, Korvin Maniphest Tasks: T1602 Differential Revision: https://secure.phabricator.com/D3202
This commit is contained in:
parent
8fbe6347d2
commit
fd3ba9841b
2 changed files with 65 additions and 13 deletions
|
@ -68,8 +68,10 @@ foreach (Futures($futures)->limit(8) as $file => $future) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// default $context to empty
|
||||||
|
$extension_fields[] = '';
|
||||||
list($token, $file_path, $line_num) = $tag_info;
|
list($token, $file_path, $line_num) = $tag_info;
|
||||||
list($type, $language) = $extension_fields;
|
list($type, $language, $context) = $extension_fields;
|
||||||
|
|
||||||
// strip "language:"
|
// strip "language:"
|
||||||
$language = substr($language, 9);
|
$language = substr($language, 9);
|
||||||
|
@ -82,20 +84,22 @@ foreach (Futures($futures)->limit(8) as $file => $future) {
|
||||||
$language = str_ireplace("c++", "cpp", $language);
|
$language = str_ireplace("c++", "cpp", $language);
|
||||||
$language = str_ireplace("c#", "csharp", $language);
|
$language = str_ireplace("c#", "csharp", $language);
|
||||||
|
|
||||||
switch ($type) {
|
// Ruby has "singleton method", for example
|
||||||
case 'class':
|
$type = substr(str_replace(' ', '_', $type), 0, 12);
|
||||||
print_symbol($file_path, $line_num, 'class', $token, $language);
|
// class:foo, struct:foo, union:foo, enum:foo, ...
|
||||||
break;
|
$context = last(explode(':', $context, 2));
|
||||||
case 'function':
|
|
||||||
print_symbol($file_path, $line_num, 'function', $token, $language);
|
$ignore = array(
|
||||||
break;
|
'variable' => true,
|
||||||
default:
|
);
|
||||||
|
if (empty($ignore[$type])) {
|
||||||
|
print_symbol($file_path, $line_num, $type, $token, $context, $language);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function ctags_get_parser_future($file_path) {
|
function ctags_get_parser_future($file_path) {
|
||||||
$future = new ExecFuture('ctags -n --fields=Kl -o - %s',
|
$future = new ExecFuture('ctags -n --fields=Kls -o - %s',
|
||||||
$file_path);
|
$file_path);
|
||||||
return $future;
|
return $future;
|
||||||
}
|
}
|
||||||
|
@ -111,7 +115,7 @@ function ctags_check_executable() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function print_symbol($file, $line_num, $type, $token, $language) {
|
function print_symbol($file, $line_num, $type, $token, $context, $language) {
|
||||||
// get rid of relative path
|
// get rid of relative path
|
||||||
$file = explode('/', $file);
|
$file = explode('/', $file);
|
||||||
if ($file[0] == '.' || $file[0] == "..") {
|
if ($file[0] == '.' || $file[0] == "..") {
|
||||||
|
@ -120,6 +124,7 @@ function print_symbol($file, $line_num, $type, $token, $language) {
|
||||||
$file = '/' . implode('/', $file);
|
$file = '/' . implode('/', $file);
|
||||||
|
|
||||||
$parts = array(
|
$parts = array(
|
||||||
|
$context,
|
||||||
$token,
|
$token,
|
||||||
$type,
|
$type,
|
||||||
strtolower($language),
|
strtolower($language),
|
||||||
|
|
|
@ -45,6 +45,7 @@ foreach (Futures($futures)->limit(8) as $file => $future) {
|
||||||
$future->resolve());
|
$future->resolve());
|
||||||
|
|
||||||
$root = $tree->getRootNode();
|
$root = $tree->getRootNode();
|
||||||
|
$scopes = array();
|
||||||
|
|
||||||
$functions = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION');
|
$functions = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION');
|
||||||
foreach ($functions as $function) {
|
foreach ($functions as $function) {
|
||||||
|
@ -56,18 +57,64 @@ foreach (Futures($futures)->limit(8) as $file => $future) {
|
||||||
foreach ($classes as $class) {
|
foreach ($classes as $class) {
|
||||||
$class_name = $class->getChildByIndex(1);
|
$class_name = $class->getChildByIndex(1);
|
||||||
print_symbol($file, 'class', $class_name);
|
print_symbol($file, 'class', $class_name);
|
||||||
|
$scopes[] = array($class, $class_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
$interfaces = $root->selectDescendantsOfType('n_INTERFACE_DECLARATION');
|
$interfaces = $root->selectDescendantsOfType('n_INTERFACE_DECLARATION');
|
||||||
foreach ($interfaces as $interface) {
|
foreach ($interfaces as $interface) {
|
||||||
$interface_name = $interface->getChildByIndex(1);
|
$interface_name = $interface->getChildByIndex(1);
|
||||||
print_symbol($file, 'interface', $interface_name);
|
print_symbol($file, 'interface', $interface_name);
|
||||||
|
$scopes[] = array($interface, $interface_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
$constants = $root->selectDescendantsOfType('n_CONSTANT_DECLARATION_LIST');
|
||||||
|
foreach ($constants as $constant_list) {
|
||||||
|
foreach ($constant_list->getChildren() as $constant) {
|
||||||
|
$constant_name = $constant->getChildByIndex(0);
|
||||||
|
print_symbol($file, 'constant', $constant_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach ($scopes as $scope) {
|
||||||
|
// this prints duplicate symbols in the case of nested classes
|
||||||
|
// luckily, PHP doesn't allow those
|
||||||
|
list($class, $class_name) = $scope;
|
||||||
|
|
||||||
|
$consts = $class->selectDescendantsOfType(
|
||||||
|
'n_CLASS_CONSTANT_DECLARATION_LIST');
|
||||||
|
foreach ($consts as $const_list) {
|
||||||
|
foreach ($const_list->getChildren() as $const) {
|
||||||
|
$const_name = $const->getChildByIndex(0);
|
||||||
|
print_symbol($file, 'class_const', $const_name, $class_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$members = $class->selectDescendantsOfType(
|
||||||
|
'n_CLASS_MEMBER_DECLARATION_LIST');
|
||||||
|
foreach ($members as $member_list) {
|
||||||
|
foreach ($member_list->getChildren() as $member) {
|
||||||
|
if ($member->getTypeName() == 'n_CLASS_MEMBER_MODIFIER_LIST') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$member_name = $member->getChildByIndex(0);
|
||||||
|
print_symbol($file, 'member', $member_name, $class_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$methods = $class->selectDescendantsOfType('n_METHOD_DECLARATION');
|
||||||
|
foreach ($methods as $method) {
|
||||||
|
$method_name = $method->getChildByIndex(2);
|
||||||
|
print_symbol($file, 'method', $method_name, $class_name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function print_symbol($file, $type, $token) {
|
function print_symbol($file, $type, $token, $context=null) {
|
||||||
$parts = array(
|
$parts = array(
|
||||||
$token->getConcreteString(),
|
$context ? $context->getConcreteString() : '',
|
||||||
|
// variable tokens are `$name`, not just `name`, so strip the $ off of
|
||||||
|
// class field names
|
||||||
|
ltrim($token->getConcreteString(), '$'),
|
||||||
$type,
|
$type,
|
||||||
'php',
|
'php',
|
||||||
$token->getLineNumber(),
|
$token->getLineNumber(),
|
||||||
|
|
Loading…
Reference in a new issue