From 81976ff2ff9338e4544677dbcb76c47825aa7c2d Mon Sep 17 00:00:00 2001 From: epriestley Date: Thu, 24 May 2012 10:56:56 -0700 Subject: [PATCH] Provide a simpler analyzer script for killing __init__.php Summary: The `phutil_analyzer.php` script currently analyzes entire modules and is fairly complex. We don't need or want this in a post-__init__.php world. This is basically a simplified version of `phutil_analyzer.php`, which takes one file and emits symbols. Test Plan: ```$ ./scripts/phutil_symbols.php resources/test/diverse_symbols.php { "have" : { "function" : { "f" : 348 }, "class" : { "L" : 308, "A" : 497, "C" : 509, "D" : 531, "CLocal" : 627 }, "interface" : { "ILocal" : 593 } }, "need" : { "function" : { "g" : 402, "h" : 462 }, "class" : { "B" : 519, "INonlocal" : 642, "U" : 552, "X" : 421, "V" : 557, "W" : 565, "P" : 572 }, "interface" : { "IForeign" : 608 } } }``` Reviewers: vrana, nh, btrahan Reviewed By: vrana CC: aran Maniphest Tasks: T1103 Differential Revision: https://secure.phabricator.com/D2561 --- resources/test/diverse_symbols.php.example | 72 ++++ scripts/phutil_symbols.php | 417 +++++++++++++++++++++ 2 files changed, 489 insertions(+) create mode 100644 resources/test/diverse_symbols.php.example create mode 100755 scripts/phutil_symbols.php diff --git a/resources/test/diverse_symbols.php.example b/resources/test/diverse_symbols.php.example new file mode 100644 index 00000000..88b16daf --- /dev/null +++ b/resources/test/diverse_symbols.php.example @@ -0,0 +1,72 @@ +setTagline('identify symbols in a PHP source file'); +$args->setSynopsis(<<parseStandardArguments(); +$args->parse( + array( + array( + 'name' => 'all', + 'help' => 'Report all symbols, including builtins and declared '. + 'externals.', + ), + array( + 'name' => 'ugly', + 'help' => 'Do not prettify JSON output.', + ), + array( + 'name' => 'path', + 'wildcard' => true, + 'help' => 'PHP Source file to analyze.', + ), + )); + +$paths = $args->getArg('path'); +if (count($paths) !== 1) { + throw new Exception("Specify exactly one path!"); +} +$path = Filesystem::resolvePath(head($paths)); + +$show_all = $args->getArg('all'); + +$source_code = Filesystem::readFile($path); +$tree = XHPASTTree::newFromData($source_code); +$root = $tree->getRootNode(); + +$root->buildSelectCache(); + + +// -( Marked Externals )------------------------------------------------------ + + +// Identify symbols marked with "@phutil-external-symbol", so we exclude them +// from the dependency list. + +$externals = array(); +$doc_parser = new PhutilDocblockParser(); +foreach ($root->getTokens() as $token) { + if ($token->getTypeName() == 'T_DOC_COMMENT') { + list($block, $special) = $doc_parser->parse($token->getValue()); + + $ext_list = idx($special, 'phutil-external-symbol'); + $ext_list = explode("\n", $ext_list); + $ext_list = array_filter($ext_list); + + foreach ($ext_list as $ext_ref) { + $matches = null; + if (preg_match('/^\s*(\S+)\s+(\S+)/', $ext_ref, $matches)) { + $externals[$matches[1]][$matches[2]] = true; + } + } + } +} + + +// -( Declarations and Dependencies )----------------------------------------- + + +// The first stage of analysis is to find all the symbols we declare in the +// file (like functions and classes) and all the symbols we use in the file +// (like calling functions and invoking classes). Later, we filter this list +// to exclude builtins. + + +$have = array(); // For symbols we declare. +$need = array(); // For symbols we use. + + +// -( Functions )------------------------------------------------------------- + + +// Find functions declared in this file. + +// This is "function f() { ... }". +$functions = $root->selectDescendantsOfType('n_FUNCTION_DECLARATION'); +foreach ($functions as $function) { + $name = $function->getChildByIndex(2); + if ($name->getTypeName() == 'n_EMPTY') { + // This is an anonymous function; don't record it into the symbol + // index. + continue; + } + $have[] = array( + 'type' => 'function', + 'symbol' => $name, + ); +} + + +// Find functions used by this file. Uses: +// +// - Explicit Call +// - String literal passed to call_user_func() or call_user_func_array() +// +// TODO: Possibly support these: +// +// - String literal in ReflectionFunction(). + +// This is "f();". +$calls = $root->selectDescendantsOfType('n_FUNCTION_CALL'); +foreach ($calls as $call) { + $name = $call->getChildByIndex(0); + if ($name->getTypeName() == 'n_VARIABLE' || + $name->getTypeName() == 'n_VARIABLE_VARIABLE') { + // Ignore these, we can't analyze them. + continue; + } + if ($name->getTypeName() == 'n_CLASS_STATIC_ACCESS') { + // These are "C::f()", we'll pick this up later on. + continue; + } + $call_name = $name->getConcreteString(); + if ($call_name == 'call_user_func' || + $call_name == 'call_user_func_array') { + $params = $call->getChildByIndex(1)->getChildren(); + if (!count($params)) { + // This is a bare call_user_func() with no arguments; just ignore it. + continue; + } + $symbol = array_shift($params); + $symbol_value = $symbol->getStringLiteralValue(); + if ($symbol_value) { + $need[] = array( + 'type' => 'function', + 'name' => $symbol_value, + 'symbol' => $symbol, + ); + } + } else { + $need[] = array( + 'type' => 'function', + 'symbol' => $name, + ); + } +} + + +// -( Classes )--------------------------------------------------------------- + + +// Find classes declared by this file. + + +// This is "class X ... { ... }". +$classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); +foreach ($classes as $class) { + $class_name = $class->getChildByIndex(1); + $have[] = array( + 'type' => 'class', + 'symbol' => $class_name, + ); +} + + +// Find classes used by this file. We identify these: +// +// - class ... extends X +// - new X +// - Static method call +// - Static property access +// - Use of class constant +// +// TODO: Possibly support these: +// +// - typehints +// - instanceof +// - catch +// - String literal in ReflectionClass(). +// - String literal in array literal in call_user_func()/call_user_func_array() + + +// This is "class X ... { ... }". +$classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); +foreach ($classes as $class) { + $class_name = $class->getChildByIndex(1); + $extends = $class->getChildByIndex(2); + foreach ($extends->selectDescendantsOfType('n_CLASS_NAME') as $parent) { + $need[] = array( + 'type' => 'class', + 'symbol' => $parent, + ); + } +} + +// This is "new X()". +$uses_of_new = $root->selectDescendantsOfType('n_NEW'); +foreach ($uses_of_new as $new_operator) { + $name = $new_operator->getChildByIndex(0); + if ($name->getTypeName() == 'n_VARIABLE' || + $name->getTypeName() == 'n_VARIABLE_VARIABLE') { + continue; + } + $need[] = array( + 'type' => 'class', + 'symbol' => $name, + ); +} + +// This covers all of "X::$y", "X::y()" and "X::CONST". +$static_uses = $root->selectDescendantsOfType('n_CLASS_STATIC_ACCESS'); +foreach ($static_uses as $static_use) { + $name = $static_use->getChildByIndex(0); + if ($name->getTypeName() != 'n_CLASS_NAME') { + continue; + } + $name_concrete = $name->getConcreteString(); + $magic_names = array( + 'static' => true, + 'parent' => true, + 'self' => true, + ); + if (isset($magic_names[$name_concrete])) { + continue; + } + $need[] = array( + 'type' => 'class', + 'symbol' => $name, + ); +} + + +// -( Interfaces )------------------------------------------------------------ + + +// Find interfaces declared in ths file. + + +// This is "interface X .. { ... }". +$interfaces = $root->selectDescendantsOfType('n_INTERFACE_DECLARATION'); +foreach ($interfaces as $interface) { + $interface_name = $interface->getChildByIndex(1); + $have[] = array( + 'type' => 'interface', + 'symbol' => $interface_name, + ); +} + + +// Find interfaces used by this file. We identify these: +// +// - class ... implements X +// - interface ... extends X + + +// This is "class X ... { ... }". +$classes = $root->selectDescendantsOfType('n_CLASS_DECLARATION'); +foreach ($classes as $class) { + $implements = $class->getChildByIndex(3); + $interfaces = $implements->selectDescendantsOfType('n_CLASS_NAME'); + foreach ($interfaces as $interface) { + $need[] = array( + 'type' => 'interface', + 'symbol' => $interface, + ); + } +} + + +// This is "interface X ... { ... }". +$interfaces = $root->selectDescendantsOfType('n_INTERFACE_DECLARATION'); +foreach ($interfaces as $interface) { + $interface_name = $interface->getChildByIndex(1); + + $extends = $interface->getChildByIndex(2); + foreach ($extends->selectDescendantsOfType('n_CLASS_NAME') as $parent) { + $need[] = array( + 'type' => 'interface', + 'symbol' => $parent, + ); + } +} + + +// -( Analysis )-------------------------------------------------------------- + + +$declared_symbols = array(); +foreach ($have as $key => $spec) { + $name = $spec['symbol']->getConcreteString(); + $declared_symbols[$spec['type']][$name] = $spec['symbol']->getOffset(); +} + +$required_symbols = array(); +foreach ($need as $key => $spec) { + $name = idx($spec, 'name'); + if (!$name) { + $name = $spec['symbol']->getConcreteString(); + } + + $type = $spec['type']; + if (!$show_all) { + if (!empty($externals[$type][$name])) { + // Ignore symbols declared as externals. + continue; + } + if (!empty($builtins[$type][$name])) { + // Ignore symbols declared as builtins. + continue; + } + } + if (!empty($required_symbols[$type][$name])) { + // Report only the first use of a symbol, since reporting all of them + // isn't terribly informative. + continue; + } + if (!empty($declared_symbols[$type][$name])) { + // We declare this symbol, so don't treat it as a requirement. + continue; + } + $required_symbols[$type][$name] = $spec['symbol']->getOffset(); +} + +$result = array( + 'have' => $declared_symbols, + 'need' => $required_symbols, +); + + +// -( Output )---------------------------------------------------------------- + + +if ($args->getArg('ugly')) { + echo json_encode($result); +} else { + $json = new PhutilJSON(); + echo $json->encodeFormatted($result); +} + + +// -( Library )--------------------------------------------------------------- + + +function phutil_symbols_get_builtins() { + $builtin_classes = get_declared_classes(); + $builtin_interfaces = get_declared_interfaces(); + $builtin_functions = get_defined_functions(); + $builtin_functions = $builtin_functions['internal']; + + return array( + 'class' => array_fill_keys($builtin_classes, true) + array( + 'PhutilBootloader' => true, + ), + 'function' => array_filter( + array( + 'empty' => true, + 'isset' => true, + 'die' => true, + + // These are provided by libphutil but not visible in the map. + + 'phutil_is_windows' => true, + 'phutil_load_library' => true, + 'phutil_is_hiphop_runtime' => true, + + // HPHP/i defines these functions as 'internal', but they are NOT + // builtins and do not exist in vanilla PHP. Make sure we don't mark + // them as builtin since we need to add dependencies for them. + 'idx' => false, + 'id' => false, + ) + array_fill_keys($builtin_functions, true)), + 'interface' => array_fill_keys($builtin_interfaces, true), + ); +}