diff --git a/bin/diviner b/bin/diviner new file mode 120000 index 0000000000..465b7cf593 --- /dev/null +++ b/bin/diviner @@ -0,0 +1 @@ +../scripts/diviner/diviner.php \ No newline at end of file diff --git a/scripts/diviner/diviner.php b/scripts/diviner/diviner.php new file mode 100755 index 0000000000..9d866ad0b3 --- /dev/null +++ b/scripts/diviner/diviner.php @@ -0,0 +1,22 @@ +#!/usr/bin/env php +setTagline('documentation generator'); +$args->setSynopsis(<<parseStandardArguments(); + +$args->parseWorkflows( + array( + new DivinerGenerateWorkflow(), + new DivinerAtomizeWorkflow(), + new PhutilHelpArgumentWorkflow(), + )); diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index 722084f576..751c7b60d2 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -422,7 +422,16 @@ phutil_register_library_map(array( 'DiffusionTagListView' => 'applications/diffusion/view/DiffusionTagListView.php', 'DiffusionURITestCase' => 'applications/diffusion/request/__tests__/DiffusionURITestCase.php', 'DiffusionView' => 'applications/diffusion/view/DiffusionView.php', + 'DivinerArticleAtomizer' => 'applications/diviner/atomizer/DivinerArticleAtomizer.php', + 'DivinerAtom' => 'applications/diviner/atom/DivinerAtom.php', + 'DivinerAtomCache' => 'applications/diviner/cache/DivinerAtomCache.php', + 'DivinerAtomRef' => 'applications/diviner/atom/DivinerAtomRef.php', + 'DivinerAtomizeWorkflow' => 'applications/diviner/workflow/DivinerAtomizeWorkflow.php', + 'DivinerAtomizer' => 'applications/diviner/atomizer/DivinerAtomizer.php', + 'DivinerFileAtomizer' => 'applications/diviner/atomizer/DivinerFileAtomizer.php', + 'DivinerGenerateWorkflow' => 'applications/diviner/workflow/DivinerGenerateWorkflow.php', 'DivinerListController' => 'applications/diviner/controller/DivinerListController.php', + 'DivinerWorkflow' => 'applications/diviner/workflow/DivinerWorkflow.php', 'DrydockAllocatorWorker' => 'applications/drydock/worker/DrydockAllocatorWorker.php', 'DrydockApacheWebrootInterface' => 'applications/drydock/interface/webroot/DrydockApacheWebrootInterface.php', 'DrydockBlueprint' => 'applications/drydock/blueprint/DrydockBlueprint.php', @@ -1780,7 +1789,12 @@ phutil_register_library_map(array( 'DiffusionTagListView' => 'DiffusionView', 'DiffusionURITestCase' => 'ArcanistPhutilTestCase', 'DiffusionView' => 'AphrontView', + 'DivinerArticleAtomizer' => 'DivinerAtomizer', + 'DivinerAtomizeWorkflow' => 'DivinerWorkflow', + 'DivinerFileAtomizer' => 'DivinerAtomizer', + 'DivinerGenerateWorkflow' => 'DivinerWorkflow', 'DivinerListController' => 'PhabricatorController', + 'DivinerWorkflow' => 'PhutilArgumentWorkflow', 'DrydockAllocatorWorker' => 'PhabricatorWorker', 'DrydockApacheWebrootInterface' => 'DrydockWebrootInterface', 'DrydockCommandInterface' => 'DrydockInterface', diff --git a/src/applications/diviner/atom/DivinerAtom.php b/src/applications/diviner/atom/DivinerAtom.php new file mode 100644 index 0000000000..aa8de92cf6 --- /dev/null +++ b/src/applications/diviner/atom/DivinerAtom.php @@ -0,0 +1,289 @@ +project = $project; + return $this; + } + + public function getProject() { + return $this->project; + } + + public function setContext($context) { + $this->context = $context; + return $this; + } + + public function getContext() { + return $this->context; + } + + public static function getAtomSerializationVersion() { + return 1; + } + + public function addWarning($warning) { + $this->warnings[] = $warning; + return $this; + } + + public function getWarnings() { + return $this->warnings; + } + + public function setDocblockRaw($docblock_raw) { + $this->docblockRaw = $docblock_raw; + + $parser = new PhutilDocblockParser(); + list($text, $meta) = $parser->parse($docblock_raw); + $this->docblockText = $text; + $this->docblockMeta = $meta; + + return $this; + } + + public function getDocblockRaw() { + return $this->docblockRaw; + } + + public function getDocblockText() { + if ($this->docblockText === null) { + throw new Exception("Call setDocblockRaw() before getDocblockText()!"); + } + return $this->docblockText; + } + + public function getDocblockMeta() { + if ($this->docblockMeta === null) { + throw new Exception("Call setDocblockRaw() before getDocblockMeta()!"); + } + return $this->docblockMeta; + } + + public function setType($type) { + $this->type = $type; + return $this; + } + + public function getType() { + return $this->type; + } + + public function setName($name) { + $this->name = $name; + return $this; + } + + public function getName() { + return $this->name; + } + + public function setFile($file) { + $this->file = $file; + return $this; + } + + public function getFile() { + return $this->file; + } + + public function setLine($line) { + $this->line = $line; + return $this; + } + + public function getLine() { + return $this->line; + } + + public function setContentRaw($content_raw) { + $this->contentRaw = $content_raw; + return $this; + } + + public function getContentRaw() { + return $this->contentRaw; + } + + public function setHash($hash) { + $this->hash = $hash; + return $this; + } + + public function addLink(DivinerAtomRef $ref) { + $this->links[] = $ref; + return $this; + } + + public function addExtends(DivinerAtomRef $ref) { + $this->extends[] = $ref; + return $this; + } + + public function getLinkDictionaries() { + return mpull($this->links, 'toDictionary'); + } + + public function getExtendsDictionaries() { + return mpull($this->extends, 'toDictionary'); + } + + public function getHash() { + if ($this->hash) { + return $this->hash; + } + + $parts = array( + $this->getType(), + $this->getName(), + $this->getFile(), + $this->getLine(), + $this->getLength(), + $this->getLanguage(), + $this->getContentRaw(), + $this->getDocblockRaw(), + mpull($this->extends, 'toHash'), + mpull($this->links, 'toHash'), + ); + + return md5(serialize($parts)).'N'; + } + + public function setLength($length) { + $this->length = $length; + return $this; + } + + public function getLength() { + return $this->length; + } + + public function setLanguage($language) { + $this->language = $language; + return $this; + } + + public function getLanguage() { + return $this->language; + } + + public function addChildHash($child_hash) { + $this->childHashes[] = $child_hash; + return $this; + } + + public function getChildHashes() { + return $this->childHashes; + } + + public function setParentHash($parent_hash) { + if ($this->parentHash) { + throw new Exception("Atom already has a parent!"); + } + $this->parentHash = $parent_hash; + return $this; + } + + public function getParentHash() { + return $this->parentHash; + } + + public function addChild(DivinerAtom $atom) { + $atom->setParentHash($this->getHash()); + $this->addChildHash($atom->getHash()); + return $this; + } + + public function getURI() { + $parts = array(); + $parts[] = phutil_escape_uri_path_component($this->getType()); + if ($this->getContext()) { + $parts[] = phutil_escape_uri_path_component($this->getContext()); + } + $parts[] = phutil_escape_uri_path_component($this->getName()); + $parts[] = null; + return implode('/', $parts); + } + + + public function toDictionary() { + // NOTE: If you change this format, bump the format version in + // getAtomSerializationVersion(). + + return array( + 'type' => $this->getType(), + 'name' => $this->getName(), + 'file' => $this->getFile(), + 'line' => $this->getLine(), + 'hash' => $this->getHash(), + 'uri' => $this->getURI(), + 'length' => $this->getLength(), + 'context' => $this->getContext(), + 'language' => $this->getLanguage(), + 'docblockRaw' => $this->getDocblockRaw(), + 'warnings' => $this->getWarnings(), + 'parentHash' => $this->getParentHash(), + 'childHashes' => $this->getChildHashes(), + 'extends' => $this->getExtendsDictionaries(), + 'links' => $this->getLinkDictionaries(), + 'ref' => $this->getRef()->toDictionary(), + ); + } + + public function getRef() { + return id(new DivinerAtomRef()) + ->setProject($this->getProject()) + ->setContext($this->getContext()) + ->setType($this->getType()) + ->setName($this->getName()); + } + + public static function newFromDictionary(array $dictionary) { + $atom = id(new DivinerAtom()) + ->setType(idx($dictionary, 'type')) + ->setName(idx($dictionary, 'name')) + ->setFile(idx($dictionary, 'file')) + ->setLine(idx($dictionary, 'line')) + ->setHash(idx($dictionary, 'hash')) + ->setLength(idx($dictionary, 'length')) + ->setContext(idx($dictionary, 'context')) + ->setLanguage(idx($dictionary, 'language')) + ->setParentHash(idx($dictionary, 'parentHash')) + ->setDocblockRaw(idx($dictionary, 'docblockRaw')); + + foreach (idx($dictionary, 'warnings', array()) as $warning) { + $atom->addWarning($warning); + } + + foreach (idx($dictionary, 'childHashes', array()) as $child) { + $atom->addChildHash($child); + } + + return $atom; + } + +} diff --git a/src/applications/diviner/atom/DivinerAtomRef.php b/src/applications/diviner/atom/DivinerAtomRef.php new file mode 100644 index 0000000000..0ee5188c8c --- /dev/null +++ b/src/applications/diviner/atom/DivinerAtomRef.php @@ -0,0 +1,69 @@ +name = $name; + return $this; + } + + public function getName() { + return $this->name; + } + + public function setType($type) { + $this->type = $type; + return $this; + } + + public function getType() { + return $this->type; + } + + public function setContext($context) { + $this->context = $context; + return $this; + } + + public function getContext() { + return $this->context; + } + + public function setProject($project) { + $this->project = $project; + return $this; + } + + public function getProject() { + return $this->project; + } + + public function toDictionary() { + return array( + 'project' => $this->getProject(), + 'context' => $this->getContext(), + 'type' => $this->getType(), + 'name' => $this->getName(), + ); + } + + public function toHash() { + $dict = $this->toDictionary(); + ksort($dict); + return md5(serialize($dict)).'S'; + } + + public static function newFromDictionary(array $dict) { + $obj = new DivinerAtomRef(); + $obj->project = idx($dict, 'project'); + $obj->context = idx($dict, 'context'); + $obj->type = idx($dict, 'type'); + $obj->name = idx($dict, 'name'); + return $obj; + } +} diff --git a/src/applications/diviner/atomizer/DivinerArticleAtomizer.php b/src/applications/diviner/atomizer/DivinerArticleAtomizer.php new file mode 100644 index 0000000000..3718319219 --- /dev/null +++ b/src/applications/diviner/atomizer/DivinerArticleAtomizer.php @@ -0,0 +1,25 @@ +newAtom(DivinerAtom::TYPE_ARTICLE) + ->setLine(1) + ->setLength(count(explode("\n", $file_data))) + ->setLanguage('human'); + + $block = "/**\n".str_replace("\n", "\n * ", $file_data)."\n */"; + $atom->setDocblockRaw($block); + + $meta = $atom->getDocblockMeta(); + $title = idx($meta, 'title'); + if (!strlen($title)) { + $title = 'Untitled Article "'.basename($file_name).'"'; + $atom->addWarning("Article has no @title!"); + } + $atom->setName($title); + + return array($atom); + } + +} diff --git a/src/applications/diviner/atomizer/DivinerAtomizer.php b/src/applications/diviner/atomizer/DivinerAtomizer.php new file mode 100644 index 0000000000..ba53c8cf4d --- /dev/null +++ b/src/applications/diviner/atomizer/DivinerAtomizer.php @@ -0,0 +1,45 @@ +project = $project; + return $this; + } + + final public function getProject() { + return $this->project; + } + + protected function newAtom($type) { + return id(new DivinerAtom()) + ->setProject($this->getProject()) + ->setType($type); + } + + protected function newRef($type, $name, $project = null, $context = null) { + $project = coalesce($project, $this->getProject()); + + return id(new DivinerAtomRef()) + ->setProject($project) + ->setContext($context) + ->setType($type) + ->setName($name); + } + +} diff --git a/src/applications/diviner/atomizer/DivinerFileAtomizer.php b/src/applications/diviner/atomizer/DivinerFileAtomizer.php new file mode 100644 index 0000000000..3ea1ded7d6 --- /dev/null +++ b/src/applications/diviner/atomizer/DivinerFileAtomizer.php @@ -0,0 +1,14 @@ +newAtom(DivinerAtom::TYPE_FILE) + ->setName($file_name) + ->setFile($file_name) + ->setContentRaw($file_data); + + return array($atom); + } + +} diff --git a/src/applications/diviner/cache/DivinerAtomCache.php b/src/applications/diviner/cache/DivinerAtomCache.php new file mode 100644 index 0000000000..382c977e56 --- /dev/null +++ b/src/applications/diviner/cache/DivinerAtomCache.php @@ -0,0 +1,261 @@ +setCacheDirectory($cache_directory); + $profiled_cache = id(new PhutilKeyValueCacheProfiler($dir_cache)) + ->setProfiler(PhutilServiceProfiler::getInstance()) + ->setName('diviner-atom-cache'); + $this->cache = $profiled_cache; + } + + private function getCache() { + return $this->cache; + } + + public function delete() { + $this->getCache()->destroyCache(); + $this->fileHashMap = null; + $this->atomMap = null; + $this->atoms = array(); + + return $this; + } + + /** + * Convert a long-form hash key like `ccbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaN` into + * a shortened directory form, like `cc/bb/aaaaaaaaN`. In conjunction with + * @{class:PhutilKeyValueCacheDirectory}, this gives us nice directories + * inside .divinercache instead of a million hash files with huge names at + * top level. + */ + private function getHashKey($hash) { + return implode( + '/', + array( + substr($hash, 0, 2), + substr($hash, 2, 2), + substr($hash, 4, 8), + )); + } + + +/* -( File Hash Map )------------------------------------------------------ */ + + + public function getFileHashMap() { + if ($this->fileHashMap === null) { + $this->fileHashMap = $this->getCache()->getKey('file', array()); + } + return $this->fileHashMap; + } + + public function addFileHash($file_hash, $atom_hash) { + $this->getFileHashMap(); + $this->fileHashMap[$file_hash] = $atom_hash; + return $this; + } + + public function fileHashExists($file_hash) { + $map = $this->getFileHashMap(); + return isset($map[$file_hash]); + } + + public function deleteFileHash($file_hash) { + if ($this->fileHashExists($file_hash)) { + $map = $this->getFileHashMap(); + $atom_hash = $map[$file_hash]; + unset($this->fileHashMap[$file_hash]); + + $this->deleteAtomHash($atom_hash); + } + + return $this; + } + + +/* -( Atom Map )----------------------------------------------------------- */ + + + public function getAtomMap() { + if ($this->atomMap === null) { + $this->atomMap = $this->getCache()->getKey('atom', array()); + } + return $this->atomMap; + } + + public function getAtom($atom_hash) { + if (!array_key_exists($atom_hash, $this->atoms)) { + $key = 'atom/'.$this->getHashKey($atom_hash); + $this->atoms[$atom_hash] = $this->getCache()->getKey($key); + } + return $this->atoms[$atom_hash]; + } + + public function addAtom(array $atom) { + $hash = $atom['hash']; + $this->atoms[$hash] = $atom; + + $this->getAtomMap(); + $this->atomMap[$hash] = true; + + $this->writeAtoms['atom/'.$this->getHashKey($hash)] = $atom; + + return $this; + } + + public function deleteAtomHash($atom_hash) { + $atom = $this->getAtom($atom_hash); + if ($atom) { + foreach ($atom['childHashes'] as $child_hash) { + $this->deleteAtomHash($child_hash); + } + } + + $this->getAtomMap(); + unset($this->atomMap[$atom_hash]); + unset($this->writeAtoms[$atom_hash]); + + $this->getCache()->deleteKey('atom/'.$this->getHashKey($atom_hash)); + + return $this; + } + + public function saveAtoms() { + $this->getCache()->setKeys( + array( + 'file' => $this->getFileHashMap(), + 'atom' => $this->getAtomMap(), + ) + $this->writeAtoms); + $this->writeAtoms = array(); + return $this; + } + + +/* -( Symbol Hash Map )---------------------------------------------------- */ + + + public function getSymbolMap() { + if ($this->symbolMap === null) { + $this->symbolMap = $this->getCache()->getKey('symbol', array()); + } + return $this->symbolMap; + } + + public function addSymbol($atom_hash, $symbol_hash) { + $this->getSymbolMap(); + $this->symbolMap[$atom_hash] = $symbol_hash; + return $this; + } + + public function deleteSymbol($atom_hash) { + $this->getSymbolMap(); + unset($this->symbolMap[$atom_hash]); + + return $this; + } + + public function saveSymbols() { + $this->getCache()->setKeys( + array( + 'symbol' => $this->getSymbolMap(), + )); + return $this; + } + +/* -( Edge Map )----------------------------------------------------------- */ + + + public function getEdgeMap() { + if ($this->edgeDstMap === null) { + $this->edgeDstMap = $this->getCache()->getKey('edge', array()); + $this->edgeSrcMap = array(); + foreach ($this->edgeDstMap as $dst => $srcs) { + foreach ($srcs as $src => $ignored) { + $this->edgeSrcMap[$src][$dst] = true; + } + } + } + return $this->edgeDstMap; + } + + public function getEdgesWithDestination($symbol_hash) { + $this->getEdgeMap(); + return array_keys(idx($this->edgeDstMap, $symbol_hash, array())); + } + + public function addEdges($node_hash, array $symbol_hash_list) { + $this->getEdgeMap(); + $this->edgeSrcMap[$node_hash] = array_fill_keys($symbol_hash_list, true); + foreach ($symbol_hash_list as $symbol_hash) { + $this->edgeDstMap[$symbol_hash][$node_hash] = true; + } + return $this; + } + + public function deleteEdges($node_hash) { + $this->getEdgeMap(); + foreach (idx($this->edgeSrcMap, $node_hash, array()) as $dst => $ignored) { + unset($this->edgeDstMap[$dst][$node_hash]); + if (empty($this->edgeDstMap[$dst])) { + unset($this->edgeDstMap[$dst]); + } + } + unset($this->edgeSrcMap[$node_hash]); + return $this; + } + + public function saveEdges() { + $this->getCache()->setKeys( + array( + 'edge' => $this->getEdgeMap(), + )); + return $this; + } + + +/* -( Graph Map )---------------------------------------------------------- */ + + + public function getGraphMap() { + if ($this->graphMap === null) { + $this->graphMap = $this->getCache()->getKey('graph', array()); + } + return $this->graphMap; + } + + public function deleteGraph($node_hash) { + $this->getGraphMap(); + unset($this->graphMap[$node_hash]); + return $this; + } + + public function addGraph($node_hash, $graph_hash) { + $this->getGraphMap(); + $this->graphMap[$node_hash] = $graph_hash; + return $this; + } + + public function saveGraph() { + $this->getCache()->setKeys( + array( + 'graph' => $this->getGraphMap(), + )); + return $this; + } + +} diff --git a/src/applications/diviner/workflow/DivinerAtomizeWorkflow.php b/src/applications/diviner/workflow/DivinerAtomizeWorkflow.php new file mode 100644 index 0000000000..74130e7a73 --- /dev/null +++ b/src/applications/diviner/workflow/DivinerAtomizeWorkflow.php @@ -0,0 +1,103 @@ +setName('atomize') + ->setSynopsis(pht('Build atoms from source.')) + ->setArguments( + array( + array( + 'name' => 'atomizer', + 'param' => 'class', + 'help' => 'Specify a subclass of DivinerAtomizer.', + ), + array( + 'name' => 'files', + 'wildcard' => true, + ), + array( + 'name' => 'ugly', + 'help' => 'Produce ugly (but faster) output.', + ), + )); + } + + public function execute(PhutilArgumentParser $args) { + $console = PhutilConsole::getConsole(); + + $atomizer_class = $args->getArg('atomizer'); + if (!$atomizer_class) { + throw new Exception("Specify an atomizer class with --atomizer."); + } + + $symbols = id(new PhutilSymbolLoader()) + ->setName($atomizer_class) + ->setConcreteOnly(true) + ->setAncestorClass('DivinerAtomizer') + ->selectAndLoadSymbols(); + if (!$symbols) { + throw new Exception( + "Atomizer class '{$atomizer_class}' must be a concrete subclass of ". + "DivinerAtomizer."); + } + + $atomizer = newv($atomizer_class, array()); + + $files = $args->getArg('files'); + if (!$files) { + throw new Exception("Specify one or more files to atomize."); + } + + $file_atomizer = new DivinerFileAtomizer(); + + $all_atoms = array(); + foreach ($files as $file) { + $data = Filesystem::readFile($file); + + if (!$this->shouldAtomizeFile($file, $data)) { + $console->writeLog("Skipping %s...\n", $file); + continue; + } else { + $console->writeLog("Atomizing %s...\n", $file); + } + + $file_atoms = $file_atomizer->atomize($file, $data); + $all_atoms[] = $file_atoms; + + if (count($file_atoms) !== 1) { + throw new Exception("Expected exactly one atom from file atomizer."); + } + $file_atom = head($file_atoms); + + $atoms = $atomizer->atomize($file, $data); + + foreach ($atoms as $atom) { + $file_atom->addChild($atom); + } + + $all_atoms[] = $atoms; + } + + $all_atoms = array_mergev($all_atoms); + $all_atoms = mpull($all_atoms, 'toDictionary'); + $all_atoms = ipull($all_atoms, null, 'hash'); + + if ($args->getArg('ugly')) { + $json = json_encode($all_atoms); + } else { + $json_encoder = new PhutilJSON(); + $json = $json_encoder->encodeFormatted($all_atoms); + } + + $console->writeOut('%s', $json); + + return 0; + } + + private function shouldAtomizeFile($file_name, $file_data) { + return (strpos($file_data, '@'.'undivinable') === false); + } + +} diff --git a/src/applications/diviner/workflow/DivinerGenerateWorkflow.php b/src/applications/diviner/workflow/DivinerGenerateWorkflow.php new file mode 100644 index 0000000000..d2b88321c5 --- /dev/null +++ b/src/applications/diviner/workflow/DivinerGenerateWorkflow.php @@ -0,0 +1,384 @@ +setName('generate') + ->setSynopsis(pht('Generate documentation.')) + ->setArguments( + array( + array( + 'name' => 'clean', + 'help' => 'Clear the caches before generating documentation.', + ), + )); + } + + public function execute(PhutilArgumentParser $args) { + if ($args->getArg('clean')) { + $this->log(pht('CLEARING CACHES')); + $this->getAtomCache()->delete(); + } + + // The major challenge of documentation generation is one of dependency + // management. When regenerating documentation, we want to do the smallest + // amount of work we can, so that regenerating documentation after minor + // changes is quick. + // + // ATOM CACHE + // + // In the first stage, we find all the direct changes to source code since + // the last run. This stage relies on two data structures: + // + // - File Hash Map: map + // - Atom Map: map + // + // First, we hash all the source files in the project to detect any which + // have changed since the previous run (i.e., their hash is not present in + // the File Hash Map). If a file's content hash appears in the map, it has + // not changed, so we don't need to reparse it. + // + // We break the contents of each file into "atoms", which represent a unit + // of source code (like a function, method, class or file). Each atom has a + // "node hash" based on the content of the atom: if a function definition + // changes, the node hash of the atom changes too. The primary output of + // the atom cache is a list of node hashes which exist in the project. This + // is the Atom Map. The node hash depends only on the definition of the atom + // and the atomizer implementation. It ends with an "N", for "node". + // + // (We need the Atom Map in addition to the File Hash Map because each file + // may have several atoms in it (e.g., multiple functions, or a class and + // its methods). The File Hash Map contains an exhaustive list of all atoms + // with type "file", but not child atoms of those top-level atoms.) + // + // GRAPH CACHE + // + // We now know which atoms exist, and can compare the Atom Map to some + // existing cache to figure out what has changed. However, this isn't + // sufficient to figure out which documentation actually needs to be + // regnerated, because atoms depend on other atoms. For example, if "B + // extends A" and the definition for A changes, we need to regenerate the + // documentation in B. Similarly, if X links to Y and Y changes, we should + // regenerate X. (In both these cases, the documentation for the connected + // atom may not acutally change, but in some cases it will, and the extra + // work we need to do is generally very small compared to the size of the + // project.) + // + // To figure out which other nodes have changed, we compute a "graph hash" + // for each node. This hash combines the "node hash" with the node hashes + // of connected nodes. Our primary output is a list of graph hashes, which + // a documentation generator can use to easily determine what work needs + // to be done by comparing the list with a list of cached graph hashes, + // then generating documentation for new hashes and deleting documentation + // for missing hashes. The graph hash ends with a "G", for "graph". + // + // In this stage, we rely on three data structures: + // + // - Symbol Map: map + // - Edge Map: map> + // - Graph Map: map + // + // Calculating the graph hash requires several steps, because we need to + // figure out which nodes an atom is attached to. The atom contains symbolic + // references to other nodes by name (e.g., "extends SomeClass") in the form + // of DivinerAtomRefs. We can also build a symbolic reference for any atom + // from the atom itself. Each DivinerAtomRef generates a symbol hash, + // which ends with an "S", for "symbol". + // + // First, we update the symbol map. We remove (and mark dirty) any symbols + // associated with node hashes which no longer exist (e.g., old/dead nodes). + // Second, we add (and mark dirty) any symbols associated with new nodes. + // We also add edges defined by new nodes to the graph. + // + // We initialize a list of dirty nodes to the list of new nodes, then + // find all nodes connected to dirty symbols and add them to the dirty + // node list. This list now contains every node with a new or changed + // graph hash. + // + // We walk the dirty list and compute the new graph hashes, adding them + // to the graph hash map. This Graph Map can then be passed to an actual + // documentation generator, which can compare the graph hashes to a list + // of already-generated graph hashes and easily assess which documents need + // to be regenerated and which can be deleted. + + $this->buildAtomCache(); + $this->buildGraphCache(); + } + +/* -( Atom Cache )--------------------------------------------------------- */ + + private function buildAtomCache() { + $this->log(pht('BUILDING ATOM CACHE')); + + $file_hashes = $this->findFilesInProject(); + + $this->log(pht('Found %d file(s) in project.', count($file_hashes))); + + $this->deleteDeadAtoms($file_hashes); + + $atomize = $this->getFilesToAtomize($file_hashes); + + $this->log(pht('Found %d unatomized, uncached file(s).', count($atomize))); + + $file_atomizers = $this->getAtomizersForFiles($atomize); + + $this->log(pht('Found %d file(s) to atomize.', count($file_atomizers))); + + $futures = $this->buildAtomizerFutures($file_atomizers); + if ($futures) { + $this->resolveAtomizerFutures($futures, $file_hashes); + $this->log(pht("Atomization complete.")); + } else { + $this->log(pht("Atom cache is up to date, no files to atomize.")); + } + + $this->log(pht("Writing atom cache.")); + + $this->getAtomCache()->saveAtoms(); + + $this->log(pht("Done.")); + } + + private function getAtomizersForFiles(array $files) { + $rules = $this->getRules(); + + $atomizers = array(); + + foreach ($files as $file) { + foreach ($rules as $rule => $atomizer) { + $ok = preg_match($rule, $file); + if ($ok === false) { + throw new Exception( + "Rule '{$rule}' is not a valid regular expression."); + } + if ($ok) { + $atomizers[$file] = $atomizer; + continue; + } + } + } + + return $atomizers; + } + + private function getRules() { + return $this->getConfig('rules', array()) + array( + '/\\.diviner$/' => 'DivinerArticleAtomizer', + ); + } + + + private function findFilesInProject() { + $file_hashes = id(new FileFinder($this->getRoot())) + ->excludePath('*/.*') + ->withType('f') + ->setGenerateChecksums(true) + ->find(); + + $version = $this->getDivinerAtomWorldVersion(); + + foreach ($file_hashes as $file => $md5_hash) { + // We want the hash to change if the file moves or Diviner gets updated, + // not just if the file content changes. Derive a hash from everything + // we care about. + $file_hashes[$file] = md5("{$file}\0{$md5_hash}\0{$version}").'F'; + } + + return $file_hashes; + } + + private function deleteDeadAtoms(array $file_hashes) { + $atom_cache = $this->getAtomCache(); + + $hash_to_file = array_flip($file_hashes); + foreach ($atom_cache->getFileHashMap() as $hash => $atom) { + if (empty($hash_to_file[$hash])) { + $atom_cache->deleteFileHash($hash); + } + } + } + + private function getFilesToAtomize(array $file_hashes) { + $atom_cache = $this->getAtomCache(); + + $atomize = array(); + foreach ($file_hashes as $file => $hash) { + if (!$atom_cache->fileHashExists($hash)) { + $atomize[] = $file; + } + } + + return $atomize; + } + + private function buildAtomizerFutures(array $file_atomizers) { + $atomizers = array(); + foreach ($file_atomizers as $file => $atomizer) { + $atomizers[$atomizer][] = $file; + } + + $futures = array(); + foreach ($atomizers as $class => $files) { + foreach (array_chunk($files, 32) as $chunk) { + $future = new ExecFuture( + '%s atomize --atomizer %s -- %Ls', + dirname(phutil_get_library_root('phabricator')).'/bin/diviner', + $class, + $chunk); + $future->setCWD($this->getRoot()); + + $futures[] = $future; + } + } + + return $futures; + } + + private function resolveAtomizerFutures(array $futures, array $file_hashes) { + assert_instances_of($futures, 'Future'); + + $atom_cache = $this->getAtomCache(); + foreach (Futures($futures)->limit(4) as $key => $future) { + $atoms = $future->resolveJSON(); + + foreach ($atoms as $atom) { + if ($atom['type'] == DivinerAtom::TYPE_FILE) { + $file_hash = $file_hashes[$atom['file']]; + $atom_cache->addFileHash($file_hash, $atom['hash']); + } + $atom_cache->addAtom($atom); + } + } + } + + + /** + * Get a global version number, which changes whenever any atom or atomizer + * implementation changes in a way which is not backward-compatible. + */ + private function getDivinerAtomWorldVersion() { + $version = array(); + $version['atom'] = DivinerAtom::getAtomSerializationVersion(); + $version['rules'] = $this->getRules(); + + $atomizers = id(new PhutilSymbolLoader()) + ->setAncestorClass('DivinerAtomizer') + ->setConcreteOnly(true) + ->selectAndLoadSymbols(); + + $atomizer_versions = array(); + foreach ($atomizers as $atomizer) { + $atomizer_versions[$atomizer['name']] = call_user_func( + array( + $atomizer['name'], + 'getAtomizerVersion', + )); + } + + ksort($atomizer_versions); + $version['atomizers'] = $atomizer_versions; + + return md5(serialize($version)); + } + + +/* -( Graph Cache )-------------------------------------------------------- */ + + + private function buildGraphCache() { + $this->log(pht('BUILDING GRAPH CACHE')); + + $atom_cache = $this->getAtomCache(); + $symbol_map = $atom_cache->getSymbolMap(); + $atoms = $atom_cache->getAtomMap(); + + $dirty_symbols = array(); + $dirty_nhashes = array(); + + $del_atoms = array_diff_key($symbol_map, $atoms); + $this->log(pht('Found %d obsolete atom(s) in graph.', count($del_atoms))); + foreach ($del_atoms as $nhash => $shash) { + $atom_cache->deleteSymbol($nhash); + $dirty_symbols[$shash] = true; + + $atom_cache->deleteEdges($nhash); + $atom_cache->deleteGraph($nhash); + } + + $new_atoms = array_diff_key($atoms, $symbol_map); + $this->log(pht('Found %d new atom(s) in graph.', count($new_atoms))); + foreach ($new_atoms as $nhash => $ignored) { + $shash = $this->computeSymbolHash($nhash); + $atom_cache->addSymbol($nhash, $shash); + $dirty_symbols[$shash] = true; + + $atom_cache->addEdges( + $nhash, + $this->getEdges($nhash)); + + $dirty_nhashes[$nhash] = true; + } + + $this->log(pht('Propagating changes through the graph.')); + + foreach ($dirty_symbols as $symbol => $ignored) { + foreach ($atom_cache->getEdgesWithDestination($symbol) as $edge) { + $dirty_nhashes[$edge] = true; + } + } + + $this->log(pht('Found %d affected atoms.', count($dirty_nhashes))); + + foreach ($dirty_nhashes as $nhash => $ignored) { + $atom_cache->addGraph($nhash, $this->computeGraphHash($nhash)); + } + + $this->log(pht('Writing graph cache.')); + + $atom_cache->saveGraph(); + $atom_cache->saveEdges(); + $atom_cache->saveSymbols(); + + $this->log(pht('Done.')); + } + + private function computeSymbolHash($node_hash) { + $atom_cache = $this->getAtomCache(); + $atom = $atom_cache->getAtom($node_hash); + + $ref = DivinerAtomRef::newFromDictionary($atom['ref']); + return $ref->toHash(); + } + + private function getEdges($node_hash) { + $atom_cache = $this->getAtomCache(); + $atom = $atom_cache->getAtom($node_hash); + + $refs = array(); + foreach (array_merge($atom['extends'], $atom['links']) as $ref_dict) { + $ref = DivinerAtomRef::newFromDictionary($ref_dict); + if ($ref->getProject() == $atom['project']) { + $refs[$ref->toHash()] = true; + } + } + + return array_keys($refs); + } + + private function computeGraphHash($node_hash) { + $atom_cache = $this->getAtomCache(); + $atom = $atom_cache->getAtom($node_hash); + + $edges = $this->getEdges($node_hash); + sort($edges); + + $inputs = array( + 'atomHash' => $atom['hash'], + 'edges' => $edges, + ); + + return md5(serialize($inputs)).'G'; + } + +} diff --git a/src/applications/diviner/workflow/DivinerWorkflow.php b/src/applications/diviner/workflow/DivinerWorkflow.php new file mode 100644 index 0000000000..81d9156957 --- /dev/null +++ b/src/applications/diviner/workflow/DivinerWorkflow.php @@ -0,0 +1,33 @@ +atomCache) { + $cache_directory = $this->getRoot().'/.divinercache'; + $this->atomCache = new DivinerAtomCache($cache_directory); + } + return $this->atomCache; + } + + protected function log($message) { + $console = PhutilConsole::getConsole(); + $console->getServer()->setEnableLog(true); + $console->writeLog($message."\n"); + } + +}