From 7d43e59110665c1480e2d519dd5759b59fbdae28 Mon Sep 17 00:00:00 2001 From: "William R. Otte" Date: Sun, 17 Nov 2013 16:33:33 -0800 Subject: [PATCH] Modified script to commit smaller batches of symbols to the database. Summary: Modified the import script so it will only try to load a configurable number of symbols at a time to avoid exhausting memory for large project imports. I haven't written a line of PHP in more than a decade, so please forgive any stylistic or technical errors. Test Plan: Ran the script on symbol table generated from linux kernel. Reviewers: epriestley, #blessed_reviewers Reviewed By: epriestley CC: Korvin, epriestley, aran Maniphest Tasks: T4117 Differential Revision: https://secure.phabricator.com/D7596 --- scripts/symbols/import_project_symbols.php | 112 +++++++++++++-------- 1 file changed, 72 insertions(+), 40 deletions(-) diff --git a/scripts/symbols/import_project_symbols.php b/scripts/symbols/import_project_symbols.php index 6271dc7f75..033b509754 100755 --- a/scripts/symbols/import_project_symbols.php +++ b/scripts/symbols/import_project_symbols.php @@ -1,6 +1,7 @@ #!/usr/bin/env php parse( 'help' => 'If a line can\'t be parsed, ignore that line and '. 'continue instead of exiting.', ), + array( + 'name' => 'max-transaction', + 'param' => 'num-syms', + 'default' => '100000', + 'help' => 'Maximum number of symbols that should '. + 'be part of a single transaction', + ), array( 'name' => 'more', 'wildcard' => true, @@ -53,6 +61,52 @@ $input = file_get_contents('php://stdin'); $input = trim($input); $input = explode("\n", $input); + +function commit_symbols ($syms, $project, $no_purge) { + echo "Looking up path IDs...\n"; + $path_map = + PhabricatorRepositoryCommitChangeParserWorker::lookupOrCreatePaths( + ipull($syms, 'path')); + + $symbol = new PhabricatorRepositorySymbol(); + $conn_w = $symbol->establishConnection('w'); + + echo "Preparing queries...\n"; + $sql = array(); + foreach ($syms as $dict) { + $sql[] = qsprintf( + $conn_w, + '(%d, %s, %s, %s, %s, %d, %d)', + $project->getID(), + $dict['ctxt'], + $dict['name'], + $dict['type'], + $dict['lang'], + $dict['line'], + $path_map[$dict['path']]); + } + + if (!$no_purge) { + echo "Purging old syms...\n"; + queryfx($conn_w, + 'DELETE FROM %T WHERE arcanistProjectID = %d', + $symbol->getTableName(), + $project->getID()); + } + + echo "Loading ".number_format(count($sql))." syms...\n"; + foreach (array_chunk($sql, 128) as $chunk) { + queryfx($conn_w, + 'INSERT INTO %T + (arcanistProjectID, symbolContext, symbolName, symbolType, + symbolLanguage, lineNumber, pathID) VALUES %Q', + $symbol->getTableName(), + implode(', ', $chunk)); + } + +} + +$no_purge = $args->getArg('no-purge'); $symbols = array(); foreach ($input as $key => $line) { try { @@ -129,48 +183,26 @@ foreach ($input as $key => $line) { throw $e; } } + + if (count ($symbols) >= $args->getArg('max-transaction')) { + try { + echo "Committing {$args->getArg('max-transaction')} symbols....\n"; + commit_symbols($symbols, $project, $no_purge); + $no_purge = true; + unset($symbols); + $symbols = array(); + } catch (Exception $e) { + if ($args->getArg('ignore-errors')) { + continue; + } else { + throw $e; + } + } + } } -echo "Looking up path IDs...\n"; -$path_map = PhabricatorRepositoryCommitChangeParserWorker::lookupOrCreatePaths( - ipull($symbols, 'path')); - -$symbol = new PhabricatorRepositorySymbol(); -$conn_w = $symbol->establishConnection('w'); - -echo "Preparing queries...\n"; -$sql = array(); -foreach ($symbols as $dict) { - $sql[] = qsprintf( - $conn_w, - '(%d, %s, %s, %s, %s, %d, %d)', - $project->getID(), - $dict['ctxt'], - $dict['name'], - $dict['type'], - $dict['lang'], - $dict['line'], - $path_map[$dict['path']]); -} - -if (!$args->getArg('no-purge')) { - echo "Purging old symbols...\n"; - queryfx( - $conn_w, - 'DELETE FROM %T WHERE arcanistProjectID = %d', - $symbol->getTableName(), - $project->getID()); -} - -echo "Loading ".number_format(count($sql))." symbols...\n"; -foreach (array_chunk($sql, 128) as $chunk) { - queryfx( - $conn_w, - 'INSERT INTO %T - (arcanistProjectID, symbolContext, symbolName, symbolType, - symbolLanguage, lineNumber, pathID) VALUES %Q', - $symbol->getTableName(), - implode(', ', $chunk)); +if (count($symbols)) { + commit_symbols($symbols, $project, $args->getArg('no-purge')); } echo "Done.\n";