mirror of
https://we.phorge.it/source/phorge.git
synced 2024-12-18 19:40:55 +01:00
Modified script to commit smaller batches of symbols to the database.
Summary: Modified the import script so it will only try to load a configurable number of symbols at a time to avoid exhausting memory for large project imports. I haven't written a line of PHP in more than a decade, so please forgive any stylistic or technical errors. Test Plan: Ran the script on symbol table generated from linux kernel. Reviewers: epriestley, #blessed_reviewers Reviewed By: epriestley CC: Korvin, epriestley, aran Maniphest Tasks: T4117 Differential Revision: https://secure.phabricator.com/D7596
This commit is contained in:
parent
965c2e6732
commit
7d43e59110
1 changed files with 72 additions and 40 deletions
|
@ -1,6 +1,7 @@
|
|||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
|
||||
$root = dirname(dirname(dirname(__FILE__)));
|
||||
require_once $root.'/scripts/__init_script__.php';
|
||||
|
||||
|
@ -24,6 +25,13 @@ $args->parse(
|
|||
'help' => 'If a line can\'t be parsed, ignore that line and '.
|
||||
'continue instead of exiting.',
|
||||
),
|
||||
array(
|
||||
'name' => 'max-transaction',
|
||||
'param' => 'num-syms',
|
||||
'default' => '100000',
|
||||
'help' => 'Maximum number of symbols that should '.
|
||||
'be part of a single transaction',
|
||||
),
|
||||
array(
|
||||
'name' => 'more',
|
||||
'wildcard' => true,
|
||||
|
@ -53,6 +61,52 @@ $input = file_get_contents('php://stdin');
|
|||
$input = trim($input);
|
||||
$input = explode("\n", $input);
|
||||
|
||||
|
||||
function commit_symbols ($syms, $project, $no_purge) {
|
||||
echo "Looking up path IDs...\n";
|
||||
$path_map =
|
||||
PhabricatorRepositoryCommitChangeParserWorker::lookupOrCreatePaths(
|
||||
ipull($syms, 'path'));
|
||||
|
||||
$symbol = new PhabricatorRepositorySymbol();
|
||||
$conn_w = $symbol->establishConnection('w');
|
||||
|
||||
echo "Preparing queries...\n";
|
||||
$sql = array();
|
||||
foreach ($syms as $dict) {
|
||||
$sql[] = qsprintf(
|
||||
$conn_w,
|
||||
'(%d, %s, %s, %s, %s, %d, %d)',
|
||||
$project->getID(),
|
||||
$dict['ctxt'],
|
||||
$dict['name'],
|
||||
$dict['type'],
|
||||
$dict['lang'],
|
||||
$dict['line'],
|
||||
$path_map[$dict['path']]);
|
||||
}
|
||||
|
||||
if (!$no_purge) {
|
||||
echo "Purging old syms...\n";
|
||||
queryfx($conn_w,
|
||||
'DELETE FROM %T WHERE arcanistProjectID = %d',
|
||||
$symbol->getTableName(),
|
||||
$project->getID());
|
||||
}
|
||||
|
||||
echo "Loading ".number_format(count($sql))." syms...\n";
|
||||
foreach (array_chunk($sql, 128) as $chunk) {
|
||||
queryfx($conn_w,
|
||||
'INSERT INTO %T
|
||||
(arcanistProjectID, symbolContext, symbolName, symbolType,
|
||||
symbolLanguage, lineNumber, pathID) VALUES %Q',
|
||||
$symbol->getTableName(),
|
||||
implode(', ', $chunk));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$no_purge = $args->getArg('no-purge');
|
||||
$symbols = array();
|
||||
foreach ($input as $key => $line) {
|
||||
try {
|
||||
|
@ -129,48 +183,26 @@ foreach ($input as $key => $line) {
|
|||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
if (count ($symbols) >= $args->getArg('max-transaction')) {
|
||||
try {
|
||||
echo "Committing {$args->getArg('max-transaction')} symbols....\n";
|
||||
commit_symbols($symbols, $project, $no_purge);
|
||||
$no_purge = true;
|
||||
unset($symbols);
|
||||
$symbols = array();
|
||||
} catch (Exception $e) {
|
||||
if ($args->getArg('ignore-errors')) {
|
||||
continue;
|
||||
} else {
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
echo "Looking up path IDs...\n";
|
||||
$path_map = PhabricatorRepositoryCommitChangeParserWorker::lookupOrCreatePaths(
|
||||
ipull($symbols, 'path'));
|
||||
|
||||
$symbol = new PhabricatorRepositorySymbol();
|
||||
$conn_w = $symbol->establishConnection('w');
|
||||
|
||||
echo "Preparing queries...\n";
|
||||
$sql = array();
|
||||
foreach ($symbols as $dict) {
|
||||
$sql[] = qsprintf(
|
||||
$conn_w,
|
||||
'(%d, %s, %s, %s, %s, %d, %d)',
|
||||
$project->getID(),
|
||||
$dict['ctxt'],
|
||||
$dict['name'],
|
||||
$dict['type'],
|
||||
$dict['lang'],
|
||||
$dict['line'],
|
||||
$path_map[$dict['path']]);
|
||||
}
|
||||
|
||||
if (!$args->getArg('no-purge')) {
|
||||
echo "Purging old symbols...\n";
|
||||
queryfx(
|
||||
$conn_w,
|
||||
'DELETE FROM %T WHERE arcanistProjectID = %d',
|
||||
$symbol->getTableName(),
|
||||
$project->getID());
|
||||
}
|
||||
|
||||
echo "Loading ".number_format(count($sql))." symbols...\n";
|
||||
foreach (array_chunk($sql, 128) as $chunk) {
|
||||
queryfx(
|
||||
$conn_w,
|
||||
'INSERT INTO %T
|
||||
(arcanistProjectID, symbolContext, symbolName, symbolType,
|
||||
symbolLanguage, lineNumber, pathID) VALUES %Q',
|
||||
$symbol->getTableName(),
|
||||
implode(', ', $chunk));
|
||||
if (count($symbols)) {
|
||||
commit_symbols($symbols, $project, $args->getArg('no-purge'));
|
||||
}
|
||||
|
||||
echo "Done.\n";
|
||||
|
|
Loading…
Reference in a new issue