1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-12-18 19:40:55 +01:00

Modified script to commit smaller batches of symbols to the database.

Summary:
Modified the import script so it will only try to load a configurable
number of symbols at a time to avoid exhausting memory for large project
imports.

I haven't written a line of PHP in more than a decade, so please forgive
any stylistic or technical errors.

Test Plan: Ran the script on symbol table generated from linux kernel.

Reviewers: epriestley, #blessed_reviewers

Reviewed By: epriestley

CC: Korvin, epriestley, aran

Maniphest Tasks: T4117

Differential Revision: https://secure.phabricator.com/D7596
This commit is contained in:
William R. Otte 2013-11-17 16:33:33 -08:00 committed by epriestley
parent 965c2e6732
commit 7d43e59110

View file

@ -1,6 +1,7 @@
#!/usr/bin/env php
<?php
$root = dirname(dirname(dirname(__FILE__)));
require_once $root.'/scripts/__init_script__.php';
@ -24,6 +25,13 @@ $args->parse(
'help' => 'If a line can\'t be parsed, ignore that line and '.
'continue instead of exiting.',
),
array(
'name' => 'max-transaction',
'param' => 'num-syms',
'default' => '100000',
'help' => 'Maximum number of symbols that should '.
'be part of a single transaction',
),
array(
'name' => 'more',
'wildcard' => true,
@ -53,6 +61,52 @@ $input = file_get_contents('php://stdin');
$input = trim($input);
$input = explode("\n", $input);
function commit_symbols ($syms, $project, $no_purge) {
echo "Looking up path IDs...\n";
$path_map =
PhabricatorRepositoryCommitChangeParserWorker::lookupOrCreatePaths(
ipull($syms, 'path'));
$symbol = new PhabricatorRepositorySymbol();
$conn_w = $symbol->establishConnection('w');
echo "Preparing queries...\n";
$sql = array();
foreach ($syms as $dict) {
$sql[] = qsprintf(
$conn_w,
'(%d, %s, %s, %s, %s, %d, %d)',
$project->getID(),
$dict['ctxt'],
$dict['name'],
$dict['type'],
$dict['lang'],
$dict['line'],
$path_map[$dict['path']]);
}
if (!$no_purge) {
echo "Purging old syms...\n";
queryfx($conn_w,
'DELETE FROM %T WHERE arcanistProjectID = %d',
$symbol->getTableName(),
$project->getID());
}
echo "Loading ".number_format(count($sql))." syms...\n";
foreach (array_chunk($sql, 128) as $chunk) {
queryfx($conn_w,
'INSERT INTO %T
(arcanistProjectID, symbolContext, symbolName, symbolType,
symbolLanguage, lineNumber, pathID) VALUES %Q',
$symbol->getTableName(),
implode(', ', $chunk));
}
}
$no_purge = $args->getArg('no-purge');
$symbols = array();
foreach ($input as $key => $line) {
try {
@ -129,48 +183,26 @@ foreach ($input as $key => $line) {
throw $e;
}
}
if (count ($symbols) >= $args->getArg('max-transaction')) {
try {
echo "Committing {$args->getArg('max-transaction')} symbols....\n";
commit_symbols($symbols, $project, $no_purge);
$no_purge = true;
unset($symbols);
$symbols = array();
} catch (Exception $e) {
if ($args->getArg('ignore-errors')) {
continue;
} else {
throw $e;
}
}
}
}
echo "Looking up path IDs...\n";
$path_map = PhabricatorRepositoryCommitChangeParserWorker::lookupOrCreatePaths(
ipull($symbols, 'path'));
$symbol = new PhabricatorRepositorySymbol();
$conn_w = $symbol->establishConnection('w');
echo "Preparing queries...\n";
$sql = array();
foreach ($symbols as $dict) {
$sql[] = qsprintf(
$conn_w,
'(%d, %s, %s, %s, %s, %d, %d)',
$project->getID(),
$dict['ctxt'],
$dict['name'],
$dict['type'],
$dict['lang'],
$dict['line'],
$path_map[$dict['path']]);
}
if (!$args->getArg('no-purge')) {
echo "Purging old symbols...\n";
queryfx(
$conn_w,
'DELETE FROM %T WHERE arcanistProjectID = %d',
$symbol->getTableName(),
$project->getID());
}
echo "Loading ".number_format(count($sql))." symbols...\n";
foreach (array_chunk($sql, 128) as $chunk) {
queryfx(
$conn_w,
'INSERT INTO %T
(arcanistProjectID, symbolContext, symbolName, symbolType,
symbolLanguage, lineNumber, pathID) VALUES %Q',
$symbol->getTableName(),
implode(', ', $chunk));
if (count($symbols)) {
commit_symbols($symbols, $project, $args->getArg('no-purge'));
}
echo "Done.\n";