1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-25 16:22:43 +01:00

Add a bin/bulk export CLI tool to make debugging and profiling large exports easier

Summary:
Ref T13049. When stuff executes asynchronously on the bulk workflow it can be hard to inspect directly, and/or a pain to test because you have to go through a bunch of steps to run it again.

Make future work here easier by making export triggerable from the CLI. This makes it easy to repeat, inspect with `--trace`, profile with `--xprofile`, etc.

Test Plan:
  - Ran several invalid commands, got sensible error messages.
  - Ran some valid commands, got exported data.
  - Used `--xprofile` to look at the profile for a 300MB dump of 100K tasks which took about 40 seconds to export. Nothing jumped out as sketchy to me -- CustomField wrangling is a little slow but most of the time looked like it was being spent legitimately.

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T13049

Differential Revision: https://secure.phabricator.com/D18965
This commit is contained in:
epriestley 2018-01-30 05:39:09 -08:00
parent 84df122085
commit b27fd05eef
2 changed files with 170 additions and 0 deletions

View file

@ -2226,6 +2226,7 @@ phutil_register_library_map(array(
'PhabricatorBulkContentSource' => 'infrastructure/daemon/contentsource/PhabricatorBulkContentSource.php',
'PhabricatorBulkEditGroup' => 'applications/transactions/bulk/PhabricatorBulkEditGroup.php',
'PhabricatorBulkEngine' => 'applications/transactions/bulk/PhabricatorBulkEngine.php',
'PhabricatorBulkManagementExportWorkflow' => 'applications/transactions/bulk/management/PhabricatorBulkManagementExportWorkflow.php',
'PhabricatorBulkManagementMakeSilentWorkflow' => 'applications/transactions/bulk/management/PhabricatorBulkManagementMakeSilentWorkflow.php',
'PhabricatorBulkManagementWorkflow' => 'applications/transactions/bulk/management/PhabricatorBulkManagementWorkflow.php',
'PhabricatorCSVExportFormat' => 'infrastructure/export/format/PhabricatorCSVExportFormat.php',
@ -7579,6 +7580,7 @@ phutil_register_library_map(array(
'PhabricatorBulkContentSource' => 'PhabricatorContentSource',
'PhabricatorBulkEditGroup' => 'Phobject',
'PhabricatorBulkEngine' => 'Phobject',
'PhabricatorBulkManagementExportWorkflow' => 'PhabricatorBulkManagementWorkflow',
'PhabricatorBulkManagementMakeSilentWorkflow' => 'PhabricatorBulkManagementWorkflow',
'PhabricatorBulkManagementWorkflow' => 'PhabricatorManagementWorkflow',
'PhabricatorCSVExportFormat' => 'PhabricatorExportFormat',

View file

@ -0,0 +1,168 @@
<?php
final class PhabricatorBulkManagementExportWorkflow
extends PhabricatorBulkManagementWorkflow {
protected function didConstruct() {
$this
->setName('export')
->setExamples('**export** [options]')
->setSynopsis(
pht('Export data to a flat file (JSON, CSV, Excel, etc).'))
->setArguments(
array(
array(
'name' => 'class',
'param' => 'class',
'help' => pht(
'SearchEngine class to export data from.'),
),
array(
'name' => 'format',
'param' => 'format',
'help' => pht('Export format.'),
),
array(
'name' => 'query',
'param' => 'key',
'help' => pht(
'Export the data selected by this query.'),
),
array(
'name' => 'output',
'param' => 'path',
'help' => pht(
'Write output to a file. If omitted, output will be sent to '.
'stdout.'),
),
array(
'name' => 'overwrite',
'help' => pht(
'If the output file already exists, overwrite it instead of '.
'raising an error.'),
),
));
}
public function execute(PhutilArgumentParser $args) {
$viewer = $this->getViewer();
$class = $args->getArg('class');
if (!strlen($class)) {
throw new PhutilArgumentUsageException(
pht(
'Specify a search engine class to export data from with '.
'"--class".'));
}
if (!is_subclass_of($class, 'PhabricatorApplicationSearchEngine')) {
throw new PhutilArgumentUsageException(
pht(
'SearchEngine class ("%s") is unknown.',
$class));
}
$engine = newv($class, array())
->setViewer($viewer);
if (!$engine->canExport()) {
throw new PhutilArgumentUsageException(
pht(
'SearchEngine class ("%s") does not support data export.',
$class));
}
$query_key = $args->getArg('query');
if (!strlen($query_key)) {
throw new PhutilArgumentUsageException(
pht(
'Specify a query to export with "--query".'));
}
if ($engine->isBuiltinQuery($query_key)) {
$saved_query = $engine->buildSavedQueryFromBuiltin($query_key);
} else if ($query_key) {
$saved_query = id(new PhabricatorSavedQueryQuery())
->setViewer($viewer)
->withQueryKeys(array($query_key))
->executeOne();
} else {
$saved_query = null;
}
if (!$saved_query) {
throw new PhutilArgumentUsageException(
pht(
'Failed to load saved query ("%s").',
$query_key));
}
$format_key = $args->getArg('format');
if (!strlen($format_key)) {
throw new PhutilArgumentUsageException(
pht(
'Specify an export format with "--format".'));
}
$all_formats = PhabricatorExportFormat::getAllExportFormats();
$format = idx($all_formats, $format_key);
if (!$format) {
throw new PhutilArgumentUsageException(
pht(
'Unknown export format ("%s"). Known formats are: %s.',
$format_key,
implode(', ', array_keys($all_formats))));
}
if (!$format->isExportFormatEnabled()) {
throw new PhutilArgumentUsageException(
pht(
'Export format ("%s") is not enabled.',
$format_key));
}
$is_overwrite = $args->getArg('overwrite');
$output_path = $args->getArg('output');
if (!strlen($output_path) && $is_overwrite) {
throw new PhutilArgumentUsageException(
pht(
'Flag "--overwrite" has no effect without "--output".'));
}
if (!$is_overwrite) {
if (Filesystem::pathExists($output_path)) {
throw new PhutilArgumentUsageException(
pht(
'Output path already exists. Use "--overwrite" to overwrite '.
'it.'));
}
}
$export_engine = id(new PhabricatorExportEngine())
->setViewer($viewer)
->setTitle(pht('Export'))
->setFilename(pht('export'))
->setSearchEngine($engine)
->setSavedQuery($saved_query)
->setExportFormat($format);
$file = $export_engine->exportFile();
$iterator = $file->getFileDataIterator();
if (strlen($output_path)) {
foreach ($iterator as $chunk) {
Filesystem::appendFile($output_path, $chunk);
}
} else {
foreach ($iterator as $chunk) {
echo $chunk;
}
}
return 0;
}
}