1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-26 00:32:42 +01:00

Separate changeset analysis code from DifferentialDiff and provide a standalone rebuild-changesets workflow

Summary:
Ref T13137. The "analyze/cache data about changesets" step is becoming more involved. We recently added detection for generated code to support "Ignore generated changes" in Owners, and I now plan to hash the new file content so we can hide changes which have no effect.

Before adding this new hashing step, pull the "detect copied code" and "detect generated code" stuff out and move them to a separate `ChangesetEngine`. Then support doing a changeset rebuild directly with `bin/differential rebuild-changesets`.

This simplifies things a bit and makes testing easier since you don't need to keep creating new revisions to re-run copy/generated/hash logic.

Test Plan: Ran `bin/differential rebuild-changesets --revision Dxxx`, saw changesets rebuild. See also next change.

Reviewers: amckinley

Reviewed By: amckinley

Maniphest Tasks: T13137

Differential Revision: https://secure.phabricator.com/D19452
This commit is contained in:
epriestley 2018-05-16 08:39:08 -07:00
parent 3544620209
commit 79fdf5c127
5 changed files with 323 additions and 216 deletions

View file

@ -432,6 +432,7 @@ phutil_register_library_map(array(
'DifferentialChangesSinceLastUpdateField' => 'applications/differential/customfield/DifferentialChangesSinceLastUpdateField.php',
'DifferentialChangeset' => 'applications/differential/storage/DifferentialChangeset.php',
'DifferentialChangesetDetailView' => 'applications/differential/view/DifferentialChangesetDetailView.php',
'DifferentialChangesetEngine' => 'applications/differential/engine/DifferentialChangesetEngine.php',
'DifferentialChangesetFileTreeSideNavBuilder' => 'applications/differential/view/DifferentialChangesetFileTreeSideNavBuilder.php',
'DifferentialChangesetHTMLRenderer' => 'applications/differential/render/DifferentialChangesetHTMLRenderer.php',
'DifferentialChangesetListController' => 'applications/differential/controller/DifferentialChangesetListController.php',
@ -2869,6 +2870,7 @@ phutil_register_library_map(array(
'PhabricatorDifferentialExtractWorkflow' => 'applications/differential/management/PhabricatorDifferentialExtractWorkflow.php',
'PhabricatorDifferentialManagementWorkflow' => 'applications/differential/management/PhabricatorDifferentialManagementWorkflow.php',
'PhabricatorDifferentialMigrateHunkWorkflow' => 'applications/differential/management/PhabricatorDifferentialMigrateHunkWorkflow.php',
'PhabricatorDifferentialRebuildChangesetsWorkflow' => 'applications/differential/management/PhabricatorDifferentialRebuildChangesetsWorkflow.php',
'PhabricatorDifferentialRevisionTestDataGenerator' => 'applications/differential/lipsum/PhabricatorDifferentialRevisionTestDataGenerator.php',
'PhabricatorDiffusionApplication' => 'applications/diffusion/application/PhabricatorDiffusionApplication.php',
'PhabricatorDiffusionBlameSetting' => 'applications/settings/setting/PhabricatorDiffusionBlameSetting.php',
@ -5729,6 +5731,7 @@ phutil_register_library_map(array(
'PhabricatorDestructibleInterface',
),
'DifferentialChangesetDetailView' => 'AphrontView',
'DifferentialChangesetEngine' => 'Phobject',
'DifferentialChangesetFileTreeSideNavBuilder' => 'Phobject',
'DifferentialChangesetHTMLRenderer' => 'DifferentialChangesetRenderer',
'DifferentialChangesetListController' => 'DifferentialController',
@ -8530,6 +8533,7 @@ phutil_register_library_map(array(
'PhabricatorDifferentialExtractWorkflow' => 'PhabricatorDifferentialManagementWorkflow',
'PhabricatorDifferentialManagementWorkflow' => 'PhabricatorManagementWorkflow',
'PhabricatorDifferentialMigrateHunkWorkflow' => 'PhabricatorDifferentialManagementWorkflow',
'PhabricatorDifferentialRebuildChangesetsWorkflow' => 'PhabricatorDifferentialManagementWorkflow',
'PhabricatorDifferentialRevisionTestDataGenerator' => 'PhabricatorTestDataGenerator',
'PhabricatorDiffusionApplication' => 'PhabricatorApplication',
'PhabricatorDiffusionBlameSetting' => 'PhabricatorInternalSetting',

View file

@ -0,0 +1,223 @@
<?php
final class DifferentialChangesetEngine extends Phobject {
public function rebuildChangesets(array $changesets) {
assert_instances_of($changesets, 'DifferentialChangeset');
foreach ($changesets as $changeset) {
$this->detectGeneratedCode($changeset);
}
$this->detectCopiedCode($changesets);
}
/* -( Generated Code )----------------------------------------------------- */
private function detectGeneratedCode(DifferentialChangeset $changeset) {
$is_generated_trusted = $this->isTrustedGeneratedCode($changeset);
if ($is_generated_trusted) {
$changeset->setTrustedChangesetAttribute(
DifferentialChangeset::ATTRIBUTE_GENERATED,
$is_generated_trusted);
}
$is_generated_untrusted = $this->isUntrustedGeneratedCode($changeset);
if ($is_generated_untrusted) {
$changeset->setUntrustedChangesetAttribute(
DifferentialChangeset::ATTRIBUTE_GENERATED,
$is_generated_untrusted);
}
}
private function isTrustedGeneratedCode(DifferentialChangeset $changeset) {
$filename = $changeset->getFilename();
$paths = PhabricatorEnv::getEnvConfig('differential.generated-paths');
foreach ($paths as $regexp) {
if (preg_match($regexp, $filename)) {
return true;
}
}
return false;
}
private function isUntrustedGeneratedCode(DifferentialChangeset $changeset) {
if ($changeset->getHunks()) {
$new_data = $changeset->makeNewFile();
if (strpos($new_data, '@'.'generated') !== false) {
return true;
}
}
return false;
}
/* -( Copied Code )-------------------------------------------------------- */
private function detectCopiedCode(array $changesets) {
$min_width = 30;
$min_lines = 3;
$map = array();
$files = array();
$types = array();
foreach ($changesets as $changeset) {
$file = $changeset->getFilename();
foreach ($changeset->getHunks() as $hunk) {
$lines = $hunk->getStructuredOldFile();
foreach ($lines as $line => $info) {
$type = $info['type'];
if ($type == '\\') {
continue;
}
$types[$file][$line] = $type;
$text = $info['text'];
$text = trim($text);
$files[$file][$line] = $text;
if (strlen($text) >= $min_width) {
$map[$text][] = array($file, $line);
}
}
}
}
foreach ($changesets as $changeset) {
$copies = array();
foreach ($changeset->getHunks() as $hunk) {
$added = $hunk->getStructuredNewFile();
$atype = array();
foreach ($added as $line => $info) {
$atype[$line] = $info['type'];
$added[$line] = trim($info['text']);
}
$skip_lines = 0;
foreach ($added as $line => $code) {
if ($skip_lines) {
// We're skipping lines that we already processed because we
// extended a block above them downward to include them.
$skip_lines--;
continue;
}
if ($atype[$line] !== '+') {
// This line hasn't been changed in the new file, so don't try
// to figure out where it came from.
continue;
}
if (empty($map[$code])) {
// This line was too short to trigger copy/move detection.
continue;
}
if (count($map[$code]) > 16) {
// If there are a large number of identical lines in this diff,
// don't try to figure out where this block came from: the analysis
// is O(N^2), since we need to compare every line against every
// other line. Even if we arrive at a result, it is unlikely to be
// meaningful. See T5041.
continue;
}
$best_length = 0;
// Explore all candidates.
foreach ($map[$code] as $val) {
list($file, $orig_line) = $val;
$length = 1;
// Search backward and forward to find all of the adjacent lines
// which match.
foreach (array(-1, 1) as $direction) {
$offset = $direction;
while (true) {
if (isset($copies[$line + $offset])) {
// If we run into a block above us which we've already
// attributed to a move or copy from elsewhere, stop
// looking.
break;
}
if (!isset($added[$line + $offset])) {
// If we've run off the beginning or end of the new file,
// stop looking.
break;
}
if (!isset($files[$file][$orig_line + $offset])) {
// If we've run off the beginning or end of the original
// file, we also stop looking.
break;
}
$old = $files[$file][$orig_line + $offset];
$new = $added[$line + $offset];
if ($old !== $new) {
// If the old line doesn't match the new line, stop
// looking.
break;
}
$length++;
$offset += $direction;
}
}
if ($length < $best_length) {
// If we already know of a better source (more matching lines)
// for this move/copy, stick with that one. We prefer long
// copies/moves which match a lot of context over short ones.
continue;
}
if ($length == $best_length) {
if (idx($types[$file], $orig_line) != '-') {
// If we already know of an equally good source (same number
// of matching lines) and this isn't a move, stick with the
// other one. We prefer moves over copies.
continue;
}
}
$best_length = $length;
// ($offset - 1) contains number of forward matching lines.
$best_offset = $offset - 1;
$best_file = $file;
$best_line = $orig_line;
}
$file = ($best_file == $changeset->getFilename() ? '' : $best_file);
for ($i = $best_length; $i--; ) {
$type = idx($types[$best_file], $best_line + $best_offset - $i);
$copies[$line + $best_offset - $i] = ($best_length < $min_lines
? array() // Ignore short blocks.
: array($file, $best_line + $best_offset - $i, $type));
}
$skip_lines = $best_offset;
}
}
$copies = array_filter($copies);
if ($copies) {
$metadata = $changeset->getMetadata();
$metadata['copy:lines'] = $copies;
$changeset->setMetadata($metadata);
}
}
}
}

View file

@ -0,0 +1,92 @@
<?php
final class PhabricatorDifferentialRebuildChangesetsWorkflow
extends PhabricatorDifferentialManagementWorkflow {
protected function didConstruct() {
$this
->setName('rebuild-changesets')
->setExamples('**rebuild-changesets** --revision __revision__')
->setSynopsis(pht('Rebuild changesets for a revision.'))
->setArguments(
array(
array(
'name' => 'revision',
'param' => 'revision',
'help' => pht('Revision to rebuild changesets for.'),
),
));
}
public function execute(PhutilArgumentParser $args) {
$viewer = $this->getViewer();
$revision_identifier = $args->getArg('revision');
if (!$revision_identifier) {
throw new PhutilArgumentUsageException(
pht('Specify a revision to rebuild changesets for with "--revision".'));
}
$revision = id(new PhabricatorObjectQuery())
->setViewer($viewer)
->withNames(array($revision_identifier))
->executeOne();
if ($revision) {
if (!($revision instanceof DifferentialRevision)) {
throw new PhutilArgumentUsageException(
pht(
'Object "%s" specified by "--revision" must be a Differential '.
'revision.'));
}
} else {
$revision = id(new DifferentialRevisionQuery())
->setViewer($viewer)
->withIDs(array($revision_identifier))
->executeOne();
}
if (!$revision) {
throw new PhutilArgumentUsageException(
pht(
'No revision "%s" exists.',
$revision_identifier));
}
$diffs = id(new DifferentialDiffQuery())
->setViewer($viewer)
->withRevisionIDs(array($revision->getID()))
->execute();
$changesets = id(new DifferentialChangesetQuery())
->setViewer($viewer)
->withDiffs($diffs)
->needHunks(true)
->execute();
$changeset_groups = mgroup($changesets, 'getDiffID');
foreach ($changeset_groups as $diff_id => $changesets) {
echo tsprintf(
"%s\n",
pht(
'Rebuilding %s changeset(s) for diff ID %d.',
phutil_count($changesets),
$diff_id));
foreach ($changesets as $changeset) {
echo tsprintf(
" %s\n",
$changeset->getFilename());
}
id(new DifferentialChangesetEngine())
->rebuildChangesets($changesets);
echo tsprintf(
"%s\n",
pht('Done.'));
}
}
}

View file

@ -1419,167 +1419,6 @@ final class DifferentialChangesetParser extends Phobject {
return sprintf('%d%%', 100 * ($covered / ($covered + $not_covered)));
}
public function detectCopiedCode(
array $changesets,
$min_width = 30,
$min_lines = 3) {
assert_instances_of($changesets, 'DifferentialChangeset');
$map = array();
$files = array();
$types = array();
foreach ($changesets as $changeset) {
$file = $changeset->getFilename();
foreach ($changeset->getHunks() as $hunk) {
$lines = $hunk->getStructuredOldFile();
foreach ($lines as $line => $info) {
$type = $info['type'];
if ($type == '\\') {
continue;
}
$types[$file][$line] = $type;
$text = $info['text'];
$text = trim($text);
$files[$file][$line] = $text;
if (strlen($text) >= $min_width) {
$map[$text][] = array($file, $line);
}
}
}
}
foreach ($changesets as $changeset) {
$copies = array();
foreach ($changeset->getHunks() as $hunk) {
$added = $hunk->getStructuredNewFile();
$atype = array();
foreach ($added as $line => $info) {
$atype[$line] = $info['type'];
$added[$line] = trim($info['text']);
}
$skip_lines = 0;
foreach ($added as $line => $code) {
if ($skip_lines) {
// We're skipping lines that we already processed because we
// extended a block above them downward to include them.
$skip_lines--;
continue;
}
if ($atype[$line] !== '+') {
// This line hasn't been changed in the new file, so don't try
// to figure out where it came from.
continue;
}
if (empty($map[$code])) {
// This line was too short to trigger copy/move detection.
continue;
}
if (count($map[$code]) > 16) {
// If there are a large number of identical lines in this diff,
// don't try to figure out where this block came from: the analysis
// is O(N^2), since we need to compare every line against every
// other line. Even if we arrive at a result, it is unlikely to be
// meaningful. See T5041.
continue;
}
$best_length = 0;
// Explore all candidates.
foreach ($map[$code] as $val) {
list($file, $orig_line) = $val;
$length = 1;
// Search backward and forward to find all of the adjacent lines
// which match.
foreach (array(-1, 1) as $direction) {
$offset = $direction;
while (true) {
if (isset($copies[$line + $offset])) {
// If we run into a block above us which we've already
// attributed to a move or copy from elsewhere, stop
// looking.
break;
}
if (!isset($added[$line + $offset])) {
// If we've run off the beginning or end of the new file,
// stop looking.
break;
}
if (!isset($files[$file][$orig_line + $offset])) {
// If we've run off the beginning or end of the original
// file, we also stop looking.
break;
}
$old = $files[$file][$orig_line + $offset];
$new = $added[$line + $offset];
if ($old !== $new) {
// If the old line doesn't match the new line, stop
// looking.
break;
}
$length++;
$offset += $direction;
}
}
if ($length < $best_length) {
// If we already know of a better source (more matching lines)
// for this move/copy, stick with that one. We prefer long
// copies/moves which match a lot of context over short ones.
continue;
}
if ($length == $best_length) {
if (idx($types[$file], $orig_line) != '-') {
// If we already know of an equally good source (same number
// of matching lines) and this isn't a move, stick with the
// other one. We prefer moves over copies.
continue;
}
}
$best_length = $length;
// ($offset - 1) contains number of forward matching lines.
$best_offset = $offset - 1;
$best_file = $file;
$best_line = $orig_line;
}
$file = ($best_file == $changeset->getFilename() ? '' : $best_file);
for ($i = $best_length; $i--; ) {
$type = idx($types[$best_file], $best_line + $best_offset - $i);
$copies[$line + $best_offset - $i] = ($best_length < $min_lines
? array() // Ignore short blocks.
: array($file, $best_line + $best_offset - $i, $type));
}
$skip_lines = $best_offset;
}
}
$copies = array_filter($copies);
if ($copies) {
$metadata = $changeset->getMetadata();
$metadata['copy:lines'] = $copies;
$changeset->setMetadata($metadata);
}
}
return $changesets;
}
/**
* Build maps from lines comments appear on to actual lines.
*/

View file

@ -226,23 +226,18 @@ final class DifferentialDiff
$changeset->setAddLines($add_lines);
$changeset->setDelLines($del_lines);
self::detectGeneratedCode($changeset);
$diff->addUnsavedChangeset($changeset);
}
$diff->setLineCount($lines);
$parser = new DifferentialChangesetParser();
$changesets = $parser->detectCopiedCode(
$diff->getChangesets(),
$min_width = 30,
$min_lines = 3);
$diff->attachChangesets($changesets);
$changesets = $diff->getChangesets();
id(new DifferentialChangesetEngine())
->rebuildChangesets($changesets);
return $diff;
}
public function getDiffDict() {
$dict = array(
'id' => $this->getID(),
@ -823,50 +818,4 @@ final class DifferentialDiff
);
}
private static function detectGeneratedCode(
DifferentialChangeset $changeset) {
$is_generated_trusted = self::isTrustedGeneratedCode($changeset);
if ($is_generated_trusted) {
$changeset->setTrustedChangesetAttribute(
DifferentialChangeset::ATTRIBUTE_GENERATED,
$is_generated_trusted);
}
$is_generated_untrusted = self::isUntrustedGeneratedCode($changeset);
if ($is_generated_untrusted) {
$changeset->setUntrustedChangesetAttribute(
DifferentialChangeset::ATTRIBUTE_GENERATED,
$is_generated_untrusted);
}
}
private static function isTrustedGeneratedCode(
DifferentialChangeset $changeset) {
$filename = $changeset->getFilename();
$paths = PhabricatorEnv::getEnvConfig('differential.generated-paths');
foreach ($paths as $regexp) {
if (preg_match($regexp, $filename)) {
return true;
}
}
return false;
}
private static function isUntrustedGeneratedCode(
DifferentialChangeset $changeset) {
if ($changeset->getHunks()) {
$new_data = $changeset->makeNewFile();
if (strpos($new_data, '@'.'generated') !== false) {
return true;
}
}
return false;
}
}