mirror of
https://we.phorge.it/source/arcanist.git
synced 2025-02-16 16:58:38 +01:00
Respect custom set encoding in patch/export workflows
Summary: In cases where a codebase is not UTF-8, we will attempt an conversion, if an alternative encoding is given/configured. This is now possible in two ways: - by configuring one under repository tracking in diffusion - by passing an --encoding option to the workflow If the first is not available we will make a conduit call to do an extra check and see if an encoding is configured directly with phabricator. Test Plan: Tried various diffs with known encodings (mostly ISO-8859-1), and passed it in, via stdin, or downloaded a known problematic revision from phabricator, and they applied where they otherwise failed. Reviewers: epriestley Reviewed By: epriestley CC: aran, epriestley Maniphest Tasks: T452 Differential Revision: https://secure.phabricator.com/D1880
This commit is contained in:
parent
f673ab10b1
commit
545f51a4fb
6 changed files with 101 additions and 22 deletions
|
@ -30,6 +30,7 @@ final class ArcanistBundle {
|
|||
private $projectID;
|
||||
private $baseRevision;
|
||||
private $revisionID;
|
||||
private $encoding;
|
||||
|
||||
public function setConduit(ConduitClient $conduit) {
|
||||
$this->conduit = $conduit;
|
||||
|
@ -47,6 +48,15 @@ final class ArcanistBundle {
|
|||
$this->baseRevision = $base_revision;
|
||||
}
|
||||
|
||||
public function setEncoding($encoding) {
|
||||
$this->encoding = $encoding;
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function getEncoding() {
|
||||
return $this->encoding;
|
||||
}
|
||||
|
||||
public function getBaseRevision() {
|
||||
return $this->baseRevision;
|
||||
}
|
||||
|
@ -86,12 +96,14 @@ final class ArcanistBundle {
|
|||
$project_name = idx($meta_info, 'projectName');
|
||||
$base_revision = idx($meta_info, 'baseRevision');
|
||||
$revision_id = idx($meta_info, 'revisionID');
|
||||
$encoding = idx($meta_info, 'encoding');
|
||||
// this arc bundle was probably made before we started storing meta info
|
||||
} else {
|
||||
$version = 0;
|
||||
$project_name = null;
|
||||
$base_revision = null;
|
||||
$revision_id = null;
|
||||
$encoding = null;
|
||||
}
|
||||
|
||||
$future = new ExecFuture(
|
||||
|
@ -117,6 +129,7 @@ final class ArcanistBundle {
|
|||
$obj->setProjectID($project_name);
|
||||
$obj->setBaseRevision($base_revision);
|
||||
$obj->setRevisionID($revision_id);
|
||||
$obj->setEncoding($encoding);
|
||||
|
||||
return $obj;
|
||||
}
|
||||
|
@ -168,6 +181,7 @@ final class ArcanistBundle {
|
|||
'projectName' => $this->getProjectID(),
|
||||
'baseRevision' => $this->getBaseRevision(),
|
||||
'revisionID' => $this->getRevisionID(),
|
||||
'encoding' => $this->getEncoding(),
|
||||
);
|
||||
|
||||
$dir = Filesystem::createTemporaryDirectory();
|
||||
|
@ -230,7 +244,8 @@ final class ArcanistBundle {
|
|||
$result[] = $this->buildHunkChanges($change->getHunks());
|
||||
}
|
||||
|
||||
return implode("\n", $result)."\n";
|
||||
$diff = implode("\n", $result)."\n";
|
||||
return $this->convertNonUTF8Diff($diff);
|
||||
}
|
||||
|
||||
public function toGitPatch() {
|
||||
|
@ -374,7 +389,24 @@ final class ArcanistBundle {
|
|||
}
|
||||
$result[] = $change_body;
|
||||
}
|
||||
return implode("\n", $result)."\n";
|
||||
|
||||
$diff = implode("\n", $result)."\n";
|
||||
return $this->convertNonUTF8Diff($diff);
|
||||
}
|
||||
|
||||
private function convertNonUTF8Diff($diff) {
|
||||
$try_encoding_is_non_utf8 =
|
||||
($this->encoding && strtoupper($this->encoding) != 'UTF-8');
|
||||
if ($try_encoding_is_non_utf8) {
|
||||
$diff = mb_convert_encoding($diff, $this->encoding, 'UTF-8');
|
||||
if (!$diff) {
|
||||
throw new Exception(
|
||||
"Attempted conversion of diff to encoding ".
|
||||
"'{$this->encoding}' failed. Have you specified ".
|
||||
"the proper encoding correctly?");
|
||||
}
|
||||
}
|
||||
return $diff;
|
||||
}
|
||||
|
||||
public function getChanges() {
|
||||
|
@ -504,6 +536,7 @@ final class ArcanistBundle {
|
|||
}
|
||||
|
||||
private function buildHunkChanges(array $hunks) {
|
||||
|
||||
$result = array();
|
||||
foreach ($hunks as $hunk) {
|
||||
$small_hunks = $this->breakHunkIntoSmallHunks($hunk);
|
||||
|
|
|
@ -64,6 +64,8 @@ abstract class ArcanistBaseWorkflow {
|
|||
private $arguments;
|
||||
private $command;
|
||||
|
||||
private $repositoryEncoding;
|
||||
|
||||
private $arcanistConfiguration;
|
||||
private $parentWorkflow;
|
||||
private $workingDirectory;
|
||||
|
@ -1126,4 +1128,18 @@ abstract class ArcanistBaseWorkflow {
|
|||
return $repository_api->getPath('.arc/'.$path);
|
||||
}
|
||||
|
||||
protected function getRepositoryEncoding() {
|
||||
if ($this->repositoryEncoding) {
|
||||
return $this->repositoryEncoding;
|
||||
}
|
||||
|
||||
$project_info = $this->getConduit()->callMethodSynchronous(
|
||||
'arcanist.projectinfo',
|
||||
array(
|
||||
'name' => $this->getWorkingCopy()->getProjectID(),
|
||||
));
|
||||
$this->repositoryEncoding = nonempty($project_info['encoding'], 'UTF-8');
|
||||
return $this->repositoryEncoding;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -751,7 +751,7 @@ EOTEXT
|
|||
}
|
||||
}
|
||||
|
||||
$try_encoding = null;
|
||||
$try_encoding = nonempty($this->getArgument('encoding'), null);
|
||||
|
||||
$utf8_problems = array();
|
||||
foreach ($changes as $change) {
|
||||
|
@ -764,29 +764,21 @@ EOTEXT
|
|||
// liberal about what they're willing to process.
|
||||
$is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus);
|
||||
if (!$is_binary) {
|
||||
$try_encoding = nonempty($this->getArgument('encoding'), null);
|
||||
if ($try_encoding === null) {
|
||||
// Make a call to check if there's an encoding specified for this
|
||||
// project.
|
||||
|
||||
if (!$try_encoding) {
|
||||
try {
|
||||
$project_info = $this->getConduit()->callMethodSynchronous(
|
||||
'arcanist.projectinfo',
|
||||
array(
|
||||
'name' => $this->getWorkingCopy()->getProjectID(),
|
||||
));
|
||||
$try_encoding = nonempty($project_info['encoding'], false);
|
||||
$try_encoding = $this->getRepositoryEncoding();
|
||||
} catch (ConduitClientException $e) {
|
||||
if ($e->getErrorCode() == 'ERR-BAD-ARCANIST-PROJECT') {
|
||||
echo phutil_console_wrap(
|
||||
"Lookup of encoding in arcanist project failed\n".
|
||||
$e->getMessage()
|
||||
);
|
||||
$try_encoding = false;
|
||||
} else {
|
||||
throw $e;
|
||||
}
|
||||
if ($e->getErrorCode() == 'ERR-BAD-ARCANIST-PROJECT') {
|
||||
echo phutil_console_wrap(
|
||||
"Lookup of encoding in arcanist project failed\n".
|
||||
$e->getMessage());
|
||||
} else {
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($try_encoding) {
|
||||
// NOTE: This feature is HIGHLY EXPERIMENTAL and will cause a lot
|
||||
// of issues. Use it at your own risk.
|
||||
|
|
|
@ -75,6 +75,11 @@ EOTEXT
|
|||
"Export change as an arc bundle. This format can represent all ".
|
||||
"changes. These bundles can be applied with 'arc patch'.",
|
||||
),
|
||||
'encoding' => array(
|
||||
'param' => 'encoding',
|
||||
'help' =>
|
||||
"Attempt to convert non UTF-8 patch into specified encoding.",
|
||||
),
|
||||
'revision' => array(
|
||||
'param' => 'revision_id',
|
||||
'help' =>
|
||||
|
@ -209,6 +214,19 @@ EOTEXT
|
|||
break;
|
||||
}
|
||||
|
||||
$try_encoding = nonempty($this->getArgument('encoding'), null);
|
||||
if (!$try_encoding) {
|
||||
try {
|
||||
$try_encoding = $this->getRepositoryEncoding();
|
||||
} catch (ConduitClientException $e) {
|
||||
$try_encoding = null;
|
||||
}
|
||||
}
|
||||
|
||||
if ($try_encoding) {
|
||||
$bundle->setEncoding($try_encoding);
|
||||
}
|
||||
|
||||
$format = $this->getFormat();
|
||||
|
||||
switch ($format) {
|
||||
|
|
|
@ -12,6 +12,7 @@ phutil_require_module('arcanist', 'parser/diff');
|
|||
phutil_require_module('arcanist', 'workflow/base');
|
||||
|
||||
phutil_require_module('phutil', 'console');
|
||||
phutil_require_module('phutil', 'utils');
|
||||
|
||||
|
||||
phutil_require_source('ArcanistExportWorkflow.php');
|
||||
|
|
|
@ -81,6 +81,11 @@ EOTEXT
|
|||
'help' =>
|
||||
"Apply changes from a git patchfile or unified patchfile.",
|
||||
),
|
||||
'encoding' => array(
|
||||
'param' => 'encoding',
|
||||
'help' =>
|
||||
"Attempt to convert non UTF-8 patch into specified encoding.",
|
||||
),
|
||||
'update' => array(
|
||||
'supports' => array(
|
||||
'git', 'svn', 'hg'
|
||||
|
@ -359,6 +364,20 @@ EOTEXT
|
|||
throw $ex;
|
||||
}
|
||||
}
|
||||
|
||||
$try_encoding = nonempty($this->getArgument('encoding'), null);
|
||||
if (!$try_encoding) {
|
||||
try {
|
||||
$try_encoding = $this->getRepositoryEncoding();
|
||||
} catch (ConduitClientException $e) {
|
||||
$try_encoding = null;
|
||||
}
|
||||
}
|
||||
|
||||
if ($try_encoding) {
|
||||
$bundle->setEncoding($try_encoding);
|
||||
}
|
||||
|
||||
$force = $this->getArgument('force', false);
|
||||
if ($force) {
|
||||
// force means don't do any sanity checks about the patch
|
||||
|
|
Loading…
Add table
Reference in a new issue