1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2025-02-16 16:58:38 +01:00

Respect custom set encoding in patch/export workflows

Summary:
In cases where a codebase is not UTF-8, we will attempt an conversion,
if an alternative encoding is given/configured.

This is now possible in two ways:

  - by configuring one under repository tracking in diffusion
  - by passing an --encoding option to the workflow

If the first is not available we will make a conduit call
to do an extra check and see if an encoding is configured directly with
phabricator.

Test Plan:
Tried various diffs with known encodings (mostly ISO-8859-1), and passed
it in, via stdin, or downloaded a known problematic revision from
phabricator, and they applied where they otherwise failed.

Reviewers: epriestley

Reviewed By: epriestley

CC: aran, epriestley

Maniphest Tasks: T452

Differential Revision: https://secure.phabricator.com/D1880
This commit is contained in:
David Reuss 2012-03-14 07:08:06 -07:00 committed by epriestley
parent f673ab10b1
commit 545f51a4fb
6 changed files with 101 additions and 22 deletions

View file

@ -30,6 +30,7 @@ final class ArcanistBundle {
private $projectID;
private $baseRevision;
private $revisionID;
private $encoding;
public function setConduit(ConduitClient $conduit) {
$this->conduit = $conduit;
@ -47,6 +48,15 @@ final class ArcanistBundle {
$this->baseRevision = $base_revision;
}
public function setEncoding($encoding) {
$this->encoding = $encoding;
return $this;
}
public function getEncoding() {
return $this->encoding;
}
public function getBaseRevision() {
return $this->baseRevision;
}
@ -86,12 +96,14 @@ final class ArcanistBundle {
$project_name = idx($meta_info, 'projectName');
$base_revision = idx($meta_info, 'baseRevision');
$revision_id = idx($meta_info, 'revisionID');
$encoding = idx($meta_info, 'encoding');
// this arc bundle was probably made before we started storing meta info
} else {
$version = 0;
$project_name = null;
$base_revision = null;
$revision_id = null;
$encoding = null;
}
$future = new ExecFuture(
@ -117,6 +129,7 @@ final class ArcanistBundle {
$obj->setProjectID($project_name);
$obj->setBaseRevision($base_revision);
$obj->setRevisionID($revision_id);
$obj->setEncoding($encoding);
return $obj;
}
@ -168,6 +181,7 @@ final class ArcanistBundle {
'projectName' => $this->getProjectID(),
'baseRevision' => $this->getBaseRevision(),
'revisionID' => $this->getRevisionID(),
'encoding' => $this->getEncoding(),
);
$dir = Filesystem::createTemporaryDirectory();
@ -230,7 +244,8 @@ final class ArcanistBundle {
$result[] = $this->buildHunkChanges($change->getHunks());
}
return implode("\n", $result)."\n";
$diff = implode("\n", $result)."\n";
return $this->convertNonUTF8Diff($diff);
}
public function toGitPatch() {
@ -374,7 +389,24 @@ final class ArcanistBundle {
}
$result[] = $change_body;
}
return implode("\n", $result)."\n";
$diff = implode("\n", $result)."\n";
return $this->convertNonUTF8Diff($diff);
}
private function convertNonUTF8Diff($diff) {
$try_encoding_is_non_utf8 =
($this->encoding && strtoupper($this->encoding) != 'UTF-8');
if ($try_encoding_is_non_utf8) {
$diff = mb_convert_encoding($diff, $this->encoding, 'UTF-8');
if (!$diff) {
throw new Exception(
"Attempted conversion of diff to encoding ".
"'{$this->encoding}' failed. Have you specified ".
"the proper encoding correctly?");
}
}
return $diff;
}
public function getChanges() {
@ -504,6 +536,7 @@ final class ArcanistBundle {
}
private function buildHunkChanges(array $hunks) {
$result = array();
foreach ($hunks as $hunk) {
$small_hunks = $this->breakHunkIntoSmallHunks($hunk);

View file

@ -64,6 +64,8 @@ abstract class ArcanistBaseWorkflow {
private $arguments;
private $command;
private $repositoryEncoding;
private $arcanistConfiguration;
private $parentWorkflow;
private $workingDirectory;
@ -1126,4 +1128,18 @@ abstract class ArcanistBaseWorkflow {
return $repository_api->getPath('.arc/'.$path);
}
protected function getRepositoryEncoding() {
if ($this->repositoryEncoding) {
return $this->repositoryEncoding;
}
$project_info = $this->getConduit()->callMethodSynchronous(
'arcanist.projectinfo',
array(
'name' => $this->getWorkingCopy()->getProjectID(),
));
$this->repositoryEncoding = nonempty($project_info['encoding'], 'UTF-8');
return $this->repositoryEncoding;
}
}

View file

@ -751,7 +751,7 @@ EOTEXT
}
}
$try_encoding = null;
$try_encoding = nonempty($this->getArgument('encoding'), null);
$utf8_problems = array();
foreach ($changes as $change) {
@ -764,29 +764,21 @@ EOTEXT
// liberal about what they're willing to process.
$is_binary = ArcanistDiffUtils::isHeuristicBinaryFile($corpus);
if (!$is_binary) {
$try_encoding = nonempty($this->getArgument('encoding'), null);
if ($try_encoding === null) {
// Make a call to check if there's an encoding specified for this
// project.
if (!$try_encoding) {
try {
$project_info = $this->getConduit()->callMethodSynchronous(
'arcanist.projectinfo',
array(
'name' => $this->getWorkingCopy()->getProjectID(),
));
$try_encoding = nonempty($project_info['encoding'], false);
$try_encoding = $this->getRepositoryEncoding();
} catch (ConduitClientException $e) {
if ($e->getErrorCode() == 'ERR-BAD-ARCANIST-PROJECT') {
echo phutil_console_wrap(
"Lookup of encoding in arcanist project failed\n".
$e->getMessage()
);
$try_encoding = false;
} else {
throw $e;
}
if ($e->getErrorCode() == 'ERR-BAD-ARCANIST-PROJECT') {
echo phutil_console_wrap(
"Lookup of encoding in arcanist project failed\n".
$e->getMessage());
} else {
throw $e;
}
}
}
if ($try_encoding) {
// NOTE: This feature is HIGHLY EXPERIMENTAL and will cause a lot
// of issues. Use it at your own risk.

View file

@ -75,6 +75,11 @@ EOTEXT
"Export change as an arc bundle. This format can represent all ".
"changes. These bundles can be applied with 'arc patch'.",
),
'encoding' => array(
'param' => 'encoding',
'help' =>
"Attempt to convert non UTF-8 patch into specified encoding.",
),
'revision' => array(
'param' => 'revision_id',
'help' =>
@ -209,6 +214,19 @@ EOTEXT
break;
}
$try_encoding = nonempty($this->getArgument('encoding'), null);
if (!$try_encoding) {
try {
$try_encoding = $this->getRepositoryEncoding();
} catch (ConduitClientException $e) {
$try_encoding = null;
}
}
if ($try_encoding) {
$bundle->setEncoding($try_encoding);
}
$format = $this->getFormat();
switch ($format) {

View file

@ -12,6 +12,7 @@ phutil_require_module('arcanist', 'parser/diff');
phutil_require_module('arcanist', 'workflow/base');
phutil_require_module('phutil', 'console');
phutil_require_module('phutil', 'utils');
phutil_require_source('ArcanistExportWorkflow.php');

View file

@ -81,6 +81,11 @@ EOTEXT
'help' =>
"Apply changes from a git patchfile or unified patchfile.",
),
'encoding' => array(
'param' => 'encoding',
'help' =>
"Attempt to convert non UTF-8 patch into specified encoding.",
),
'update' => array(
'supports' => array(
'git', 'svn', 'hg'
@ -359,6 +364,20 @@ EOTEXT
throw $ex;
}
}
$try_encoding = nonempty($this->getArgument('encoding'), null);
if (!$try_encoding) {
try {
$try_encoding = $this->getRepositoryEncoding();
} catch (ConduitClientException $e) {
$try_encoding = null;
}
}
if ($try_encoding) {
$bundle->setEncoding($try_encoding);
}
$force = $this->getArgument('force', false);
if ($force) {
// force means don't do any sanity checks about the patch