1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-10 23:01:04 +01:00

Allow Phabricator to serve Mercurial repositories over HTTP

Summary: Ref T2230. This is easily the worst thing I've had to write in a while. I'll leave some notes inline.

Test Plan: Ran `hg clone http://...` on a hosted repo. Ran `hg push` on the same. Changed sync'd both ways.

Reviewers: asherkin, btrahan

Reviewed By: btrahan

CC: aran

Maniphest Tasks: T2230

Differential Revision: https://secure.phabricator.com/D7520
This commit is contained in:
epriestley 2013-11-06 18:00:42 -08:00
parent 44a40eaf57
commit 6324669748
5 changed files with 257 additions and 6 deletions

View file

@ -505,6 +505,8 @@ phutil_register_library_map(array(
'DiffusionMercurialFileContentQuery' => 'applications/diffusion/query/filecontent/DiffusionMercurialFileContentQuery.php',
'DiffusionMercurialRawDiffQuery' => 'applications/diffusion/query/rawdiff/DiffusionMercurialRawDiffQuery.php',
'DiffusionMercurialRequest' => 'applications/diffusion/request/DiffusionMercurialRequest.php',
'DiffusionMercurialResponse' => 'applications/diffusion/response/DiffusionMercurialResponse.php',
'DiffusionMercurialWireProtocol' => 'applications/diffusion/protocol/DiffusionMercurialWireProtocol.php',
'DiffusionPathChange' => 'applications/diffusion/data/DiffusionPathChange.php',
'DiffusionPathChangeQuery' => 'applications/diffusion/query/pathchange/DiffusionPathChangeQuery.php',
'DiffusionPathCompleteController' => 'applications/diffusion/controller/DiffusionPathCompleteController.php',
@ -2757,6 +2759,7 @@ phutil_register_library_map(array(
'DiffusionMercurialFileContentQuery' => 'DiffusionFileContentQuery',
'DiffusionMercurialRawDiffQuery' => 'DiffusionRawDiffQuery',
'DiffusionMercurialRequest' => 'DiffusionRequest',
'DiffusionMercurialResponse' => 'AphrontResponse',
'DiffusionPathCompleteController' => 'DiffusionController',
'DiffusionPathQueryTestCase' => 'PhabricatorTestCase',
'DiffusionPathValidateController' => 'DiffusionController',

View file

@ -176,6 +176,9 @@ final class DiffusionServeController extends DiffusionController {
case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
$result = $this->serveGitRequest($repository, $viewer);
break;
case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL:
$result = $this->serveMercurialRequest($repository, $viewer);
break;
default:
$result = new PhabricatorVCSResponse(
999,
@ -224,13 +227,43 @@ final class DiffusionServeController extends DiffusionController {
break;
case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL:
$cmd = $request->getStr('cmd');
switch ($cmd) {
case 'capabilities':
return true;
default:
if ($cmd == 'batch') {
// For "batch" we get a "cmds" argument like
//
// heads ;known nodes=
//
// We need to examine the commands (here, "heads" and "known") to
// make sure they're all read-only.
$args = $this->getMercurialArguments();
$cmds = idx($args, 'cmds');
if ($cmds) {
// NOTE: Mercurial has some code to escape semicolons, but it does
// not actually function for command separation. For example, these
// two batch commands will produce completely different results (the
// former will run the lookup; the latter will fail with a parser
// error):
//
// lookup key=a:xb;lookup key=z* 0
// lookup key=a:;b;lookup key=z* 0
// ^
// |
// +-- Note semicolon.
//
// So just split unconditionally.
$cmds = explode(';', $cmds);
foreach ($cmds as $sub_cmd) {
$name = head(explode(' ', $sub_cmd, 2));
if (!DiffusionMercurialWireProtocol::isReadOnlyCommand($name)) {
return false;
}
break;
}
return true;
}
}
return DiffusionMercurialWireProtocol::isReadOnlyCommand($cmd);
case PhabricatorRepositoryType::REPOSITORY_TYPE_SUBVERSION:
break;
}
@ -357,5 +390,127 @@ final class DiffusionServeController extends DiffusionController {
return $user;
}
private function serveMercurialRequest(PhabricatorRepository $repository) {
$request = $this->getRequest();
$bin = Filesystem::resolveBinary('hg');
if (!$bin) {
throw new Exception("Unable to find `hg` in PATH!");
}
$env = array();
$input = PhabricatorStartup::getRawInput();
$cmd = $request->getStr('cmd');
$args = $this->getMercurialArguments();
$args = $this->formatMercurialArguments($cmd, $args);
if (strlen($input)) {
$input = strlen($input)."\n".$input."0\n";
}
list($err, $stdout, $stderr) = id(new ExecFuture('%s serve --stdio', $bin))
->setEnv($env, true)
->setCWD($repository->getLocalPath())
->write("{$cmd}\n{$args}{$input}")
->resolve();
if ($err) {
return new PhabricatorVCSResponse(
500,
pht('Error %d: %s', $err, $stderr));
}
if ($cmd == 'getbundle' ||
$cmd == 'changegroup' ||
$cmd == 'changegroupsubset') {
// We're not completely sure that "changegroup" and "changegroupsubset"
// actually work, they're for very old Mercurial.
$body = gzcompress($stdout);
} else if ($cmd == 'unbundle') {
// This includes diagnostic information and anything echoed by commit
// hooks. We ignore `stdout` since it just has protocol garbage, and
// substitute `stderr`.
$body = strlen($stderr)."\n".$stderr;
} else {
list($length, $body) = explode("\n", $stdout, 2);
}
return id(new DiffusionMercurialResponse())->setContent($body);
}
private function getMercurialArguments() {
// Mercurial sends arguments in HTTP headers. "Why?", you might wonder,
// "Why would you do this?".
$args_raw = array();
for ($ii = 1; ; $ii++) {
$header = 'HTTP_X_HGARG_'.$ii;
if (!array_key_exists($header, $_SERVER)) {
break;
}
$args_raw[] = $_SERVER[$header];
}
$args_raw = implode('', $args_raw);
return id(new PhutilQueryStringParser())
->parseQueryString($args_raw);
}
private function formatMercurialArguments($command, array $arguments) {
$spec = DiffusionMercurialWireProtocol::getCommandArgs($command);
$out = array();
// Mercurial takes normal arguments like this:
//
// name <length(value)>
// value
$has_star = false;
foreach ($spec as $arg_key) {
if ($arg_key == '*') {
$has_star = true;
continue;
}
if (isset($arguments[$arg_key])) {
$value = $arguments[$arg_key];
$size = strlen($value);
$out[] = "{$arg_key} {$size}\n{$value}";
unset($arguments[$arg_key]);
}
}
if ($has_star) {
// Mercurial takes arguments for variable argument lists roughly like
// this:
//
// * <count(args)>
// argname1 <length(argvalue1)>
// argvalue1
// argname2 <length(argvalue2)>
// argvalue2
$count = count($arguments);
$out[] = "* {$count}\n";
foreach ($arguments as $key => $value) {
if (in_array($key, $spec)) {
// We already added this argument above, so skip it.
continue;
}
$size = strlen($value);
$out[] = "{$key} {$size}\n{$value}";
}
}
return implode('', $out);
}
}

View file

@ -0,0 +1,62 @@
<?php
final class DiffusionMercurialWireProtocol {
public static function getCommandArgs($command) {
// We need to enumerate all of the Mercurial wire commands because the
// argument encoding varies based on the command. "Why?", you might ask,
// "Why would you do this?".
$commands = array(
'batch' => array('cmds', '*'),
'between' => array('pairs'),
'branchmap' => array(),
'branches' => array('nodes'),
'capabilities' => array(),
'changegroup' => array('roots'),
'changegroupsubset' => array('bases heads'),
'debugwireargs' => array('one two *'),
'getbundle' => array('*'),
'heads' => array(),
'hello' => array(),
'known' => array('nodes', '*'),
'listkeys' => array('namespace'),
'lookup' => array('key'),
'pushkey' => array('namespace', 'key', 'old', 'new'),
'stream_out' => array(''),
'unbundle' => array('heads'),
);
if (!isset($commands[$command])) {
throw new Exception("Unknown Mercurial command '{$command}!");
}
return $commands[$command];
}
public static function isReadOnlyCommand($command) {
$read_only = array(
'between' => true,
'branchmap' => true,
'branches' => true,
'capabilities' => true,
'changegroup' => true,
'changegroupsubset' => true,
'debugwireargs' => true,
'getbundle' => true,
'heads' => true,
'hello' => true,
'known' => true,
'listkeys' => true,
'lookup' => true,
'stream_out' => true,
);
// Notably, the write commands are "pushkey" and "unbundle". The
// "batch" command is theoretically read only, but we require explicit
// analysis of the actual commands.
return isset($read_only[$command]);
}
}

View file

@ -30,7 +30,7 @@ final class DiffusionGitResponse extends AphrontResponse {
}
public function getHeaders() {
return $this->headers;
return array_merge(parent::getHeaders(), $this->headers);
}
public function getCacheHeaders() {

View file

@ -0,0 +1,31 @@
<?php
final class DiffusionMercurialResponse extends AphrontResponse {
private $content;
public function setContent($content) {
$this->content = $content;
return $this;
}
public function buildResponseString() {
return $this->content;
}
public function getHeaders() {
$headers = array(
array('Content-Type', 'application/mercurial-0.1'),
);
return array_merge(parent::getHeaders(), $headers);
}
public function getCacheHeaders() {
return array();
}
public function getHTTPResponseCode() {
return 200;
}
}