mirror of
https://we.phorge.it/source/arcanist.git
synced 2024-11-25 08:12:40 +01:00
Copy repository URI normalization code from Phabricator to Arcanist
Summary: Ref T13546. Move toward smarter remote repository lookup by providing URI normalization code in Arcanist. This diff duplicates code from Phabricator; the next change will collapse it. Test Plan: Ran unit tests. Maniphest Tasks: T13546 Differential Revision: https://secure.phabricator.com/D21372
This commit is contained in:
parent
c53c05e5b2
commit
b19985a4bd
3 changed files with 247 additions and 0 deletions
|
@ -427,6 +427,8 @@ phutil_register_library_map(array(
|
||||||
'ArcanistRepositoryQuery' => 'repository/query/ArcanistRepositoryQuery.php',
|
'ArcanistRepositoryQuery' => 'repository/query/ArcanistRepositoryQuery.php',
|
||||||
'ArcanistRepositoryRef' => 'ref/ArcanistRepositoryRef.php',
|
'ArcanistRepositoryRef' => 'ref/ArcanistRepositoryRef.php',
|
||||||
'ArcanistRepositoryRemoteQuery' => 'repository/remote/ArcanistRepositoryRemoteQuery.php',
|
'ArcanistRepositoryRemoteQuery' => 'repository/remote/ArcanistRepositoryRemoteQuery.php',
|
||||||
|
'ArcanistRepositoryURINormalizer' => 'repository/remote/ArcanistRepositoryURINormalizer.php',
|
||||||
|
'ArcanistRepositoryURINormalizerTestCase' => 'repository/remote/__tests__/ArcanistRepositoryURINormalizerTestCase.php',
|
||||||
'ArcanistReusedAsIteratorXHPASTLinterRule' => 'lint/linter/xhpast/rules/ArcanistReusedAsIteratorXHPASTLinterRule.php',
|
'ArcanistReusedAsIteratorXHPASTLinterRule' => 'lint/linter/xhpast/rules/ArcanistReusedAsIteratorXHPASTLinterRule.php',
|
||||||
'ArcanistReusedAsIteratorXHPASTLinterRuleTestCase' => 'lint/linter/xhpast/rules/__tests__/ArcanistReusedAsIteratorXHPASTLinterRuleTestCase.php',
|
'ArcanistReusedAsIteratorXHPASTLinterRuleTestCase' => 'lint/linter/xhpast/rules/__tests__/ArcanistReusedAsIteratorXHPASTLinterRuleTestCase.php',
|
||||||
'ArcanistReusedIteratorReferenceXHPASTLinterRule' => 'lint/linter/xhpast/rules/ArcanistReusedIteratorReferenceXHPASTLinterRule.php',
|
'ArcanistReusedIteratorReferenceXHPASTLinterRule' => 'lint/linter/xhpast/rules/ArcanistReusedIteratorReferenceXHPASTLinterRule.php',
|
||||||
|
@ -1455,6 +1457,8 @@ phutil_register_library_map(array(
|
||||||
'ArcanistRepositoryQuery' => 'Phobject',
|
'ArcanistRepositoryQuery' => 'Phobject',
|
||||||
'ArcanistRepositoryRef' => 'ArcanistRef',
|
'ArcanistRepositoryRef' => 'ArcanistRef',
|
||||||
'ArcanistRepositoryRemoteQuery' => 'ArcanistRepositoryQuery',
|
'ArcanistRepositoryRemoteQuery' => 'ArcanistRepositoryQuery',
|
||||||
|
'ArcanistRepositoryURINormalizer' => 'Phobject',
|
||||||
|
'ArcanistRepositoryURINormalizerTestCase' => 'PhutilTestCase',
|
||||||
'ArcanistReusedAsIteratorXHPASTLinterRule' => 'ArcanistXHPASTLinterRule',
|
'ArcanistReusedAsIteratorXHPASTLinterRule' => 'ArcanistXHPASTLinterRule',
|
||||||
'ArcanistReusedAsIteratorXHPASTLinterRuleTestCase' => 'ArcanistXHPASTLinterRuleTestCase',
|
'ArcanistReusedAsIteratorXHPASTLinterRuleTestCase' => 'ArcanistXHPASTLinterRuleTestCase',
|
||||||
'ArcanistReusedIteratorReferenceXHPASTLinterRule' => 'ArcanistXHPASTLinterRule',
|
'ArcanistReusedIteratorReferenceXHPASTLinterRule' => 'ArcanistXHPASTLinterRule',
|
||||||
|
|
159
src/repository/remote/ArcanistRepositoryURINormalizer.php
Normal file
159
src/repository/remote/ArcanistRepositoryURINormalizer.php
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize repository URIs. For example, these URIs are generally equivalent
|
||||||
|
* and all point at the same repository:
|
||||||
|
*
|
||||||
|
* ssh://user@host/repo
|
||||||
|
* ssh://user@host/repo/
|
||||||
|
* ssh://user@host:22/repo
|
||||||
|
* user@host:/repo
|
||||||
|
* ssh://user@host/repo.git
|
||||||
|
*
|
||||||
|
* This class can be used to normalize URIs like this, in order to detect
|
||||||
|
* alternate spellings of the same repository URI. In particular, the
|
||||||
|
* @{method:getNormalizedPath} method will return:
|
||||||
|
*
|
||||||
|
* repo
|
||||||
|
*
|
||||||
|
* ...for all of these URIs. Generally, usage looks like this:
|
||||||
|
*
|
||||||
|
* $norm_a = new ArcanistRepositoryURINormalizer($type, $uri_a);
|
||||||
|
* $norm_b = new ArcanistRepositoryURINormalizer($type, $uri_b);
|
||||||
|
*
|
||||||
|
* if ($norm_a->getNormalizedPath() === $norm_b->getNormalizedPath()) {
|
||||||
|
* // URIs appear to point at the same repository.
|
||||||
|
* } else {
|
||||||
|
* // URIs are very unlikely to be the same repository.
|
||||||
|
* }
|
||||||
|
*
|
||||||
|
* Because a repository can be hosted at arbitrarily many arbitrary URIs, there
|
||||||
|
* is no way to completely prevent false negatives by only examining URIs
|
||||||
|
* (that is, repositories with totally different URIs could really be the same).
|
||||||
|
* However, normalization is relatively aggressive and false negatives should
|
||||||
|
* be rare: if normalization says two URIs are different repositories, they
|
||||||
|
* probably are.
|
||||||
|
*
|
||||||
|
* @task normal Normalizing URIs
|
||||||
|
*/
|
||||||
|
final class ArcanistRepositoryURINormalizer
|
||||||
|
extends Phobject {
|
||||||
|
|
||||||
|
const TYPE_GIT = 'git';
|
||||||
|
const TYPE_SVN = 'svn';
|
||||||
|
const TYPE_MERCURIAL = 'hg';
|
||||||
|
|
||||||
|
private $type;
|
||||||
|
private $uri;
|
||||||
|
private $domainMap = array();
|
||||||
|
|
||||||
|
public function __construct($type, $uri) {
|
||||||
|
switch ($type) {
|
||||||
|
case self::TYPE_GIT:
|
||||||
|
case self::TYPE_SVN:
|
||||||
|
case self::TYPE_MERCURIAL:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Exception(pht('Unknown URI type "%s"!', $type));
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->type = $type;
|
||||||
|
$this->uri = $uri;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static function getAllURITypes() {
|
||||||
|
return array(
|
||||||
|
self::TYPE_GIT,
|
||||||
|
self::TYPE_SVN,
|
||||||
|
self::TYPE_MERCURIAL,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public function setDomainMap(array $domain_map) {
|
||||||
|
foreach ($domain_map as $key => $domain) {
|
||||||
|
$domain_map[$key] = phutil_utf8_strtolower($domain);
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->domainMap = $domain_map;
|
||||||
|
return $this;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* -( Normalizing URIs )--------------------------------------------------- */
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @task normal
|
||||||
|
*/
|
||||||
|
public function getPath() {
|
||||||
|
switch ($this->type) {
|
||||||
|
case self::TYPE_GIT:
|
||||||
|
$uri = new PhutilURI($this->uri);
|
||||||
|
return $uri->getPath();
|
||||||
|
case self::TYPE_SVN:
|
||||||
|
case self::TYPE_MERCURIAL:
|
||||||
|
$uri = new PhutilURI($this->uri);
|
||||||
|
if ($uri->getProtocol()) {
|
||||||
|
return $uri->getPath();
|
||||||
|
}
|
||||||
|
|
||||||
|
return $this->uri;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getNormalizedURI() {
|
||||||
|
return $this->getNormalizedDomain().'/'.$this->getNormalizedPath();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @task normal
|
||||||
|
*/
|
||||||
|
public function getNormalizedPath() {
|
||||||
|
$path = $this->getPath();
|
||||||
|
$path = trim($path, '/');
|
||||||
|
|
||||||
|
switch ($this->type) {
|
||||||
|
case self::TYPE_GIT:
|
||||||
|
$path = preg_replace('/\.git$/', '', $path);
|
||||||
|
break;
|
||||||
|
case self::TYPE_SVN:
|
||||||
|
case self::TYPE_MERCURIAL:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is a Phabricator URI, strip it down to the callsign. We mutably
|
||||||
|
// allow you to clone repositories as "/diffusion/X/anything.git", for
|
||||||
|
// example.
|
||||||
|
|
||||||
|
$matches = null;
|
||||||
|
if (preg_match('@^(diffusion/(?:[A-Z]+|\d+))@', $path, $matches)) {
|
||||||
|
$path = $matches[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $path;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function getNormalizedDomain() {
|
||||||
|
$domain = null;
|
||||||
|
|
||||||
|
$uri = new PhutilURI($this->uri);
|
||||||
|
$domain = $uri->getDomain();
|
||||||
|
|
||||||
|
if (!strlen($domain)) {
|
||||||
|
return '<void>';
|
||||||
|
}
|
||||||
|
|
||||||
|
$domain = phutil_utf8_strtolower($domain);
|
||||||
|
|
||||||
|
foreach ($this->domainMap as $domain_key => $domain_value) {
|
||||||
|
if ($domain === $domain_value) {
|
||||||
|
$domain = $domain_key;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $domain;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,84 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
final class ArcanistRepositoryURINormalizerTestCase
|
||||||
|
extends PhutilTestCase {
|
||||||
|
|
||||||
|
public function testGitURINormalizer() {
|
||||||
|
$cases = array(
|
||||||
|
'ssh://user@domain.com/path.git' => 'path',
|
||||||
|
'https://user@domain.com/path.git' => 'path',
|
||||||
|
'git@domain.com:path.git' => 'path',
|
||||||
|
'ssh://user@gitserv002.com/path.git' => 'path',
|
||||||
|
'ssh://htaft@domain.com/path.git' => 'path',
|
||||||
|
'ssh://user@domain.com/bananas.git' => 'bananas',
|
||||||
|
'git@domain.com:bananas.git' => 'bananas',
|
||||||
|
'user@domain.com:path/repo' => 'path/repo',
|
||||||
|
'user@domain.com:path/repo/' => 'path/repo',
|
||||||
|
'file:///path/to/local/repo.git' => 'path/to/local/repo',
|
||||||
|
'/path/to/local/repo.git' => 'path/to/local/repo',
|
||||||
|
'ssh://something.com/diffusion/X/anything.git' => 'diffusion/X',
|
||||||
|
'ssh://something.com/diffusion/X/' => 'diffusion/X',
|
||||||
|
);
|
||||||
|
|
||||||
|
$type_git = ArcanistRepositoryURINormalizer::TYPE_GIT;
|
||||||
|
|
||||||
|
foreach ($cases as $input => $expect) {
|
||||||
|
$normal = new ArcanistRepositoryURINormalizer($type_git, $input);
|
||||||
|
$this->assertEqual(
|
||||||
|
$expect,
|
||||||
|
$normal->getNormalizedPath(),
|
||||||
|
pht('Normalized Git path for "%s".', $input));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testDomainURINormalizer() {
|
||||||
|
$base_domain = 'base.phabricator.example.com';
|
||||||
|
$ssh_domain = 'ssh.phabricator.example.com';
|
||||||
|
|
||||||
|
$domain_map = array(
|
||||||
|
'<base-uri>' => $base_domain,
|
||||||
|
'<ssh-host>' => $ssh_domain,
|
||||||
|
);
|
||||||
|
|
||||||
|
$cases = array(
|
||||||
|
'/' => '<void>',
|
||||||
|
'/path/to/local/repo.git' => '<void>',
|
||||||
|
'ssh://user@domain.com/path.git' => 'domain.com',
|
||||||
|
'ssh://user@DOMAIN.COM/path.git' => 'domain.com',
|
||||||
|
'http://'.$base_domain.'/diffusion/X/' => '<base-uri>',
|
||||||
|
'ssh://'.$ssh_domain.'/diffusion/X/' => '<ssh-host>',
|
||||||
|
'git@'.$ssh_domain.':bananas.git' => '<ssh-host>',
|
||||||
|
);
|
||||||
|
|
||||||
|
$type_git = ArcanistRepositoryURINormalizer::TYPE_GIT;
|
||||||
|
|
||||||
|
foreach ($cases as $input => $expect) {
|
||||||
|
$normalizer = new ArcanistRepositoryURINormalizer($type_git, $input);
|
||||||
|
|
||||||
|
$normalizer->setDomainMap($domain_map);
|
||||||
|
|
||||||
|
$this->assertEqual(
|
||||||
|
$expect,
|
||||||
|
$normalizer->getNormalizedDomain(),
|
||||||
|
pht('Normalized domain for "%s".', $input));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public function testSVNURINormalizer() {
|
||||||
|
$cases = array(
|
||||||
|
'file:///path/to/repo' => 'path/to/repo',
|
||||||
|
'file:///path/to/repo/' => 'path/to/repo',
|
||||||
|
);
|
||||||
|
|
||||||
|
$type_svn = ArcanistRepositoryURINormalizer::TYPE_SVN;
|
||||||
|
|
||||||
|
foreach ($cases as $input => $expect) {
|
||||||
|
$normal = new ArcanistRepositoryURINormalizer($type_svn, $input);
|
||||||
|
$this->assertEqual(
|
||||||
|
$expect,
|
||||||
|
$normal->getNormalizedPath(),
|
||||||
|
pht('Normalized SVN path for "%s".', $input));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in a new issue