1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-12-23 22:10:55 +01:00

Generate more friendly anchor names for header sections in Remarkup

Summary:
Depends on D20820. Ref T13410. We currently cut anchor names in the middle, don't support emoji in anchors, and generate relatively short anchors.

Generate slightly longer anchors, allow more unicode, and try not to cut things in the middle.

Test Plan: Created a document with a variety of different anchors and saw them generate more usable names.

Maniphest Tasks: T13410

Differential Revision: https://secure.phabricator.com/D20821
This commit is contained in:
epriestley 2019-09-18 15:19:40 -07:00
parent 74d6bcbdce
commit bff72ce3b5
6 changed files with 100 additions and 24 deletions

View file

@ -2150,6 +2150,7 @@ phutil_register_library_map(array(
'PhabricatorAlmanacApplication' => 'applications/almanac/application/PhabricatorAlmanacApplication.php',
'PhabricatorAmazonAuthProvider' => 'applications/auth/provider/PhabricatorAmazonAuthProvider.php',
'PhabricatorAmazonSNSFuture' => 'applications/metamta/future/PhabricatorAmazonSNSFuture.php',
'PhabricatorAnchorTestCase' => 'infrastructure/markup/__tests__/PhabricatorAnchorTestCase.php',
'PhabricatorAnchorView' => 'view/layout/PhabricatorAnchorView.php',
'PhabricatorAphlictManagementDebugWorkflow' => 'applications/aphlict/management/PhabricatorAphlictManagementDebugWorkflow.php',
'PhabricatorAphlictManagementNotifyWorkflow' => 'applications/aphlict/management/PhabricatorAphlictManagementNotifyWorkflow.php',
@ -8314,6 +8315,7 @@ phutil_register_library_map(array(
'PhabricatorAlmanacApplication' => 'PhabricatorApplication',
'PhabricatorAmazonAuthProvider' => 'PhabricatorOAuth2AuthProvider',
'PhabricatorAmazonSNSFuture' => 'PhutilAWSFuture',
'PhabricatorAnchorTestCase' => 'PhabricatorTestCase',
'PhabricatorAnchorView' => 'AphrontView',
'PhabricatorAphlictManagementDebugWorkflow' => 'PhabricatorAphlictManagementWorkflow',
'PhabricatorAphlictManagementNotifyWorkflow' => 'PhabricatorAphlictManagementWorkflow',

View file

@ -0,0 +1,38 @@
<?php
final class PhabricatorAnchorTestCase
extends PhabricatorTestCase {
public function testAnchors() {
$low_ascii = '';
for ($ii = 19; $ii <= 127; $ii++) {
$low_ascii .= chr($ii);
}
$snowman = "\xE2\x9B\x84";
$map = array(
'' => '',
'Bells and Whistles' => 'bells-and-whistles',
'Termination for Nonpayment' => 'termination-for-nonpayment',
$low_ascii => '0123456789-abcdefghijklmnopqrstu',
'xxxx xxxx xxxx xxxx xxxx on' => 'xxxx-xxxx-xxxx-xxxx-xxxx',
'xxxx xxxx xxxx xxxx xxxx ox' => 'xxxx-xxxx-xxxx-xxxx-xxxx-ox',
"So, You Want To Build A {$snowman}?" =>
"so-you-want-to-build-a-{$snowman}",
str_repeat($snowman, 128) => str_repeat($snowman, 32),
);
foreach ($map as $input => $expect) {
$anchor = PhutilRemarkupHeaderBlockRule::getAnchorNameFromHeaderText(
$input);
$this->assertEqual(
$expect,
$anchor,
pht('Anchor for "%s".', $input));
}
}
}

View file

@ -73,24 +73,7 @@ final class PhutilRemarkupHeaderBlockRule extends PhutilRemarkupBlockRule {
}
private function generateAnchor($level, $text) {
$anchor = strtolower($text);
$anchor = preg_replace('/[^a-z0-9]/', '-', $anchor);
$anchor = preg_replace('/--+/', '-', $anchor);
$anchor = trim($anchor, '-');
$anchor = substr($anchor, 0, 24);
$anchor = trim($anchor, '-');
$base = $anchor;
$key = self::KEY_HEADER_TOC;
$engine = $this->getEngine();
$anchors = $engine->getTextMetadata($key, array());
$suffix = 1;
while (!strlen($anchor) || isset($anchors[$anchor])) {
$anchor = $base.'-'.$suffix;
$anchor = trim($anchor, '-');
$suffix++;
}
// When a document contains a link inside a header, like this:
//
@ -100,12 +83,30 @@ final class PhutilRemarkupHeaderBlockRule extends PhutilRemarkupBlockRule {
// header itself. We push the 'toc' state so all the link rules generate
// just names.
$engine->pushState('toc');
$text = $this->applyRules($text);
$text = $engine->restoreText($text);
$anchors[$anchor] = array($level, $text);
$plain_text = $text;
$plain_text = $this->applyRules($plain_text);
$plain_text = $engine->restoreText($plain_text);
$engine->popState('toc');
$anchor = self::getAnchorNameFromHeaderText($plain_text);
if (!strlen($anchor)) {
return null;
}
$base = $anchor;
$key = self::KEY_HEADER_TOC;
$anchors = $engine->getTextMetadata($key, array());
$suffix = 1;
while (isset($anchors[$anchor])) {
$anchor = $base.'-'.$suffix;
$anchor = trim($anchor, '-');
$suffix++;
}
$anchors[$anchor] = array($level, $plain_text);
$engine->setTextMetadata($key, $anchors);
return phutil_tag(
@ -159,4 +160,31 @@ final class PhutilRemarkupHeaderBlockRule extends PhutilRemarkupBlockRule {
return phutil_implode_html("\n", $toc);
}
public static function getAnchorNameFromHeaderText($text) {
$anchor = phutil_utf8_strtolower($text);
// Replace all latin characters which are not "a-z" or "0-9" with "-".
// Preserve other characters, since non-latin letters and emoji work
// fine in anchors.
$anchor = preg_replace('/[\x00-\x2F\x3A-\x60\x7B-\x7F]+/', '-', $anchor);
$anchor = trim($anchor, '-');
// Truncate the fragment to something reasonable.
$anchor = id(new PhutilUTF8StringTruncator())
->setMaximumGlyphs(32)
->setTerminator('')
->truncateString($anchor);
// If the fragment is terminated by a word which "The U.S. Government
// Printing Office Style Manual" normally discourages capitalizing in
// titles, discard it. This is an arbitrary heuristic intended to avoid
// awkward hanging words in anchors.
$anchor = preg_replace(
'/-(a|an|the|at|by|for|in|of|on|per|to|up|and|as|but|if|or|nor)\z/',
'',
$anchor);
return $anchor;
}
}

View file

@ -18,6 +18,10 @@ final class PhutilRemarkupBoldRule extends PhutilRemarkupRule {
}
protected function applyCallback(array $matches) {
if ($this->getEngine()->isAnchorMode()) {
return $matches[1];
}
return hsprintf('<strong>%s</strong>', $matches[1]);
}

View file

@ -34,6 +34,10 @@ final class PhutilRemarkupEngine extends PhutilMarkupEngine {
return $this->mode & self::MODE_TEXT;
}
public function isAnchorMode() {
return $this->getState('toc');
}
public function isHTMLMailMode() {
return $this->mode & self::MODE_HTML_MAIL;
}

View file

@ -6,14 +6,14 @@
~~~~~~~~~~
<ul>
<li><a href="#http-www-example-com-lin">link_name</a></li>
<li><a href="#link-name">link_name</a></li>
<ul>
<li><a href="#bold"><strong>bold</strong></a></li>
<li><a href="#bold">bold</a></li>
</ul>
<li><a href="#http-www-example-com">http://www.example.com</a></li>
</ul>
<h2 class="remarkup-header"><a name="http-www-example-com-lin"></a><a href="http://www.example.com/" class="remarkup-link" target="_blank" rel="noreferrer">link_name</a></h2>
<h2 class="remarkup-header"><a name="link-name"></a><a href="http://www.example.com/" class="remarkup-link" target="_blank" rel="noreferrer">link_name</a></h2>
<h3 class="remarkup-header"><a name="bold"></a><strong>bold</strong></h3>