mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-25 08:12:40 +01:00
Remarkup Code-block: parse language specifier in markdown
Summary: We add support to code blocks with the language expressed as GitLab/GitHub/StackOverflow/... "flavored markdown". So we support this syntax: (to avoid confusion see it online on the Diff) lang=text ```php $asd = 1; ``` Before this change, this was the only supposed syntax in Remarkup, with an explicit "lang=": lang=text ```lang=php $asd = 1; ``` This change introduces a minor risk to eat legitimate Remarkup content, since Remarkup allows to do a multi-line in this way: lang=text ```$asd = 1; $asd = 2;``` The above example still works, but, there is a chance that hardcore Remarkup people have a problem when doing a code block to mention programming languages. In short, this can be problematic since "cpp" will be eaten from this list: COUNTEREXAMPLE ```cpp php python ``` Using the above example is not socially nice because it is not usable in GitLab, GitHub and Stack Overflow. If your first line is eaten: Just *add* a newline on the top to reach a valid raw Markdown list (suggested, valid in Remarkup + Markdown): lang=text ``` cpp php python ``` Or, just add "text" to specify that as language (suggested, valid in Remarkup + Markdown): lang=text ```text cpp php python ``` Or, just *remove* a newline from the bottom to reach a valid raw Remarkup list (Remarkup-only): lang=text ```cpp php python``` Or, just specify that you are writing in the language "text" (Remarkup-only): lang=text ```lang=text cpp php python``` To reduce impact and help you, the logic of this strict implementation is: - must have backticks - must not have any valid remarkup option, like lang=, counterexample, etc. - must not have content in the same line of the last backticks - must have a known language in our proposed subset If everything is OK, we remove that language from the content since it would be otherwise displayed. Interestingly, this could improve performance when rendering README files or snippets from external websites, since - in case - we do not need to guess the language using our deep dark magic. Closes T15481 Test Plan: We added some nice unit tests. Ensure that this test passes: PhutilRemarkupEngineTestCase::testEngine Optionally, take vision of these, before and after: https://we.phorge.it/P16 Change the test plan slightly every time, to make sure it is not in your cache. Reviewers: O1 Blessed Committers, avivey Reviewed By: O1 Blessed Committers, avivey Subscribers: avivey, speck, tobiaswiese, Matthew, Cigaryno Maniphest Tasks: T15481 Differential Revision: https://we.phorge.it/D25299
This commit is contained in:
parent
4a0d3ba3e1
commit
80484b76a5
6 changed files with 154 additions and 4 deletions
|
@ -44,7 +44,18 @@ final class PhutilRemarkupCodeBlockRule extends PhutilRemarkupBlockRule {
|
|||
}
|
||||
|
||||
public function markupText($text, $children) {
|
||||
if (preg_match('/^\s*```/', $text)) {
|
||||
// Header/footer eventually useful to be nice with "flavored markdown".
|
||||
// When it starts with ```stuff the header is 'stuff' (->language)
|
||||
// When it ends with stuff``` the footer is 'stuff' (->garbage)
|
||||
$header_line = null;
|
||||
$footer_line = null;
|
||||
|
||||
$matches = null;
|
||||
if (preg_match('/^\s*```(.*)/', $text, $matches)) {
|
||||
if (isset($matches[1])) {
|
||||
$header_line = $matches[1];
|
||||
}
|
||||
|
||||
// If this is a ```-style block, trim off the backticks and any leading
|
||||
// blank line.
|
||||
$text = preg_replace('/^\s*```(\s*\n)?/', '', $text);
|
||||
|
@ -52,6 +63,13 @@ final class PhutilRemarkupCodeBlockRule extends PhutilRemarkupBlockRule {
|
|||
}
|
||||
|
||||
$lines = explode("\n", $text);
|
||||
|
||||
// If we have a flavored header, it has sense to look for the footer.
|
||||
if ($header_line !== null && $lines) {
|
||||
$footer_line = $lines[last_key($lines)];
|
||||
}
|
||||
|
||||
// Strip final empty lines
|
||||
while ($lines && !strlen(last($lines))) {
|
||||
unset($lines[last_key($lines)]);
|
||||
}
|
||||
|
@ -65,20 +83,39 @@ final class PhutilRemarkupCodeBlockRule extends PhutilRemarkupBlockRule {
|
|||
|
||||
$parser = new PhutilSimpleOptions();
|
||||
$custom = $parser->parse(head($lines));
|
||||
$valid_options = null;
|
||||
if ($custom) {
|
||||
$valid = true;
|
||||
$valid_options = true;
|
||||
foreach ($custom as $key => $value) {
|
||||
if (!array_key_exists($key, $options)) {
|
||||
$valid = false;
|
||||
$valid_options = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ($valid) {
|
||||
if ($valid_options) {
|
||||
array_shift($lines);
|
||||
$options = $custom + $options;
|
||||
}
|
||||
}
|
||||
|
||||
// Parse flavored markdown strictly to don't eat legitimate Remarkup.
|
||||
// Proceed only if we tried to parse options and we failed
|
||||
// (no options also mean no language).
|
||||
// For example this is not a valid option: ```php
|
||||
// Proceed only if the footer exists and it is not: blabla```
|
||||
// Accept only 2 lines or more. First line: header; then content.
|
||||
if (
|
||||
$valid_options === false &&
|
||||
$header_line !== null &&
|
||||
$footer_line === '' &&
|
||||
count($lines) > 1
|
||||
) {
|
||||
if (self::isKnownLanguageCode($header_line)) {
|
||||
array_shift($lines);
|
||||
$options['lang'] = $header_line;
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize the text back to a 0-level indent.
|
||||
$min_indent = 80;
|
||||
foreach ($lines as $line) {
|
||||
|
@ -249,4 +286,61 @@ final class PhutilRemarkupCodeBlockRule extends PhutilRemarkupBlockRule {
|
|||
$engine->highlightSource($options['lang'], $text)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a language code can be used in a generic flavored markdown.
|
||||
* @param string $lang Language code
|
||||
* @return bool
|
||||
*/
|
||||
private static function isKnownLanguageCode($lang) {
|
||||
$languages = self::knownLanguageCodes();
|
||||
return isset($languages[$lang]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the available languages for a generic flavored markdown.
|
||||
* @return array Languages as array keys. Ignore the value.
|
||||
*/
|
||||
private static function knownLanguageCodes() {
|
||||
// This is a friendly subset from https://pygments.org/languages/
|
||||
static $map = array(
|
||||
'arduino' => 1,
|
||||
'assembly' => 1,
|
||||
'awk' => 1,
|
||||
'bash' => 1,
|
||||
'bat' => 1,
|
||||
'c' => 1,
|
||||
'cmake' => 1,
|
||||
'cobol' => 1,
|
||||
'cpp' => 1,
|
||||
'css' => 1,
|
||||
'csharp' => 1,
|
||||
'dart' => 1,
|
||||
'delphi' => 1,
|
||||
'fortran' => 1,
|
||||
'go' => 1,
|
||||
'groovy' => 1,
|
||||
'haskell' => 1,
|
||||
'java' => 1,
|
||||
'javascript' => 1,
|
||||
'kotlin' => 1,
|
||||
'lisp' => 1,
|
||||
'lua' => 1,
|
||||
'matlab' => 1,
|
||||
'make' => 1,
|
||||
'perl' => 1,
|
||||
'php' => 1,
|
||||
'powershell' => 1,
|
||||
'python' => 1,
|
||||
'r' => 1,
|
||||
'ruby' => 1,
|
||||
'rust' => 1,
|
||||
'scala' => 1,
|
||||
'sh' => 1,
|
||||
'sql' => 1,
|
||||
'typescript' => 1,
|
||||
'vba' => 1,
|
||||
);
|
||||
return $map;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
/**
|
||||
* Test cases for @{class:PhutilRemarkupEngine}.
|
||||
* @TODO: This unit is not always triggered when you need it.
|
||||
* https://we.phorge.it/T15500
|
||||
*/
|
||||
final class PhutilRemarkupEngineTestCase extends PhutilTestCase {
|
||||
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
```cpp
|
||||
code
|
||||
```
|
||||
~~~~~~~~~~
|
||||
<div class="remarkup-code-block" data-code-lang="cpp" data-sigil="remarkup-code-block"><pre class="remarkup-code">code</pre></div>
|
||||
~~~~~~~~~~
|
||||
code
|
|
@ -0,0 +1,18 @@
|
|||
```#comment
|
||||
code
|
||||
|
||||
#more comment
|
||||
more code```
|
||||
|
||||
~~~~~~~~~~
|
||||
<div class="remarkup-code-block" data-code-lang="text" data-sigil="remarkup-code-block"><pre class="remarkup-code">#comment
|
||||
code
|
||||
|
||||
#more comment
|
||||
more code</pre></div>
|
||||
~~~~~~~~~~
|
||||
#comment
|
||||
code
|
||||
|
||||
#more comment
|
||||
more code
|
|
@ -0,0 +1,9 @@
|
|||
```
|
||||
cpp
|
||||
second line```
|
||||
~~~~~~~~~~
|
||||
<div class="remarkup-code-block" data-code-lang="text" data-sigil="remarkup-code-block"><pre class="remarkup-code">cpp
|
||||
second line</pre></div>
|
||||
~~~~~~~~~~
|
||||
cpp
|
||||
second line
|
|
@ -0,0 +1,20 @@
|
|||
```cpp
|
||||
code
|
||||
|
||||
more code
|
||||
|
||||
more code
|
||||
```
|
||||
|
||||
~~~~~~~~~~
|
||||
<div class="remarkup-code-block" data-code-lang="cpp" data-sigil="remarkup-code-block"><pre class="remarkup-code">code
|
||||
|
||||
more code
|
||||
|
||||
more code</pre></div>
|
||||
~~~~~~~~~~
|
||||
code
|
||||
|
||||
more code
|
||||
|
||||
more code
|
Loading…
Reference in a new issue