1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2024-11-26 00:32:41 +01:00
phorge-arcanist/src/lexer/PhutilPythonFragmentLexer.php
epriestley 9b74cb4ee6 Fully merge "libphutil/" into "arcanist/"
Summary: Ref T13395. Moves all remaining code in "libphutil/" into "arcanist/".

Test Plan: Ran various arc workflows, although this probably has some remaining rough edges.

Maniphest Tasks: T13395

Differential Revision: https://secure.phabricator.com/D20980
2020-02-12 15:17:38 -08:00

314 lines
7.5 KiB
PHP

<?php
/**
* Python lexer which can handle fragments of source code, e.g. for syntax
* highlighting of inline snippets. This is largely based on Pygments:
*
* https://bitbucket.org/birkenfeld/pygments-main/src/default/pygments/lexers/agile.py
*
* This lexer is not suitable for parser construction; it always lexes any
* input stream, even if the input is not Python.
*/
final class PhutilPythonFragmentLexer extends PhutilLexer {
protected function getRawRules() {
$keywords = array(
'as',
'assert',
'break',
'continue',
'del',
'elif',
'else',
'except',
'exec',
'finally',
'for',
'global',
'if',
'lambda',
'pass',
'print',
'raise',
'return',
'try',
'while',
'with',
'yield(\s+from)?',
);
$builtins = array(
'__import__',
'abs',
'all',
'any',
'apply',
'basestring',
'bin',
'bool',
'buffer',
'bytearray',
'bytes',
'callable',
'chr',
'classmethod',
'cmp',
'coerce',
'compile',
'complex',
'delattr',
'dict',
'dir',
'divmod',
'enumerate',
'eval',
'execfile',
'exit',
'file',
'filter',
'float',
'frozenset',
'getattr',
'globals',
'hasattr',
'hash',
'hex',
'id',
'input',
'int',
'intern',
'isinstance',
'issubclass',
'iter',
'len',
'list',
'locals',
'long',
'map',
'max',
'min',
'next',
'object',
'oct',
'open',
'ord',
'pow',
'property',
'range',
'raw_input',
'reduce',
'reload',
'repr',
'reversed',
'round',
'set',
'setattr',
'slice',
'sorted',
'staticmethod',
'str',
'sum',
'super',
'tuple',
'type',
'unichr',
'unicode',
'vars',
'xrange',
'zip',
);
$pseudo_builtins = array(
'Ellipsis',
'False',
'None',
'NotImplemented',
'True',
'self',
);
$exceptions = array(
'ArithmeticError',
'AssertionError',
'AttributeError',
'BaseException',
'DeprecationWarning',
'EOFError',
'EnvironmentError',
'Exception',
'FloatingPointError',
'FutureWarning',
'GeneratorExit',
'IOError',
'ImportError',
'ImportWarning',
'IndentationError',
'IndexError',
'KeyError',
'KeyboardInterrupt',
'LookupError',
'MemoryError',
'NameError',
'NotImplemented',
'NotImplementedError',
'OSError',
'OverflowError',
'OverflowWarning',
'PendingDeprecationWarning',
'ReferenceError',
'RuntimeError',
'RuntimeWarning',
'StandardError',
'StopIteration',
'SyntaxError',
'SyntaxWarning',
'SystemError',
'SystemExit',
'TabError',
'TypeError',
'UnboundLocalError',
'UnicodeDecodeError',
'UnicodeEncodeError',
'UnicodeError',
'UnicodeTranslateError',
'UnicodeWarning',
'UserWarning',
'VMSError',
'ValueError',
'Warning',
'WindowsError',
'ZeroDivisionError',
);
$nonsemantic_rules = array(
array('[^\\S\\n]+', null),
array('#[^\\n]*', 'c'),
);
$stringescape = array(
array(
'\\\\([\\\\abfnrtv"\']|\n|N{.*?}|u[a-fA-F0-9]{4}|'.
'U[a-fA-F0-9]{8}|x[a-fA-F0-9]{2}|[0-7]{1,3})',
'se',
),
);
$nl = array(
array('\\n', 's'),
);
$strings = array(
array(
'%(\\(\\w+\\))?[-#0 +]*([0-9]+|[*])?(\\.([0-9]+|[*]))?'.
'[hlL]?[diouxXeEfFgGcrs%]',
'si',
),
array('[^\\\\\'"%\\n]+', 's'),
// quotes, percents, and backslashes must be parsed one at a time
array('[\'"\\\\]', 's'),
// unhandled string formatting sign
array('%', 's'),
// newlines are an error (use $nl rules)
array('', null, '!pop'),
);
$dqs = array_merge(array(
array('"', 's', '!pop'),
// included here for raw strings
array('(?:\\\\\\\\|\\\\\'|\\\\n)', 's'),
), $strings);
$sqs = array_merge(array(
array('\'', 's', '!pop'),
// included here for raw strings
array('(?:\\\\\\\\|\\\\\'|\\\\n)', 's'),
), $strings);
$tdqs = array_merge(array(
array('"""', 's', '!pop'),
), $nl, $strings);
$tsqs = array_merge(array(
array('\'\'\'', 's', '!pop'),
), $nl, $strings);
return array(
'start' => array_merge(array(
array('\\n', null),
// TODO: Docstrings should match only at the start of a line
array('""".*?"""', 'sd'),
array('\'\'\'.*?\'\'\'', 'sd'),
), $nonsemantic_rules, array(
array('[]{}:(),;[]', 'p'),
array('\\\\\\n', null),
array('\\\\', null),
array('(?:in|is|and|or|not)\\b', 'ow'),
array('(?:!=|==|<<|>>|[-~+/*%=<>&^|.])', 'o'),
array('(?:'.implode('|', $keywords).')\\b', 'k'),
array('def(?=\\s)', 'k', 'funcname'),
array('class(?=\\s)', 'k', 'classname'),
array('from(?=\\s)', 'kn', 'fromimport'),
array('import(?=\\s)', 'kn', 'import'),
array('(?<!\\.)(?:'.implode('|', $builtins).')\\b', 'nb'),
array('(?<!\\.)(?:'.implode('|', $pseudo_builtins).')\\b', 'bp'),
array('(?<!\\.)(?:'.implode('|', $exceptions).')\\b', 'ne'),
array('`[^\\n]*?`', 'sb'),
array('(?:[rR]|[uU][rR]|[rR][uU])"""', 's', 'tdqs_raw'),
array('(?:[rR]|[uU][rR]|[rR][uU])\'\'\'', 's', 'tsqs_raw'),
array('(?:[rR]|[uU][rR]|[rR][uU])"', 's', 'dqs_raw'),
array('(?:[rR]|[uU][rR]|[rR][uU])\'', 's', 'sqs_raw'),
array('[uU]?"""', 's', 'tdqs'),
array('[uU]?\'\'\'', 's', 'tsqs'),
array('[uU]?"', 's', 'dqs'),
array('[uU]?\'', 's', 'sqs'),
array('@[\\w.]+', 'nd'),
array('[a-zA-Z_]\\w*', 'n'),
array('(\\d+\\.\\d*|\\d*\\.\\d+)([eE][+-]?[0-9]+)?j?', 'mf'),
array('\\d+[eE][+-]?[0-9]+j?', 'mf'),
array('0[0-7]+j?', 'mo'),
array('0[bB][01]+', 'mb'),
array('0[xX][a-fA-F0-9]+', 'mh'),
array('\\d+L', 'il'),
array('\\d+j?', 'mi'),
array('.', null),
)),
'funcname' => array_merge($nonsemantic_rules, array(
array('[a-zA-Z_]\w*', 'nf', '!pop'),
array('', null, '!pop'),
)),
'classname' => array_merge($nonsemantic_rules, array(
array('[a-zA-Z_]\w*', 'nc', '!pop'),
array('', null, '!pop'),
)),
'fromimport' => array_merge($nonsemantic_rules, array(
array('import\b', 'kn', '!pop'),
// if None occurs here, it's "raise x from None", since None can
// never be a module name
array('None\b', 'bp', '!pop'),
// sadly, in "raise x from y" y will be highlighted as namespace too
array('[a-zA-Z_.][\w.]*', 'nn'),
array('', null, '!pop'),
)),
'import' => array_merge($nonsemantic_rules, array(
array('as\b', 'kn'),
array(',', 'o'),
array('[a-zA-Z_.][\w.]*', 'nn'),
array('', null, '!pop'),
)),
'dqs_raw' => $dqs,
'sqs_raw' => $sqs,
'dqs' => array_merge($stringescape, $dqs),
'sqs' => array_merge($stringescape, $sqs),
'tdqs_raw' => $tdqs,
'tsqs_raw' => $tsqs,
'tdqs' => array_merge($stringescape, $tdqs),
'tsqs' => array_merge($stringescape, $tsqs),
);
}
}