1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-03-30 22:18:13 +02:00
phorge-phorge/src/applications/files/document/PhabricatorJupyterDocumentEngine.php
epriestley a7f3316aa3 Improve sequencing of various content/header checks in abstract block diffs
Summary:
Ref T13425. Some diff checks currently sequence incorrectly:

  - When we're rendering block lists, syntax highlighting isn't relevant.
  - The "large change" guard can prevent rendering of otherwise-renderable changes.
  - Actual errors in the document engine (like bad JSON in a ".ipynb" file) aren't surfaced properly.

Improve sequencing somewhat to resolve these issues.

Test Plan:
  - Viewed a notebook, no longer saw a "highlighting disabled" warning.
  - Forced a notebook to fail, got a useful inline error instead of a popup dialog error.
  - Forced a notebook to have a large number of differences, got a rendering out of it.

Maniphest Tasks: T13425

Differential Revision: https://secure.phabricator.com/D20843
2019-09-30 10:40:12 -07:00

547 lines
12 KiB
PHP

<?php
final class PhabricatorJupyterDocumentEngine
extends PhabricatorDocumentEngine {
const ENGINEKEY = 'jupyter';
public function getViewAsLabel(PhabricatorDocumentRef $ref) {
return pht('View as Jupyter Notebook');
}
protected function getDocumentIconIcon(PhabricatorDocumentRef $ref) {
return 'fa-sun-o';
}
protected function getDocumentRenderingText(PhabricatorDocumentRef $ref) {
return pht('Rendering Jupyter Notebook...');
}
public function shouldRenderAsync(PhabricatorDocumentRef $ref) {
return true;
}
protected function getContentScore(PhabricatorDocumentRef $ref) {
$name = $ref->getName();
if (preg_match('/\\.ipynb\z/i', $name)) {
return 2000;
}
return 500;
}
protected function canRenderDocumentType(PhabricatorDocumentRef $ref) {
return $ref->isProbablyJSON();
}
public function canDiffDocuments(
PhabricatorDocumentRef $uref,
PhabricatorDocumentRef $vref) {
return true;
}
public function newEngineBlocks(
PhabricatorDocumentRef $uref,
PhabricatorDocumentRef $vref) {
$blocks = new PhabricatorDocumentEngineBlocks();
try {
$u_blocks = $this->newDiffBlocks($uref);
$v_blocks = $this->newDiffBlocks($vref);
$blocks->addBlockList($uref, $u_blocks);
$blocks->addBlockList($vref, $v_blocks);
} catch (Exception $ex) {
$blocks->addMessage($ex->getMessage());
}
return $blocks;
}
private function newDiffBlocks(PhabricatorDocumentRef $ref) {
$viewer = $this->getViewer();
$content = $ref->loadData();
$cells = $this->newCells($content, true);
$idx = 1;
$blocks = array();
foreach ($cells as $cell) {
$cell_content = $this->renderJupyterCell($viewer, $cell);
$notebook_table = phutil_tag(
'table',
array(
'class' => 'jupyter-notebook',
),
$cell_content);
$container = phutil_tag(
'div',
array(
'class' => 'document-engine-jupyter document-engine-diff',
),
$notebook_table);
// When the cell is a source code line, we can hash just the raw
// input rather than all the cell metadata.
switch (idx($cell, 'cell_type')) {
case 'code/line':
$hash_input = $cell['raw'];
break;
default:
$hash_input = serialize($cell);
break;
}
$hash = PhabricatorHash::digestWithNamedKey(
$hash_input,
'document-engine.content-digest');
$blocks[] = id(new PhabricatorDocumentEngineBlock())
->setBlockKey($idx)
->setDifferenceHash($hash)
->setContent($container);
$idx++;
}
return $blocks;
}
protected function newDocumentContent(PhabricatorDocumentRef $ref) {
$viewer = $this->getViewer();
$content = $ref->loadData();
try {
$cells = $this->newCells($content, false);
} catch (Exception $ex) {
return $this->newMessage($ex->getMessage());
}
$rows = array();
foreach ($cells as $cell) {
$rows[] = $this->renderJupyterCell($viewer, $cell);
}
$notebook_table = phutil_tag(
'table',
array(
'class' => 'jupyter-notebook',
),
$rows);
$container = phutil_tag(
'div',
array(
'class' => 'document-engine-jupyter',
),
$notebook_table);
return $container;
}
private function newCells($content, $for_diff) {
try {
$data = phutil_json_decode($content);
} catch (PhutilJSONParserException $ex) {
throw new Exception(
pht(
'This is not a valid JSON document and can not be rendered as '.
'a Jupyter notebook: %s.',
$ex->getMessage()));
}
if (!is_array($data)) {
throw new Exception(
pht(
'This document does not encode a valid JSON object and can not '.
'be rendered as a Jupyter notebook.'));
}
$nbformat = idx($data, 'nbformat');
if (!strlen($nbformat)) {
throw new Exception(
pht(
'This document is missing an "nbformat" field. Jupyter notebooks '.
'must have this field.'));
}
if ($nbformat !== 4) {
throw new Exception(
pht(
'This Jupyter notebook uses an unsupported version of the file '.
'format (found version %s, expected version 4).',
$nbformat));
}
$cells = idx($data, 'cells');
if (!is_array($cells)) {
throw new Exception(
pht(
'This Jupyter notebook does not specify a list of "cells".'));
}
if (!$cells) {
throw new Exception(
pht(
'This Jupyter notebook does not specify any notebook cells.'));
}
if (!$for_diff) {
return $cells;
}
// If we're extracting cells to build a diff view, split code cells into
// individual lines and individual outputs. We want users to be able to
// add inline comments to each line and each output block.
$results = array();
foreach ($cells as $cell) {
$cell_type = idx($cell, 'cell_type');
if ($cell_type !== 'code') {
$results[] = $cell;
continue;
}
$label = $this->newCellLabel($cell);
$lines = idx($cell, 'source');
if (!is_array($lines)) {
$lines = array();
}
$content = $this->highlightLines($lines);
$count = count($lines);
for ($ii = 0; $ii < $count; $ii++) {
$is_head = ($ii === 0);
$is_last = ($ii === ($count - 1));
if ($is_head) {
$line_label = $label;
} else {
$line_label = null;
}
$results[] = array(
'cell_type' => 'code/line',
'label' => $line_label,
'raw' => $lines[$ii],
'display' => idx($content, $ii),
'head' => $is_head,
'last' => $is_last,
);
}
$outputs = array();
$output_list = idx($cell, 'outputs');
if (is_array($output_list)) {
foreach ($output_list as $output) {
$results[] = array(
'cell_type' => 'code/output',
'output' => $output,
);
}
}
}
return $results;
}
private function renderJupyterCell(
PhabricatorUser $viewer,
array $cell) {
list($label, $content) = $this->renderJupyterCellContent($viewer, $cell);
$label_cell = phutil_tag(
'td',
array(
'class' => 'jupyter-label',
),
$label);
$classes = null;
switch (idx($cell, 'cell_type')) {
case 'code/line':
$classes = 'jupyter-cell-flush';
break;
}
$content_cell = phutil_tag(
'td',
array(
'class' => $classes,
),
$content);
return phutil_tag(
'tr',
array(),
array(
$label_cell,
$content_cell,
));
}
private function renderJupyterCellContent(
PhabricatorUser $viewer,
array $cell) {
$cell_type = idx($cell, 'cell_type');
switch ($cell_type) {
case 'markdown':
return $this->newMarkdownCell($cell);
case 'code':
return $this->newCodeCell($cell);
case 'code/line':
return $this->newCodeLineCell($cell);
case 'code/output':
return $this->newCodeOutputCell($cell);
}
return $this->newRawCell(id(new PhutilJSON())->encodeFormatted($cell));
}
private function newRawCell($content) {
return array(
null,
phutil_tag(
'div',
array(
'class' => 'jupyter-cell-raw PhabricatorMonospaced',
),
$content),
);
}
private function newMarkdownCell(array $cell) {
$content = idx($cell, 'source');
if (!is_array($content)) {
$content = array();
}
$content = implode('', $content);
$content = phutil_escape_html_newlines($content);
return array(
null,
phutil_tag(
'div',
array(
'class' => 'jupyter-cell-markdown',
),
$content),
);
}
private function newCodeCell(array $cell) {
$label = $this->newCellLabel($cell);
$content = idx($cell, 'source');
if (!is_array($content)) {
$content = array();
}
$content = $this->highlightLines($content);
$outputs = array();
$output_list = idx($cell, 'outputs');
if (is_array($output_list)) {
foreach ($output_list as $output) {
$outputs[] = $this->newOutput($output);
}
}
return array(
$label,
array(
phutil_tag(
'div',
array(
'class' =>
'jupyter-cell-code jupyter-cell-code-block '.
'PhabricatorMonospaced remarkup-code',
),
array(
$content,
)),
$outputs,
),
);
}
private function newCodeLineCell(array $cell) {
$classes = array();
$classes[] = 'PhabricatorMonospaced';
$classes[] = 'remarkup-code';
$classes[] = 'jupyter-cell-code';
$classes[] = 'jupyter-cell-code-line';
if ($cell['head']) {
$classes[] = 'jupyter-cell-code-head';
}
if ($cell['last']) {
$classes[] = 'jupyter-cell-code-last';
}
$classes = implode(' ', $classes);
return array(
$cell['label'],
array(
phutil_tag(
'div',
array(
'class' => $classes,
),
array(
$cell['display'],
)),
),
);
}
private function newCodeOutputCell(array $cell) {
return array(
null,
$this->newOutput($cell['output']),
);
}
private function newOutput(array $output) {
if (!is_array($output)) {
return pht('<Invalid Output>');
}
$classes = array(
'jupyter-output',
'PhabricatorMonospaced',
);
$output_name = idx($output, 'name');
switch ($output_name) {
case 'stderr':
$classes[] = 'jupyter-output-stderr';
break;
}
$output_type = idx($output, 'output_type');
switch ($output_type) {
case 'execute_result':
case 'display_data':
$data = idx($output, 'data');
$image_formats = array(
'image/png',
'image/jpeg',
'image/jpg',
'image/gif',
);
foreach ($image_formats as $image_format) {
if (!isset($data[$image_format])) {
continue;
}
$raw_data = $data[$image_format];
if (!is_array($raw_data)) {
$raw_data = array($raw_data);
}
$raw_data = implode('', $raw_data);
$content = phutil_tag(
'img',
array(
'src' => 'data:'.$image_format.';base64,'.$raw_data,
));
break 2;
}
if (isset($data['text/html'])) {
$content = $data['text/html'];
$classes[] = 'jupyter-output-html';
break;
}
if (isset($data['application/javascript'])) {
$content = $data['application/javascript'];
$classes[] = 'jupyter-output-html';
break;
}
if (isset($data['text/plain'])) {
$content = $data['text/plain'];
break;
}
break;
case 'stream':
default:
$content = idx($output, 'text');
if (!is_array($content)) {
$content = array();
}
$content = implode('', $content);
break;
}
return phutil_tag(
'div',
array(
'class' => implode(' ', $classes),
),
$content);
}
private function newCellLabel(array $cell) {
$execution_count = idx($cell, 'execution_count');
if ($execution_count) {
$label = 'In ['.$execution_count.']:';
} else {
$label = null;
}
return $label;
}
private function highlightLines(array $lines) {
$head = head($lines);
$matches = null;
if (preg_match('/^%%(.*)$/', $head, $matches)) {
$restore = array_shift($lines);
$lang = $matches[1];
} else {
$restore = null;
$lang = 'py';
}
$content = PhabricatorSyntaxHighlighter::highlightWithLanguage(
$lang,
implode('', $lines));
$content = phutil_split_lines($content);
if ($restore !== null) {
$language_tag = phutil_tag(
'span',
array(
'class' => 'language-tag',
),
$restore);
array_unshift($content, $language_tag);
}
return $content;
}
}