1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2025-01-08 22:01:03 +01:00

Add a very rough, proof-of-concept Jupyter notebook document engine

Summary:
Depends on D19252. Ref T13105. This very roughly renders Jupyter notebooks.

It's probably better than showing the raw JSON, but not by much.

Test Plan:
  - Viewed various notebooks with various cell types, including markdown, code, stdout, stderr, images, HTML, and Javascript.
  - HTML and Javascript are not live-fired since they're wildly dangerous.

Maniphest Tasks: T13105

Differential Revision: https://secure.phabricator.com/D19253
This commit is contained in:
epriestley 2018-03-23 06:40:30 -07:00
parent fb4ce851c4
commit cbf3d3c371
5 changed files with 366 additions and 3 deletions

View file

@ -9,7 +9,7 @@ return array(
'names' => array(
'conpherence.pkg.css' => 'e68cf1fa',
'conpherence.pkg.js' => '15191c65',
'core.pkg.css' => '2d73b2f3',
'core.pkg.css' => '7daac340',
'core.pkg.js' => 'b9b4a943',
'differential.pkg.css' => '113e692c',
'differential.pkg.js' => 'f6d809c0',
@ -168,7 +168,7 @@ return array(
'rsrc/css/phui/phui-object-box.css' => '9cff003c',
'rsrc/css/phui/phui-pager.css' => 'edcbc226',
'rsrc/css/phui/phui-pinboard-view.css' => '2495140e',
'rsrc/css/phui/phui-property-list-view.css' => '47018d3c',
'rsrc/css/phui/phui-property-list-view.css' => '871f6815',
'rsrc/css/phui/phui-remarkup-preview.css' => '54a34863',
'rsrc/css/phui/phui-segment-bar-view.css' => 'b1d1b892',
'rsrc/css/phui/phui-spacing.css' => '042804d6',
@ -850,7 +850,7 @@ return array(
'phui-oi-simple-ui-css' => 'a8beebea',
'phui-pager-css' => 'edcbc226',
'phui-pinboard-view-css' => '2495140e',
'phui-property-list-view-css' => '47018d3c',
'phui-property-list-view-css' => '871f6815',
'phui-remarkup-preview-css' => '54a34863',
'phui-segment-bar-view-css' => 'b1d1b892',
'phui-spacing-css' => '042804d6',

View file

@ -3190,6 +3190,7 @@ phutil_register_library_map(array(
'PhabricatorJSONExportFormat' => 'infrastructure/export/format/PhabricatorJSONExportFormat.php',
'PhabricatorJavelinLinter' => 'infrastructure/lint/linter/PhabricatorJavelinLinter.php',
'PhabricatorJiraIssueHasObjectEdgeType' => 'applications/doorkeeper/edge/PhabricatorJiraIssueHasObjectEdgeType.php',
'PhabricatorJupyterDocumentEngine' => 'applications/files/document/PhabricatorJupyterDocumentEngine.php',
'PhabricatorKeyValueDatabaseCache' => 'applications/cache/PhabricatorKeyValueDatabaseCache.php',
'PhabricatorKeyValueSerializingCacheProxy' => 'applications/cache/PhabricatorKeyValueSerializingCacheProxy.php',
'PhabricatorKeyboardRemarkupRule' => 'infrastructure/markup/rule/PhabricatorKeyboardRemarkupRule.php',
@ -8800,6 +8801,7 @@ phutil_register_library_map(array(
'PhabricatorJSONExportFormat' => 'PhabricatorExportFormat',
'PhabricatorJavelinLinter' => 'ArcanistLinter',
'PhabricatorJiraIssueHasObjectEdgeType' => 'PhabricatorEdgeType',
'PhabricatorJupyterDocumentEngine' => 'PhabricatorDocumentEngine',
'PhabricatorKeyValueDatabaseCache' => 'PhutilKeyValueCache',
'PhabricatorKeyValueSerializingCacheProxy' => 'PhutilKeyValueCacheProxy',
'PhabricatorKeyboardRemarkupRule' => 'PhutilRemarkupRule',

View file

@ -110,6 +110,15 @@ final class PhabricatorDocumentRef
return (strpos($snippet, "\0") === false);
}
public function isProbablyJSON() {
if (!$this->isProbablyText()) {
return false;
}
$snippet = $this->getSnippet();
return phutil_is_utf8($snippet);
}
public function getSnippet() {
if ($this->snippet === null) {
$this->snippet = $this->loadData(null, (1024 * 1024 * 1));

View file

@ -0,0 +1,305 @@
<?php
final class PhabricatorJupyterDocumentEngine
extends PhabricatorDocumentEngine {
const ENGINEKEY = 'jupyter';
public function getViewAsLabel(PhabricatorDocumentRef $ref) {
return pht('View as Jupyter Notebook');
}
protected function getDocumentIconIcon(PhabricatorDocumentRef $ref) {
return 'fa-sun-o';
}
protected function getContentScore(PhabricatorDocumentRef $ref) {
$name = $ref->getName();
if (preg_match('/\\.ipynb\z/i', $name)) {
return 2000;
}
return 500;
}
protected function canRenderDocumentType(PhabricatorDocumentRef $ref) {
return $ref->isProbablyJSON();
}
protected function newDocumentContent(PhabricatorDocumentRef $ref) {
$viewer = $this->getViewer();
$content = $ref->loadData();
try {
$data = phutil_json_decode($content);
} catch (PhutilJSONParserException $ex) {
return $this->newMessage(
pht(
'This is not a valid JSON document and can not be rendered as '.
'a Jupyter notebook: %s.',
$ex->getMessage()));
}
if (!is_array($data)) {
return $this->newMessage(
pht(
'This document does not encode a valid JSON object and can not '.
'be rendered as a Jupyter notebook.'));
}
$nbformat = idx($data, 'nbformat');
if (!strlen($nbformat)) {
return $this->newMessage(
pht(
'This document is missing an "nbformat" field. Jupyter notebooks '.
'must have this field.'));
}
if ($nbformat !== 4) {
return $this->newMessage(
pht(
'This Jupyter notebook uses an unsupported version of the file '.
'format (found version %s, expected version 4).',
$nbformat));
}
$cells = idx($data, 'cells');
if (!is_array($cells)) {
return $this->newMessage(
pht(
'This Jupyter notebook does not specify a list of "cells".'));
}
if (!$cells) {
return $this->newMessage(
pht(
'This Jupyter notebook does not specify any notebook cells.'));
}
$rows = array();
foreach ($cells as $cell) {
$rows[] = $this->renderJupyterCell($viewer, $cell);
}
$notebook_table = phutil_tag(
'table',
array(
'class' => 'jupyter-notebook',
),
$rows);
$container = phutil_tag(
'div',
array(
'class' => 'document-engine-jupyter',
),
$notebook_table);
return $container;
}
private function renderJupyterCell(
PhabricatorUser $viewer,
array $cell) {
list($label, $content) = $this->renderJupyterCellContent($viewer, $cell);
$label_cell = phutil_tag(
'th',
array(),
$label);
$content_cell = phutil_tag(
'td',
array(),
$content);
return phutil_tag(
'tr',
array(),
array(
$label_cell,
$content_cell,
));
}
private function renderJupyterCellContent(
PhabricatorUser $viewer,
array $cell) {
$cell_type = idx($cell, 'cell_type');
switch ($cell_type) {
case 'markdown':
return $this->newMarkdownCell($cell);
case 'code':
return $this->newCodeCell($cell);
}
return $this->newRawCell(id(new PhutilJSON())->encodeFormatted($cell));
}
private function newRawCell($content) {
return array(
null,
phutil_tag(
'div',
array(
'class' => 'jupyter-cell-raw PhabricatorMonospaced',
),
$content),
);
}
private function newMarkdownCell(array $cell) {
$content = idx($cell, 'source');
if (!is_array($content)) {
$content = array();
}
$content = implode('', $content);
$content = phutil_escape_html_newlines($content);
return array(
null,
phutil_tag(
'div',
array(
'class' => 'jupyter-cell-markdown',
),
$content),
);
}
private function newCodeCell(array $cell) {
$execution_count = idx($cell, 'execution_count');
if ($execution_count) {
$label = 'In ['.$execution_count.']:';
} else {
$label = null;
}
$content = idx($cell, 'source');
if (!is_array($content)) {
$content = array();
}
$content = implode('', $content);
$content = PhabricatorSyntaxHighlighter::highlightWithLanguage(
'python',
$content);
$outputs = array();
$output_list = idx($cell, 'outputs');
if (is_array($output_list)) {
foreach ($output_list as $output) {
$outputs[] = $this->newOutput($output);
}
}
return array(
$label,
array(
phutil_tag(
'div',
array(
'class' => 'jupyter-cell-code PhabricatorMonospaced remarkup-code',
),
array(
$content,
)),
$outputs,
),
);
}
private function newOutput(array $output) {
if (!is_array($output)) {
return pht('<Invalid Output>');
}
$classes = array(
'jupyter-output',
'PhabricatorMonospaced',
);
$output_name = idx($output, 'name');
switch ($output_name) {
case 'stderr':
$classes[] = 'jupyter-output-stderr';
break;
}
$output_type = idx($output, 'output_type');
switch ($output_type) {
case 'execute_result':
case 'display_data':
$data = idx($output, 'data');
$image_formats = array(
'image/png',
'image/jpeg',
'image/jpg',
'image/gif',
);
foreach ($image_formats as $image_format) {
if (!isset($data[$image_format])) {
continue;
}
$raw_data = $data[$image_format];
if (!is_array($raw_data)) {
continue;
}
$raw_data = implode('', $raw_data);
$content = phutil_tag(
'img',
array(
'src' => 'data:'.$image_format.';base64,'.$raw_data,
));
break 2;
}
if (isset($data['text/html'])) {
$content = $data['text/html'];
$classes[] = 'jupyter-output-html';
break;
}
if (isset($data['application/javascript'])) {
$content = $data['application/javascript'];
$classes[] = 'jupyter-output-html';
break;
}
if (isset($data['text/plain'])) {
$content = $data['text/plain'];
break;
}
break;
case 'stream':
default:
$content = idx($output, 'text');
if (!is_array($content)) {
$content = array();
}
$content = implode('', $content);
break;
}
return phutil_tag(
'div',
array(
'class' => implode(' ', $classes),
),
$content);
}
}

View file

@ -257,3 +257,50 @@ div.phui-property-list-stacked .phui-property-list-properties
.document-engine-pdf .phabricator-remarkup-embed-layout-link {
text-align: left;
}
.document-engine-jupyter {
overflow: hidden;
margin: 20px;
}
.jupyter-cell-raw {
white-space: pre-wrap;
background: {$lightgreybackground};
color: {$greytext};
padding: 8px;
}
.jupyter-cell-code {
white-space: pre-wrap;
background: {$lightgreybackground};
padding: 8px;
border: 1px solid {$lightgreyborder};
border-radius: 2px;
}
.jupyter-notebook > tbody > tr > th,
.jupyter-notebook > tbody > tr > td {
padding: 8px;
}
.jupyter-notebook > tbody > tr > th {
white-space: nowrap;
text-align: right;
min-width: 48px;
font-weight: bold;
}
.jupyter-output {
margin: 4px 0;
padding: 8px;
white-space: pre-wrap;
word-break: break-all;
}
.jupyter-output-stderr {
background: {$sh-redbackground};
}
.jupyter-output-html {
background: {$sh-indigobackground};
}