1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-10 00:42:41 +01:00

In Jupyter notebooks, read strings stored in the raw as either "string" or "list<string>" more consistently

Summary:
Ref PHI1835. Generally, Jupyter notebooks in the wild may store source and markdown content as either a single string or a list of strings.

Make the renderer read these formats more consistently. In particular, this fixes rendering of code blocks stored as a single string.

This also fixes an issue where cell labels were double-rendered in diff views.

Test Plan:
Created a notebook with a code block represented on disk as a single string, rendered a diff from it.

{F7696071}

Differential Revision: https://secure.phabricator.com/D21434
This commit is contained in:
epriestley 2020-08-05 12:15:17 -07:00
parent 98e0440d45
commit 2db1955159

View file

@ -85,15 +85,8 @@ final class PhabricatorJupyterDocumentEngine
if ($utype === $vtype) {
switch ($utype) {
case 'markdown':
$usource = idx($ucell, 'source');
if (is_array($usource)) {
$usource = implode('', $usource);
}
$vsource = idx($vcell, 'source');
if (is_array($vsource)) {
$vsource = implode('', $vsource);
}
$usource = $this->readString($ucell, 'source');
$vsource = $this->readString($vcell, 'source');
$diff = id(new PhutilProseDifferenceEngine())
->getDiff($usource, $vsource);
@ -117,8 +110,6 @@ final class PhabricatorJupyterDocumentEngine
$vsource = idx($vcell, 'raw');
$udisplay = idx($ucell, 'display');
$vdisplay = idx($vcell, 'display');
$ulabel = idx($ucell, 'label');
$vlabel = idx($vcell, 'label');
$intraline_segments = ArcanistDiffUtils::generateIntralineDiff(
$usource,
@ -142,15 +133,15 @@ final class PhabricatorJupyterDocumentEngine
$vdisplay,
$v_segments);
$u_content = $this->newCodeLineCell($ucell, $usource);
$v_content = $this->newCodeLineCell($vcell, $vsource);
list($u_label, $u_content) = $this->newCodeLineCell($ucell, $usource);
list($v_label, $v_content) = $this->newCodeLineCell($vcell, $vsource);
$classes = array(
'jupyter-cell-flush',
);
$u_content = $this->newJupyterCell($ulabel, $u_content, $classes);
$v_content = $this->newJupyterCell($vlabel, $v_content, $classes);
$u_content = $this->newJupyterCell($u_label, $u_content, $classes);
$v_content = $this->newJupyterCell($v_label, $v_content, $classes);
$u_content = $this->newCellContainer($u_content);
$v_content = $this->newCellContainer($v_content);
@ -259,10 +250,7 @@ final class PhabricatorJupyterDocumentEngine
$hash_input = $cell['raw'];
break;
case 'markdown':
$hash_input = $cell['source'];
if (is_array($hash_input)) {
$hash_input = implode('', $cell['source']);
}
$hash_input = $this->readString($cell, 'source');
break;
default:
$hash_input = serialize($cell);
@ -334,7 +322,6 @@ final class PhabricatorJupyterDocumentEngine
'be rendered as a Jupyter notebook.'));
}
$nbformat = idx($data, 'nbformat');
if (!strlen($nbformat)) {
throw new Exception(
@ -376,10 +363,7 @@ final class PhabricatorJupyterDocumentEngine
foreach ($cells as $cell) {
$cell_type = idx($cell, 'cell_type');
if ($cell_type === 'markdown') {
$source = $cell['source'];
if (is_array($source)) {
$source = implode('', $source);
}
$source = $this->readString($cell, 'source');
// Attempt to split contiguous blocks of markdown into smaller
// pieces.
@ -404,11 +388,7 @@ final class PhabricatorJupyterDocumentEngine
$label = $this->newCellLabel($cell);
$lines = idx($cell, 'source');
if (!is_array($lines)) {
$lines = array();
}
$lines = $this->readStringList($cell, 'source');
$content = $this->highlightLines($lines);
$count = count($lines);
@ -526,10 +506,7 @@ final class PhabricatorJupyterDocumentEngine
}
private function newMarkdownCell(array $cell) {
$content = idx($cell, 'source');
if (!is_array($content)) {
$content = array();
}
$content = $this->readStringList($cell, 'source');
// TODO: This should ideally highlight as Markdown, but the "md"
// highlighter in Pygments is painfully slow and not terribly useful.
@ -549,11 +526,7 @@ final class PhabricatorJupyterDocumentEngine
private function newCodeCell(array $cell) {
$label = $this->newCellLabel($cell);
$content = idx($cell, 'source');
if (!is_array($content)) {
$content = array();
}
$content = $this->readStringList($cell, 'source');
$content = $this->highlightLines($content);
$outputs = array();
@ -660,11 +633,7 @@ final class PhabricatorJupyterDocumentEngine
continue;
}
$raw_data = $data[$image_format];
if (!is_array($raw_data)) {
$raw_data = array($raw_data);
}
$raw_data = implode('', $raw_data);
$raw_data = $this->readString($data, $image_format);
$content = phutil_tag(
'img',
@ -695,11 +664,7 @@ final class PhabricatorJupyterDocumentEngine
break;
case 'stream':
default:
$content = idx($output, 'text');
if (!is_array($content)) {
$content = array();
}
$content = implode('', $content);
$content = $this->readString($output, 'text');
break;
}
@ -761,4 +726,23 @@ final class PhabricatorJupyterDocumentEngine
return true;
}
private function readString(array $src, $key) {
$list = $this->readStringList($src, $key);
return implode('', $list);
}
private function readStringList(array $src, $key) {
$list = idx($src, $key);
if (is_array($list)) {
$list = $list;
} else if (is_string($list)) {
$list = array($list);
} else {
$list = array();
}
return $list;
}
}