1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-26 00:32:42 +01:00

In Jupyter notebooks, read strings stored in the raw as either "string" or "list<string>" more consistently

Summary:
Ref PHI1835. Generally, Jupyter notebooks in the wild may store source and markdown content as either a single string or a list of strings.

Make the renderer read these formats more consistently. In particular, this fixes rendering of code blocks stored as a single string.

This also fixes an issue where cell labels were double-rendered in diff views.

Test Plan:
Created a notebook with a code block represented on disk as a single string, rendered a diff from it.

{F7696071}

Differential Revision: https://secure.phabricator.com/D21434
This commit is contained in:
epriestley 2020-08-05 12:15:17 -07:00
parent 98e0440d45
commit 2db1955159

View file

@ -85,15 +85,8 @@ final class PhabricatorJupyterDocumentEngine
if ($utype === $vtype) { if ($utype === $vtype) {
switch ($utype) { switch ($utype) {
case 'markdown': case 'markdown':
$usource = idx($ucell, 'source'); $usource = $this->readString($ucell, 'source');
if (is_array($usource)) { $vsource = $this->readString($vcell, 'source');
$usource = implode('', $usource);
}
$vsource = idx($vcell, 'source');
if (is_array($vsource)) {
$vsource = implode('', $vsource);
}
$diff = id(new PhutilProseDifferenceEngine()) $diff = id(new PhutilProseDifferenceEngine())
->getDiff($usource, $vsource); ->getDiff($usource, $vsource);
@ -117,8 +110,6 @@ final class PhabricatorJupyterDocumentEngine
$vsource = idx($vcell, 'raw'); $vsource = idx($vcell, 'raw');
$udisplay = idx($ucell, 'display'); $udisplay = idx($ucell, 'display');
$vdisplay = idx($vcell, 'display'); $vdisplay = idx($vcell, 'display');
$ulabel = idx($ucell, 'label');
$vlabel = idx($vcell, 'label');
$intraline_segments = ArcanistDiffUtils::generateIntralineDiff( $intraline_segments = ArcanistDiffUtils::generateIntralineDiff(
$usource, $usource,
@ -142,15 +133,15 @@ final class PhabricatorJupyterDocumentEngine
$vdisplay, $vdisplay,
$v_segments); $v_segments);
$u_content = $this->newCodeLineCell($ucell, $usource); list($u_label, $u_content) = $this->newCodeLineCell($ucell, $usource);
$v_content = $this->newCodeLineCell($vcell, $vsource); list($v_label, $v_content) = $this->newCodeLineCell($vcell, $vsource);
$classes = array( $classes = array(
'jupyter-cell-flush', 'jupyter-cell-flush',
); );
$u_content = $this->newJupyterCell($ulabel, $u_content, $classes); $u_content = $this->newJupyterCell($u_label, $u_content, $classes);
$v_content = $this->newJupyterCell($vlabel, $v_content, $classes); $v_content = $this->newJupyterCell($v_label, $v_content, $classes);
$u_content = $this->newCellContainer($u_content); $u_content = $this->newCellContainer($u_content);
$v_content = $this->newCellContainer($v_content); $v_content = $this->newCellContainer($v_content);
@ -259,10 +250,7 @@ final class PhabricatorJupyterDocumentEngine
$hash_input = $cell['raw']; $hash_input = $cell['raw'];
break; break;
case 'markdown': case 'markdown':
$hash_input = $cell['source']; $hash_input = $this->readString($cell, 'source');
if (is_array($hash_input)) {
$hash_input = implode('', $cell['source']);
}
break; break;
default: default:
$hash_input = serialize($cell); $hash_input = serialize($cell);
@ -334,7 +322,6 @@ final class PhabricatorJupyterDocumentEngine
'be rendered as a Jupyter notebook.')); 'be rendered as a Jupyter notebook.'));
} }
$nbformat = idx($data, 'nbformat'); $nbformat = idx($data, 'nbformat');
if (!strlen($nbformat)) { if (!strlen($nbformat)) {
throw new Exception( throw new Exception(
@ -376,10 +363,7 @@ final class PhabricatorJupyterDocumentEngine
foreach ($cells as $cell) { foreach ($cells as $cell) {
$cell_type = idx($cell, 'cell_type'); $cell_type = idx($cell, 'cell_type');
if ($cell_type === 'markdown') { if ($cell_type === 'markdown') {
$source = $cell['source']; $source = $this->readString($cell, 'source');
if (is_array($source)) {
$source = implode('', $source);
}
// Attempt to split contiguous blocks of markdown into smaller // Attempt to split contiguous blocks of markdown into smaller
// pieces. // pieces.
@ -404,11 +388,7 @@ final class PhabricatorJupyterDocumentEngine
$label = $this->newCellLabel($cell); $label = $this->newCellLabel($cell);
$lines = idx($cell, 'source'); $lines = $this->readStringList($cell, 'source');
if (!is_array($lines)) {
$lines = array();
}
$content = $this->highlightLines($lines); $content = $this->highlightLines($lines);
$count = count($lines); $count = count($lines);
@ -526,10 +506,7 @@ final class PhabricatorJupyterDocumentEngine
} }
private function newMarkdownCell(array $cell) { private function newMarkdownCell(array $cell) {
$content = idx($cell, 'source'); $content = $this->readStringList($cell, 'source');
if (!is_array($content)) {
$content = array();
}
// TODO: This should ideally highlight as Markdown, but the "md" // TODO: This should ideally highlight as Markdown, but the "md"
// highlighter in Pygments is painfully slow and not terribly useful. // highlighter in Pygments is painfully slow and not terribly useful.
@ -549,11 +526,7 @@ final class PhabricatorJupyterDocumentEngine
private function newCodeCell(array $cell) { private function newCodeCell(array $cell) {
$label = $this->newCellLabel($cell); $label = $this->newCellLabel($cell);
$content = idx($cell, 'source'); $content = $this->readStringList($cell, 'source');
if (!is_array($content)) {
$content = array();
}
$content = $this->highlightLines($content); $content = $this->highlightLines($content);
$outputs = array(); $outputs = array();
@ -660,11 +633,7 @@ final class PhabricatorJupyterDocumentEngine
continue; continue;
} }
$raw_data = $data[$image_format]; $raw_data = $this->readString($data, $image_format);
if (!is_array($raw_data)) {
$raw_data = array($raw_data);
}
$raw_data = implode('', $raw_data);
$content = phutil_tag( $content = phutil_tag(
'img', 'img',
@ -695,11 +664,7 @@ final class PhabricatorJupyterDocumentEngine
break; break;
case 'stream': case 'stream':
default: default:
$content = idx($output, 'text'); $content = $this->readString($output, 'text');
if (!is_array($content)) {
$content = array();
}
$content = implode('', $content);
break; break;
} }
@ -761,4 +726,23 @@ final class PhabricatorJupyterDocumentEngine
return true; return true;
} }
private function readString(array $src, $key) {
$list = $this->readStringList($src, $key);
return implode('', $list);
}
private function readStringList(array $src, $key) {
$list = idx($src, $key);
if (is_array($list)) {
$list = $list;
} else if (is_string($list)) {
$list = array($list);
} else {
$list = array();
}
return $list;
}
} }