mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-10 00:42:41 +01:00
In Jupyter notebooks, read strings stored in the raw as either "string" or "list<string>" more consistently
Summary: Ref PHI1835. Generally, Jupyter notebooks in the wild may store source and markdown content as either a single string or a list of strings. Make the renderer read these formats more consistently. In particular, this fixes rendering of code blocks stored as a single string. This also fixes an issue where cell labels were double-rendered in diff views. Test Plan: Created a notebook with a code block represented on disk as a single string, rendered a diff from it. {F7696071} Differential Revision: https://secure.phabricator.com/D21434
This commit is contained in:
parent
98e0440d45
commit
2db1955159
1 changed files with 32 additions and 48 deletions
|
@ -85,15 +85,8 @@ final class PhabricatorJupyterDocumentEngine
|
|||
if ($utype === $vtype) {
|
||||
switch ($utype) {
|
||||
case 'markdown':
|
||||
$usource = idx($ucell, 'source');
|
||||
if (is_array($usource)) {
|
||||
$usource = implode('', $usource);
|
||||
}
|
||||
|
||||
$vsource = idx($vcell, 'source');
|
||||
if (is_array($vsource)) {
|
||||
$vsource = implode('', $vsource);
|
||||
}
|
||||
$usource = $this->readString($ucell, 'source');
|
||||
$vsource = $this->readString($vcell, 'source');
|
||||
|
||||
$diff = id(new PhutilProseDifferenceEngine())
|
||||
->getDiff($usource, $vsource);
|
||||
|
@ -117,8 +110,6 @@ final class PhabricatorJupyterDocumentEngine
|
|||
$vsource = idx($vcell, 'raw');
|
||||
$udisplay = idx($ucell, 'display');
|
||||
$vdisplay = idx($vcell, 'display');
|
||||
$ulabel = idx($ucell, 'label');
|
||||
$vlabel = idx($vcell, 'label');
|
||||
|
||||
$intraline_segments = ArcanistDiffUtils::generateIntralineDiff(
|
||||
$usource,
|
||||
|
@ -142,15 +133,15 @@ final class PhabricatorJupyterDocumentEngine
|
|||
$vdisplay,
|
||||
$v_segments);
|
||||
|
||||
$u_content = $this->newCodeLineCell($ucell, $usource);
|
||||
$v_content = $this->newCodeLineCell($vcell, $vsource);
|
||||
list($u_label, $u_content) = $this->newCodeLineCell($ucell, $usource);
|
||||
list($v_label, $v_content) = $this->newCodeLineCell($vcell, $vsource);
|
||||
|
||||
$classes = array(
|
||||
'jupyter-cell-flush',
|
||||
);
|
||||
|
||||
$u_content = $this->newJupyterCell($ulabel, $u_content, $classes);
|
||||
$v_content = $this->newJupyterCell($vlabel, $v_content, $classes);
|
||||
$u_content = $this->newJupyterCell($u_label, $u_content, $classes);
|
||||
$v_content = $this->newJupyterCell($v_label, $v_content, $classes);
|
||||
|
||||
$u_content = $this->newCellContainer($u_content);
|
||||
$v_content = $this->newCellContainer($v_content);
|
||||
|
@ -259,10 +250,7 @@ final class PhabricatorJupyterDocumentEngine
|
|||
$hash_input = $cell['raw'];
|
||||
break;
|
||||
case 'markdown':
|
||||
$hash_input = $cell['source'];
|
||||
if (is_array($hash_input)) {
|
||||
$hash_input = implode('', $cell['source']);
|
||||
}
|
||||
$hash_input = $this->readString($cell, 'source');
|
||||
break;
|
||||
default:
|
||||
$hash_input = serialize($cell);
|
||||
|
@ -334,7 +322,6 @@ final class PhabricatorJupyterDocumentEngine
|
|||
'be rendered as a Jupyter notebook.'));
|
||||
}
|
||||
|
||||
|
||||
$nbformat = idx($data, 'nbformat');
|
||||
if (!strlen($nbformat)) {
|
||||
throw new Exception(
|
||||
|
@ -376,10 +363,7 @@ final class PhabricatorJupyterDocumentEngine
|
|||
foreach ($cells as $cell) {
|
||||
$cell_type = idx($cell, 'cell_type');
|
||||
if ($cell_type === 'markdown') {
|
||||
$source = $cell['source'];
|
||||
if (is_array($source)) {
|
||||
$source = implode('', $source);
|
||||
}
|
||||
$source = $this->readString($cell, 'source');
|
||||
|
||||
// Attempt to split contiguous blocks of markdown into smaller
|
||||
// pieces.
|
||||
|
@ -404,11 +388,7 @@ final class PhabricatorJupyterDocumentEngine
|
|||
|
||||
$label = $this->newCellLabel($cell);
|
||||
|
||||
$lines = idx($cell, 'source');
|
||||
if (!is_array($lines)) {
|
||||
$lines = array();
|
||||
}
|
||||
|
||||
$lines = $this->readStringList($cell, 'source');
|
||||
$content = $this->highlightLines($lines);
|
||||
|
||||
$count = count($lines);
|
||||
|
@ -526,10 +506,7 @@ final class PhabricatorJupyterDocumentEngine
|
|||
}
|
||||
|
||||
private function newMarkdownCell(array $cell) {
|
||||
$content = idx($cell, 'source');
|
||||
if (!is_array($content)) {
|
||||
$content = array();
|
||||
}
|
||||
$content = $this->readStringList($cell, 'source');
|
||||
|
||||
// TODO: This should ideally highlight as Markdown, but the "md"
|
||||
// highlighter in Pygments is painfully slow and not terribly useful.
|
||||
|
@ -549,11 +526,7 @@ final class PhabricatorJupyterDocumentEngine
|
|||
private function newCodeCell(array $cell) {
|
||||
$label = $this->newCellLabel($cell);
|
||||
|
||||
$content = idx($cell, 'source');
|
||||
if (!is_array($content)) {
|
||||
$content = array();
|
||||
}
|
||||
|
||||
$content = $this->readStringList($cell, 'source');
|
||||
$content = $this->highlightLines($content);
|
||||
|
||||
$outputs = array();
|
||||
|
@ -660,11 +633,7 @@ final class PhabricatorJupyterDocumentEngine
|
|||
continue;
|
||||
}
|
||||
|
||||
$raw_data = $data[$image_format];
|
||||
if (!is_array($raw_data)) {
|
||||
$raw_data = array($raw_data);
|
||||
}
|
||||
$raw_data = implode('', $raw_data);
|
||||
$raw_data = $this->readString($data, $image_format);
|
||||
|
||||
$content = phutil_tag(
|
||||
'img',
|
||||
|
@ -695,11 +664,7 @@ final class PhabricatorJupyterDocumentEngine
|
|||
break;
|
||||
case 'stream':
|
||||
default:
|
||||
$content = idx($output, 'text');
|
||||
if (!is_array($content)) {
|
||||
$content = array();
|
||||
}
|
||||
$content = implode('', $content);
|
||||
$content = $this->readString($output, 'text');
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -761,4 +726,23 @@ final class PhabricatorJupyterDocumentEngine
|
|||
return true;
|
||||
}
|
||||
|
||||
private function readString(array $src, $key) {
|
||||
$list = $this->readStringList($src, $key);
|
||||
return implode('', $list);
|
||||
}
|
||||
|
||||
private function readStringList(array $src, $key) {
|
||||
$list = idx($src, $key);
|
||||
|
||||
if (is_array($list)) {
|
||||
$list = $list;
|
||||
} else if (is_string($list)) {
|
||||
$list = array($list);
|
||||
} else {
|
||||
$list = array();
|
||||
}
|
||||
|
||||
return $list;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue