Skip to content

Commit d1afae9

Browse files
committed
Fix HTML escaping to match what Jupyter does (#14038)
* Basic idea * add some functional tests * Add news entry * Fix functional tests
1 parent 8d337a7 commit d1afae9

13 files changed

+165
-73
lines changed

news/2 Fixes/5678.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix escaping of output to encode HTML chars correctly.

src/client/datascience/jupyter/jupyterNotebook.ts

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ import { KernelConnectionMetadata } from './kernels/types';
4040

4141
// tslint:disable-next-line: no-require-imports
4242
import cloneDeep = require('lodash/cloneDeep');
43+
// tslint:disable-next-line: no-require-imports
44+
import escape = require('lodash/escape');
45+
// tslint:disable-next-line: no-require-imports
46+
import unescape = require('lodash/unescape');
4347
import { concatMultilineString, formatStreamText } from '../../../datascience-ui/common';
4448
import { RefBool } from '../../common/refBool';
4549
import { PythonEnvironment } from '../../pythonEnvironments/info';
@@ -783,12 +787,12 @@ export class JupyterNotebookBase implements INotebook {
783787
outputs.forEach((o) => {
784788
if (o.output_type === 'stream') {
785789
const stream = o as nbformat.IStream;
786-
result = result.concat(formatStreamText(concatMultilineString(stream.text, true)));
790+
result = result.concat(formatStreamText(unescape(concatMultilineString(stream.text, true))));
787791
} else {
788792
const data = o.data;
789793
if (data && data.hasOwnProperty('text/plain')) {
790794
// tslint:disable-next-line:no-any
791-
result = result.concat((data as any)['text/plain']);
795+
result = result.concat(unescape((data as any)['text/plain']));
792796
}
793797
}
794798
});
@@ -1233,7 +1237,7 @@ export class JupyterNotebookBase implements INotebook {
12331237
) {
12341238
// Check our length on text output
12351239
if (msg.content.data && msg.content.data.hasOwnProperty('text/plain')) {
1236-
msg.content.data['text/plain'] = trimFunc(msg.content.data['text/plain'] as string);
1240+
msg.content.data['text/plain'] = escape(trimFunc(msg.content.data['text/plain'] as string));
12371241
}
12381242

12391243
this.addToCellData(
@@ -1262,7 +1266,7 @@ export class JupyterNotebookBase implements INotebook {
12621266
if (o.data && o.data.hasOwnProperty('text/plain')) {
12631267
// tslint:disable-next-line: no-any
12641268
const str = (o.data as any)['text/plain'].toString();
1265-
const data = trimFunc(str) as string;
1269+
const data = escape(trimFunc(str)) as string;
12661270
this.addToCellData(
12671271
cell,
12681272
{
@@ -1310,13 +1314,13 @@ export class JupyterNotebookBase implements INotebook {
13101314
: undefined;
13111315
if (existing) {
13121316
// tslint:disable-next-line:restrict-plus-operands
1313-
existing.text = existing.text + msg.content.text;
1317+
existing.text = existing.text + escape(msg.content.text);
13141318
const originalText = formatStreamText(concatMultilineString(existing.text));
13151319
originalTextLength = originalText.length;
13161320
existing.text = trimFunc(originalText);
13171321
trimmedTextLength = existing.text.length;
13181322
} else {
1319-
const originalText = formatStreamText(concatMultilineString(msg.content.text));
1323+
const originalText = formatStreamText(concatMultilineString(escape(msg.content.text)));
13201324
originalTextLength = originalText.length;
13211325
// Create a new stream entry
13221326
const output: nbformat.IStream = {
@@ -1346,6 +1350,11 @@ export class JupyterNotebookBase implements INotebook {
13461350
}
13471351

13481352
private handleDisplayData(msg: KernelMessage.IDisplayDataMsg, clearState: RefBool, cell: ICell) {
1353+
// Escape text output
1354+
if (msg.content.data && msg.content.data.hasOwnProperty('text/plain')) {
1355+
msg.content.data['text/plain'] = escape(msg.content.data['text/plain'] as string);
1356+
}
1357+
13491358
const output: nbformat.IDisplayData = {
13501359
output_type: 'display_data',
13511360
data: msg.content.data,
@@ -1393,9 +1402,9 @@ export class JupyterNotebookBase implements INotebook {
13931402
private handleError(msg: KernelMessage.IErrorMsg, clearState: RefBool, cell: ICell) {
13941403
const output: nbformat.IError = {
13951404
output_type: 'error',
1396-
ename: msg.content.ename,
1397-
evalue: msg.content.evalue,
1398-
traceback: msg.content.traceback
1405+
ename: escape(msg.content.ename),
1406+
evalue: escape(msg.content.evalue),
1407+
traceback: msg.content.traceback.map(escape)
13991408
};
14001409
this.addToCellData(cell, output, clearState);
14011410
cell.state = CellState.error;

src/client/datascience/jupyter/kernelVariables.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import { inject, injectable } from 'inversify';
66
import stripAnsi from 'strip-ansi';
77
import * as uuid from 'uuid/v4';
88

9+
// tslint:disable-next-line: no-require-imports
10+
import unescape = require('lodash/unescape');
911
import { CancellationToken, Event, EventEmitter, Uri } from 'vscode';
1012
import { PYTHON_LANGUAGE } from '../../common/constants';
1113
import { traceError } from '../../common/logger';
@@ -246,7 +248,7 @@ export class KernelVariables implements IJupyterVariables {
246248

247249
// Pull our text result out of the Jupyter cell
248250
private deserializeJupyterResult<T>(cells: ICell[]): T {
249-
const text = this.extractJupyterResultText(cells);
251+
const text = unescape(this.extractJupyterResultText(cells));
250252
return JSON.parse(text) as T;
251253
}
252254

@@ -371,7 +373,7 @@ export class KernelVariables implements IJupyterVariables {
371373
// Now execute the query
372374
if (notebook && query) {
373375
const cells = await notebook.execute(query.query, Identifiers.EmptyFileName, 0, uuid(), token, true);
374-
const text = this.extractJupyterResultText(cells);
376+
const text = unescape(this.extractJupyterResultText(cells));
375377

376378
// Apply the expression to it
377379
const matches = this.getAllMatches(query.parser, text);

src/client/datascience/jupyter/kernels/cellExecution.ts

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ import {
3535
import { IKernel } from './types';
3636
// tslint:disable-next-line: no-var-requires no-require-imports
3737
const vscodeNotebookEnums = require('vscode') as typeof import('vscode-proposed');
38+
// tslint:disable-next-line: no-require-imports
39+
import escape = require('lodash/escape');
3840

3941
export class CellExecutionFactory {
4042
constructor(
@@ -406,6 +408,11 @@ export class CellExecution {
406408
// See this for docs on the messages:
407409
// https://jupyter-client.readthedocs.io/en/latest/messaging.html#messaging-in-jupyter
408410
private handleExecuteResult(msg: KernelMessage.IExecuteResultMsg, clearState: RefBool) {
411+
// Escape text output
412+
if (msg.content.data && msg.content.data.hasOwnProperty('text/plain')) {
413+
msg.content.data['text/plain'] = escape(msg.content.data['text/plain'] as string);
414+
}
415+
409416
this.addToCellData(
410417
{
411418
output_type: 'execute_result',
@@ -429,7 +436,7 @@ export class CellExecution {
429436
// Mark as stream output so the text is formatted because it likely has ansi codes in it.
430437
output_type: 'stream',
431438
// tslint:disable-next-line: no-any
432-
text: (o.data as any)['text/plain'].toString(),
439+
text: escape((o.data as any)['text/plain'].toString()),
433440
metadata: {},
434441
execution_count: reply.execution_count
435442
},
@@ -463,10 +470,10 @@ export class CellExecution {
463470
lastOutput && lastOutput.outputKind === CellOutputKind.Text ? lastOutput : undefined;
464471
if (existing) {
465472
// tslint:disable-next-line:restrict-plus-operands
466-
existing.text = formatStreamText(concatMultilineString(existing.text + msg.content.text));
473+
existing.text = formatStreamText(concatMultilineString(existing.text + escape(msg.content.text)));
467474
this.cell.outputs = [...this.cell.outputs]; // This is necessary to get VS code to update (for now)
468475
} else {
469-
const originalText = formatStreamText(concatMultilineString(msg.content.text));
476+
const originalText = formatStreamText(concatMultilineString(escape(msg.content.text)));
470477
// Create a new stream entry
471478
const output: nbformat.IStream = {
472479
output_type: 'stream',
@@ -478,6 +485,11 @@ export class CellExecution {
478485
}
479486

480487
private handleDisplayData(msg: KernelMessage.IDisplayDataMsg, clearState: RefBool) {
488+
// Escape text output
489+
if (msg.content.data && msg.content.data.hasOwnProperty('text/plain')) {
490+
msg.content.data['text/plain'] = escape(msg.content.data['text/plain'] as string);
491+
}
492+
481493
const output: nbformat.IDisplayData = {
482494
output_type: 'display_data',
483495
data: msg.content.data,

src/client/datascience/jupyter/oldJupyterVariables.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ import { Event, EventEmitter, Uri } from 'vscode';
1111
import { PYTHON_LANGUAGE } from '../../common/constants';
1212
import { traceError } from '../../common/logger';
1313

14+
// tslint:disable-next-line: no-require-imports
15+
import unescape = require('lodash/unescape');
1416
import { IConfigurationService } from '../../common/types';
1517
import * as localize from '../../common/utils/localize';
1618
import { EXTENSION_ROOT_DIR } from '../../constants';
@@ -232,7 +234,7 @@ export class OldJupyterVariables implements IJupyterVariables {
232234

233235
// Pull our text result out of the Jupyter cell
234236
private deserializeJupyterResult<T>(cells: ICell[]): T {
235-
const text = this.extractJupyterResultText(cells);
237+
const text = unescape(this.extractJupyterResultText(cells));
236238
return JSON.parse(text) as T;
237239
}
238240

@@ -357,7 +359,7 @@ export class OldJupyterVariables implements IJupyterVariables {
357359
// Now execute the query
358360
if (notebook && query) {
359361
const cells = await notebook.execute(query.query, Identifiers.EmptyFileName, 0, uuid(), undefined, true);
360-
const text = this.extractJupyterResultText(cells);
362+
const text = unescape(this.extractJupyterResultText(cells));
361363

362364
// Apply the expression to it
363365
const matches = this.getAllMatches(query.parser, text);

src/datascience-ui/interactive-common/cellOutput.tsx

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -314,26 +314,34 @@ export class CellOutput extends React.Component<ICellOutputProps> {
314314
input = JSON.stringify(output.data);
315315
renderWithScrollbars = true;
316316
isText = true;
317+
} else if (output.output_type === 'execute_result' && input && input.hasOwnProperty('text/plain')) {
318+
// Plain text should actually be shown as html so that escaped HTML shows up correctly
319+
mimeType = 'text/html';
320+
isText = true;
321+
isError = false;
322+
renderWithScrollbars = true;
323+
// tslint:disable-next-line: no-any
324+
const text = (input as any)['text/plain'];
325+
input = {
326+
'text/html': text // XML tags should have already been escaped.
327+
};
317328
} else if (output.output_type === 'stream') {
318-
// Stream output needs to be wrapped in xmp so it
319-
// show literally. Otherwise < chars start a new html element.
320329
mimeType = 'text/html';
321330
isText = true;
322331
isError = false;
323332
renderWithScrollbars = true;
324333
// Sonar is wrong, TS won't compile without this AS
325334
const stream = output as nbformat.IStream; // NOSONAR
326-
const formatted = concatMultilineString(stream.text);
335+
const concatted = concatMultilineString(stream.text);
327336
input = {
328-
'text/html': formatted.includes('<') ? `<xmp>${formatted}</xmp>` : `<div>${formatted}</div>`
337+
'text/html': concatted // XML tags should have already been escaped.
329338
};
330339

331-
// Output may have goofy ascii colorization chars in it. Try
332-
// colorizing if we don't have html that needs <xmp> around it (ex. <type ='string'>)
340+
// Output may have ascii colorization chars in it.
333341
try {
334-
if (ansiRegex().test(formatted)) {
342+
if (ansiRegex().test(concatted)) {
335343
const converter = new CellOutput.ansiToHtmlClass(CellOutput.getAnsiToHtmlOptions());
336-
const html = converter.toHtml(formatted);
344+
const html = converter.toHtml(concatted);
337345
input = {
338346
'text/html': html
339347
};

src/test/datascience/Untitled-1.ipynb

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
{
2+
"metadata": {
3+
"language_info": {
4+
"codemirror_mode": {
5+
"name": "ipython",
6+
"version": 3
7+
},
8+
"file_extension": ".py",
9+
"mimetype": "text/x-python",
10+
"name": "python",
11+
"nbconvert_exporter": "python",
12+
"pygments_lexer": "ipython3",
13+
"version": "3.6.6-final"
14+
},
15+
"orig_nbformat": 2
16+
},
17+
"nbformat": 4,
18+
"nbformat_minor": 2,
19+
"cells": [
20+
{
21+
"cell_type": "code",
22+
"execution_count": null,
23+
"metadata": {},
24+
"outputs": [],
25+
"source": []
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": 1,
30+
"metadata": {},
31+
"outputs": [
32+
{
33+
"output_type": "execute_result",
34+
"data": {
35+
"text/plain": "1"
36+
},
37+
"metadata": {},
38+
"execution_count": 1
39+
}
40+
],
41+
"source": [
42+
"a=1\n",
43+
"a"
44+
]
45+
}
46+
]
47+
}

0 commit comments

Comments
 (0)