Skip to content

Commit fc5aa02

Browse files
fix: list items & table cell content parsing
Co-authored-by: Matthew Lipski <[email protected]>
1 parent 930b74f commit fc5aa02

32 files changed

+1432
-118
lines changed

packages/core/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@
9696
"hast-util-from-dom": "^5.0.1",
9797
"prosemirror-dropcursor": "^1.8.1",
9898
"prosemirror-highlight": "^0.13.0",
99-
"prosemirror-model": "^1.24.1",
99+
"prosemirror-model": "^1.25.1",
100100
"prosemirror-state": "^1.4.3",
101101
"prosemirror-tables": "^1.6.4",
102102
"prosemirror-transform": "^1.10.2",

packages/core/src/blocks/ListItemBlockContent/BulletListItemBlockContent/BulletListItemBlockContent.ts

+5-20
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
import { InputRule } from "@tiptap/core";
2-
import { DOMParser } from "@tiptap/pm/model";
32
import { updateBlockCommand } from "../../../api/blockManipulation/commands/updateBlock/updateBlock.js";
43
import { getBlockInfoFromSelection } from "../../../api/getBlockInfoFromPos.js";
54
import {
65
PropSchema,
76
createBlockSpecFromStronglyTypedTiptapNode,
87
createStronglyTypedTiptapNode,
98
} from "../../../schema/index.js";
10-
import {
11-
createDefaultBlockDOMOutputSpec,
12-
mergeParagraphs,
13-
} from "../../defaultBlockHelpers.js";
9+
import { createDefaultBlockDOMOutputSpec } from "../../defaultBlockHelpers.js";
1410
import { defaultProps } from "../../defaultProps.js";
11+
import { getListItemContent } from "../getListItemContent.js";
1512
import { handleEnter } from "../ListItemKeyboardShortcuts.js";
1613

1714
export const bulletListItemPropSchema = {
@@ -98,7 +95,7 @@ const BulletListItemBlockContent = createStronglyTypedTiptapNode({
9895

9996
if (
10097
parent.tagName === "UL" ||
101-
(parent.tagName === "DIV" && parent.parentElement!.tagName === "UL")
98+
(parent.tagName === "DIV" && parent.parentElement?.tagName === "UL")
10299
) {
103100
return {};
104101
}
@@ -107,20 +104,8 @@ const BulletListItemBlockContent = createStronglyTypedTiptapNode({
107104
},
108105
// As `li` elements can contain multiple paragraphs, we need to merge their contents
109106
// into a single one so that ProseMirror can parse everything correctly.
110-
getContent: (node, schema) => {
111-
mergeParagraphs(node as HTMLElement);
112-
113-
const parser = DOMParser.fromSchema(schema);
114-
115-
const parentNode = parser.parse(
116-
(node as HTMLElement).querySelector("p") || node,
117-
{
118-
topNode: schema.nodes[this.name].create(),
119-
}
120-
);
121-
122-
return parentNode.content;
123-
},
107+
getContent: (node, schema) =>
108+
getListItemContent(node, schema, this.name),
124109
node: "bulletListItem",
125110
},
126111
];

packages/core/src/blocks/ListItemBlockContent/CheckListItemBlockContent/CheckListItemBlockContent.ts

+5-20
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import { InputRule } from "@tiptap/core";
2-
import { DOMParser } from "@tiptap/pm/model";
32
import { updateBlockCommand } from "../../../api/blockManipulation/commands/updateBlock/updateBlock.js";
43
import {
54
getBlockInfoFromSelection,
@@ -11,11 +10,9 @@ import {
1110
createStronglyTypedTiptapNode,
1211
propsToAttributes,
1312
} from "../../../schema/index.js";
14-
import {
15-
createDefaultBlockDOMOutputSpec,
16-
mergeParagraphs,
17-
} from "../../defaultBlockHelpers.js";
13+
import { createDefaultBlockDOMOutputSpec } from "../../defaultBlockHelpers.js";
1814
import { defaultProps } from "../../defaultProps.js";
15+
import { getListItemContent } from "../getListItemContent.js";
1916
import { handleEnter } from "../ListItemKeyboardShortcuts.js";
2017

2118
export const checkListItemPropSchema = {
@@ -154,7 +151,7 @@ const checkListItemBlockContent = createStronglyTypedTiptapNode({
154151

155152
if (
156153
parent.tagName === "UL" ||
157-
(parent.tagName === "DIV" && parent.parentElement!.tagName === "UL")
154+
(parent.tagName === "DIV" && parent.parentElement?.tagName === "UL")
158155
) {
159156
const checkbox =
160157
(element.querySelector(
@@ -172,20 +169,8 @@ const checkListItemBlockContent = createStronglyTypedTiptapNode({
172169
},
173170
// As `li` elements can contain multiple paragraphs, we need to merge their contents
174171
// into a single one so that ProseMirror can parse everything correctly.
175-
getContent: (node, schema) => {
176-
mergeParagraphs(node as HTMLElement);
177-
178-
const parser = DOMParser.fromSchema(schema);
179-
180-
const parentNode = parser.parse(
181-
(node as HTMLElement).querySelector("p") || node,
182-
{
183-
topNode: schema.nodes[this.name].create(),
184-
}
185-
);
186-
187-
return parentNode.content;
188-
},
172+
getContent: (node, schema) =>
173+
getListItemContent(node, schema, this.name),
189174
node: "checkListItem",
190175
},
191176
];

packages/core/src/blocks/ListItemBlockContent/NumberedListItemBlockContent/NumberedListItemBlockContent.ts

+6-20
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import { InputRule } from "@tiptap/core";
2-
import { DOMParser } from "@tiptap/pm/model";
32
import { updateBlockCommand } from "../../../api/blockManipulation/commands/updateBlock/updateBlock.js";
43
import { getBlockInfoFromSelection } from "../../../api/getBlockInfoFromPos.js";
54
import {
@@ -8,11 +7,9 @@ import {
87
createStronglyTypedTiptapNode,
98
propsToAttributes,
109
} from "../../../schema/index.js";
11-
import {
12-
createDefaultBlockDOMOutputSpec,
13-
mergeParagraphs,
14-
} from "../../defaultBlockHelpers.js";
10+
import { createDefaultBlockDOMOutputSpec } from "../../defaultBlockHelpers.js";
1511
import { defaultProps } from "../../defaultProps.js";
12+
import { getListItemContent } from "../getListItemContent.js";
1613
import { handleEnter } from "../ListItemKeyboardShortcuts.js";
1714
import { NumberedListIndexingPlugin } from "./NumberedListIndexingPlugin.js";
1815

@@ -126,7 +123,7 @@ const NumberedListItemBlockContent = createStronglyTypedTiptapNode({
126123

127124
if (
128125
parent.tagName === "OL" ||
129-
(parent.tagName === "DIV" && parent.parentElement!.tagName === "OL")
126+
(parent.tagName === "DIV" && parent.parentElement?.tagName === "OL")
130127
) {
131128
const startIndex =
132129
parseInt(parent.getAttribute("start") || "1") || 1;
@@ -144,20 +141,9 @@ const NumberedListItemBlockContent = createStronglyTypedTiptapNode({
144141
},
145142
// As `li` elements can contain multiple paragraphs, we need to merge their contents
146143
// into a single one so that ProseMirror can parse everything correctly.
147-
getContent: (node, schema) => {
148-
mergeParagraphs(node as HTMLElement);
149-
150-
const parser = DOMParser.fromSchema(schema);
151-
152-
const parentNode = parser.parse(
153-
(node as HTMLElement).querySelector("p") || node,
154-
{
155-
topNode: schema.nodes[this.name].create(),
156-
}
157-
);
158-
159-
return parentNode.content;
160-
},
144+
getContent: (node, schema) =>
145+
getListItemContent(node, schema, this.name),
146+
priority: 300,
161147
node: "numberedListItem",
162148
},
163149
];
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import { DOMParser, Fragment, Schema } from "prosemirror-model";
2+
3+
/**
4+
* This function is used to parse the content of a list item external HTML node.
5+
*
6+
* Due to a change in how prosemirror-model handles parsing elements, we have additional flexibility in how we can "fit" content into a list item.
7+
*
8+
* We've decided to take an approach that is similar to Notion. The core rules of the algorithm are:
9+
*
10+
* - If the first child of an `li` has ONLY text content, take the text content, and flatten it into the list item. Subsequent siblings are carried over as is, as children of the list item.
11+
* - e.g. `<li><h1>Hello</h1><p>World</p></li> -> <li>Hello<blockGroup><blockContainer><p>World</p></blockContainer></blockGroup></li>`
12+
* - Else, take the content and insert it as children instead.
13+
* - e.g. `<li><img src="url" /></li> -> <li><p></p><blockGroup><blockContainer><img src="url" /></blockContainer></blockGroup></li>`
14+
*
15+
* This ensures that a list item's content is always valid ProseMirror content. Smoothing over differences between how external HTML may be rendered, and how ProseMirror expects content to be structured.
16+
*/
17+
export function getListItemContent(
18+
/**
19+
* The `li` element to parse.
20+
*/
21+
node: Node,
22+
/**
23+
* The schema to use for parsing.
24+
*/
25+
schema: Schema,
26+
/**
27+
* The name of the list item node.
28+
*/
29+
name: string
30+
): Fragment {
31+
/**
32+
* To actually implement this algorithm, we need to leverage ProseMirror's "fitting" algorithm.
33+
* Where, if content is parsed which doesn't fit into the current node, it will be moved into the parent node.
34+
*
35+
* This allows us to parse multiple pieces of content from within the list item (even though it normally would not match the list item's schema) and "throw" the excess content into the list item's children.
36+
*
37+
* The expected return value is a `Fragment` which contains the list item's content as the first element, and the children wrapped in a blockGroup node. Like so:
38+
* ```
39+
* Fragment<[Node<Text>, Node<BlockGroup<Node<BlockContainer<any>>>>]>
40+
* ```
41+
*/
42+
const parser = DOMParser.fromSchema(schema);
43+
44+
if (!(node instanceof HTMLElement)) {
45+
// TODO: This will be unnecessary in the future: https://github.com/ProseMirror/prosemirror-model/commit/166188d4f9db96eb86fb7de62e72049c86c9dd79
46+
throw new Error("Node is not an HTMLElement");
47+
}
48+
49+
// Move the `li` element's content into a new `div` element
50+
// This is a hacky workaround to not re-trigger list item parsing,
51+
// when we are looking to understand what the list item's content actually is, in terms of the schema.
52+
const clonedNodeDiv = document.createElement("div");
53+
// Mark the `div` element as a `blockGroup` to make the parsing easier.
54+
clonedNodeDiv.setAttribute("data-node-type", "blockGroup");
55+
// Clone all children of the `li` element into the new `div` element
56+
for (const child of Array.from(node.childNodes)) {
57+
clonedNodeDiv.appendChild(child.cloneNode(true));
58+
}
59+
60+
// Parses children of the `li` element into a `blockGroup` with `blockContainer` node children
61+
// This is the structure of list item children, so parsing into this structure allows for
62+
// easy separation of list item content from child list item content.
63+
let blockGroupNode = parser.parse(clonedNodeDiv, {
64+
topNode: schema.nodes.blockGroup.create(),
65+
});
66+
67+
// There is an edge case where a list item's content may contain a `<input>` element.
68+
// Causing it to be recognized as a `checkListItem`.
69+
// We want to skip this, and just parse the list item's content as is.
70+
if (blockGroupNode.firstChild?.firstChild?.type.name === "checkListItem") {
71+
// We skip the first child, by cutting it out of the `blockGroup` node.
72+
// and continuing with the rest of the algorithm.
73+
blockGroupNode = blockGroupNode.copy(
74+
blockGroupNode.content.cut(
75+
blockGroupNode.firstChild.firstChild.nodeSize + 2
76+
)
77+
);
78+
}
79+
80+
// Structure above is `blockGroup<blockContainer<any>[]>`
81+
// We want to extract the first `blockContainer` node's content, and see if it is a text block.
82+
const listItemsFirstChild = blockGroupNode.firstChild?.firstChild;
83+
84+
// If the first node is not a text block, then it's first child is not compatible with the list item node.
85+
if (!listItemsFirstChild?.isTextblock) {
86+
// So, we do not try inserting anything into the list item, and instead return anything we found as children for the list item.
87+
return Fragment.from(blockGroupNode);
88+
}
89+
90+
// If it is a text block, then we know it only contains text content.
91+
// So, we extract it, and insert its content into the `listItemNode`.
92+
// The remaining nodes in the `blockGroup` stay in-place.
93+
const listItemNode = schema.nodes[name].create(
94+
{},
95+
listItemsFirstChild.content
96+
);
97+
98+
// We have `blockGroup<listItemsFirstChild, ...blockContainer<any>[]>`
99+
// We want to extract out the rest of the nodes as `<...blockContainer<any>[]>`
100+
const remainingListItemChildren = blockGroupNode.content.cut(
101+
// +2 for the `blockGroup` node's start and end markers
102+
listItemsFirstChild.nodeSize + 2
103+
);
104+
const hasRemainingListItemChildren = remainingListItemChildren.size > 0;
105+
106+
if (hasRemainingListItemChildren) {
107+
// Copy the remaining list item children back into the `blockGroup` node.
108+
// This will make it back into: `blockGroup<...blockContainer<any>[]>`
109+
const listItemsChildren = blockGroupNode.copy(remainingListItemChildren);
110+
111+
// Return the `listItem` node's content, then add the parsed children after to be lifted out by ProseMirror "fitting" algorithm.
112+
return listItemNode.content.addToEnd(listItemsChildren);
113+
}
114+
115+
// Otherwise, just return the `listItem` node's content.
116+
return listItemNode.content;
117+
}

packages/core/src/blocks/TableBlockContent/TableBlockContent.ts

+42-33
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { TableCell } from "@tiptap/extension-table-cell";
22
import { TableHeader } from "@tiptap/extension-table-header";
33
import { TableRow } from "@tiptap/extension-table-row";
4-
import { Node as PMNode, DOMParser } from "prosemirror-model";
4+
import { DOMParser, Fragment, Node as PMNode, Schema } from "prosemirror-model";
55
import { TableView } from "prosemirror-tables";
66

77
import { NodeView } from "prosemirror-view";
@@ -10,10 +10,7 @@ import {
1010
createStronglyTypedTiptapNode,
1111
} from "../../schema/index.js";
1212
import { mergeCSSClasses } from "../../util/browser.js";
13-
import {
14-
createDefaultBlockDOMOutputSpec,
15-
mergeParagraphs,
16-
} from "../defaultBlockHelpers.js";
13+
import { createDefaultBlockDOMOutputSpec } from "../defaultBlockHelpers.js";
1714
import { defaultProps } from "../defaultProps.js";
1815
import { EMPTY_CELL_WIDTH, TableExtension } from "./TableExtension.js";
1916

@@ -154,6 +151,42 @@ const TableParagraph = createStronglyTypedTiptapNode({
154151
},
155152
});
156153

154+
/**
155+
* This will flatten a node's content to fit into a table cell's paragraph.
156+
*/
157+
function parseTableContent(node: HTMLElement, schema: Schema) {
158+
const parser = DOMParser.fromSchema(schema);
159+
160+
// This will parse the content of the table paragraph as though it were a blockGroup.
161+
// Resulting in a structure like:
162+
// <blockGroup>
163+
// <blockContainer>
164+
// <p>Hello</p>
165+
// </blockContainer>
166+
// <blockContainer>
167+
// <p>Hello</p>
168+
// </blockContainer>
169+
// </blockGroup>
170+
const parsedContent = parser.parse(node, {
171+
topNode: schema.nodes.blockGroup.create(),
172+
});
173+
const extractedContent: PMNode[] = [];
174+
175+
// Try to extract any content within the blockContainer.
176+
parsedContent.content.descendants((child) => {
177+
// As long as the child is an inline node, we can append it to the fragment.
178+
if (child.isInline) {
179+
// And append it to the fragment
180+
extractedContent.push(child);
181+
return false;
182+
}
183+
184+
return undefined;
185+
});
186+
187+
return Fragment.fromArray(extractedContent);
188+
}
189+
157190
export const Table = createBlockSpecFromStronglyTypedTiptapNode(
158191
TableBlockContent,
159192
tablePropSchema,
@@ -175,20 +208,8 @@ export const Table = createBlockSpecFromStronglyTypedTiptapNode(
175208
tag: "th",
176209
// As `th` elements can contain multiple paragraphs, we need to merge their contents
177210
// into a single one so that ProseMirror can parse everything correctly.
178-
getContent: (node, schema) => {
179-
mergeParagraphs(node as HTMLElement);
180-
181-
const parser = DOMParser.fromSchema(schema);
182-
183-
const parentNode = parser.parse(
184-
(node as HTMLElement).querySelector("p") || node,
185-
{
186-
topNode: schema.nodes[this.name].create(),
187-
}
188-
);
189-
190-
return parentNode.content;
191-
},
211+
getContent: (node, schema) =>
212+
parseTableContent(node as HTMLElement, schema),
192213
},
193214
];
194215
},
@@ -201,20 +222,8 @@ export const Table = createBlockSpecFromStronglyTypedTiptapNode(
201222
tag: "td",
202223
// As `td` elements can contain multiple paragraphs, we need to merge their contents
203224
// into a single one so that ProseMirror can parse everything correctly.
204-
getContent: (node, schema) => {
205-
mergeParagraphs(node as HTMLElement);
206-
207-
const parser = DOMParser.fromSchema(schema);
208-
209-
const parentNode = parser.parse(
210-
(node as HTMLElement).querySelector("p") || node,
211-
{
212-
topNode: schema.nodes[this.name].create(),
213-
}
214-
);
215-
216-
return parentNode.content;
217-
},
225+
getContent: (node, schema) =>
226+
parseTableContent(node as HTMLElement, schema),
218227
},
219228
];
220229
},

0 commit comments

Comments
 (0)