Skip to content

Commit 467f699

Browse files
Will-Sommerspokey
andauthored
Introduce tree-sitter queries for syntactic scopes (#629)
- Partially addresses #616 - Partially addresses #436 - Depends on #1396 ## Todo - [x] **[DISCUSS]** What to do about fallback `iterationScope`? That's the only thing that is a regression here. - [x] File issues for FIXMEs - [x] File issue for defining iteration scopes. Can probably reuse most of the code from the regular scope handler other than creating the target - [x] File issue to add unit tests for scope handlers - [x] File issue to add some Python scope types where multiple can end at the same point (due to lack of closing brackets) - [x] Add test that checks no scope types are duplicated between legacy and new definition, or file issue to add test - [x] File PR for my 7783da6 (Add support for domain, leading, trailing, interior) #1427 - [x] Look through comments on this thread for anything worth filing / doing - [x] Open as new PR? - [x] Remove extraneous test cases - [x] Double check #629 (comment); a lot of those tests we already have for the generic modifier code - [x] Make sure changes to parse-tree-extension are shipped - [x] Close #785 if we fix that - [x] Comment on #484 saying the process has started and providing link to example - [x] Close #797 if we fix that --------- Co-authored-by: Pokey Rule <[email protected]>
1 parent 5d6fbc2 commit 467f699

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1159
-25
lines changed

packages/cursorless-engine/src/cursorlessEngine.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { MarkStageFactoryImpl } from "./processTargets/MarkStageFactoryImpl";
99
import { ModifierStageFactoryImpl } from "./processTargets/ModifierStageFactoryImpl";
1010
import { ScopeHandlerFactoryImpl } from "./processTargets/modifiers/scopeHandlers";
1111
import { injectIde } from "./singletons/ide.singleton";
12+
import { LanguageDefinitions } from "./languages/LanguageDefinitions";
1213

1314
export function createCursorlessEngine(
1415
treeSitter: TreeSitter,
@@ -36,7 +37,8 @@ export function createCursorlessEngine(
3637

3738
const testCaseRecorder = new TestCaseRecorder(hatTokenMap);
3839

39-
const scopeHandlerFactory = new ScopeHandlerFactoryImpl();
40+
const languageDefinitions = new LanguageDefinitions(treeSitter);
41+
const scopeHandlerFactory = new ScopeHandlerFactoryImpl(languageDefinitions);
4042
const markStageFactory = new MarkStageFactoryImpl();
4143
const modifierStageFactory = new ModifierStageFactoryImpl(
4244
scopeHandlerFactory,
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import { ScopeType, SimpleScopeType } from "@cursorless/common";
2+
import { Query } from "web-tree-sitter";
3+
import { ide } from "../singletons/ide.singleton";
4+
import { join } from "path";
5+
import { TreeSitterScopeHandler } from "../processTargets/modifiers/scopeHandlers";
6+
import { TreeSitter } from "../typings/TreeSitter";
7+
import { existsSync, readFileSync } from "fs";
8+
import { LanguageId } from "./constants";
9+
10+
/**
11+
* Represents a language definition for a single language, including the
12+
* tree-sitter query used to extract scopes for the given language
13+
*/
14+
export class LanguageDefinition {
15+
private constructor(
16+
private treeSitter: TreeSitter,
17+
/**
18+
* The tree-sitter query used to extract scopes for the given language.
19+
* Note that this query contains patterns for all scope types that the
20+
* language supports using new-style tree-sitter queries
21+
*/
22+
private query: Query,
23+
) {}
24+
25+
/**
26+
* Construct a language definition for the given language id, if the language
27+
* has a new-style query definition, or return undefined if the language doesn't
28+
*
29+
* @param treeSitter The tree-sitter instance to use for parsing
30+
* @param languageId The language id for which to create a language definition
31+
* @returns A language definition for the given language id, or undefined if the given language
32+
* id doesn't have a new-style query definition
33+
*/
34+
static create(
35+
treeSitter: TreeSitter,
36+
languageId: LanguageId,
37+
): LanguageDefinition | undefined {
38+
const queryPath = join(ide().assetsRoot, "queries", `${languageId}.scm`);
39+
40+
if (!existsSync(queryPath)) {
41+
return undefined;
42+
}
43+
44+
const rawLanguageQueryString = readFileSync(queryPath, "utf8");
45+
46+
return new LanguageDefinition(
47+
treeSitter,
48+
treeSitter.getLanguage(languageId)!.query(rawLanguageQueryString),
49+
);
50+
}
51+
52+
/**
53+
* @param scopeType The scope type for which to get a scope handler
54+
* @returns A scope handler for the given scope type and language id, or
55+
* undefined if the given scope type / language id combination is still using
56+
* legacy pathways
57+
*/
58+
getScopeHandler(scopeType: ScopeType) {
59+
if (!this.query.captureNames.includes(scopeType.type)) {
60+
return undefined;
61+
}
62+
63+
return new TreeSitterScopeHandler(
64+
this.treeSitter,
65+
this.query,
66+
scopeType as SimpleScopeType,
67+
);
68+
}
69+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import { TreeSitter } from "..";
2+
import { LanguageDefinition } from "./LanguageDefinition";
3+
import { LanguageId } from "./constants";
4+
5+
/**
6+
* Sentinel value to indicate that a language doesn't have
7+
* a new-style query definition file
8+
*/
9+
const LANGUAGE_UNDEFINED = Symbol("LANGUAGE_UNDEFINED");
10+
11+
/**
12+
* Keeps a map from language ids to {@link LanguageDefinition} instances,
13+
* constructing them as necessary
14+
*/
15+
export class LanguageDefinitions {
16+
/**
17+
* Maps from language id to {@link LanguageDefinition} or
18+
* {@link LANGUAGE_UNDEFINED} if language doesn't have new-style definitions.
19+
* We use a sentinel value instead of undefined so that we can distinguish
20+
* between a situation where we haven't yet checked whether a language has a
21+
* new-style query definition and a situation where we've checked and found
22+
* that it doesn't. The former case is represented by `undefined` (due to the
23+
* semantics of {@link Map.get}), while the latter is represented by the
24+
* sentinel value.
25+
*/
26+
private languageDefinitions: Map<
27+
string,
28+
LanguageDefinition | typeof LANGUAGE_UNDEFINED
29+
> = new Map();
30+
31+
constructor(private treeSitter: TreeSitter) {}
32+
33+
/**
34+
* Get a language definition for the given language id, if the language
35+
* has a new-style query definition, or return undefined if the language doesn't
36+
*
37+
* @param languageId The language id for which to get a language definition
38+
* @returns A language definition for the given language id, or undefined if
39+
* the given language id doesn't have a new-style query definition
40+
*/
41+
get(languageId: string): LanguageDefinition | undefined {
42+
let definition = this.languageDefinitions.get(languageId);
43+
44+
if (definition == null) {
45+
definition =
46+
LanguageDefinition.create(this.treeSitter, languageId as LanguageId) ??
47+
LANGUAGE_UNDEFINED;
48+
49+
this.languageDefinitions.set(languageId, definition);
50+
}
51+
52+
return definition === LANGUAGE_UNDEFINED ? undefined : definition;
53+
}
54+
}

packages/cursorless-engine/src/languages/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ import { SupportedLanguageId, supportedLanguageIds } from "./constants";
33
export function isLanguageSupported(
44
languageId: string,
55
): languageId is SupportedLanguageId {
6-
return languageId in supportedLanguageIds;
6+
return supportedLanguageIds.includes(languageId as SupportedLanguageId);
77
}

packages/cursorless-engine/src/languages/ruby.ts

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,6 @@ function blockFinder(node: SyntaxNode) {
151151
const nodeMatchers: Partial<
152152
Record<SimpleScopeTypeType, NodeMatcherAlternative>
153153
> = {
154-
map: mapTypes,
155-
list: listTypes,
156154
statement: cascadingMatcher(
157155
patternMatcher(...STATEMENT_TYPES),
158156
ancestorChainNodeMatcher(
@@ -164,26 +162,20 @@ const nodeMatchers: Partial<
164162
),
165163
),
166164
string: "string",
167-
ifStatement: "if",
168-
functionCall: "call",
169-
comment: "comment",
170-
namedFunction: ["method", "singleton_method"],
171165
functionName: ["method[name]", "singleton_method[name]"],
172166
anonymousFunction: cascadingMatcher(
173167
patternMatcher("lambda", "do_block"),
174168
matcher(blockFinder),
175169
),
176-
regularExpression: "regex",
177170
condition: conditionMatcher("*[condition]"),
178171
argumentOrParameter: argumentMatcher(
179172
"lambda_parameters",
180173
"method_parameters",
181174
"block_parameters",
182175
"argument_list",
183176
),
184-
class: "class",
185-
className: "class[name]",
186177
collectionKey: trailingMatcher(["pair[key]"], [":"]),
178+
className: "class[name]",
187179
name: [
188180
"assignment[left]",
189181
"operator_assignment[left]",

packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/ScopeHandlerFactoryImpl.ts

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
1+
import type { ScopeType } from "@cursorless/common";
12
import {
23
CharacterScopeHandler,
34
DocumentScopeHandler,
45
IdentifierScopeHandler,
56
LineScopeHandler,
6-
TokenScopeHandler,
7-
WordScopeHandler,
87
OneOfScopeHandler,
98
ParagraphScopeHandler,
9+
TokenScopeHandler,
10+
WordScopeHandler,
1011
} from ".";
11-
import type { ScopeType } from "@cursorless/common";
12-
import type { ScopeHandler } from "./scopeHandler.types";
12+
import { LanguageDefinitions } from "../../../languages/LanguageDefinitions";
1313
import { ScopeHandlerFactory } from "./ScopeHandlerFactory";
14+
import type { ScopeHandler } from "./scopeHandler.types";
1415

1516
/**
1617
* Returns a scope handler for the given scope type and language id, or
@@ -30,7 +31,7 @@ import { ScopeHandlerFactory } from "./ScopeHandlerFactory";
3031
* legacy pathways
3132
*/
3233
export class ScopeHandlerFactoryImpl implements ScopeHandlerFactory {
33-
constructor() {
34+
constructor(private languageDefinitions: LanguageDefinitions) {
3435
this.create = this.create.bind(this);
3536
}
3637

@@ -53,7 +54,9 @@ export class ScopeHandlerFactoryImpl implements ScopeHandlerFactory {
5354
case "paragraph":
5455
return new ParagraphScopeHandler(scopeType, languageId);
5556
default:
56-
return undefined;
57+
return this.languageDefinitions
58+
.get(languageId)
59+
?.getScopeHandler(scopeType);
5760
}
5861
}
5962
}
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import {
2+
Direction,
3+
Position,
4+
ScopeType,
5+
SimpleScopeType,
6+
TextDocument,
7+
TextEditor,
8+
} from "@cursorless/common";
9+
10+
import { Point, Query, QueryMatch } from "web-tree-sitter";
11+
import { TreeSitter } from "../../..";
12+
import { getNodeRange } from "../../../util/nodeSelectors";
13+
import ScopeTypeTarget from "../../targets/ScopeTypeTarget";
14+
import BaseScopeHandler from "./BaseScopeHandler";
15+
import { compareTargetScopes } from "./compareTargetScopes";
16+
import { TargetScope } from "./scope.types";
17+
import { ScopeIteratorRequirements } from "./scopeHandler.types";
18+
19+
/**
20+
* Handles scopes that are implemented using tree-sitter.
21+
*/
22+
export class TreeSitterScopeHandler extends BaseScopeHandler {
23+
protected isHierarchical: boolean = true;
24+
25+
constructor(
26+
private treeSitter: TreeSitter,
27+
private query: Query,
28+
public scopeType: SimpleScopeType,
29+
) {
30+
super();
31+
}
32+
33+
public get iterationScopeType(): ScopeType {
34+
throw Error("Not implemented");
35+
}
36+
37+
*generateScopeCandidates(
38+
editor: TextEditor,
39+
position: Position,
40+
direction: Direction,
41+
hints: ScopeIteratorRequirements,
42+
): Iterable<TargetScope> {
43+
const { document } = editor;
44+
45+
/** Narrow the range within which tree-sitter searches, for performance */
46+
const { start, end } = getQueryRange(document, position, direction, hints);
47+
48+
yield* this.query
49+
.matches(
50+
this.treeSitter.getTree(document).rootNode,
51+
positionToPoint(start),
52+
positionToPoint(end),
53+
)
54+
.filter(({ captures }) =>
55+
captures.some((capture) => capture.name === this.scopeType.type),
56+
)
57+
.map((match) => this.matchToScope(editor, match))
58+
.sort((a, b) => compareTargetScopes(direction, position, a, b));
59+
}
60+
61+
private matchToScope(editor: TextEditor, match: QueryMatch): TargetScope {
62+
const contentRange = getNodeRange(
63+
match.captures.find((capture) => capture.name === this.scopeType.type)!
64+
.node,
65+
);
66+
67+
return {
68+
editor,
69+
// FIXME: Actually get domain
70+
domain: contentRange,
71+
getTarget: (isReversed) =>
72+
new ScopeTypeTarget({
73+
scopeTypeType: this.scopeType.type,
74+
editor,
75+
isReversed,
76+
contentRange,
77+
// FIXME: Actually get removalRange
78+
removalRange: contentRange,
79+
// FIXME: Other fields here
80+
}),
81+
};
82+
}
83+
}
84+
85+
/**
86+
* Constructs a range to pass to {@link Query.matches} to find scopes. Note
87+
* that {@link Query.matches} will only return scopes that have non-empty
88+
* intersection with this range. Also note that the base
89+
* {@link BaseScopeHandler.generateScopes} will filter out any extra scopes
90+
* that we yield, so we don't need to be totally precise.
91+
*
92+
* @returns Range to pass to {@link Query.matches}
93+
*/
94+
function getQueryRange(
95+
document: TextDocument,
96+
position: Position,
97+
direction: Direction,
98+
{ containment, distalPosition }: ScopeIteratorRequirements,
99+
) {
100+
const offset = document.offsetAt(position);
101+
const distalOffset =
102+
distalPosition == null ? null : document.offsetAt(distalPosition);
103+
104+
if (containment === "required") {
105+
// If containment is required, we smear the position left and right by one
106+
// character so that we have a non-empty intersection with any scope that
107+
// touches position
108+
return {
109+
start: document.positionAt(offset - 1),
110+
end: document.positionAt(offset + 1),
111+
};
112+
}
113+
114+
// If containment is disallowed, we can shift the position forward by a character to avoid
115+
// matching scopes that touch position. Otherwise, we shift the position backward by a
116+
// character to ensure we get scopes that touch position.
117+
const proximalShift = containment === "disallowed" ? 1 : -1;
118+
119+
// FIXME: Don't go all the way to end of document when there is no distalPosition?
120+
// Seems wasteful to query all the way to end of document for something like "next funk"
121+
// Might be better to start smaller and exponentially grow
122+
return direction === "forward"
123+
? {
124+
start: document.positionAt(offset + proximalShift),
125+
end:
126+
distalOffset == null
127+
? document.range.end
128+
: document.positionAt(distalOffset + 1),
129+
}
130+
: {
131+
start:
132+
distalOffset == null
133+
? document.range.start
134+
: document.positionAt(distalOffset - 1),
135+
end: document.positionAt(offset - proximalShift),
136+
};
137+
}
138+
139+
function positionToPoint(start: Position): Point | undefined {
140+
return { row: start.line, column: start.character };
141+
}

packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ export * from "./TokenScopeHandler";
1212
export { default as TokenScopeHandler } from "./TokenScopeHandler";
1313
export * from "./DocumentScopeHandler";
1414
export { default as DocumentScopeHandler } from "./DocumentScopeHandler";
15+
export * from "./TreeSitterScopeHandler";
1516
export * from "./OneOfScopeHandler";
1617
export { default as OneOfScopeHandler } from "./OneOfScopeHandler";
1718
export * from "./ParagraphScopeHandler";

0 commit comments

Comments
 (0)