diff --git a/packages/cursorless-engine/src/cursorlessEngine.ts b/packages/cursorless-engine/src/cursorlessEngine.ts index 064405b29c..0311317457 100644 --- a/packages/cursorless-engine/src/cursorlessEngine.ts +++ b/packages/cursorless-engine/src/cursorlessEngine.ts @@ -9,7 +9,7 @@ import { MarkStageFactoryImpl } from "./processTargets/MarkStageFactoryImpl"; import { ModifierStageFactoryImpl } from "./processTargets/ModifierStageFactoryImpl"; import { ScopeHandlerFactoryImpl } from "./processTargets/modifiers/scopeHandlers"; import { injectIde } from "./singletons/ide.singleton"; -import { LanguageDefinitionsImpl } from "./languages/LanguageDefinitionsImpl"; +import { LanguageDefinitions } from "./languages/LanguageDefinitions"; export function createCursorlessEngine( treeSitter: TreeSitter, @@ -37,7 +37,7 @@ export function createCursorlessEngine( const testCaseRecorder = new TestCaseRecorder(hatTokenMap); - const languageDefinitions = new LanguageDefinitionsImpl(treeSitter); + const languageDefinitions = new LanguageDefinitions(treeSitter); const scopeHandlerFactory = new ScopeHandlerFactoryImpl(languageDefinitions); const markStageFactory = new MarkStageFactoryImpl(); const modifierStageFactory = new ModifierStageFactoryImpl( diff --git a/packages/cursorless-engine/src/languages/LanguageDefinition.ts b/packages/cursorless-engine/src/languages/LanguageDefinition.ts new file mode 100644 index 0000000000..974d3e1710 --- /dev/null +++ b/packages/cursorless-engine/src/languages/LanguageDefinition.ts @@ -0,0 +1,69 @@ +import { ScopeType, SimpleScopeType } from "@cursorless/common"; +import { Query } from "web-tree-sitter"; +import { ide } from "../singletons/ide.singleton"; +import { join } from "path"; +import { TreeSitterScopeHandler } from "../processTargets/modifiers/scopeHandlers"; +import { TreeSitter } from "../typings/TreeSitter"; +import { existsSync, readFileSync } from "fs"; +import { LanguageId } from "./constants"; + +/** + * Represents a language definition for a single language, including the + * tree-sitter query used to extract scopes for the given language + */ +export class LanguageDefinition { + private constructor( + private treeSitter: TreeSitter, + /** + * The tree-sitter query used to extract scopes for the given language. + * Note that this query contains patterns for all scope types that the + * language supports using new-style tree-sitter queries + */ + private query: Query, + ) {} + + /** + * Construct a language definition for the given language id, if the language + * has a new-style query definition, or return undefined if the language doesn't + * + * @param treeSitter The tree-sitter instance to use for parsing + * @param languageId The language id for which to create a language definition + * @returns A language definition for the given language id, or undefined if the given language + * id doesn't have a new-style query definition + */ + static create( + treeSitter: TreeSitter, + languageId: LanguageId, + ): LanguageDefinition | undefined { + const queryPath = join(ide().assetsRoot, "queries", `${languageId}.scm`); + + if (!existsSync(queryPath)) { + return undefined; + } + + const rawLanguageQueryString = readFileSync(queryPath, "utf8"); + + return new LanguageDefinition( + treeSitter, + treeSitter.getLanguage(languageId)!.query(rawLanguageQueryString), + ); + } + + /** + * @param scopeType The scope type for which to get a scope handler + * @returns A scope handler for the given scope type and language id, or + * undefined if the given scope type / language id combination is still using + * legacy pathways + */ + getScopeHandler(scopeType: ScopeType) { + if (!this.query.captureNames.includes(scopeType.type)) { + return undefined; + } + + return new TreeSitterScopeHandler( + this.treeSitter, + this.query, + scopeType as SimpleScopeType, + ); + } +} diff --git a/packages/cursorless-engine/src/languages/LanguageDefinitionImpl.ts b/packages/cursorless-engine/src/languages/LanguageDefinitionImpl.ts deleted file mode 100644 index 35e86245cd..0000000000 --- a/packages/cursorless-engine/src/languages/LanguageDefinitionImpl.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { ScopeType, SimpleScopeType } from "@cursorless/common"; -import { Query } from "web-tree-sitter"; -import { LanguageDefinition } from "./LanguageDefinitions"; -import { LanguageId } from "./constants"; -import { ide } from "../singletons/ide.singleton"; -import { join } from "path"; -import { TreeSitterScopeHandler } from "../processTargets/modifiers/scopeHandlers"; -import { TreeSitter } from "../typings/TreeSitter"; -import { readFileSync } from "fs"; - -export class LanguageDefinitionImpl implements LanguageDefinition { - private query!: Query; - - constructor(private treeSitter: TreeSitter, private languageId: LanguageId) {} - - init() { - const rawLanguageQueryString = readFileSync( - join(ide().assetsRoot, "queries", `${this.languageId}.scm`), - "utf8", - ); - - this.query = this.treeSitter - .getLanguage(this.languageId)! - .query(rawLanguageQueryString); - } - - maybeGetLanguageScopeHandler(scopeType: ScopeType) { - if (!this.query.captureNames.includes(scopeType.type)) { - return undefined; - } - - return new TreeSitterScopeHandler( - this.treeSitter, - this.query, - scopeType as SimpleScopeType, - ); - } -} diff --git a/packages/cursorless-engine/src/languages/LanguageDefinitions.ts b/packages/cursorless-engine/src/languages/LanguageDefinitions.ts index 3910332da7..6a3bc22735 100644 --- a/packages/cursorless-engine/src/languages/LanguageDefinitions.ts +++ b/packages/cursorless-engine/src/languages/LanguageDefinitions.ts @@ -1,12 +1,54 @@ -import { ScopeType } from "@cursorless/common"; -import { ScopeHandler } from "../processTargets/modifiers/scopeHandlers/scopeHandler.types"; +import { TreeSitter } from ".."; +import { LanguageDefinition } from "./LanguageDefinition"; +import { LanguageId } from "./constants"; -export interface LanguageDefinitions { - get(languageId: string): LanguageDefinition | undefined; -} +/** + * Sentinel value to indicate that a language doesn't have + * a new-style query definition file + */ +const LANGUAGE_UNDEFINED = Symbol("LANGUAGE_UNDEFINED"); + +/** + * Keeps a map from language ids to {@link LanguageDefinition} instances, + * constructing them as necessary + */ +export class LanguageDefinitions { + /** + * Maps from language id to {@link LanguageDefinition} or + * {@link LANGUAGE_UNDEFINED} if language doesn't have new-style definitions. + * We use a sentinel value instead of undefined so that we can distinguish + * between a situation where we haven't yet checked whether a language has a + * new-style query definition and a situation where we've checked and found + * that it doesn't. The former case is represented by `undefined` (due to the + * semantics of {@link Map.get}), while the latter is represented by the + * sentinel value. + */ + private languageDefinitions: Map< + string, + LanguageDefinition | typeof LANGUAGE_UNDEFINED + > = new Map(); + + constructor(private treeSitter: TreeSitter) {} + + /** + * Get a language definition for the given language id, if the language + * has a new-style query definition, or return undefined if the language doesn't + * + * @param languageId The language id for which to get a language definition + * @returns A language definition for the given language id, or undefined if + * the given language id doesn't have a new-style query definition + */ + get(languageId: string): LanguageDefinition | undefined { + let definition = this.languageDefinitions.get(languageId); + + if (definition == null) { + definition = + LanguageDefinition.create(this.treeSitter, languageId as LanguageId) ?? + LANGUAGE_UNDEFINED; + + this.languageDefinitions.set(languageId, definition); + } -export interface LanguageDefinition { - maybeGetLanguageScopeHandler: ( - scopeType: ScopeType, - ) => ScopeHandler | undefined; + return definition === LANGUAGE_UNDEFINED ? undefined : definition; + } } diff --git a/packages/cursorless-engine/src/languages/LanguageDefinitionsImpl.ts b/packages/cursorless-engine/src/languages/LanguageDefinitionsImpl.ts deleted file mode 100644 index 72c41156b4..0000000000 --- a/packages/cursorless-engine/src/languages/LanguageDefinitionsImpl.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { TreeSitter } from ".."; -import { LanguageDefinition, LanguageDefinitions } from "./LanguageDefinitions"; -import { LanguageId } from "./constants"; -import { LanguageDefinitionImpl } from "./LanguageDefinitionImpl"; - -export class LanguageDefinitionsImpl implements LanguageDefinitions { - private languageDefinitions: Map = - new Map(); - - constructor(private treeSitter: TreeSitter) {} - - get(languageId: LanguageId): LanguageDefinition | undefined { - if (!languages.includes(languageId)) { - return undefined; - } - - let definition = this.languageDefinitions.get(languageId); - - if (definition == null) { - definition = new LanguageDefinitionImpl(this.treeSitter, languageId); - definition.init(); - this.languageDefinitions.set(languageId, definition); - } - - return definition; - } -} - -/** - * A list of languages which have query definitions. Note that it's possible - * for a language to have some of its scope types defined via queries and the - * rest via legacy `nodeMatcher` definitions. The - * {@link LanguageDefinitionImpl} will return `undefined` for any scope types - * which are not defined via queries, which will cause the modifier stage to - * fall back to the legacy `nodeMatcher` definitions. - */ -const languages: LanguageId[] = ["ruby"]; diff --git a/packages/cursorless-engine/src/languages/index.ts b/packages/cursorless-engine/src/languages/index.ts index eb3b766e4c..f5f79c357c 100644 --- a/packages/cursorless-engine/src/languages/index.ts +++ b/packages/cursorless-engine/src/languages/index.ts @@ -3,5 +3,5 @@ import { SupportedLanguageId, supportedLanguageIds } from "./constants"; export function isLanguageSupported( languageId: string, ): languageId is SupportedLanguageId { - return languageId in supportedLanguageIds; + return supportedLanguageIds.includes(languageId as SupportedLanguageId); } diff --git a/packages/cursorless-engine/src/languages/ruby.ts b/packages/cursorless-engine/src/languages/ruby.ts index 2b3c89ce0e..9bf1c87158 100644 --- a/packages/cursorless-engine/src/languages/ruby.ts +++ b/packages/cursorless-engine/src/languages/ruby.ts @@ -175,7 +175,6 @@ const nodeMatchers: Partial< "argument_list", ), collectionKey: trailingMatcher(["pair[key]"], [":"]), - className: "class[name]", name: [ "assignment[left]", "operator_assignment[left]", diff --git a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/ScopeHandlerFactoryImpl.ts b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/ScopeHandlerFactoryImpl.ts index cb4e5dfc91..e11acfbb63 100644 --- a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/ScopeHandlerFactoryImpl.ts +++ b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/ScopeHandlerFactoryImpl.ts @@ -56,7 +56,7 @@ export class ScopeHandlerFactoryImpl implements ScopeHandlerFactory { default: return this.languageDefinitions .get(languageId) - ?.maybeGetLanguageScopeHandler(scopeType); + ?.getScopeHandler(scopeType); } } } diff --git a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/TreeSitterScopeHandler.ts b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/TreeSitterScopeHandler.ts index 823d228d20..70991186f6 100644 --- a/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/TreeSitterScopeHandler.ts +++ b/packages/cursorless-engine/src/processTargets/modifiers/scopeHandlers/TreeSitterScopeHandler.ts @@ -42,12 +42,8 @@ export class TreeSitterScopeHandler extends BaseScopeHandler { ): Iterable { const { document } = editor; - const { start, end } = this.getQueryRange( - document, - position, - direction, - hints, - ); + /** Narrow the range within which tree-sitter searches, for performance */ + const { start, end } = getQueryRange(document, position, direction, hints); yield* this.query .matches( @@ -62,84 +58,128 @@ export class TreeSitterScopeHandler extends BaseScopeHandler { .sort((a, b) => compareTargetScopes(direction, position, a, b)); } - /** - * Constructs a range to pass to {@link Query.matches} to find scopes. Note - * that {@link Query.matches} will only return scopes that have non-empty - * intersection with this range. Also note that the base - * {@link BaseScopeHandler.generateScopes} will filter out any extra scopes - * that we yield, so we don't need to be totally precise. - * - * @returns Range to pass to {@link Query.matches} - */ - private getQueryRange( - document: TextDocument, - position: Position, - direction: Direction, - { containment, distalPosition }: ScopeIteratorRequirements, - ) { - const offset = document.offsetAt(position); - const distalOffset = - distalPosition == null ? null : document.offsetAt(distalPosition); - - if (containment === "required") { - // If containment is required, we smear the position left and right by one - // character so that we have a non-empty intersection with any scope that - // touches position - return { - start: document.positionAt(offset - 1), - end: document.positionAt(offset + 1), - }; - } - - // If containment is disallowed, we can shift the position forward by a character to avoid - // matching scopes that touch position. Otherwise, we shift the position backward by a - // character to ensure we get scopes that touch position. - const proximalShift = containment === "disallowed" ? 1 : -1; - - // FIXME: Don't go all the way to end of document when there is no distalPosition? - // Seems wasteful to query all the way to end of document for something like "next funk" - // Might be better to start smaller and exponentially grow - return direction === "forward" - ? { - start: document.positionAt(offset + proximalShift), - end: - distalOffset == null - ? document.range.end - : document.positionAt(distalOffset + 1), - } - : { - start: - distalOffset == null - ? document.range.start - : document.positionAt(distalOffset - 1), - end: document.positionAt(offset - proximalShift), - }; - } - private matchToScope(editor: TextEditor, match: QueryMatch): TargetScope { + const scopeTypeType = this.scopeType.type; + const contentRange = getNodeRange( - match.captures.find((capture) => capture.name === this.scopeType.type)! - .node, + match.captures.find((capture) => capture.name === scopeTypeType)!.node, ); + const domain = + getRelatedRange(match, scopeTypeType, "domain") ?? contentRange; + + const removalRange = getRelatedRange(match, scopeTypeType, "removal"); + + const leadingDelimiterRange = getRelatedRange( + match, + scopeTypeType, + "leading", + ); + + const trailingDelimiterRange = getRelatedRange( + match, + scopeTypeType, + "trailing", + ); + + const interiorRange = getRelatedRange(match, scopeTypeType, "interior"); + return { editor, - // FIXME: Actually get domain - domain: contentRange, + domain, getTarget: (isReversed) => new ScopeTypeTarget({ - scopeTypeType: this.scopeType.type, + scopeTypeType, editor, isReversed, contentRange, - // FIXME: Actually get removalRange - removalRange: contentRange, - // FIXME: Other fields here + removalRange, + leadingDelimiterRange, + trailingDelimiterRange, + interiorRange, + // FIXME: Add delimiter text }), }; } } +/** + * Constructs a range to pass to {@link Query.matches} to find scopes. Note + * that {@link Query.matches} will only return scopes that have non-empty + * intersection with this range. Also note that the base + * {@link BaseScopeHandler.generateScopes} will filter out any extra scopes + * that we yield, so we don't need to be totally precise. + * + * @returns Range to pass to {@link Query.matches} + */ +function getQueryRange( + document: TextDocument, + position: Position, + direction: Direction, + { containment, distalPosition }: ScopeIteratorRequirements, +) { + const offset = document.offsetAt(position); + const distalOffset = + distalPosition == null ? null : document.offsetAt(distalPosition); + + if (containment === "required") { + // If containment is required, we smear the position left and right by one + // character so that we have a non-empty intersection with any scope that + // touches position + return { + start: document.positionAt(offset - 1), + end: document.positionAt(offset + 1), + }; + } + + // If containment is disallowed, we can shift the position forward by a character to avoid + // matching scopes that touch position. Otherwise, we shift the position backward by a + // character to ensure we get scopes that touch position. + const proximalShift = containment === "disallowed" ? 1 : -1; + + // FIXME: Don't go all the way to end of document when there is no distalPosition? + // Seems wasteful to query all the way to end of document for something like "next funk" + // Might be better to start smaller and exponentially grow + return direction === "forward" + ? { + start: document.positionAt(offset + proximalShift), + end: + distalOffset == null + ? document.range.end + : document.positionAt(distalOffset + 1), + } + : { + start: + distalOffset == null + ? document.range.start + : document.positionAt(distalOffset - 1), + end: document.positionAt(offset - proximalShift), + }; +} + +/** + * Gets the range of a node that is related to the scope. For example, if the + * scope is "class name", the `domain` node would be the containing class. + * + * @param match The match to get the range from + * @param scopeTypeType The type of the scope + * @param relationship The relationship to get the range for, eg "domain", or "removal" + * @returns A range or undefined if no range was found + */ +function getRelatedRange( + match: QueryMatch, + scopeTypeType: string, + relationship: string, +) { + const relatedNode = match.captures.find( + (capture) => + capture.name === `${scopeTypeType}.${relationship}` || + capture.name === relationship, + )?.node; + + return relatedNode == null ? undefined : getNodeRange(relatedNode); +} + function positionToPoint(start: Position): Point | undefined { return { row: start.line, column: start.character }; } diff --git a/queries/ruby.scm b/queries/ruby.scm index 19773d755b..812f651270 100644 --- a/queries/ruby.scm +++ b/queries/ruby.scm @@ -13,3 +13,4 @@ (regex) @regularExpression (class) @class +(class name: (_) @className) @domain