From 7c5b186c7132294b8cd610a644119fc8e353e7eb Mon Sep 17 00:00:00 2001 From: Nick Rolfe Date: Mon, 30 Jun 2025 10:41:34 +0100 Subject: [PATCH] Ruby/QL: add discard predicates for locations --- .../src/codeql_ql/ast/internal/TreeSitter.qll | 14 +++ .../codeql/ruby/ast/internal/TreeSitter.qll | 14 +++ .../src/generator/mod.rs | 8 +- .../src/generator/ql_gen.rs | 85 +++++++++++++++++++ 4 files changed, 118 insertions(+), 3 deletions(-) diff --git a/ql/ql/src/codeql_ql/ast/internal/TreeSitter.qll b/ql/ql/src/codeql_ql/ast/internal/TreeSitter.qll index 4eb2c86defd0..2d4a9d654652 100644 --- a/ql/ql/src/codeql_ql/ast/internal/TreeSitter.qll +++ b/ql/ql/src/codeql_ql/ast/internal/TreeSitter.qll @@ -9,6 +9,20 @@ import codeql.Locations as L overlay[local] private predicate isOverlay() { databaseMetadata("isOverlay", "true") } +/** Holds if `loc` is in the `file` and is part of the overlay base database. */ +overlay[local] +private predicate discardableLocation(@file file, @location_default loc) { + not isOverlay() and locations_default(loc, file, _, _, _, _) +} + +/** Holds if `loc` should be discarded, because it is part of the overlay base and is in a file that was also extracted as part of the overlay database. */ +overlay[discard_entity] +private predicate discardLocation(@location_default loc) { + exists(@file file, string path | files(file, path) | + discardableLocation(file, loc) and overlayChangedFiles(path) + ) +} + module QL { /** The base class for all AST nodes */ class AstNode extends @ql_ast_node { diff --git a/ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll b/ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll index 62e51c613989..4d9cd901f237 100644 --- a/ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll +++ b/ruby/ql/lib/codeql/ruby/ast/internal/TreeSitter.qll @@ -9,6 +9,20 @@ import codeql.Locations as L overlay[local] private predicate isOverlay() { databaseMetadata("isOverlay", "true") } +/** Holds if `loc` is in the `file` and is part of the overlay base database. */ +overlay[local] +private predicate discardableLocation(@file file, @location_default loc) { + not isOverlay() and locations_default(loc, file, _, _, _, _) +} + +/** Holds if `loc` should be discarded, because it is part of the overlay base and is in a file that was also extracted as part of the overlay database. */ +overlay[discard_entity] +private predicate discardLocation(@location_default loc) { + exists(@file file, string path | files(file, path) | + discardableLocation(file, loc) and overlayChangedFiles(path) + ) +} + module Ruby { /** The base class for all AST nodes */ class AstNode extends @ruby_ast_node { diff --git a/shared/tree-sitter-extractor/src/generator/mod.rs b/shared/tree-sitter-extractor/src/generator/mod.rs index 3e36a3e4f8ae..d9e6e00a1211 100644 --- a/shared/tree-sitter-extractor/src/generator/mod.rs +++ b/shared/tree-sitter-extractor/src/generator/mod.rs @@ -51,9 +51,11 @@ pub fn generate( ql::write( &mut ql_writer, - &[ql::TopLevel::Predicate( - ql_gen::create_is_overlay_predicate(), - )], + &[ + ql::TopLevel::Predicate(ql_gen::create_is_overlay_predicate()), + ql::TopLevel::Predicate(ql_gen::create_discardable_location_predicate()), + ql::TopLevel::Predicate(ql_gen::create_discard_location_predicate()), + ], )?; for language in languages { diff --git a/shared/tree-sitter-extractor/src/generator/ql_gen.rs b/shared/tree-sitter-extractor/src/generator/ql_gen.rs index 84d55e751ce4..8b6c9c18c6f8 100644 --- a/shared/tree-sitter-extractor/src/generator/ql_gen.rs +++ b/shared/tree-sitter-extractor/src/generator/ql_gen.rs @@ -396,6 +396,91 @@ pub fn create_discard_ast_node_predicate(ast_node_name: &str) -> ql::Predicate { } } +pub fn create_discardable_location_predicate() -> ql::Predicate<'static> { + ql::Predicate { + name: "discardableLocation", + qldoc: Some(String::from( + "Holds if `loc` is in the `file` and is part of the overlay base database.", + )), + overridden: false, + is_private: true, + is_final: false, + overlay: Some(ql::OverlayAnnotation::Local), + return_type: None, + formal_parameters: vec![ + ql::FormalParameter { + name: "file", + param_type: ql::Type::At("file"), + }, + ql::FormalParameter { + name: "loc", + param_type: ql::Type::At("location_default"), + }, + ], + body: ql::Expression::And(vec![ + ql::Expression::Negation(Box::new(ql::Expression::Pred("isOverlay", vec![]))), + ql::Expression::Pred( + "locations_default", + vec![ + ql::Expression::Var("loc"), + ql::Expression::Var("file"), + ql::Expression::Var("_"), + ql::Expression::Var("_"), + ql::Expression::Var("_"), + ql::Expression::Var("_"), + ], + ), + ]), + } +} + +/// Creates a discard predicate for `@location_default` entities. This is necessary because the +/// tree-sitter extractors use `*` IDs for locations, which means that locations don't get shared +/// between the base and overlay databases. +pub fn create_discard_location_predicate() -> ql::Predicate<'static> { + ql::Predicate { + name: "discardLocation", + qldoc: Some(String::from( + "Holds if `loc` should be discarded, because it is part of the overlay base \ + and is in a file that was also extracted as part of the overlay database.", + )), + overridden: false, + is_private: true, + is_final: false, + overlay: Some(ql::OverlayAnnotation::DiscardEntity), + return_type: None, + formal_parameters: vec![ql::FormalParameter { + name: "loc", + param_type: ql::Type::At("location_default"), + }], + body: ql::Expression::Aggregate { + name: "exists", + vars: vec![ + ql::FormalParameter { + name: "file", + param_type: ql::Type::At("file"), + }, + ql::FormalParameter { + name: "path", + param_type: ql::Type::String, + }, + ], + range: Some(Box::new(ql::Expression::Pred( + "files", + vec![ql::Expression::Var("file"), ql::Expression::Var("path")], + ))), + expr: Box::new(ql::Expression::And(vec![ + ql::Expression::Pred( + "discardableLocation", + vec![ql::Expression::Var("file"), ql::Expression::Var("loc")], + ), + ql::Expression::Pred("overlayChangedFiles", vec![ql::Expression::Var("path")]), + ])), + second_expr: None, + }, + } +} + /// Returns an expression to get a field that's defined as a column in the parent's table. /// /// # Arguments