From afb064b2d7d6e630001c4d482c4ad560941df72d Mon Sep 17 00:00:00 2001 From: blyxyas Date: Tue, 22 Nov 2022 17:20:28 +0100 Subject: [PATCH 01/17] Direct Method Call Lint Implementation (Draft) --- CHANGELOG.md | 1 + clippy_lints/src/declared_lints.rs | 1 + clippy_lints/src/direct_method_call.rs | 88 ++++++++++++++++++++++++++ clippy_lints/src/lib.rs | 2 + tests/ui/direct_method_call.rs | 24 +++++++ tests/ui/direct_method_call.stderr | 17 +++++ 6 files changed, 133 insertions(+) create mode 100644 clippy_lints/src/direct_method_call.rs create mode 100644 tests/ui/direct_method_call.rs create mode 100644 tests/ui/direct_method_call.stderr diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f1f73c1fd2f..d1b5d6b173c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3966,6 +3966,7 @@ Released 2018-09-13 [`derive_hash_xor_eq`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_hash_xor_eq [`derive_ord_xor_partial_ord`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_ord_xor_partial_ord [`derive_partial_eq_without_eq`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_partial_eq_without_eq +[`direct_method_call`]: https://rust-lang.github.io/rust-clippy/master/index.html#direct_method_call [`disallowed_macros`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_macros [`disallowed_method`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_method [`disallowed_methods`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_methods diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs index 0d3fc43a6443..9fbf448befdd 100644 --- a/clippy_lints/src/declared_lints.rs +++ b/clippy_lints/src/declared_lints.rs @@ -116,6 +116,7 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[ crate::derive::DERIVE_PARTIAL_EQ_WITHOUT_EQ_INFO, crate::derive::EXPL_IMPL_CLONE_ON_COPY_INFO, crate::derive::UNSAFE_DERIVE_DESERIALIZE_INFO, + crate::direct_method_call::DIRECT_METHOD_CALL_INFO, crate::disallowed_macros::DISALLOWED_MACROS_INFO, crate::disallowed_methods::DISALLOWED_METHODS_INFO, crate::disallowed_names::DISALLOWED_NAMES_INFO, diff --git a/clippy_lints/src/direct_method_call.rs b/clippy_lints/src/direct_method_call.rs new file mode 100644 index 000000000000..73c2d9632f49 --- /dev/null +++ b/clippy_lints/src/direct_method_call.rs @@ -0,0 +1,88 @@ +use clippy_utils::diagnostics::span_lint_and_sugg; +use clippy_utils::fn_def_id; +use clippy_utils::source::snippet_with_context; +use rustc_errors::Applicability; +use rustc_hir::{Expr}; +use rustc_lint::{LateContext, LateLintPass}; +use rustc_session::{declare_lint_pass, declare_tool_lint}; +use std::borrow::Cow; + +declare_clippy_lint! { + /// ### What it does + /// Suggests a better pattern of code, when a trait is used explicitly. + /// ### Why is this bad? + /// It's poorly readable. + /// ### Example + /// ```rust + /// f32::floor(4.5f32); + /// ``` + /// Use instead: + /// ```rust + /// (4.5f32).floor(); + /// ``` + #[clippy::version = "1.66.0"] + pub DIRECT_METHOD_CALL, + complexity, + "direct method call" +} + +declare_lint_pass!(DirectMethodCall => [DIRECT_METHOD_CALL]); + +// 'X::Y(Z) -> Z.Y()' When Z implements X + +impl LateLintPass<'_> for DirectMethodCall { + fn check_expr(&mut self, cx: &LateContext<'_>, expr: &Expr<'_>) { + if let Some(fnid) = fn_def_id(cx, expr) { + match fnid.as_local() { + None => { + dbg!("TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL"); + return () + }, + Some(_) => {}, + } + let hir = cx.tcx.hir(); + if let Some(fndecl) = hir.fn_decl_by_hir_id(hir.local_def_id_to_hir_id(fnid.as_local().unwrap())) { + if fndecl.implicit_self.has_implicit_self() && + // Discard if it's already a method x.y() + !cx.typeck_results().is_method_call(expr) + { + let snippet_raw = snippet_with_context( + cx, + expr.span, + expr.span.ctxt(), + "..", + &mut Applicability::MaybeIncorrect, + ) + .0; + let snippet_formatted = format_snippet(&snippet_raw); + span_lint_and_sugg( + cx, + DIRECT_METHOD_CALL, + expr.span, + "This is poorly readable", + "did you mean", + snippet_formatted, + Applicability::MaybeIncorrect, + ); + } + } + } + } +} + +fn format_snippet<'a>(snippet_raw: &'a Cow<'a, str>) -> String { + // W::X::Y(Z, ...N) = Y.Z(...N) + let mut segments = snippet_raw.split("::").collect::>(); + segments.remove(segments.len() - 2); + + let binding = segments.join("::").to_string(); + let no_trait = binding.split('(').collect::>(); + let method_name = no_trait[0]; + let mut args = no_trait[1].split(')').collect::>()[0].to_owned(); + // Remove whitespace + args.retain(|c| !c.is_whitespace()); + + let args = args.split(',').collect::>(); + let to_return = format!("({}).{}({})", args[0], method_name, args[1..].join(",").to_string()); + return to_return; +} diff --git a/clippy_lints/src/lib.rs b/clippy_lints/src/lib.rs index 4e640d7ebf65..f0b99218a34f 100644 --- a/clippy_lints/src/lib.rs +++ b/clippy_lints/src/lib.rs @@ -100,6 +100,7 @@ mod default_union_representation; mod dereference; mod derivable_impls; mod derive; +mod direct_method_call; mod disallowed_macros; mod disallowed_methods; mod disallowed_names; @@ -921,6 +922,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: store.register_late_pass(|_| Box::new(from_raw_with_void_ptr::FromRawWithVoidPtr)); store.register_late_pass(|_| Box::new(suspicious_xor_used_as_pow::ConfusingXorAndPow)); store.register_late_pass(move |_| Box::new(manual_is_ascii_check::ManualIsAsciiCheck::new(msrv))); + store.register_late_pass(|_| Box::new(direct_method_call::DirectMethodCall)); // add lints here, do not remove this comment, it's used in `new_lint` } diff --git a/tests/ui/direct_method_call.rs b/tests/ui/direct_method_call.rs new file mode 100644 index 000000000000..17878cd28285 --- /dev/null +++ b/tests/ui/direct_method_call.rs @@ -0,0 +1,24 @@ +#![allow(unused)] +#![warn(clippy::direct_method_call)] +#![allow(clippy::let_unit_value)] + +trait Xa { + fn hi(self) -> u8; + fn hoi(self, x: u32) -> u8; +} + +impl Xa for f32 { + fn hi(self) -> u8 { + 3 + } + fn hoi(self, x: u32) -> u8 { + 2 + } +} + +fn main() { + let _ = Xa::hi(4.5f32); + let _ = (4.5f32).hi(); + let _ = Xa::hoi(4.5f32, 4); + let _ = f32::floor(4.5f32); +} diff --git a/tests/ui/direct_method_call.stderr b/tests/ui/direct_method_call.stderr new file mode 100644 index 000000000000..595323de1004 --- /dev/null +++ b/tests/ui/direct_method_call.stderr @@ -0,0 +1,17 @@ +error: This is poorly readable + --> $DIR/direct_method_call.rs:20:13 + | +LL | let _ = Xa::hi(4.5f32); + | ^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).hi()` + | + = note: `-D clippy::direct-method-call` implied by `-D warnings` + +error: This is poorly readable + --> $DIR/direct_method_call.rs:22:13 + | +LL | let _ = Xa::hoi(4.5f32, 4); + | ^^^^^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).hoi(4)` + +[clippy_lints/src/direct_method_call.rs:38] "TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL" = "TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL" +error: aborting due to 2 previous errors + From 6e615093e69eec99893c4551e70d52c27f82ee5f Mon Sep 17 00:00:00 2001 From: blyxyas Date: Tue, 22 Nov 2022 17:20:28 +0100 Subject: [PATCH 02/17] Direct Method Call Lint Implementation (Draft) --- CHANGELOG.md | 1 + clippy_lints/src/declared_lints.rs | 1 + clippy_lints/src/direct_method_call.rs | 88 ++++++++++++++++++++++++++ clippy_lints/src/lib.rs | 4 +- tests/ui/direct_method_call.rs | 24 +++++++ tests/ui/direct_method_call.stderr | 17 +++++ 6 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 clippy_lints/src/direct_method_call.rs create mode 100644 tests/ui/direct_method_call.rs create mode 100644 tests/ui/direct_method_call.stderr diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b6b12c623af..2a19202f08d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3966,6 +3966,7 @@ Released 2018-09-13 [`derive_hash_xor_eq`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_hash_xor_eq [`derive_ord_xor_partial_ord`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_ord_xor_partial_ord [`derive_partial_eq_without_eq`]: https://rust-lang.github.io/rust-clippy/master/index.html#derive_partial_eq_without_eq +[`direct_method_call`]: https://rust-lang.github.io/rust-clippy/master/index.html#direct_method_call [`disallowed_macros`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_macros [`disallowed_method`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_method [`disallowed_methods`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_methods diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs index eb3210946f11..b33e377dc0f9 100644 --- a/clippy_lints/src/declared_lints.rs +++ b/clippy_lints/src/declared_lints.rs @@ -116,6 +116,7 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[ crate::derive::DERIVE_PARTIAL_EQ_WITHOUT_EQ_INFO, crate::derive::EXPL_IMPL_CLONE_ON_COPY_INFO, crate::derive::UNSAFE_DERIVE_DESERIALIZE_INFO, + crate::direct_method_call::DIRECT_METHOD_CALL_INFO, crate::disallowed_macros::DISALLOWED_MACROS_INFO, crate::disallowed_methods::DISALLOWED_METHODS_INFO, crate::disallowed_names::DISALLOWED_NAMES_INFO, diff --git a/clippy_lints/src/direct_method_call.rs b/clippy_lints/src/direct_method_call.rs new file mode 100644 index 000000000000..73c2d9632f49 --- /dev/null +++ b/clippy_lints/src/direct_method_call.rs @@ -0,0 +1,88 @@ +use clippy_utils::diagnostics::span_lint_and_sugg; +use clippy_utils::fn_def_id; +use clippy_utils::source::snippet_with_context; +use rustc_errors::Applicability; +use rustc_hir::{Expr}; +use rustc_lint::{LateContext, LateLintPass}; +use rustc_session::{declare_lint_pass, declare_tool_lint}; +use std::borrow::Cow; + +declare_clippy_lint! { + /// ### What it does + /// Suggests a better pattern of code, when a trait is used explicitly. + /// ### Why is this bad? + /// It's poorly readable. + /// ### Example + /// ```rust + /// f32::floor(4.5f32); + /// ``` + /// Use instead: + /// ```rust + /// (4.5f32).floor(); + /// ``` + #[clippy::version = "1.66.0"] + pub DIRECT_METHOD_CALL, + complexity, + "direct method call" +} + +declare_lint_pass!(DirectMethodCall => [DIRECT_METHOD_CALL]); + +// 'X::Y(Z) -> Z.Y()' When Z implements X + +impl LateLintPass<'_> for DirectMethodCall { + fn check_expr(&mut self, cx: &LateContext<'_>, expr: &Expr<'_>) { + if let Some(fnid) = fn_def_id(cx, expr) { + match fnid.as_local() { + None => { + dbg!("TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL"); + return () + }, + Some(_) => {}, + } + let hir = cx.tcx.hir(); + if let Some(fndecl) = hir.fn_decl_by_hir_id(hir.local_def_id_to_hir_id(fnid.as_local().unwrap())) { + if fndecl.implicit_self.has_implicit_self() && + // Discard if it's already a method x.y() + !cx.typeck_results().is_method_call(expr) + { + let snippet_raw = snippet_with_context( + cx, + expr.span, + expr.span.ctxt(), + "..", + &mut Applicability::MaybeIncorrect, + ) + .0; + let snippet_formatted = format_snippet(&snippet_raw); + span_lint_and_sugg( + cx, + DIRECT_METHOD_CALL, + expr.span, + "This is poorly readable", + "did you mean", + snippet_formatted, + Applicability::MaybeIncorrect, + ); + } + } + } + } +} + +fn format_snippet<'a>(snippet_raw: &'a Cow<'a, str>) -> String { + // W::X::Y(Z, ...N) = Y.Z(...N) + let mut segments = snippet_raw.split("::").collect::>(); + segments.remove(segments.len() - 2); + + let binding = segments.join("::").to_string(); + let no_trait = binding.split('(').collect::>(); + let method_name = no_trait[0]; + let mut args = no_trait[1].split(')').collect::>()[0].to_owned(); + // Remove whitespace + args.retain(|c| !c.is_whitespace()); + + let args = args.split(',').collect::>(); + let to_return = format!("({}).{}({})", args[0], method_name, args[1..].join(",").to_string()); + return to_return; +} diff --git a/clippy_lints/src/lib.rs b/clippy_lints/src/lib.rs index 17dbd983b6c0..3e633d5a1ced 100644 --- a/clippy_lints/src/lib.rs +++ b/clippy_lints/src/lib.rs @@ -100,6 +100,7 @@ mod default_union_representation; mod dereference; mod derivable_impls; mod derive; +mod direct_method_call; mod disallowed_macros; mod disallowed_methods; mod disallowed_names; @@ -883,7 +884,8 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: store.register_late_pass(|_| Box::new(missing_trait_methods::MissingTraitMethods)); store.register_late_pass(|_| Box::new(from_raw_with_void_ptr::FromRawWithVoidPtr)); store.register_late_pass(|_| Box::new(suspicious_xor_used_as_pow::ConfusingXorAndPow)); - store.register_late_pass(move |_| Box::new(manual_is_ascii_check::ManualIsAsciiCheck::new(msrv()))); + store.register_late_pass(move |_| Box::new(manual_is_ascii_check::ManualIsAsciiCheck::new(msrv))); + store.register_late_pass(|_| Box::new(direct_method_call::DirectMethodCall)); // add lints here, do not remove this comment, it's used in `new_lint` } diff --git a/tests/ui/direct_method_call.rs b/tests/ui/direct_method_call.rs new file mode 100644 index 000000000000..17878cd28285 --- /dev/null +++ b/tests/ui/direct_method_call.rs @@ -0,0 +1,24 @@ +#![allow(unused)] +#![warn(clippy::direct_method_call)] +#![allow(clippy::let_unit_value)] + +trait Xa { + fn hi(self) -> u8; + fn hoi(self, x: u32) -> u8; +} + +impl Xa for f32 { + fn hi(self) -> u8 { + 3 + } + fn hoi(self, x: u32) -> u8 { + 2 + } +} + +fn main() { + let _ = Xa::hi(4.5f32); + let _ = (4.5f32).hi(); + let _ = Xa::hoi(4.5f32, 4); + let _ = f32::floor(4.5f32); +} diff --git a/tests/ui/direct_method_call.stderr b/tests/ui/direct_method_call.stderr new file mode 100644 index 000000000000..595323de1004 --- /dev/null +++ b/tests/ui/direct_method_call.stderr @@ -0,0 +1,17 @@ +error: This is poorly readable + --> $DIR/direct_method_call.rs:20:13 + | +LL | let _ = Xa::hi(4.5f32); + | ^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).hi()` + | + = note: `-D clippy::direct-method-call` implied by `-D warnings` + +error: This is poorly readable + --> $DIR/direct_method_call.rs:22:13 + | +LL | let _ = Xa::hoi(4.5f32, 4); + | ^^^^^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).hoi(4)` + +[clippy_lints/src/direct_method_call.rs:38] "TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL" = "TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL" +error: aborting due to 2 previous errors + From bb619ad1c695255eb1c73c16532e2c526868dedb Mon Sep 17 00:00:00 2001 From: blyxyas Date: Wed, 23 Nov 2022 00:11:15 +0100 Subject: [PATCH 03/17] Minor fixes (formatting, dogfood) --- clippy_lints/src/direct_method_call.rs | 21 +++++++++++---------- clippy_lints/src/lib.rs | 2 +- tests/ui/direct_method_call.stderr | 5 ++--- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/clippy_lints/src/direct_method_call.rs b/clippy_lints/src/direct_method_call.rs index 73c2d9632f49..65aadf675ed4 100644 --- a/clippy_lints/src/direct_method_call.rs +++ b/clippy_lints/src/direct_method_call.rs @@ -1,11 +1,12 @@ +#![allow(clippy::redundant_clone)] + use clippy_utils::diagnostics::span_lint_and_sugg; use clippy_utils::fn_def_id; use clippy_utils::source::snippet_with_context; use rustc_errors::Applicability; -use rustc_hir::{Expr}; +use rustc_hir::Expr; use rustc_lint::{LateContext, LateLintPass}; use rustc_session::{declare_lint_pass, declare_tool_lint}; -use std::borrow::Cow; declare_clippy_lint! { /// ### What it does @@ -23,7 +24,7 @@ declare_clippy_lint! { #[clippy::version = "1.66.0"] pub DIRECT_METHOD_CALL, complexity, - "direct method call" + "Needlessly using explicit traits" } declare_lint_pass!(DirectMethodCall => [DIRECT_METHOD_CALL]); @@ -35,8 +36,8 @@ impl LateLintPass<'_> for DirectMethodCall { if let Some(fnid) = fn_def_id(cx, expr) { match fnid.as_local() { None => { - dbg!("TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL"); - return () + // TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL + return; }, Some(_) => {}, } @@ -59,7 +60,7 @@ impl LateLintPass<'_> for DirectMethodCall { cx, DIRECT_METHOD_CALL, expr.span, - "This is poorly readable", + "this is poorly readable", "did you mean", snippet_formatted, Applicability::MaybeIncorrect, @@ -70,12 +71,12 @@ impl LateLintPass<'_> for DirectMethodCall { } } -fn format_snippet<'a>(snippet_raw: &'a Cow<'a, str>) -> String { +fn format_snippet(snippet_raw: &str) -> String { // W::X::Y(Z, ...N) = Y.Z(...N) let mut segments = snippet_raw.split("::").collect::>(); segments.remove(segments.len() - 2); - let binding = segments.join("::").to_string(); + let binding = segments.join("::"); let no_trait = binding.split('(').collect::>(); let method_name = no_trait[0]; let mut args = no_trait[1].split(')').collect::>()[0].to_owned(); @@ -83,6 +84,6 @@ fn format_snippet<'a>(snippet_raw: &'a Cow<'a, str>) -> String { args.retain(|c| !c.is_whitespace()); let args = args.split(',').collect::>(); - let to_return = format!("({}).{}({})", args[0], method_name, args[1..].join(",").to_string()); - return to_return; + let to_return = format!("({}).{method_name}({})", args[0], args[1..].join(",")); + to_return } diff --git a/clippy_lints/src/lib.rs b/clippy_lints/src/lib.rs index 3e633d5a1ced..4ff5cf2a5ab5 100644 --- a/clippy_lints/src/lib.rs +++ b/clippy_lints/src/lib.rs @@ -884,7 +884,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: store.register_late_pass(|_| Box::new(missing_trait_methods::MissingTraitMethods)); store.register_late_pass(|_| Box::new(from_raw_with_void_ptr::FromRawWithVoidPtr)); store.register_late_pass(|_| Box::new(suspicious_xor_used_as_pow::ConfusingXorAndPow)); - store.register_late_pass(move |_| Box::new(manual_is_ascii_check::ManualIsAsciiCheck::new(msrv))); + store.register_late_pass(move |_| Box::new(manual_is_ascii_check::ManualIsAsciiCheck::new(msrv()))); store.register_late_pass(|_| Box::new(direct_method_call::DirectMethodCall)); // add lints here, do not remove this comment, it's used in `new_lint` } diff --git a/tests/ui/direct_method_call.stderr b/tests/ui/direct_method_call.stderr index 595323de1004..7ae7e71f2e52 100644 --- a/tests/ui/direct_method_call.stderr +++ b/tests/ui/direct_method_call.stderr @@ -1,4 +1,4 @@ -error: This is poorly readable +error: this is poorly readable --> $DIR/direct_method_call.rs:20:13 | LL | let _ = Xa::hi(4.5f32); @@ -6,12 +6,11 @@ LL | let _ = Xa::hi(4.5f32); | = note: `-D clippy::direct-method-call` implied by `-D warnings` -error: This is poorly readable +error: this is poorly readable --> $DIR/direct_method_call.rs:22:13 | LL | let _ = Xa::hoi(4.5f32, 4); | ^^^^^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).hoi(4)` -[clippy_lints/src/direct_method_call.rs:38] "TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL" = "TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL" error: aborting due to 2 previous errors From a40934314f8a74d02146278a729eb20a920161b0 Mon Sep 17 00:00:00 2001 From: blyxyas Date: Fri, 25 Nov 2022 00:11:35 +0100 Subject: [PATCH 04/17] Implement lint for non-local functions --- clippy_lints/src/direct_method_call.rs | 14 +++----------- tests/ui/direct_method_call.rs | 24 ++++++++++++++---------- tests/ui/direct_method_call.stderr | 20 +++++++++++++------- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/clippy_lints/src/direct_method_call.rs b/clippy_lints/src/direct_method_call.rs index 65aadf675ed4..5dd9eb295786 100644 --- a/clippy_lints/src/direct_method_call.rs +++ b/clippy_lints/src/direct_method_call.rs @@ -24,7 +24,7 @@ declare_clippy_lint! { #[clippy::version = "1.66.0"] pub DIRECT_METHOD_CALL, complexity, - "Needlessly using explicit traits" + "Needlessly using explicit trait" } declare_lint_pass!(DirectMethodCall => [DIRECT_METHOD_CALL]); @@ -34,16 +34,8 @@ declare_lint_pass!(DirectMethodCall => [DIRECT_METHOD_CALL]); impl LateLintPass<'_> for DirectMethodCall { fn check_expr(&mut self, cx: &LateContext<'_>, expr: &Expr<'_>) { if let Some(fnid) = fn_def_id(cx, expr) { - match fnid.as_local() { - None => { - // TODO: IMPLEMENT FUNCTION THAT AREN'T LOCAL - return; - }, - Some(_) => {}, - } - let hir = cx.tcx.hir(); - if let Some(fndecl) = hir.fn_decl_by_hir_id(hir.local_def_id_to_hir_id(fnid.as_local().unwrap())) { - if fndecl.implicit_self.has_implicit_self() && + if let Some(fnsig) = cx.tcx.opt_associated_item(fnid) { + if fnsig.fn_has_self_parameter && // Discard if it's already a method x.y() !cx.typeck_results().is_method_call(expr) { diff --git a/tests/ui/direct_method_call.rs b/tests/ui/direct_method_call.rs index 17878cd28285..6407dfedbbce 100644 --- a/tests/ui/direct_method_call.rs +++ b/tests/ui/direct_method_call.rs @@ -1,24 +1,28 @@ #![allow(unused)] #![warn(clippy::direct_method_call)] -#![allow(clippy::let_unit_value)] -trait Xa { - fn hi(self) -> u8; - fn hoi(self, x: u32) -> u8; +trait MyTrait { + fn function(self) -> u8; + fn function_with_args(self, x: u32) -> u8; } -impl Xa for f32 { - fn hi(self) -> u8 { +impl MyTrait for f32 { + fn function(self) -> u8 { 3 } - fn hoi(self, x: u32) -> u8 { + fn function_with_args(self, x: u32) -> u8 { 2 } } fn main() { - let _ = Xa::hi(4.5f32); - let _ = (4.5f32).hi(); - let _ = Xa::hoi(4.5f32, 4); + // Should warn + let _ = MyTrait::function(4.5f32); + let _ = MyTrait::function_with_args(4.5f32, 4); let _ = f32::floor(4.5f32); + + // Should not warn + let _ = (4.5f32).function(); + let _ = (4.5f32).function_with_args(4); + let _ = (4.5f32).floor(); } diff --git a/tests/ui/direct_method_call.stderr b/tests/ui/direct_method_call.stderr index 7ae7e71f2e52..4232d91b809d 100644 --- a/tests/ui/direct_method_call.stderr +++ b/tests/ui/direct_method_call.stderr @@ -1,16 +1,22 @@ error: this is poorly readable - --> $DIR/direct_method_call.rs:20:13 + --> $DIR/direct_method_call.rs:19:13 | -LL | let _ = Xa::hi(4.5f32); - | ^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).hi()` +LL | let _ = MyTrait::function(4.5f32); + | ^^^^^^^^^^^^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).function()` | = note: `-D clippy::direct-method-call` implied by `-D warnings` error: this is poorly readable - --> $DIR/direct_method_call.rs:22:13 + --> $DIR/direct_method_call.rs:21:13 | -LL | let _ = Xa::hoi(4.5f32, 4); - | ^^^^^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).hoi(4)` +LL | let _ = MyTrait::function_with_args(4.5f32, 4); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).function_with_args(4)` -error: aborting due to 2 previous errors +error: this is poorly readable + --> $DIR/direct_method_call.rs:23:13 + | +LL | let _ = f32::floor(4.5f32); + | ^^^^^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).floor()` + +error: aborting due to 3 previous errors From 395b8d3cac5e8d609aad6fcc23e2c88d7f7af0a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20Kohlgr=C3=BCber?= Date: Tue, 12 Mar 2019 13:38:46 +0100 Subject: [PATCH 05/17] add syntax-tree-patterns RFC --- rfcs/0001-syntax-tree-patterns.md | 797 ++++++++++++++++++++++++++++++ 1 file changed, 797 insertions(+) create mode 100644 rfcs/0001-syntax-tree-patterns.md diff --git a/rfcs/0001-syntax-tree-patterns.md b/rfcs/0001-syntax-tree-patterns.md new file mode 100644 index 000000000000..8d285b2ef44c --- /dev/null +++ b/rfcs/0001-syntax-tree-patterns.md @@ -0,0 +1,797 @@ +- Feature Name: syntax-tree-patterns +- Start Date: 2019-03-12 +- RFC PR: (leave this empty) +- Rust Issue: (leave this empty) + +> Note: This project is part of my Master's Thesis (supervised by [@oli-obk](https://github.com/oli-obk)) + +# Summary +[summary]: #summary + +Introduce a domain-specific language (similar to regular expressions) that allows to describe lints using *syntax tree patterns*. + + +# Motivation +[motivation]: #motivation + + +Finding parts of a syntax tree (AST, HIR, ...) that have certain properties (e.g. "*an if that has a block as its condition*") is a major task when writing lints. For non-trivial lints, it often requires nested pattern matching of AST / HIR nodes. For example, testing that an expression is a boolean literal requires the following checks: + +``` +if let ast::ExprKind::Lit(lit) = &expr.node { + if let ast::LitKind::Bool(_) = &lit.node { + ... + } +} +``` + +Writing this kind of matching code quickly becomes a complex task and the resulting code is often hard to comprehend. The code below shows a simplified version of the pattern matching required by the `collapsible_if` lint: + +``` +// simplified version of the collapsible_if lint +if let ast::ExprKind::If(check, then, None) = &expr.node { + if then.stmts.len() == 1 { + if let ast::StmtKind::Expr(inner) | ast::StmtKind::Semi(inner) = &then.stmts[0].node { + if let ast::ExprKind::If(check_inner, content, None) = &inner.node { + ... + } + } + } +} +``` + +The `if_chain` macro can improve readability by flattening the nested if statements, but the resulting code is still quite hard to read: + +``` +// simplified version of the collapsible_if lint +if_chain! { + if let ast::ExprKind::If(check, then, None) = &expr.node; + if then.stmts.len() == 1; + if let ast::StmtKind::Expr(inner) | ast::StmtKind::Semi(inner) = &then.stmts[0].node; + if let ast::ExprKind::If(check_inner, content, None) = &inner.node; + then { + ... + } +} +``` + +The code above matches if expressions that contain only another if expression (where both ifs don't have an else branch). While it's easy to explain what the lint does, it's hard to see that from looking at the code samples above. + +Following the motivation above, the first goal this RFC is to **simplify writing and reading lints**. + +The second part of the motivation is clippy's dependence on unstable compiler-internal data structures. Clippy lints are currently written against the compiler's AST / HIR which means that even small changes in these data structures might break a lot of lints. The second goal of this RFC is to **make lints independant of the compiler's AST / HIR data structures**. + +# Approach + +A lot of complexity in writing lints currently seems to come from having to manually implement the matching logic (see code samples above). It's an imparative style that describes *how* to match a syntax tree node instead of specifying *what* should be matched against declaratively. In other areas, it's common to use declarative patterns to describe desired information and let the implementation do the actual matching. A well-known example of this approach are [regular expressions](https://en.wikipedia.org/wiki/Regular_expression). Instead of writing code that detects certain character sequences, one can describe a search pattern using a domain-specific language and search for matches using that pattern. The advantage of using a declarative domain-specific language is that its limited domain (e.g. matching character sequences in the case of regular expressions) allows to express entities in that domain in a very natural and expressive way. + +While regular expressions are very useful when searching for patterns in flat character sequences, they cannot easily be applied to hierarchical data structures like syntax trees. This RFC therefore proposes a pattern matching system that is inspired by regular expressions and designed for hierarchical syntax trees. + +# Guide-level explanation +[guide-level-explanation]: #guide-level-explanation + +This proposal adds a `pattern!` macro that can be used to specify a syntax tree pattern to search for. A simple pattern is shown below: + +``` +pattern!{ + my_pattern: Expr = + Lit(Bool(false)) +} +``` + +This macro call defines a pattern named `my_pattern` that can be matched against an `Expr` syntax tree node. The actual pattern (`Lit(Bool(false))` in this case) defines which syntax trees should match the pattern. This pattern matches expressions that are boolean literals with value `false`. + +The pattern can then be used to implement lints in the following way: + +``` +... + +impl EarlyLintPass for MyAwesomeLint { + fn check_expr(&mut self, cx: &EarlyContext, expr: &syntax::ast::Expr) { + + if my_pattern(expr).is_some() { + cx.span_lint( + MY_AWESOME_LINT, + expr.span, + "This is a match for a simple pattern. Well done!", + ); + } + + } +} +``` + +The `pattern!` macro call expands to a function `my_pattern` that expects a syntax tree expression as its argument and returns an `Option` that indicates whether the pattern matched. + +> Note: The result type is explained in more detail in [a later section](#the-result-type). For now, it's enough to know that the result is `Some` if the pattern matched and `None` otherwise. + +## Pattern syntax + +The following examples demonstate the pattern syntax: + + +#### Any (`_`) + +The simplest pattern is the any pattern. It matches anything and is therefore similar to regex's `*`. + +``` +pattern!{ + // matches any expression + my_pattern: Expr = + _ +} +``` + +#### Node (`()`) + +Nodes are used to match a specific variant of an AST node. A node has a name and a number of arguments that depends on the node type. For example, the `Lit` node has a single argument that describes the type of the literal. As another example, the `If` node has three arguments describing the if's condition, then block and else block. + +``` +pattern!{ + // matches any expression that is a literal + my_pattern: Expr = + Lit(_) +} + +pattern!{ + // matches any expression that is a boolean literal + my_pattern: Expr = + Lit(Bool(_)) +} + +pattern!{ + // matches if expressions that have a boolean literal in their condition + // Note: The `_?` syntax here means that the else branch is optional and can be anything. + // This is discussed in more detail in the section `Repetition`. + my_pattern: Expr = + If( Lit(Bool(_)) , _, _?) +} +``` + + +#### Literal (``) + +A pattern can also contain Rust literals. These literals match themselves. + +``` +pattern!{ + // matches the boolean literal false + my_pattern: Expr = + Lit(Bool(false)) +} + +pattern!{ + // matches the character literal 'x' + my_pattern: Expr = + Lit(Char('x')) +} +``` + +#### Alternations (`a | b`) + +``` +pattern!{ + // matches if the literal is a boolean or integer literal + my_pattern: Lit = + Bool(_) | Int(_) +} + +pattern!{ + // matches if the expression is a char literal with value 'x' or 'y' + my_pattern: Expr = + Lit( Char('x' | 'y') ) +} +``` + +#### Empty (`()`) + +The empty pattern represents an empty sequence or the `None` variant of an optional. + +``` +pattern!{ + // matches if the expression is an empty array + my_pattern: Expr = + Array( () ) +} + +pattern!{ + // matches if expressions that don't have an else clause + my_pattern: Expr = + If(_, _, ()) +} +``` + +#### Sequence (` `) + +``` +pattern!{ + // matches the array [true, false] + my_pattern: Expr = + Array( Lit(Bool(true)) Lit(Bool(false)) ) +} +``` + +#### Repetition (`*`, `+`, `?`, `{n}`, `{n,m}`, `{n,}`) + +Elements may be repeated. The syntax for specifying repetitions is identical to [regex's syntax](https://docs.rs/regex/1.1.2/regex/#repetitions). + +``` +pattern!{ + // matches arrays that contain 2 'x's as their last or second-last elements + // Examples: + // ['x', 'x'] match + // ['x', 'x', 'y'] match + // ['a', 'b', 'c', 'x', 'x', 'y'] match + // ['x', 'x', 'y', 'z'] no match + my_pattern: Expr = + Array( _* Lit(Char('x')){2} _? ) +} + +pattern!{ + // matches if expressions that **may or may not** have an else block + // Attn: `If(_, _, _)` matches only ifs that **have** an else block + // + // | if with else block | if witout else block + // If(_, _, _) | match | no match + // If(_, _, _?) | match | match + // If(_, _, ()) | no match | match + my_pattern: Expr = + If(_, _, _?) +} +``` + +#### Named submatch (`#`) + +``` +pattern!{ + // matches character literals and gives the literal the name foo + my_pattern: Expr = + Lit(Char(_)#foo) +} + +pattern!{ + // matches character literals and gives the char the name bar + my_pattern: Expr = + Lit(Char(_#bar)) +} + +pattern!{ + // matches character literals and gives the expression the name baz + my_pattern: Expr = + Lit(Char(_))#baz +} +``` + +The reason for using named submatches is described in the section [The result type](#the-result-type). + +### Summary + +The following table gives an summary of the pattern syntax: + +| Syntax | Concept | Examples | +|-------------------------|------------------|--------------------------------------------| +|`_` | Any | `_` | +|`()` | Node | `Lit(Bool(true))`, `If(_, _, _)` | +|`` | Literal | `'x'`, `false`, `101` | +|` \| ` | Alternation | `Char(_) \| Bool(_)` | +|`()` | Empty | `Array( () )` | +|` ` | Sequence | `Tuple( Lit(Bool(_)) Lit(Int(_)) Lit(_) )` | +|`*`
`
+`
`
?`
`
{n}`
`
{n,m}`
`
{n,}` | Repetition





| `Array( _* )`,
`Block( Semi(_)+ )`,
`If(_, _, Block(_)?)`,
`Array( Lit(_){10} )`,
`Lit(_){5,10}`,
`Lit(Bool(_)){10,}` | +|`
#` | Named submatch | `Lit(Int(_))#foo` `Lit(Int(_#bar))` | + + +## The result type +[the-result-type]: #the-result-type + +A lot of lints require checks that go beyond what the pattern syntax described above can express. For example, a lint might want to check whether a node was created as part of a macro expansion or whether there's no comment above a node. Another example would be a lint that wants to match two nodes that have the same value (as needed by lints like `almost_swapped`). Instead of allowing users to write these checks into the pattern directly (which might make patterns hard to read), the proposed solution allows users to assign names to parts of a pattern expression. When matching a pattern against a syntax tree node, the return value will contain references to all nodes that were matched by these named subpatterns. This is similar to capture groups in regular expressions. + +For example, given the following pattern + +``` +pattern!{ + // matches character literals + my_pattern: Expr = + Lit(Char(_#val_inner)#val)#val_outer +} +``` + +one could get references to the nodes that matched the subpatterns in the following way: + +``` +... +fn check_expr(expr: &syntax::ast::Expr) { + if let Some(result) = my_pattern(expr) { + result.val_inner // type: &char + result.val // type: &syntax::ast::Lit + result.val_outer // type: &syntax::ast::Expr + } +} +``` + +The types in the `result` struct depend on the pattern. For example, the following pattern + +``` +pattern!{ + // matches arrays of character literals + my_pattern_seq: Expr = + Array( Lit(_)*#foo ) +} +``` + +matches arrays that consist of any number of literal expressions. Because those expressions are named `foo`, the result struct contains a `foo` attribute which is a vector of expressions: + +``` +... +if let Some(result) = my_pattern_seq(expr) { + result.foo // type: Vec<&syntax::ast::Expr> +} +``` + +Another result type occurs when a name is only defined in one branch of an alternation: + +``` +pattern!{ + // matches if expression is a boolean or integer literal + my_pattern_alt: Expr = + Lit( Bool(_#bar) | Int(_) ) +} +``` + +In the pattern above, the `bar` name is only defined if the pattern matches a boolean literal. If it matches an integer literal, the name isn't set. To account fot this, the result struct's `bar` attribute is an option type: + +``` +... +if let Some(result) = my_pattern_alt(expr) { + result.bar // type: Option<&bool> +} +``` + +It's also possible to use a name in multiple alternation branches if they have compatible types: + +``` +pattern!{ + // matches if expression is a boolean or integer literal + my_pattern_mult: Expr = + Lit(_#baz) | Array( Lit(_#baz) ) +} +... +if let Some(result) = my_pattern_mult(expr) { + result.baz // type: &syntax::ast::Lit +} +``` + +Named submatches are a **flat** namespace and this is intended. In the example above, two different sub-structures are assigned to a flat name. I expect that for most lints, a flat namespace is sufficient and easier to work with than a hierarchical one. + +#### Two stages + +Using named subpatterns, users can write lints in two stages. First, a coarse selection of possible matches is produced by the pattern syntax. In the second stage, the named subpattern references can be used to do additional tests like asserting that a node hasn't been created as part of a macro expansion. + +## Implementing clippy lints using patterns + +As a "real-world" example, I re-implemented the `collapsible_if` lint using patterns. The code can be found [here](https://github.com/fkohlgrueber/rust-clippy-pattern/blob/039b07ecccaf96d6aa7504f5126720d2c9cceddd/clippy_lints/src/collapsible_if.rs#L88-L163). The pattern-based version passes all test cases that were written for `collapsible_if`. + + +# Reference-level explanation +[reference-level-explanation]: #reference-level-explanation + +## Overview + +The following diagram shows the dependencies between the main parts of the proposed solution: + +``` + Pattern syntax + | + | parsing / lowering + v + PatternTree + ^ + | + | + IsMatch trait + | + | + +---------------+-----------+---------+ + | | | | + v v v v + syntax::ast rustc::hir syn ... +``` + +The pattern syntax described in the previous section is parsed / lowered into the so-called *PatternTree* data structure that represents a valid syntax tree pattern. Matching a *PatternTree* against an actual syntax tree (e.g. rust ast / hir or the syn ast, ...) is done using the *IsMatch* trait. + +The *PatternTree* and the *IsMatch* trait are introduced in more detail in the following sections. + +## PatternTree + +The core data structure of this RFC is the **PatternTree**. + +It's a data structure similar to rust's AST / HIR, but with the following differences: + +- The PatternTree doesn't contain parsing information like `Span`s +- The PatternTree can represent alternatives, sequences and optionals + +The code below shows a simplified version of the current PatternTree: + +> Note: The current implementation can be found [here](https://github.com/fkohlgrueber/pattern-matching/blob/dfb3bc9fbab69cec7c91e72564a63ebaa2ede638/pattern-match/src/pattern_tree.rs#L50-L96). + + +``` +pub enum Expr { + Lit(Alt), + Array(Seq), + Block_(Alt), + If(Alt, Alt, Opt), + IfLet( + Alt, + Opt, + ), +} + +pub enum Lit { + Char(Alt), + Bool(Alt), + Int(Alt), +} + +pub enum Stmt { + Expr(Alt), + Semi(Alt), +} + +pub enum BlockType { + Block(Seq), +} +``` + +The `Alt`, `Seq` and `Opt` structs look like these: + +> Note: The current implementation can be found [here](https://github.com/fkohlgrueber/pattern-matching/blob/dfb3bc9fbab69cec7c91e72564a63ebaa2ede638/pattern-match/src/matchers.rs#L35-L60). + +``` +pub enum Alt { + Any, + Elmt(Box), + Alt(Box, Box), + Named(Box, ...) +} + +pub enum Opt { + Any, // anything, but not None + Elmt(Box), + None, + Alt(Box, Box), + Named(Box, ...) +} + +pub enum Seq { + Any, + Empty, + Elmt(Box), + Repeat(Box, RepeatRange), + Seq(Box, Box), + Alt(Box, Box), + Named(Box, ...) +} + +pub struct RepeatRange { + pub start: usize, + pub end: Option // exclusive +} +``` + +## Parsing / Lowering + +The input of a `pattern!` macro call is parsed into a `ParseTree` first and then lowered to a `PatternTree`. + +Valid patterns depend on the *PatternTree* definitions. For example, the pattern `Lit(Bool(_)*)` isn't valid because the parameter type of the `Lit` variant of the `Expr` enum is `Any` and therefore doesn't support repetition (`*`). As another example, `Array( Lit(_)* )` is a valid pattern because the parameter of `Array` is of type `Seq` which allows sequences and repetitions. + +> Note: names in the pattern syntax correspond to *PatternTree* enum **variants**. For example, the `Lit` in the pattern above refers to the `Lit` variant of the `Expr` enum (`Expr::Lit`), not the `Lit` enum. + +## The IsMatch Trait + +The pattern syntax and the *PatternTree* are independant of specific syntax tree implementations (rust ast / hir, syn, ...). When looking at the different pattern examples in the previous sections, it can be seen that the patterns don't contain any information specific to a certain syntax tree implementation. In contrast, clippy lints currently match against ast / hir syntax tree nodes and therefore directly depend on their implementation. + +The connection between the *PatternTree* and specific syntax tree implementations is the `IsMatch` trait. It defines how to match *PatternTree* nodes against specific syntax tree nodes. A simplified implementation of the `IsMatch` trait is shown below: + +``` +pub trait IsMatch { + fn is_match(&self, other: &'o O) -> bool; +} +``` + +This trait needs to be implemented on each enum of the *PatternTree* (for the corresponding syntax tree types). For example, the `IsMatch` implementation for matching `ast::LitKind` against the *PatternTree's* `Lit` enum might look like this: + +``` +impl IsMatch for Lit { + fn is_match(&self, other: &ast::LitKind) -> bool { + match (self, other) { + (Lit::Char(i), ast::LitKind::Char(j)) => i.is_match(j), + (Lit::Bool(i), ast::LitKind::Bool(j)) => i.is_match(j), + (Lit::Int(i), ast::LitKind::Int(j, _)) => i.is_match(j), + _ => false, + } + } +} +``` + +All `IsMatch` implementations for matching the current *PatternTree* against `syntax::ast` can be found [here](https://github.com/fkohlgrueber/pattern-matching/blob/dfb3bc9fbab69cec7c91e72564a63ebaa2ede638/pattern-match/src/ast_match.rs). + + +# Drawbacks +[drawbacks]: #drawbacks + +#### Performance + +The pattern matching code is currently not optimized for performance, so it might be slower than hand-written matching code. +Additionally, the two-stage approach (matching against the coarse pattern first and checking for additional properties later) might be slower than the current practice of checking for structure and additional properties in one pass. For example, the following lint + +``` +pattern!{ + pat_if_without_else: Expr = + If( + _, + Block( + Expr( If(_, _, ())#inner ) + | Semi( If(_, _, ())#inner ) + )#then, + () + ) +} +... +fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { + if let Some(result) = pat_if_without_else(expr) { + if !block_starts_with_comment(cx, result.then) { + ... + } +} +``` + +first matches against the pattern and then checks that the `then` block doesn't start with a comment. Using clippy's current approach, it's possible to check for these conditions earlier: + +``` +fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { + if_chain! { + if let ast::ExprKind::If(ref check, ref then, None) = expr.node; + if !block_starts_with_comment(cx, then); + if let Some(inner) = expr_block(then); + if let ast::ExprKind::If(ref check_inner, ref content, None) = inner.node; + then { + ... + } + } +} +``` + +Whether or not this causes performance regressions depends on actual patterns. If it turns out to be a problem, the pattern matching algorithms could be extended to allow "early filtering" (see the [Early Filtering](#early-filtering) section in Future Possibilities). + +That being said, I don't see any conceptual limitations regarding pattern matching performance. + +#### Applicability + +Even though I'd expect that a lot of lints can be written using the proposed pattern syntax, it's unlikely that all lints can be expressed using patterns. I suspect that there will still be lints that need to be implemented by writing custom pattern matching code. This would lead to mix within clippy's codebase where some lints are implemented using patterns and others aren't. This inconsistency might be considered a drawback. + + +# Rationale and alternatives +[rationale-and-alternatives]: #rationale-and-alternatives + +Specifying lints using syntax tree patterns has a couple of advantages compared to the current approach of manually writing matching code. First, syntax tree patterns allow users to describe patterns in a simple and expressive way. This makes it easier to write new lints for both novices and experts and also makes reading / modifying existing lints simpler. + +Another advantage is that lints are independent of specific syntax tree implementations (e.g. AST / HIR, ...). When these syntax tree implementations change, only the `IsMatch` trait implementations need to be adapted and existing lints can remain unchanged. This also means that if the `IsMatch` trait implementations were integrated into the compiler, updating the `IsMatch` implementations would be required for the compiler to compile successfully. This could reduce the number of times clippy breaks because of changes in the compiler. Another advantage of the pattern's independence is that converting an `EarlyLintPass` lint into a `LatePassLint` wouldn't require rewriting the whole pattern matching code. In fact, the pattern might work just fine without any adaptions. + + + +## Alternatives + +### Rust-like pattern syntax + +The proposed pattern syntax requires users to know the structure of the `PatternTree` (which is very similar to the AST's / HIR's structure) and also the pattern syntax. An alternative would be to introduce a pattern syntax that is similar to actual Rust syntax (probably like the `quote!` macro). For example, a pattern that matches `if` expressions that have `false` in their condition could look like this: + +``` +if false { + #[*] +} +``` + +#### Problems + +Extending Rust syntax (which is quite complex by itself) with additional syntax needed for specifying patterns (alternations, sequences, repetisions, named submatches, ...) might become difficult to read and really hard to parse properly. + +For example, a pattern that matches a binary operation that has `0` on both sides might look like this: + +``` +0 #[*:BinOpKind] 0 +``` + +Now consider this slightly more complex example: + +``` +1 + 0 #[*:BinOpKind] 0 +``` + +The parser would need to know the precedence of `#[*:BinOpKind]` because it affects the structure of the resulting AST. `1 + 0 + 0` is parsed as `(1 + 0) + 0` while `1 + 0 * 0` is parsed as `1 + (0 * 0)`. Since the pattern could be any `BinOpKind`, the precedence cannot be known in advance. + +Another example of a problem would be named submatches. Take a look at this pattern: + +``` +fn test() { + 1 #foo +} +``` + +Which node is `#foo` referring to? `int`, `ast::Lit`, `ast::Expr`, `ast::Stmt`? Naming subpatterns in a rust-like syntax is difficult because a lot of AST nodes don't have a syntactic element that can be used to put the name tag on. In these situations, the only sensible option would be to assign the name tag to the outermost node (`ast::Stmt` in the example above), because the information of all child nodes can be retrieved through the outermost node. The problem with this then would be that accessing inner nodes (like `ast::Lit`) would again require manual pattern matching. + +In general, Rust syntax contains a lot of code structure implicitly. This structure is reconstructed during parsing (e.g. binary operations are reconstructed using operator precedence and left-to-right) and is one of the reasons why parsing is a complex task. The advantage of this approach is that writing code is simpler for users. + +When writing *syntax tree patterns*, each element of the hierarchy might have alternatives, repetitions, etc.. Respecting that while still allowing human-friendly syntax that contains structure implicitly seems to be really complex, if not impossible. + +Developing such a syntax would also require to maintain a custom parser that is at least as complex as the Rust parser itself. Additionally, future changes in the Rust syntax might be incompatible with such a syntax. + +In summary, I think that developing such a syntax would introduce a lot of complexity to solve a relatively minor problem. + +The issue of users not knowing about the *PatternTree* structure could be solved by a tool that, given a rust program, generates a pattern that matches only this program (similar to the clippy author lint). + +For some simple cases (like the first example above), it might be possible to successfully mix Rust and pattern syntax. This space could be further explored in a future extension. + +# Prior art +[prior-art]: #prior-art + +The pattern syntax is heavily inspired by regular expressions (repetitions, alternatives, sequences, ...). + +From what I've seen until now, other linters also implement lints that directly work on syntax tree data structures, just like clippy does currently. I would therefore consider the pattern syntax to be *new*, but please correct me if I'm wrong. + +# Unresolved questions +[unresolved-questions]: #unresolved-questions + +#### How to handle multiple matches? + +When matching a syntax tree node against a pattern, there are possibly multiple ways in which the pattern can be matched. A simple example of this would be the following pattern: + +``` +pattern!{ + my_pattern: Expr = + Array( _* Lit(_)+#literals) +} +``` + +This pattern matches arrays that end with at least one literal. Now given the array `[x, 1, 2]`, should `1` be matched as part of the `_*` or the `Lit(_)+` part of the pattern? The difference is important because the named submatch `#literals` would contain 1 or 2 elements depending how the pattern is matched. In regular expressions, this problem is solved by matching "greedy" by default and "non-greedy" optionally. + +I haven't looked much into this yet because I don't know how relevant it is for most lints. The current implementation simply returns the first match it finds. + +# Future possibilities +[future-possibilities]: #future-possibilities + +#### Implement rest of Rust Syntax + +The current project only implements a small part of the Rust syntax. In the future, this should incrementally be extended to more syntax to allow implementing more lints. Implementing more of the Rust syntax requires extending the `PatternTree` and `IsMatch` implementations, but should be relatively straight-forward. + +#### Early filtering + +As described in the *Drawbacks/Performance* section, allowing additional checks during the pattern matching might be beneficial. + +The pattern below shows how this could look like: + +``` +pattern!{ + pat_if_without_else: Expr = + If( + _, + Block( + Expr( If(_, _, ())#inner ) + | Semi( If(_, _, ())#inner ) + )#then, + () + ) + where + !in_macro(#then.span); +} +``` + +The difference compared to the currently proposed two-stage filtering is that using early filtering, the condition (`!in_macro(#then.span)` in this case) would be evaluated as soon as the `Block(_)#then` was matched. + +Another idea in this area would be to introduce a syntax for backreferences. They could be used to require that multiple parts of a pattern should match the same value. For example, the `assign_op_pattern` lint that searches for `a = a op b` and recommends changing it to `a op= b` requires that both occurrances of `a` are the same. Using `=#...` as syntax for backreferences, the lint could be implemented like this: + +``` +pattern!{ + assign_op_pattern: Expr = + Assign(_#target, Binary(_, =#target, _) +} +``` + +#### Match descendant + +A lot of lints currently implement custom visitors that check whether any subtree (which might not be a direct descendant) of the current node matches some properties. This cannot be expressed with the proposed pattern syntax. Extending the pattern syntax to allow patterns like "a function that contains at least two return statements" could be a practical addition. + +#### Negation operator for alternatives + +For patterns like "a literal that is not a boolean literal" one currently needs to list all alternatives except the boolean case. Introducing a negation operator that allows to write `Lit(!Bool(_))` might be a good idea. This pattern would be eqivalent to `Lit( Char(_) | Int(_) )` (given that currently only three literal types are implemented). + +#### Functional composition + +Patterns currently don't have any concept of composition. This leads to repetitions within patterns. For example, one of the collapsible-if patterns currently has to be written like this: + +``` +pattern!{ + pat_if_else: Expr = + If( + _, + _, + Block_( + Block( + Expr((If(_, _, _?) | IfLet(_, _?))#else_) | + Semi((If(_, _, _?) | IfLet(_, _?))#else_) + )#block_inner + )#block + ) | + IfLet( + _, + Block_( + Block( + Expr((If(_, _, _?) | IfLet(_, _?))#else_) | + Semi((If(_, _, _?) | IfLet(_, _?))#else_) + )#block_inner + )#block + ) +} +``` + +If patterns supported defining functions of subpatterns, the code could be simplified as follows: + +``` +pattern!{ + fn expr_or_semi(expr: Expr) -> Stmt { + Expr(expr) | Semi(expr) + } + fn if_or_if_let(then: Block, else: Opt) -> Expr { + If(_, then, else) | IfLet(then, else) + } + pat_if_else: Expr = + if_or_if_let( + _, + Block_( + Block( + expr_or_semi( if_or_if_let(_, _?)#else_ ) + )#block_inner + )#block + ) +} +``` + +Additionally, common patterns like `expr_or_semi` could be shared between different lints. + +#### Clippy Pattern Author + +Another improvement could be to create a tool that, given some valid Rust syntax, generates a pattern that matches this syntax exactly. This would make starting to write a pattern easier. A user could take a look at the patterns generated for a couple of Rust code examples and use that information to write a pattern that matches all of them. + +This is similar to clippy's author lint. + +#### Supporting other syntaxes + +Most of the proposed system is language-agnostic. For example, the pattern syntax could also be used to describe patterns for other programming languages. + +In order to support other languages' syntaxes, one would need to implement another `PatternTree` that sufficiently describes the languages' AST and implement `IsMatch` for this `PatternTree` and the languages' AST. + +One aspect of this is that it would even be possible to write lints that work on the pattern syntax itself. For example, when writing the following pattern + + +``` +pattern!{ + my_pattern: Expr = + Array( Lit(Bool(false)) Lit(Bool(false)) ) +} +``` + +a lint that works on the pattern syntax's AST could suggest using this pattern instead: + +``` +pattern!{ + my_pattern: Expr = + Array( Lit(Bool(false)){2} ) +} +``` + +In the future, clippy could use this system to also provide lints for custom syntaxes like those found in macros. + +# Final Note + +This is the first RFC I've ever written and it might be a little rough around the edges. + +I'm looking forward to hearing your feedback and discussing the proposal. From 8c6ae1e348ee10ac4209c5b7b6962903ab7118e7 Mon Sep 17 00:00:00 2001 From: blyxyas Date: Fri, 25 Nov 2022 23:25:05 +0100 Subject: [PATCH 06/17] Refactor + Better snippeting function --- clippy_lints/src/direct_method_call.rs | 97 ++++++++++++++++---------- tests/ui/direct_method_call.rs | 2 +- tests/ui/direct_method_call.stderr | 4 +- 3 files changed, 64 insertions(+), 39 deletions(-) diff --git a/clippy_lints/src/direct_method_call.rs b/clippy_lints/src/direct_method_call.rs index 5dd9eb295786..cdd51c8ac1cc 100644 --- a/clippy_lints/src/direct_method_call.rs +++ b/clippy_lints/src/direct_method_call.rs @@ -5,7 +5,9 @@ use clippy_utils::fn_def_id; use clippy_utils::source::snippet_with_context; use rustc_errors::Applicability; use rustc_hir::Expr; +use rustc_lint::LintContext; use rustc_lint::{LateContext, LateLintPass}; +use rustc_middle::lint::in_external_macro; use rustc_session::{declare_lint_pass, declare_tool_lint}; declare_clippy_lint! { @@ -33,49 +35,72 @@ declare_lint_pass!(DirectMethodCall => [DIRECT_METHOD_CALL]); impl LateLintPass<'_> for DirectMethodCall { fn check_expr(&mut self, cx: &LateContext<'_>, expr: &Expr<'_>) { - if let Some(fnid) = fn_def_id(cx, expr) { - if let Some(fnsig) = cx.tcx.opt_associated_item(fnid) { - if fnsig.fn_has_self_parameter && - // Discard if it's already a method x.y() - !cx.typeck_results().is_method_call(expr) - { - let snippet_raw = snippet_with_context( - cx, - expr.span, - expr.span.ctxt(), - "..", - &mut Applicability::MaybeIncorrect, - ) - .0; - let snippet_formatted = format_snippet(&snippet_raw); - span_lint_and_sugg( - cx, - DIRECT_METHOD_CALL, - expr.span, - "this is poorly readable", - "did you mean", - snippet_formatted, - Applicability::MaybeIncorrect, - ); + if !in_external_macro(cx.sess(), expr.span) { + if let Some(fnid) = fn_def_id(cx, expr) { + if let Some(fnsig) = cx.tcx.opt_associated_item(fnid) { + if fnsig.fn_has_self_parameter && + // Discard if it's already a method x.y() + !cx.typeck_results().is_method_call(expr) + { + let snippet_raw = snippet_with_context( + cx, + expr.span, + expr.span.ctxt(), + "..", + &mut Applicability::MaybeIncorrect, + ) + .0; + let snippet_formatted = format_snippet(&snippet_raw); + if let Some(snip) = snippet_formatted { + span_lint_and_sugg( + cx, + DIRECT_METHOD_CALL, + expr.span, + "this is poorly readable", + "did you mean", + snip, + Applicability::MaybeIncorrect, + ); + } + } } } } } } -fn format_snippet(snippet_raw: &str) -> String { +// This is an expensive function. +fn format_snippet(snippet_raw: &str) -> Option { // W::X::Y(Z, ...N) = Y.Z(...N) - let mut segments = snippet_raw.split("::").collect::>(); - segments.remove(segments.len() - 2); + let segments = snippet_raw.split("(").collect::>(); + let mut args: Vec = Vec::new(); + { + let mut args_len; + let raw_args = segments[1].split(')').collect::>()[0] + .split(',') + .collect::>(); + // Convert raw_args to String (args); + for raw_arg in raw_args { + args.push(raw_arg.to_owned()); + args_len = args.len(); + args[args_len - 1].retain(|c| !c.is_whitespace()); + } + } - let binding = segments.join("::"); - let no_trait = binding.split('(').collect::>(); - let method_name = no_trait[0]; - let mut args = no_trait[1].split(')').collect::>()[0].to_owned(); - // Remove whitespace - args.retain(|c| !c.is_whitespace()); + let mut ident = segments[0]; + let mut deconstructed_ident = ident.split("::").collect::>(); + if deconstructed_ident.len() >= 2 { + // W::X::Y(Z, ...N) + // 1 2 ---- 3 ---- + // Remove 2 + deconstructed_ident.remove(deconstructed_ident.len() - 2); + } else { + return None; + } - let args = args.split(',').collect::>(); - let to_return = format!("({}).{method_name}({})", args[0], args[1..].join(",")); - to_return + let binding = deconstructed_ident.join("::"); + ident = &binding; + // Remove whitespace + let to_return = format!("({}).{ident}({})", args[0], args[1..].join(",")); + Some(to_return) } diff --git a/tests/ui/direct_method_call.rs b/tests/ui/direct_method_call.rs index 6407dfedbbce..6a38afebac71 100644 --- a/tests/ui/direct_method_call.rs +++ b/tests/ui/direct_method_call.rs @@ -20,7 +20,7 @@ fn main() { let _ = MyTrait::function(4.5f32); let _ = MyTrait::function_with_args(4.5f32, 4); let _ = f32::floor(4.5f32); - + // Should not warn let _ = (4.5f32).function(); let _ = (4.5f32).function_with_args(4); diff --git a/tests/ui/direct_method_call.stderr b/tests/ui/direct_method_call.stderr index 4232d91b809d..dd6cd1de6d66 100644 --- a/tests/ui/direct_method_call.stderr +++ b/tests/ui/direct_method_call.stderr @@ -1,5 +1,5 @@ error: this is poorly readable - --> $DIR/direct_method_call.rs:19:13 + --> $DIR/direct_method_call.rs:20:13 | LL | let _ = MyTrait::function(4.5f32); | ^^^^^^^^^^^^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).function()` @@ -13,7 +13,7 @@ LL | let _ = MyTrait::function_with_args(4.5f32, 4); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).function_with_args(4)` error: this is poorly readable - --> $DIR/direct_method_call.rs:23:13 + --> $DIR/direct_method_call.rs:22:13 | LL | let _ = f32::floor(4.5f32); | ^^^^^^^^^^^^^^^^^^ help: did you mean: `(4.5f32).floor()` From 490899e1b4c8de4e55b9a659080a07668244e984 Mon Sep 17 00:00:00 2001 From: blyxyas Date: Sat, 26 Nov 2022 17:58:22 +0100 Subject: [PATCH 07/17] Add lint config + ui-toml test + bugfixes (desc.) `allowed-explicit-modules = [...]` now can be used in `clippy.toml` All the modules that are in that array will be ignored by the lint --- clippy_lints/src/direct_method_call.rs | 36 ++++++++++++++++--- clippy_lints/src/lib.rs | 7 +++- clippy_lints/src/utils/conf.rs | 4 +++ tests/ui-toml/direct_method_call/clippy.toml | 1 + .../direct_method_call/direct_method_call.rs | 29 +++++++++++++++ 5 files changed, 71 insertions(+), 6 deletions(-) create mode 100644 tests/ui-toml/direct_method_call/clippy.toml create mode 100644 tests/ui-toml/direct_method_call/direct_method_call.rs diff --git a/clippy_lints/src/direct_method_call.rs b/clippy_lints/src/direct_method_call.rs index cdd51c8ac1cc..fb4311f19767 100644 --- a/clippy_lints/src/direct_method_call.rs +++ b/clippy_lints/src/direct_method_call.rs @@ -8,7 +8,7 @@ use rustc_hir::Expr; use rustc_lint::LintContext; use rustc_lint::{LateContext, LateLintPass}; use rustc_middle::lint::in_external_macro; -use rustc_session::{declare_lint_pass, declare_tool_lint}; +use rustc_session::{declare_tool_lint, impl_lint_pass}; declare_clippy_lint! { /// ### What it does @@ -29,7 +29,20 @@ declare_clippy_lint! { "Needlessly using explicit trait" } -declare_lint_pass!(DirectMethodCall => [DIRECT_METHOD_CALL]); +impl_lint_pass!(DirectMethodCall => [DIRECT_METHOD_CALL]); + +pub struct DirectMethodCall { + allowed_explicit_modules: Vec, +} + +impl DirectMethodCall { + #[must_use] + pub fn new(allowed_explicit_modules: Vec) -> Self { + Self { + allowed_explicit_modules, + } + } +} // 'X::Y(Z) -> Z.Y()' When Z implements X @@ -50,7 +63,7 @@ impl LateLintPass<'_> for DirectMethodCall { &mut Applicability::MaybeIncorrect, ) .0; - let snippet_formatted = format_snippet(&snippet_raw); + let snippet_formatted = format_snippet(&snippet_raw, &self.allowed_explicit_modules); if let Some(snip) = snippet_formatted { span_lint_and_sugg( cx, @@ -70,9 +83,14 @@ impl LateLintPass<'_> for DirectMethodCall { } // This is an expensive function. -fn format_snippet(snippet_raw: &str) -> Option { +fn format_snippet(snippet_raw: &str, allowed_explicit_modules: &Vec) -> Option { // W::X::Y(Z, ...N) = Y.Z(...N) let segments = snippet_raw.split("(").collect::>(); + if segments.len() <= 1 { + return None; + } + let binding = segments[1].split(')').collect::>(); + let suffixes = binding[binding.len() - 1]; let mut args: Vec = Vec::new(); { let mut args_len; @@ -87,8 +105,16 @@ fn format_snippet(snippet_raw: &str) -> Option { } } + // Ignore if module name is in conf.allowed_explicit_modules + let mut ident = segments[0]; let mut deconstructed_ident = ident.split("::").collect::>(); + for &ident in &deconstructed_ident { + if allowed_explicit_modules.contains(&ident.to_owned()) { + return None; + } + } + if deconstructed_ident.len() >= 2 { // W::X::Y(Z, ...N) // 1 2 ---- 3 ---- @@ -101,6 +127,6 @@ fn format_snippet(snippet_raw: &str) -> Option { let binding = deconstructed_ident.join("::"); ident = &binding; // Remove whitespace - let to_return = format!("({}).{ident}({})", args[0], args[1..].join(",")); + let to_return = format!("({}).{ident}({}){suffixes}", args[0], args[1..].join(",")); Some(to_return) } diff --git a/clippy_lints/src/lib.rs b/clippy_lints/src/lib.rs index 4ff5cf2a5ab5..01d70a8a605c 100644 --- a/clippy_lints/src/lib.rs +++ b/clippy_lints/src/lib.rs @@ -885,7 +885,12 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf: store.register_late_pass(|_| Box::new(from_raw_with_void_ptr::FromRawWithVoidPtr)); store.register_late_pass(|_| Box::new(suspicious_xor_used_as_pow::ConfusingXorAndPow)); store.register_late_pass(move |_| Box::new(manual_is_ascii_check::ManualIsAsciiCheck::new(msrv()))); - store.register_late_pass(|_| Box::new(direct_method_call::DirectMethodCall)); + let allowed_explicit_modules = conf.allowed_explicit_modules.clone(); + store.register_late_pass(move |_| { + Box::new(direct_method_call::DirectMethodCall::new( + allowed_explicit_modules.clone(), + )) + }); // add lints here, do not remove this comment, it's used in `new_lint` } diff --git a/clippy_lints/src/utils/conf.rs b/clippy_lints/src/utils/conf.rs index b37d4239477e..facce21ab514 100644 --- a/clippy_lints/src/utils/conf.rs +++ b/clippy_lints/src/utils/conf.rs @@ -402,6 +402,10 @@ define_Conf! { /// A list of paths to types that should be treated like `Arc`, i.e. ignored but /// for the generic parameters for determining interior mutability (ignore_interior_mutability: Vec = Vec::from(["bytes::Bytes".into()])), + /// Lint: DIRECT_METHOD_CALL + /// + /// A list of modules that are allowed to be explicitly used (e.g. std::fs) + (allowed_explicit_modules: Vec = vec!["Arc".into(), "Rc".into(), "Box".into()]), } /// Search for the configuration file. diff --git a/tests/ui-toml/direct_method_call/clippy.toml b/tests/ui-toml/direct_method_call/clippy.toml new file mode 100644 index 000000000000..5fe1543fdb4d --- /dev/null +++ b/tests/ui-toml/direct_method_call/clippy.toml @@ -0,0 +1 @@ +allowed-explicit-modules = ["f64"] \ No newline at end of file diff --git a/tests/ui-toml/direct_method_call/direct_method_call.rs b/tests/ui-toml/direct_method_call/direct_method_call.rs new file mode 100644 index 000000000000..a5669e4bbdb2 --- /dev/null +++ b/tests/ui-toml/direct_method_call/direct_method_call.rs @@ -0,0 +1,29 @@ +#![allow(unused)] +#![warn(clippy::direct_method_call)] + +trait MyTrait { + fn function(self) -> u8; + fn function_with_args(self, x: u32) -> u8; +} + +impl MyTrait for f32 { + fn function(self) -> u8 { + 3 + } + fn function_with_args(self, x: u32) -> u8 { + 2 + } +} + +fn main() { + // Should warn + let _ = MyTrait::function(4.5f32); + let _ = MyTrait::function_with_args(4.5f32, 4); + let _ = f32::floor(4.5f32); + + // Should not warn + let _ = f64::floor(4.5f64); // f64 is allowed in clippy.toml + let _ = (4.5f32).function(); + let _ = (4.5f32).function_with_args(4); + let _ = (4.5f32).floor(); +} From 60c4b025ffb041df766d65f355be2b9b5efbf5e4 Mon Sep 17 00:00:00 2001 From: Alex Macleod Date: Thu, 24 Nov 2022 21:33:25 +0000 Subject: [PATCH 08/17] Fix remark for `rfcs/0001-syntax-tree-patterns.md` --- rfcs/0001-syntax-tree-patterns.md | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/rfcs/0001-syntax-tree-patterns.md b/rfcs/0001-syntax-tree-patterns.md index 8d285b2ef44c..9161986a7b77 100644 --- a/rfcs/0001-syntax-tree-patterns.md +++ b/rfcs/0001-syntax-tree-patterns.md @@ -1,3 +1,5 @@ + + - Feature Name: syntax-tree-patterns - Start Date: 2019-03-12 - RFC PR: (leave this empty) @@ -6,13 +8,11 @@ > Note: This project is part of my Master's Thesis (supervised by [@oli-obk](https://github.com/oli-obk)) # Summary -[summary]: #summary Introduce a domain-specific language (similar to regular expressions) that allows to describe lints using *syntax tree patterns*. # Motivation -[motivation]: #motivation Finding parts of a syntax tree (AST, HIR, ...) that have certain properties (e.g. "*an if that has a block as its condition*") is a major task when writing lints. For non-trivial lints, it often requires nested pattern matching of AST / HIR nodes. For example, testing that an expression is a boolean literal requires the following checks: @@ -68,7 +68,6 @@ A lot of complexity in writing lints currently seems to come from having to manu While regular expressions are very useful when searching for patterns in flat character sequences, they cannot easily be applied to hierarchical data structures like syntax trees. This RFC therefore proposes a pattern matching system that is inspired by regular expressions and designed for hierarchical syntax trees. # Guide-level explanation -[guide-level-explanation]: #guide-level-explanation This proposal adds a `pattern!` macro that can be used to specify a syntax tree pattern to search for. A simple pattern is shown below: @@ -281,7 +280,6 @@ The following table gives an summary of the pattern syntax: ## The result type -[the-result-type]: #the-result-type A lot of lints require checks that go beyond what the pattern syntax described above can express. For example, a lint might want to check whether a node was created as part of a macro expansion or whether there's no comment above a node. Another example would be a lint that wants to match two nodes that have the same value (as needed by lints like `almost_swapped`). Instead of allowing users to write these checks into the pattern directly (which might make patterns hard to read), the proposed solution allows users to assign names to parts of a pattern expression. When matching a pattern against a syntax tree node, the return value will contain references to all nodes that were matched by these named subpatterns. This is similar to capture groups in regular expressions. @@ -372,7 +370,6 @@ As a "real-world" example, I re-implemented the `collapsible_if` lint using patt # Reference-level explanation -[reference-level-explanation]: #reference-level-explanation ## Overview @@ -517,7 +514,6 @@ All `IsMatch` implementations for matching the current *PatternTree* against `sy # Drawbacks -[drawbacks]: #drawbacks #### Performance @@ -571,7 +567,6 @@ Even though I'd expect that a lot of lints can be written using the proposed pat # Rationale and alternatives -[rationale-and-alternatives]: #rationale-and-alternatives Specifying lints using syntax tree patterns has a couple of advantages compared to the current approach of manually writing matching code. First, syntax tree patterns allow users to describe patterns in a simple and expressive way. This makes it easier to write new lints for both novices and experts and also makes reading / modifying existing lints simpler. @@ -632,14 +627,12 @@ The issue of users not knowing about the *PatternTree* structure could be solved For some simple cases (like the first example above), it might be possible to successfully mix Rust and pattern syntax. This space could be further explored in a future extension. # Prior art -[prior-art]: #prior-art The pattern syntax is heavily inspired by regular expressions (repetitions, alternatives, sequences, ...). From what I've seen until now, other linters also implement lints that directly work on syntax tree data structures, just like clippy does currently. I would therefore consider the pattern syntax to be *new*, but please correct me if I'm wrong. # Unresolved questions -[unresolved-questions]: #unresolved-questions #### How to handle multiple matches? @@ -657,7 +650,6 @@ This pattern matches arrays that end with at least one literal. Now given the ar I haven't looked much into this yet because I don't know how relevant it is for most lints. The current implementation simply returns the first match it finds. # Future possibilities -[future-possibilities]: #future-possibilities #### Implement rest of Rust Syntax From 203ee71585e86b6f9cea2d9e0baec59bf638c92d Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Mon, 14 Nov 2022 13:42:47 +0100 Subject: [PATCH 09/17] Lint unnecessary safety comments on items --- CHANGELOG.md | 1 + clippy_lints/src/declared_lints.rs | 1 + .../src/undocumented_unsafe_blocks.rs | 240 +++++++++++++----- tests/ui/undocumented_unsafe_blocks.rs | 2 +- tests/ui/undocumented_unsafe_blocks.stderr | 15 +- 5 files changed, 200 insertions(+), 59 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a19202f08d6..e4480c61c3ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4452,6 +4452,7 @@ Released 2018-09-13 [`unnecessary_mut_passed`]: https://rust-lang.github.io/rust-clippy/master/index.html#unnecessary_mut_passed [`unnecessary_operation`]: https://rust-lang.github.io/rust-clippy/master/index.html#unnecessary_operation [`unnecessary_owned_empty_strings`]: https://rust-lang.github.io/rust-clippy/master/index.html#unnecessary_owned_empty_strings +[`unnecessary_safety_comment`]: https://rust-lang.github.io/rust-clippy/master/index.html#unnecessary_safety_comment [`unnecessary_safety_doc`]: https://rust-lang.github.io/rust-clippy/master/index.html#unnecessary_safety_doc [`unnecessary_self_imports`]: https://rust-lang.github.io/rust-clippy/master/index.html#unnecessary_self_imports [`unnecessary_sort_by`]: https://rust-lang.github.io/rust-clippy/master/index.html#unnecessary_sort_by diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs index b33e377dc0f9..060e2f8e8ecf 100644 --- a/clippy_lints/src/declared_lints.rs +++ b/clippy_lints/src/declared_lints.rs @@ -585,6 +585,7 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[ crate::types::TYPE_COMPLEXITY_INFO, crate::types::VEC_BOX_INFO, crate::undocumented_unsafe_blocks::UNDOCUMENTED_UNSAFE_BLOCKS_INFO, + crate::undocumented_unsafe_blocks::UNNECESSARY_SAFETY_COMMENT_INFO, crate::unicode::INVISIBLE_CHARACTERS_INFO, crate::unicode::NON_ASCII_LITERAL_INFO, crate::unicode::UNICODE_NOT_NFC_INFO, diff --git a/clippy_lints/src/undocumented_unsafe_blocks.rs b/clippy_lints/src/undocumented_unsafe_blocks.rs index e8f15a444735..57ee2735aa03 100644 --- a/clippy_lints/src/undocumented_unsafe_blocks.rs +++ b/clippy_lints/src/undocumented_unsafe_blocks.rs @@ -59,8 +59,36 @@ declare_clippy_lint! { restriction, "creating an unsafe block without explaining why it is safe" } +declare_clippy_lint! { + /// ### What it does + /// Checks for `// SAFETY: ` comments on safe code. + /// + /// ### Why is this bad? + /// Safe code has no safety requirements, so there is no need to + /// describe safety invariants. + /// + /// ### Example + /// ```rust + /// use std::ptr::NonNull; + /// let a = &mut 42; + /// + /// // SAFETY: references are guaranteed to be non-null. + /// let ptr = NonNull::new(a).unwrap(); + /// ``` + /// Use instead: + /// ```rust + /// use std::ptr::NonNull; + /// let a = &mut 42; + /// + /// let ptr = NonNull::new(a).unwrap(); + /// ``` + #[clippy::version = "1.66.0"] + pub UNNECESSARY_SAFETY_COMMENT, + restriction, + "creating an unsafe block without explaining why it is safe" +} -declare_lint_pass!(UndocumentedUnsafeBlocks => [UNDOCUMENTED_UNSAFE_BLOCKS]); +declare_lint_pass!(UndocumentedUnsafeBlocks => [UNDOCUMENTED_UNSAFE_BLOCKS, UNNECESSARY_SAFETY_COMMENT]); impl LateLintPass<'_> for UndocumentedUnsafeBlocks { fn check_block(&mut self, cx: &LateContext<'_>, block: &'_ Block<'_>) { @@ -90,28 +118,95 @@ impl LateLintPass<'_> for UndocumentedUnsafeBlocks { } fn check_item(&mut self, cx: &LateContext<'_>, item: &hir::Item<'_>) { - if let hir::ItemKind::Impl(imple) = item.kind - && imple.unsafety == hir::Unsafety::Unsafe - && !in_external_macro(cx.tcx.sess, item.span) - && !is_lint_allowed(cx, UNDOCUMENTED_UNSAFE_BLOCKS, item.hir_id()) - && !is_unsafe_from_proc_macro(cx, item.span) - && !item_has_safety_comment(cx, item) - { + if in_external_macro(cx.tcx.sess, item.span) { + return; + } + + let mk_spans = |pos: BytePos| { let source_map = cx.tcx.sess.source_map(); + let span = Span::new(pos, pos, SyntaxContext::root(), None); + let help_span = source_map.span_extend_to_next_char(span, '\n', true); let span = if source_map.is_multiline(item.span) { source_map.span_until_char(item.span, '\n') } else { item.span }; + (span, help_span) + }; - span_lint_and_help( - cx, - UNDOCUMENTED_UNSAFE_BLOCKS, - span, - "unsafe impl missing a safety comment", - None, - "consider adding a safety comment on the preceding line", - ); + let item_has_safety_comment = item_has_safety_comment(cx, item); + match (&item.kind, item_has_safety_comment) { + (hir::ItemKind::Impl(impl_), HasSafetyComment::No) if impl_.unsafety == hir::Unsafety::Unsafe => { + if !is_lint_allowed(cx, UNDOCUMENTED_UNSAFE_BLOCKS, item.hir_id()) + && !is_unsafe_from_proc_macro(cx, item.span) + { + let source_map = cx.tcx.sess.source_map(); + let span = if source_map.is_multiline(item.span) { + source_map.span_until_char(item.span, '\n') + } else { + item.span + }; + + span_lint_and_help( + cx, + UNDOCUMENTED_UNSAFE_BLOCKS, + span, + "unsafe impl missing a safety comment", + None, + "consider adding a safety comment on the preceding line", + ); + } + }, + (hir::ItemKind::Impl(impl_), HasSafetyComment::Yes(pos)) if impl_.unsafety == hir::Unsafety::Normal => { + if !is_lint_allowed(cx, UNNECESSARY_SAFETY_COMMENT, item.hir_id()) { + let (span, help_span) = mk_spans(pos); + + span_lint_and_help( + cx, + UNNECESSARY_SAFETY_COMMENT, + span, + "impl has unnecessary safety comment", + Some(help_span), + "consider removing the safety comment", + ); + } + }, + (hir::ItemKind::Impl(_), _) => {}, + (&hir::ItemKind::Const(.., body) | &hir::ItemKind::Static(.., body), HasSafetyComment::Yes(pos)) => { + if !is_lint_allowed(cx, UNNECESSARY_SAFETY_COMMENT, body.hir_id) { + let body = cx.tcx.hir().body(body); + if !matches!( + body.value.kind, hir::ExprKind::Block(block, _) + if block.rules == BlockCheckMode::UnsafeBlock(UnsafeSource::UserProvided) + ) { + let (span, help_span) = mk_spans(pos); + + span_lint_and_help( + cx, + UNNECESSARY_SAFETY_COMMENT, + span, + &format!("{} has unnecessary safety comment", item.kind.descr()), + Some(help_span), + "consider removing the safety comment", + ); + } + } + }, + (_, HasSafetyComment::Yes(pos)) => { + if !is_lint_allowed(cx, UNNECESSARY_SAFETY_COMMENT, item.hir_id()) { + let (span, help_span) = mk_spans(pos); + + span_lint_and_help( + cx, + UNNECESSARY_SAFETY_COMMENT, + span, + &format!("{} has unnecessary safety comment", item.kind.descr()), + Some(help_span), + "consider removing the safety comment", + ); + } + }, + _ => (), } } } @@ -170,28 +265,38 @@ fn block_has_safety_comment(cx: &LateContext<'_>, span: Span) -> bool { // won't work. This is to avoid dealing with where such a comment should be place relative to // attributes and doc comments. - span_from_macro_expansion_has_safety_comment(cx, span) || span_in_body_has_safety_comment(cx, span) + matches!( + span_from_macro_expansion_has_safety_comment(cx, span), + HasSafetyComment::Yes(_) + ) || span_in_body_has_safety_comment(cx, span) +} + +enum HasSafetyComment { + Yes(BytePos), + No, + Maybe, } /// Checks if the lines immediately preceding the item contain a safety comment. #[allow(clippy::collapsible_match)] -fn item_has_safety_comment(cx: &LateContext<'_>, item: &hir::Item<'_>) -> bool { - if span_from_macro_expansion_has_safety_comment(cx, item.span) { - return true; +fn item_has_safety_comment(cx: &LateContext<'_>, item: &hir::Item<'_>) -> HasSafetyComment { + match span_from_macro_expansion_has_safety_comment(cx, item.span) { + HasSafetyComment::Maybe => (), + has_safety_comment => return has_safety_comment, } if item.span.ctxt() == SyntaxContext::root() { if let Some(parent_node) = get_parent_node(cx.tcx, item.hir_id()) { let comment_start = match parent_node { Node::Crate(parent_mod) => { - comment_start_before_impl_in_mod(cx, parent_mod, parent_mod.spans.inner_span, item) + comment_start_before_item_in_mod(cx, parent_mod, parent_mod.spans.inner_span, item) }, Node::Item(parent_item) => { if let ItemKind::Mod(parent_mod) = &parent_item.kind { - comment_start_before_impl_in_mod(cx, parent_mod, parent_item.span, item) + comment_start_before_item_in_mod(cx, parent_mod, parent_item.span, item) } else { // Doesn't support impls in this position. Pretend a comment was found. - return true; + return HasSafetyComment::Maybe; } }, Node::Stmt(stmt) => { @@ -200,17 +305,17 @@ fn item_has_safety_comment(cx: &LateContext<'_>, item: &hir::Item<'_>) -> bool { Node::Block(block) => walk_span_to_context(block.span, SyntaxContext::root()).map(Span::lo), _ => { // Doesn't support impls in this position. Pretend a comment was found. - return true; + return HasSafetyComment::Maybe; }, } } else { // Problem getting the parent node. Pretend a comment was found. - return true; + return HasSafetyComment::Maybe; } }, _ => { // Doesn't support impls in this position. Pretend a comment was found. - return true; + return HasSafetyComment::Maybe; }, }; @@ -222,33 +327,40 @@ fn item_has_safety_comment(cx: &LateContext<'_>, item: &hir::Item<'_>) -> bool { && let Some(src) = unsafe_line.sf.src.as_deref() { unsafe_line.sf.lines(|lines| { - comment_start_line.line < unsafe_line.line && text_has_safety_comment( - src, - &lines[comment_start_line.line + 1..=unsafe_line.line], - unsafe_line.sf.start_pos.to_usize(), - ) + if comment_start_line.line >= unsafe_line.line { + HasSafetyComment::No + } else { + match text_has_safety_comment( + src, + &lines[comment_start_line.line + 1..=unsafe_line.line], + unsafe_line.sf.start_pos.to_usize(), + ) { + Some(b) => HasSafetyComment::Yes(b), + None => HasSafetyComment::No, + } + } }) } else { // Problem getting source text. Pretend a comment was found. - true + HasSafetyComment::Maybe } } else { // No parent node. Pretend a comment was found. - true + HasSafetyComment::Maybe } } else { - false + HasSafetyComment::No } } -fn comment_start_before_impl_in_mod( +fn comment_start_before_item_in_mod( cx: &LateContext<'_>, parent_mod: &hir::Mod<'_>, parent_mod_span: Span, - imple: &hir::Item<'_>, + item: &hir::Item<'_>, ) -> Option { parent_mod.item_ids.iter().enumerate().find_map(|(idx, item_id)| { - if *item_id == imple.item_id() { + if *item_id == item.item_id() { if idx == 0 { // mod A { /* comment */ unsafe impl T {} ... } // ^------------------------------------------^ returns the start of this span @@ -270,11 +382,11 @@ fn comment_start_before_impl_in_mod( }) } -fn span_from_macro_expansion_has_safety_comment(cx: &LateContext<'_>, span: Span) -> bool { +fn span_from_macro_expansion_has_safety_comment(cx: &LateContext<'_>, span: Span) -> HasSafetyComment { let source_map = cx.sess().source_map(); let ctxt = span.ctxt(); if ctxt == SyntaxContext::root() { - false + HasSafetyComment::Maybe } else { // From a macro expansion. Get the text from the start of the macro declaration to start of the // unsafe block. @@ -286,15 +398,22 @@ fn span_from_macro_expansion_has_safety_comment(cx: &LateContext<'_>, span: Span && let Some(src) = unsafe_line.sf.src.as_deref() { unsafe_line.sf.lines(|lines| { - macro_line.line < unsafe_line.line && text_has_safety_comment( - src, - &lines[macro_line.line + 1..=unsafe_line.line], - unsafe_line.sf.start_pos.to_usize(), - ) + if macro_line.line < unsafe_line.line { + match text_has_safety_comment( + src, + &lines[macro_line.line + 1..=unsafe_line.line], + unsafe_line.sf.start_pos.to_usize(), + ) { + Some(b) => HasSafetyComment::Yes(b), + None => HasSafetyComment::No, + } + } else { + HasSafetyComment::No + } }) } else { // Problem getting source text. Pretend a comment was found. - true + HasSafetyComment::Maybe } } } @@ -333,7 +452,7 @@ fn span_in_body_has_safety_comment(cx: &LateContext<'_>, span: Span) -> bool { src, &lines[body_line.line + 1..=unsafe_line.line], unsafe_line.sf.start_pos.to_usize(), - ) + ).is_some() }) } else { // Problem getting source text. Pretend a comment was found. @@ -345,30 +464,34 @@ fn span_in_body_has_safety_comment(cx: &LateContext<'_>, span: Span) -> bool { } /// Checks if the given text has a safety comment for the immediately proceeding line. -fn text_has_safety_comment(src: &str, line_starts: &[BytePos], offset: usize) -> bool { +fn text_has_safety_comment(src: &str, line_starts: &[BytePos], offset: usize) -> Option { let mut lines = line_starts .array_windows::<2>() .rev() .map_while(|[start, end]| { let start = start.to_usize() - offset; let end = end.to_usize() - offset; - src.get(start..end).map(|text| (start, text.trim_start())) + let text = src.get(start..end)?; + let trimmed = text.trim_start(); + Some((start + (text.len() - trimmed.len()), trimmed)) }) .filter(|(_, text)| !text.is_empty()); let Some((line_start, line)) = lines.next() else { - return false; + return None; }; // Check for a sequence of line comments. if line.starts_with("//") { - let mut line = line; + let (mut line, mut line_start) = (line, line_start); loop { if line.to_ascii_uppercase().contains("SAFETY:") { - return true; + return Some(BytePos( + u32::try_from(line_start).unwrap() + u32::try_from(offset).unwrap(), + )); } match lines.next() { - Some((_, x)) if x.starts_with("//") => line = x, - _ => return false, + Some((s, x)) if x.starts_with("//") => (line, line_start) = (x, s), + _ => return None, } } } @@ -377,16 +500,19 @@ fn text_has_safety_comment(src: &str, line_starts: &[BytePos], offset: usize) -> let (mut line_start, mut line) = (line_start, line); loop { if line.starts_with("/*") { - let src = src[line_start..line_starts.last().unwrap().to_usize() - offset].trim_start(); + let src = &src[line_start..line_starts.last().unwrap().to_usize() - offset]; let mut tokens = tokenize(src); - return src[..tokens.next().unwrap().len as usize] + return (src[..tokens.next().unwrap().len as usize] .to_ascii_uppercase() .contains("SAFETY:") - && tokens.all(|t| t.kind == TokenKind::Whitespace); + && tokens.all(|t| t.kind == TokenKind::Whitespace)) + .then_some(BytePos( + u32::try_from(line_start).unwrap() + u32::try_from(offset).unwrap(), + )); } match lines.next() { Some(x) => (line_start, line) = x, - None => return false, + None => return None, } } } diff --git a/tests/ui/undocumented_unsafe_blocks.rs b/tests/ui/undocumented_unsafe_blocks.rs index cbc6768033ec..c05eb447b2eb 100644 --- a/tests/ui/undocumented_unsafe_blocks.rs +++ b/tests/ui/undocumented_unsafe_blocks.rs @@ -1,6 +1,6 @@ // aux-build:proc_macro_unsafe.rs -#![warn(clippy::undocumented_unsafe_blocks)] +#![warn(clippy::undocumented_unsafe_blocks, clippy::unnecessary_safety_comment)] #![allow(clippy::let_unit_value, clippy::missing_safety_doc)] extern crate proc_macro_unsafe; diff --git a/tests/ui/undocumented_unsafe_blocks.stderr b/tests/ui/undocumented_unsafe_blocks.stderr index ba4de9806d17..4c6f6cd18c51 100644 --- a/tests/ui/undocumented_unsafe_blocks.stderr +++ b/tests/ui/undocumented_unsafe_blocks.stderr @@ -239,6 +239,19 @@ LL | unsafe impl TrailingComment for () {} // SAFETY: | = help: consider adding a safety comment on the preceding line +error: constant item has unnecessary safety comment + --> $DIR/undocumented_unsafe_blocks.rs:471:5 + | +LL | const BIG_NUMBER: i32 = 1000000; + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/undocumented_unsafe_blocks.rs:470:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + = note: `-D clippy::unnecessary-safety-comment` implied by `-D warnings` + error: unsafe impl missing a safety comment --> $DIR/undocumented_unsafe_blocks.rs:472:5 | @@ -287,5 +300,5 @@ LL | let bar = unsafe {}; | = help: consider adding a safety comment on the preceding line -error: aborting due to 34 previous errors +error: aborting due to 35 previous errors From f6a12fbdf25abbbf2cf19337c6e27d8725b24ab4 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 17 Nov 2022 11:00:51 +0100 Subject: [PATCH 10/17] Add some more test cases for undocumented_unsafe_blocks --- .../src/undocumented_unsafe_blocks.rs | 7 +- tests/ui/undocumented_unsafe_blocks.rs | 13 ++++ tests/ui/undocumented_unsafe_blocks.stderr | 72 +++++++++++++++++-- 3 files changed, 85 insertions(+), 7 deletions(-) diff --git a/clippy_lints/src/undocumented_unsafe_blocks.rs b/clippy_lints/src/undocumented_unsafe_blocks.rs index 57ee2735aa03..080a481e87f5 100644 --- a/clippy_lints/src/undocumented_unsafe_blocks.rs +++ b/clippy_lints/src/undocumented_unsafe_blocks.rs @@ -82,7 +82,7 @@ declare_clippy_lint! { /// /// let ptr = NonNull::new(a).unwrap(); /// ``` - #[clippy::version = "1.66.0"] + #[clippy::version = "1.67.0"] pub UNNECESSARY_SAFETY_COMMENT, restriction, "creating an unsafe block without explaining why it is safe" @@ -136,6 +136,7 @@ impl LateLintPass<'_> for UndocumentedUnsafeBlocks { let item_has_safety_comment = item_has_safety_comment(cx, item); match (&item.kind, item_has_safety_comment) { + // lint unsafe impl without safety comment (hir::ItemKind::Impl(impl_), HasSafetyComment::No) if impl_.unsafety == hir::Unsafety::Unsafe => { if !is_lint_allowed(cx, UNDOCUMENTED_UNSAFE_BLOCKS, item.hir_id()) && !is_unsafe_from_proc_macro(cx, item.span) @@ -157,6 +158,7 @@ impl LateLintPass<'_> for UndocumentedUnsafeBlocks { ); } }, + // lint safe impl with unnecessary safety comment (hir::ItemKind::Impl(impl_), HasSafetyComment::Yes(pos)) if impl_.unsafety == hir::Unsafety::Normal => { if !is_lint_allowed(cx, UNNECESSARY_SAFETY_COMMENT, item.hir_id()) { let (span, help_span) = mk_spans(pos); @@ -172,6 +174,7 @@ impl LateLintPass<'_> for UndocumentedUnsafeBlocks { } }, (hir::ItemKind::Impl(_), _) => {}, + // const and static items only need a safety comment if their body is an unsafe block, lint otherwise (&hir::ItemKind::Const(.., body) | &hir::ItemKind::Static(.., body), HasSafetyComment::Yes(pos)) => { if !is_lint_allowed(cx, UNNECESSARY_SAFETY_COMMENT, body.hir_id) { let body = cx.tcx.hir().body(body); @@ -192,6 +195,8 @@ impl LateLintPass<'_> for UndocumentedUnsafeBlocks { } } }, + // Aside from unsafe impls and consts/statics with an unsafe block, items in general + // do not have safety invariants that need to be documented, so lint those. (_, HasSafetyComment::Yes(pos)) => { if !is_lint_allowed(cx, UNNECESSARY_SAFETY_COMMENT, item.hir_id()) { let (span, help_span) = mk_spans(pos); diff --git a/tests/ui/undocumented_unsafe_blocks.rs b/tests/ui/undocumented_unsafe_blocks.rs index c05eb447b2eb..f68e0b4915ea 100644 --- a/tests/ui/undocumented_unsafe_blocks.rs +++ b/tests/ui/undocumented_unsafe_blocks.rs @@ -472,6 +472,19 @@ mod unsafe_impl_invalid_comment { unsafe impl Interference for () {} } +mod unsafe_items_invalid_comment { + // SAFETY: + const CONST: u32 = 0; + // SAFETY: + static STATIC: u32 = 0; + // SAFETY: + struct Struct; + // SAFETY: + enum Enum {} + // SAFETY: + mod module {} +} + unsafe trait ImplInFn {} fn impl_in_fn() { diff --git a/tests/ui/undocumented_unsafe_blocks.stderr b/tests/ui/undocumented_unsafe_blocks.stderr index 4c6f6cd18c51..becad4f61a92 100644 --- a/tests/ui/undocumented_unsafe_blocks.stderr +++ b/tests/ui/undocumented_unsafe_blocks.stderr @@ -260,16 +260,76 @@ LL | unsafe impl Interference for () {} | = help: consider adding a safety comment on the preceding line -error: unsafe impl missing a safety comment +error: constant item has unnecessary safety comment + --> $DIR/undocumented_unsafe_blocks.rs:477:5 + | +LL | const CONST: u32 = 0; + | ^^^^^^^^^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/undocumented_unsafe_blocks.rs:476:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + +error: static item has unnecessary safety comment --> $DIR/undocumented_unsafe_blocks.rs:479:5 | +LL | static STATIC: u32 = 0; + | ^^^^^^^^^^^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/undocumented_unsafe_blocks.rs:478:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + +error: struct has unnecessary safety comment + --> $DIR/undocumented_unsafe_blocks.rs:481:5 + | +LL | struct Struct; + | ^^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/undocumented_unsafe_blocks.rs:480:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + +error: enum has unnecessary safety comment + --> $DIR/undocumented_unsafe_blocks.rs:483:5 + | +LL | enum Enum {} + | ^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/undocumented_unsafe_blocks.rs:482:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + +error: module has unnecessary safety comment + --> $DIR/undocumented_unsafe_blocks.rs:485:5 + | +LL | mod module {} + | ^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/undocumented_unsafe_blocks.rs:484:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + +error: unsafe impl missing a safety comment + --> $DIR/undocumented_unsafe_blocks.rs:492:5 + | LL | unsafe impl ImplInFn for () {} | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = help: consider adding a safety comment on the preceding line error: unsafe impl missing a safety comment - --> $DIR/undocumented_unsafe_blocks.rs:488:1 + --> $DIR/undocumented_unsafe_blocks.rs:501:1 | LL | unsafe impl CrateRoot for () {} | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -277,7 +337,7 @@ LL | unsafe impl CrateRoot for () {} = help: consider adding a safety comment on the preceding line error: unsafe block missing a safety comment - --> $DIR/undocumented_unsafe_blocks.rs:498:9 + --> $DIR/undocumented_unsafe_blocks.rs:511:9 | LL | unsafe {}; | ^^^^^^^^^ @@ -285,7 +345,7 @@ LL | unsafe {}; = help: consider adding a safety comment on the preceding line error: unsafe block missing a safety comment - --> $DIR/undocumented_unsafe_blocks.rs:502:12 + --> $DIR/undocumented_unsafe_blocks.rs:515:12 | LL | if unsafe { true } { | ^^^^^^^^^^^^^^^ @@ -293,12 +353,12 @@ LL | if unsafe { true } { = help: consider adding a safety comment on the preceding line error: unsafe block missing a safety comment - --> $DIR/undocumented_unsafe_blocks.rs:505:23 + --> $DIR/undocumented_unsafe_blocks.rs:518:23 | LL | let bar = unsafe {}; | ^^^^^^^^^ | = help: consider adding a safety comment on the preceding line -error: aborting due to 35 previous errors +error: aborting due to 40 previous errors From 062b58b54656f88e92d75dd234c2ff76a85309a2 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 17 Nov 2022 15:14:03 +0100 Subject: [PATCH 11/17] Lint unnecessary safety comments on statements and block tail expressions --- .../src/undocumented_unsafe_blocks.rs | 126 +++++++++++++++++- clippy_utils/src/visitors.rs | 12 +- tests/ui/undocumented_unsafe_blocks.rs | 31 +++++ tests/ui/undocumented_unsafe_blocks.stderr | 60 ++++++++- 4 files changed, 220 insertions(+), 9 deletions(-) diff --git a/clippy_lints/src/undocumented_unsafe_blocks.rs b/clippy_lints/src/undocumented_unsafe_blocks.rs index 080a481e87f5..c60144df757c 100644 --- a/clippy_lints/src/undocumented_unsafe_blocks.rs +++ b/clippy_lints/src/undocumented_unsafe_blocks.rs @@ -1,6 +1,10 @@ +use std::ops::ControlFlow; + use clippy_utils::diagnostics::span_lint_and_help; use clippy_utils::source::walk_span_to_context; +use clippy_utils::visitors::{for_each_expr_with_closures, Descend}; use clippy_utils::{get_parent_node, is_lint_allowed}; +use hir::HirId; use rustc_data_structures::sync::Lrc; use rustc_hir as hir; use rustc_hir::{Block, BlockCheckMode, ItemKind, Node, UnsafeSource}; @@ -90,8 +94,8 @@ declare_clippy_lint! { declare_lint_pass!(UndocumentedUnsafeBlocks => [UNDOCUMENTED_UNSAFE_BLOCKS, UNNECESSARY_SAFETY_COMMENT]); -impl LateLintPass<'_> for UndocumentedUnsafeBlocks { - fn check_block(&mut self, cx: &LateContext<'_>, block: &'_ Block<'_>) { +impl<'tcx> LateLintPass<'tcx> for UndocumentedUnsafeBlocks { + fn check_block(&mut self, cx: &LateContext<'tcx>, block: &'tcx Block<'tcx>) { if block.rules == BlockCheckMode::UnsafeBlock(UnsafeSource::UserProvided) && !in_external_macro(cx.tcx.sess, block.span) && !is_lint_allowed(cx, UNDOCUMENTED_UNSAFE_BLOCKS, block.hir_id) @@ -115,6 +119,45 @@ impl LateLintPass<'_> for UndocumentedUnsafeBlocks { "consider adding a safety comment on the preceding line", ); } + + if let Some(tail) = block.expr + && !is_lint_allowed(cx, UNNECESSARY_SAFETY_COMMENT, tail.hir_id) + && !in_external_macro(cx.tcx.sess, tail.span) + && let HasSafetyComment::Yes(pos) = stmt_has_safety_comment(cx, tail.span, tail.hir_id) + && let Some(help_span) = expr_has_unnecessary_safety_comment(cx, tail, pos) + { + span_lint_and_help( + cx, + UNNECESSARY_SAFETY_COMMENT, + tail.span, + "expression has unnecessary safety comment", + Some(help_span), + "consider removing the safety comment", + ); + } + } + + fn check_stmt(&mut self, cx: &LateContext<'tcx>, stmt: &hir::Stmt<'tcx>) { + let expr = match stmt.kind { + hir::StmtKind::Local(&hir::Local { init: Some(expr), .. }) + | hir::StmtKind::Expr(expr) + | hir::StmtKind::Semi(expr) => expr, + _ => return, + }; + if !is_lint_allowed(cx, UNNECESSARY_SAFETY_COMMENT, stmt.hir_id) + && !in_external_macro(cx.tcx.sess, stmt.span) + && let HasSafetyComment::Yes(pos) = stmt_has_safety_comment(cx, stmt.span, stmt.hir_id) + && let Some(help_span) = expr_has_unnecessary_safety_comment(cx, expr, pos) + { + span_lint_and_help( + cx, + UNNECESSARY_SAFETY_COMMENT, + stmt.span, + "statement has unnecessary safety comment", + Some(help_span), + "consider removing the safety comment", + ); + } } fn check_item(&mut self, cx: &LateContext<'_>, item: &hir::Item<'_>) { @@ -216,6 +259,36 @@ impl LateLintPass<'_> for UndocumentedUnsafeBlocks { } } +fn expr_has_unnecessary_safety_comment<'tcx>( + cx: &LateContext<'tcx>, + expr: &'tcx hir::Expr<'tcx>, + comment_pos: BytePos, +) -> Option { + // this should roughly be the reverse of `block_parents_have_safety_comment` + if for_each_expr_with_closures(cx, expr, |expr| match expr.kind { + hir::ExprKind::Block( + Block { + rules: BlockCheckMode::UnsafeBlock(UnsafeSource::UserProvided), + .. + }, + _, + ) => ControlFlow::Break(()), + // statements will be handled by check_stmt itself again + hir::ExprKind::Block(..) => ControlFlow::Continue(Descend::No), + _ => ControlFlow::Continue(Descend::Yes), + }) + .is_some() + { + return None; + } + + let source_map = cx.tcx.sess.source_map(); + let span = Span::new(comment_pos, comment_pos, SyntaxContext::root(), None); + let help_span = source_map.span_extend_to_next_char(span, '\n', true); + + Some(help_span) +} + fn is_unsafe_from_proc_macro(cx: &LateContext<'_>, span: Span) -> bool { let source_map = cx.sess().source_map(); let file_pos = source_map.lookup_byte_offset(span.lo()); @@ -358,6 +431,55 @@ fn item_has_safety_comment(cx: &LateContext<'_>, item: &hir::Item<'_>) -> HasSaf } } +/// Checks if the lines immediately preceding the item contain a safety comment. +#[allow(clippy::collapsible_match)] +fn stmt_has_safety_comment(cx: &LateContext<'_>, span: Span, hir_id: HirId) -> HasSafetyComment { + match span_from_macro_expansion_has_safety_comment(cx, span) { + HasSafetyComment::Maybe => (), + has_safety_comment => return has_safety_comment, + } + + if span.ctxt() == SyntaxContext::root() { + if let Some(parent_node) = get_parent_node(cx.tcx, hir_id) { + let comment_start = match parent_node { + Node::Block(block) => walk_span_to_context(block.span, SyntaxContext::root()).map(Span::lo), + _ => return HasSafetyComment::Maybe, + }; + + let source_map = cx.sess().source_map(); + if let Some(comment_start) = comment_start + && let Ok(unsafe_line) = source_map.lookup_line(span.lo()) + && let Ok(comment_start_line) = source_map.lookup_line(comment_start) + && Lrc::ptr_eq(&unsafe_line.sf, &comment_start_line.sf) + && let Some(src) = unsafe_line.sf.src.as_deref() + { + unsafe_line.sf.lines(|lines| { + if comment_start_line.line >= unsafe_line.line { + HasSafetyComment::No + } else { + match text_has_safety_comment( + src, + &lines[comment_start_line.line + 1..=unsafe_line.line], + unsafe_line.sf.start_pos.to_usize(), + ) { + Some(b) => HasSafetyComment::Yes(b), + None => HasSafetyComment::No, + } + } + }) + } else { + // Problem getting source text. Pretend a comment was found. + HasSafetyComment::Maybe + } + } else { + // No parent node. Pretend a comment was found. + HasSafetyComment::Maybe + } + } else { + HasSafetyComment::No + } +} + fn comment_start_before_item_in_mod( cx: &LateContext<'_>, parent_mod: &hir::Mod<'_>, diff --git a/clippy_utils/src/visitors.rs b/clippy_utils/src/visitors.rs index d4294f18fd50..863fb60fcfca 100644 --- a/clippy_utils/src/visitors.rs +++ b/clippy_utils/src/visitors.rs @@ -170,22 +170,22 @@ where cb: F, } - struct WithStmtGuarg<'a, F> { + struct WithStmtGuard<'a, F> { val: &'a mut RetFinder, prev_in_stmt: bool, } impl RetFinder { - fn inside_stmt(&mut self, in_stmt: bool) -> WithStmtGuarg<'_, F> { + fn inside_stmt(&mut self, in_stmt: bool) -> WithStmtGuard<'_, F> { let prev_in_stmt = std::mem::replace(&mut self.in_stmt, in_stmt); - WithStmtGuarg { + WithStmtGuard { val: self, prev_in_stmt, } } } - impl std::ops::Deref for WithStmtGuarg<'_, F> { + impl std::ops::Deref for WithStmtGuard<'_, F> { type Target = RetFinder; fn deref(&self) -> &Self::Target { @@ -193,13 +193,13 @@ where } } - impl std::ops::DerefMut for WithStmtGuarg<'_, F> { + impl std::ops::DerefMut for WithStmtGuard<'_, F> { fn deref_mut(&mut self) -> &mut Self::Target { self.val } } - impl Drop for WithStmtGuarg<'_, F> { + impl Drop for WithStmtGuard<'_, F> { fn drop(&mut self) { self.val.in_stmt = self.prev_in_stmt; } diff --git a/tests/ui/undocumented_unsafe_blocks.rs b/tests/ui/undocumented_unsafe_blocks.rs index f68e0b4915ea..cb99ce0d4214 100644 --- a/tests/ui/undocumented_unsafe_blocks.rs +++ b/tests/ui/undocumented_unsafe_blocks.rs @@ -522,4 +522,35 @@ fn issue_9142() { }; } +mod unnecessary_from_macro { + trait T {} + + macro_rules! no_safety_comment { + ($t:ty) => { + impl T for $t {} + }; + } + + // FIXME: This is not caught + // Safety: unnecessary + no_safety_comment!(()); + + macro_rules! with_safety_comment { + ($t:ty) => { + // Safety: unnecessary + impl T for $t {} + }; + } + + with_safety_comment!(i32); +} + +fn unnecessary_on_stmt_and_expr() -> u32 { + // SAFETY: unnecessary + let num = 42; + + // SAFETY: unnecessary + 24 +} + fn main() {} diff --git a/tests/ui/undocumented_unsafe_blocks.stderr b/tests/ui/undocumented_unsafe_blocks.stderr index becad4f61a92..919fd51351cb 100644 --- a/tests/ui/undocumented_unsafe_blocks.stderr +++ b/tests/ui/undocumented_unsafe_blocks.stderr @@ -344,6 +344,24 @@ LL | unsafe {}; | = help: consider adding a safety comment on the preceding line +error: statement has unnecessary safety comment + --> $DIR/undocumented_unsafe_blocks.rs:514:5 + | +LL | / let _ = { +LL | | if unsafe { true } { +LL | | todo!(); +LL | | } else { +... | +LL | | } +LL | | }; + | |______^ + | +help: consider removing the safety comment + --> $DIR/undocumented_unsafe_blocks.rs:513:5 + | +LL | // SAFETY: this is more than one level away, so it should warn + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + error: unsafe block missing a safety comment --> $DIR/undocumented_unsafe_blocks.rs:515:12 | @@ -360,5 +378,45 @@ LL | let bar = unsafe {}; | = help: consider adding a safety comment on the preceding line -error: aborting due to 40 previous errors +error: impl has unnecessary safety comment + --> $DIR/undocumented_unsafe_blocks.rs:541:13 + | +LL | impl T for $t {} + | ^^^^^^^^^^^^^^^^ +... +LL | with_safety_comment!(i32); + | ------------------------- in this macro invocation + | +help: consider removing the safety comment + --> $DIR/undocumented_unsafe_blocks.rs:540:13 + | +LL | // Safety: unnecessary + | ^^^^^^^^^^^^^^^^^^^^^^ + = note: this error originates in the macro `with_safety_comment` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: expression has unnecessary safety comment + --> $DIR/undocumented_unsafe_blocks.rs:553:5 + | +LL | 24 + | ^^ + | +help: consider removing the safety comment + --> $DIR/undocumented_unsafe_blocks.rs:552:5 + | +LL | // SAFETY: unnecessary + | ^^^^^^^^^^^^^^^^^^^^^^ + +error: statement has unnecessary safety comment + --> $DIR/undocumented_unsafe_blocks.rs:550:5 + | +LL | let num = 42; + | ^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/undocumented_unsafe_blocks.rs:549:5 + | +LL | // SAFETY: unnecessary + | ^^^^^^^^^^^^^^^^^^^^^^ + +error: aborting due to 44 previous errors From dea8bee8af1046ecfaee80c9911db0b747c86b01 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 17 Nov 2022 15:17:28 +0100 Subject: [PATCH 12/17] Simplify --- .../src/undocumented_unsafe_blocks.rs | 173 ++++++++---------- 1 file changed, 78 insertions(+), 95 deletions(-) diff --git a/clippy_lints/src/undocumented_unsafe_blocks.rs b/clippy_lints/src/undocumented_unsafe_blocks.rs index c60144df757c..d7e483423064 100644 --- a/clippy_lints/src/undocumented_unsafe_blocks.rs +++ b/clippy_lints/src/undocumented_unsafe_blocks.rs @@ -363,72 +363,60 @@ fn item_has_safety_comment(cx: &LateContext<'_>, item: &hir::Item<'_>) -> HasSaf has_safety_comment => return has_safety_comment, } - if item.span.ctxt() == SyntaxContext::root() { - if let Some(parent_node) = get_parent_node(cx.tcx, item.hir_id()) { - let comment_start = match parent_node { - Node::Crate(parent_mod) => { - comment_start_before_item_in_mod(cx, parent_mod, parent_mod.spans.inner_span, item) - }, - Node::Item(parent_item) => { - if let ItemKind::Mod(parent_mod) = &parent_item.kind { - comment_start_before_item_in_mod(cx, parent_mod, parent_item.span, item) - } else { - // Doesn't support impls in this position. Pretend a comment was found. - return HasSafetyComment::Maybe; - } - }, - Node::Stmt(stmt) => { - if let Some(stmt_parent) = get_parent_node(cx.tcx, stmt.hir_id) { - match stmt_parent { - Node::Block(block) => walk_span_to_context(block.span, SyntaxContext::root()).map(Span::lo), - _ => { - // Doesn't support impls in this position. Pretend a comment was found. - return HasSafetyComment::Maybe; - }, - } - } else { - // Problem getting the parent node. Pretend a comment was found. - return HasSafetyComment::Maybe; - } - }, - _ => { + if item.span.ctxt() != SyntaxContext::root() { + return HasSafetyComment::No; + } + if let Some(parent_node) = get_parent_node(cx.tcx, item.hir_id()) { + let comment_start = match parent_node { + Node::Crate(parent_mod) => { + comment_start_before_item_in_mod(cx, parent_mod, parent_mod.spans.inner_span, item) + }, + Node::Item(parent_item) => { + if let ItemKind::Mod(parent_mod) = &parent_item.kind { + comment_start_before_item_in_mod(cx, parent_mod, parent_item.span, item) + } else { // Doesn't support impls in this position. Pretend a comment was found. return HasSafetyComment::Maybe; - }, - }; + } + }, + Node::Stmt(stmt) => { + if let Some(Node::Block(block)) = get_parent_node(cx.tcx, stmt.hir_id) { + walk_span_to_context(block.span, SyntaxContext::root()).map(Span::lo) + } else { + // Problem getting the parent node. Pretend a comment was found. + return HasSafetyComment::Maybe; + } + }, + _ => { + // Doesn't support impls in this position. Pretend a comment was found. + return HasSafetyComment::Maybe; + }, + }; - let source_map = cx.sess().source_map(); - if let Some(comment_start) = comment_start - && let Ok(unsafe_line) = source_map.lookup_line(item.span.lo()) - && let Ok(comment_start_line) = source_map.lookup_line(comment_start) - && Lrc::ptr_eq(&unsafe_line.sf, &comment_start_line.sf) - && let Some(src) = unsafe_line.sf.src.as_deref() - { - unsafe_line.sf.lines(|lines| { - if comment_start_line.line >= unsafe_line.line { - HasSafetyComment::No - } else { - match text_has_safety_comment( - src, - &lines[comment_start_line.line + 1..=unsafe_line.line], - unsafe_line.sf.start_pos.to_usize(), - ) { - Some(b) => HasSafetyComment::Yes(b), - None => HasSafetyComment::No, - } + let source_map = cx.sess().source_map(); + if let Some(comment_start) = comment_start + && let Ok(unsafe_line) = source_map.lookup_line(item.span.lo()) + && let Ok(comment_start_line) = source_map.lookup_line(comment_start) + && Lrc::ptr_eq(&unsafe_line.sf, &comment_start_line.sf) + && let Some(src) = unsafe_line.sf.src.as_deref() + { + return unsafe_line.sf.lines(|lines| { + if comment_start_line.line >= unsafe_line.line { + HasSafetyComment::No + } else { + match text_has_safety_comment( + src, + &lines[comment_start_line.line + 1..=unsafe_line.line], + unsafe_line.sf.start_pos.to_usize(), + ) { + Some(b) => HasSafetyComment::Yes(b), + None => HasSafetyComment::No, } - }) - } else { - // Problem getting source text. Pretend a comment was found. - HasSafetyComment::Maybe - } - } else { - // No parent node. Pretend a comment was found. - HasSafetyComment::Maybe + } + }); } - } else { - HasSafetyComment::No } + HasSafetyComment::Maybe } /// Checks if the lines immediately preceding the item contain a safety comment. @@ -439,45 +427,40 @@ fn stmt_has_safety_comment(cx: &LateContext<'_>, span: Span, hir_id: HirId) -> H has_safety_comment => return has_safety_comment, } - if span.ctxt() == SyntaxContext::root() { - if let Some(parent_node) = get_parent_node(cx.tcx, hir_id) { - let comment_start = match parent_node { - Node::Block(block) => walk_span_to_context(block.span, SyntaxContext::root()).map(Span::lo), - _ => return HasSafetyComment::Maybe, - }; + if span.ctxt() != SyntaxContext::root() { + return HasSafetyComment::No; + } - let source_map = cx.sess().source_map(); - if let Some(comment_start) = comment_start - && let Ok(unsafe_line) = source_map.lookup_line(span.lo()) - && let Ok(comment_start_line) = source_map.lookup_line(comment_start) - && Lrc::ptr_eq(&unsafe_line.sf, &comment_start_line.sf) - && let Some(src) = unsafe_line.sf.src.as_deref() - { - unsafe_line.sf.lines(|lines| { - if comment_start_line.line >= unsafe_line.line { - HasSafetyComment::No - } else { - match text_has_safety_comment( - src, - &lines[comment_start_line.line + 1..=unsafe_line.line], - unsafe_line.sf.start_pos.to_usize(), - ) { - Some(b) => HasSafetyComment::Yes(b), - None => HasSafetyComment::No, - } + if let Some(parent_node) = get_parent_node(cx.tcx, hir_id) { + let comment_start = match parent_node { + Node::Block(block) => walk_span_to_context(block.span, SyntaxContext::root()).map(Span::lo), + _ => return HasSafetyComment::Maybe, + }; + + let source_map = cx.sess().source_map(); + if let Some(comment_start) = comment_start + && let Ok(unsafe_line) = source_map.lookup_line(span.lo()) + && let Ok(comment_start_line) = source_map.lookup_line(comment_start) + && Lrc::ptr_eq(&unsafe_line.sf, &comment_start_line.sf) + && let Some(src) = unsafe_line.sf.src.as_deref() + { + return unsafe_line.sf.lines(|lines| { + if comment_start_line.line >= unsafe_line.line { + HasSafetyComment::No + } else { + match text_has_safety_comment( + src, + &lines[comment_start_line.line + 1..=unsafe_line.line], + unsafe_line.sf.start_pos.to_usize(), + ) { + Some(b) => HasSafetyComment::Yes(b), + None => HasSafetyComment::No, } - }) - } else { - // Problem getting source text. Pretend a comment was found. - HasSafetyComment::Maybe - } - } else { - // No parent node. Pretend a comment was found. - HasSafetyComment::Maybe + } + }); } - } else { - HasSafetyComment::No } + HasSafetyComment::Maybe } fn comment_start_before_item_in_mod( From e14a584466e51f5e9d87cbe7846084c60c0157fc Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 24 Nov 2022 09:47:50 +0100 Subject: [PATCH 13/17] Address reviews --- .../src/undocumented_unsafe_blocks.rs | 9 +- tests/ui/undocumented_unsafe_blocks.rs | 44 ------- tests/ui/undocumented_unsafe_blocks.stderr | 116 ++---------------- tests/ui/unnecessary_safety_comment.rs | 51 ++++++++ tests/ui/unnecessary_safety_comment.stderr | 115 +++++++++++++++++ 5 files changed, 178 insertions(+), 157 deletions(-) create mode 100644 tests/ui/unnecessary_safety_comment.rs create mode 100644 tests/ui/unnecessary_safety_comment.stderr diff --git a/clippy_lints/src/undocumented_unsafe_blocks.rs b/clippy_lints/src/undocumented_unsafe_blocks.rs index d7e483423064..2e1b6d8d4ea7 100644 --- a/clippy_lints/src/undocumented_unsafe_blocks.rs +++ b/clippy_lints/src/undocumented_unsafe_blocks.rs @@ -89,7 +89,7 @@ declare_clippy_lint! { #[clippy::version = "1.67.0"] pub UNNECESSARY_SAFETY_COMMENT, restriction, - "creating an unsafe block without explaining why it is safe" + "annotating safe code with a safety comment" } declare_lint_pass!(UndocumentedUnsafeBlocks => [UNDOCUMENTED_UNSAFE_BLOCKS, UNNECESSARY_SAFETY_COMMENT]); @@ -138,12 +138,11 @@ impl<'tcx> LateLintPass<'tcx> for UndocumentedUnsafeBlocks { } fn check_stmt(&mut self, cx: &LateContext<'tcx>, stmt: &hir::Stmt<'tcx>) { - let expr = match stmt.kind { + let ( hir::StmtKind::Local(&hir::Local { init: Some(expr), .. }) | hir::StmtKind::Expr(expr) - | hir::StmtKind::Semi(expr) => expr, - _ => return, - }; + | hir::StmtKind::Semi(expr) + ) = stmt.kind else { return }; if !is_lint_allowed(cx, UNNECESSARY_SAFETY_COMMENT, stmt.hir_id) && !in_external_macro(cx.tcx.sess, stmt.span) && let HasSafetyComment::Yes(pos) = stmt_has_safety_comment(cx, stmt.span, stmt.hir_id) diff --git a/tests/ui/undocumented_unsafe_blocks.rs b/tests/ui/undocumented_unsafe_blocks.rs index cb99ce0d4214..c05eb447b2eb 100644 --- a/tests/ui/undocumented_unsafe_blocks.rs +++ b/tests/ui/undocumented_unsafe_blocks.rs @@ -472,19 +472,6 @@ mod unsafe_impl_invalid_comment { unsafe impl Interference for () {} } -mod unsafe_items_invalid_comment { - // SAFETY: - const CONST: u32 = 0; - // SAFETY: - static STATIC: u32 = 0; - // SAFETY: - struct Struct; - // SAFETY: - enum Enum {} - // SAFETY: - mod module {} -} - unsafe trait ImplInFn {} fn impl_in_fn() { @@ -522,35 +509,4 @@ fn issue_9142() { }; } -mod unnecessary_from_macro { - trait T {} - - macro_rules! no_safety_comment { - ($t:ty) => { - impl T for $t {} - }; - } - - // FIXME: This is not caught - // Safety: unnecessary - no_safety_comment!(()); - - macro_rules! with_safety_comment { - ($t:ty) => { - // Safety: unnecessary - impl T for $t {} - }; - } - - with_safety_comment!(i32); -} - -fn unnecessary_on_stmt_and_expr() -> u32 { - // SAFETY: unnecessary - let num = 42; - - // SAFETY: unnecessary - 24 -} - fn main() {} diff --git a/tests/ui/undocumented_unsafe_blocks.stderr b/tests/ui/undocumented_unsafe_blocks.stderr index 919fd51351cb..d1c1bb5ffeac 100644 --- a/tests/ui/undocumented_unsafe_blocks.stderr +++ b/tests/ui/undocumented_unsafe_blocks.stderr @@ -260,68 +260,8 @@ LL | unsafe impl Interference for () {} | = help: consider adding a safety comment on the preceding line -error: constant item has unnecessary safety comment - --> $DIR/undocumented_unsafe_blocks.rs:477:5 - | -LL | const CONST: u32 = 0; - | ^^^^^^^^^^^^^^^^^^^^^ - | -help: consider removing the safety comment - --> $DIR/undocumented_unsafe_blocks.rs:476:5 - | -LL | // SAFETY: - | ^^^^^^^^^^ - -error: static item has unnecessary safety comment - --> $DIR/undocumented_unsafe_blocks.rs:479:5 - | -LL | static STATIC: u32 = 0; - | ^^^^^^^^^^^^^^^^^^^^^^^ - | -help: consider removing the safety comment - --> $DIR/undocumented_unsafe_blocks.rs:478:5 - | -LL | // SAFETY: - | ^^^^^^^^^^ - -error: struct has unnecessary safety comment - --> $DIR/undocumented_unsafe_blocks.rs:481:5 - | -LL | struct Struct; - | ^^^^^^^^^^^^^^ - | -help: consider removing the safety comment - --> $DIR/undocumented_unsafe_blocks.rs:480:5 - | -LL | // SAFETY: - | ^^^^^^^^^^ - -error: enum has unnecessary safety comment - --> $DIR/undocumented_unsafe_blocks.rs:483:5 - | -LL | enum Enum {} - | ^^^^^^^^^^^^ - | -help: consider removing the safety comment - --> $DIR/undocumented_unsafe_blocks.rs:482:5 - | -LL | // SAFETY: - | ^^^^^^^^^^ - -error: module has unnecessary safety comment - --> $DIR/undocumented_unsafe_blocks.rs:485:5 - | -LL | mod module {} - | ^^^^^^^^^^^^^ - | -help: consider removing the safety comment - --> $DIR/undocumented_unsafe_blocks.rs:484:5 - | -LL | // SAFETY: - | ^^^^^^^^^^ - error: unsafe impl missing a safety comment - --> $DIR/undocumented_unsafe_blocks.rs:492:5 + --> $DIR/undocumented_unsafe_blocks.rs:479:5 | LL | unsafe impl ImplInFn for () {} | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -329,7 +269,7 @@ LL | unsafe impl ImplInFn for () {} = help: consider adding a safety comment on the preceding line error: unsafe impl missing a safety comment - --> $DIR/undocumented_unsafe_blocks.rs:501:1 + --> $DIR/undocumented_unsafe_blocks.rs:488:1 | LL | unsafe impl CrateRoot for () {} | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -337,7 +277,7 @@ LL | unsafe impl CrateRoot for () {} = help: consider adding a safety comment on the preceding line error: unsafe block missing a safety comment - --> $DIR/undocumented_unsafe_blocks.rs:511:9 + --> $DIR/undocumented_unsafe_blocks.rs:498:9 | LL | unsafe {}; | ^^^^^^^^^ @@ -345,7 +285,7 @@ LL | unsafe {}; = help: consider adding a safety comment on the preceding line error: statement has unnecessary safety comment - --> $DIR/undocumented_unsafe_blocks.rs:514:5 + --> $DIR/undocumented_unsafe_blocks.rs:501:5 | LL | / let _ = { LL | | if unsafe { true } { @@ -357,13 +297,13 @@ LL | | }; | |______^ | help: consider removing the safety comment - --> $DIR/undocumented_unsafe_blocks.rs:513:5 + --> $DIR/undocumented_unsafe_blocks.rs:500:5 | LL | // SAFETY: this is more than one level away, so it should warn | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ error: unsafe block missing a safety comment - --> $DIR/undocumented_unsafe_blocks.rs:515:12 + --> $DIR/undocumented_unsafe_blocks.rs:502:12 | LL | if unsafe { true } { | ^^^^^^^^^^^^^^^ @@ -371,52 +311,12 @@ LL | if unsafe { true } { = help: consider adding a safety comment on the preceding line error: unsafe block missing a safety comment - --> $DIR/undocumented_unsafe_blocks.rs:518:23 + --> $DIR/undocumented_unsafe_blocks.rs:505:23 | LL | let bar = unsafe {}; | ^^^^^^^^^ | = help: consider adding a safety comment on the preceding line -error: impl has unnecessary safety comment - --> $DIR/undocumented_unsafe_blocks.rs:541:13 - | -LL | impl T for $t {} - | ^^^^^^^^^^^^^^^^ -... -LL | with_safety_comment!(i32); - | ------------------------- in this macro invocation - | -help: consider removing the safety comment - --> $DIR/undocumented_unsafe_blocks.rs:540:13 - | -LL | // Safety: unnecessary - | ^^^^^^^^^^^^^^^^^^^^^^ - = note: this error originates in the macro `with_safety_comment` (in Nightly builds, run with -Z macro-backtrace for more info) - -error: expression has unnecessary safety comment - --> $DIR/undocumented_unsafe_blocks.rs:553:5 - | -LL | 24 - | ^^ - | -help: consider removing the safety comment - --> $DIR/undocumented_unsafe_blocks.rs:552:5 - | -LL | // SAFETY: unnecessary - | ^^^^^^^^^^^^^^^^^^^^^^ - -error: statement has unnecessary safety comment - --> $DIR/undocumented_unsafe_blocks.rs:550:5 - | -LL | let num = 42; - | ^^^^^^^^^^^^^ - | -help: consider removing the safety comment - --> $DIR/undocumented_unsafe_blocks.rs:549:5 - | -LL | // SAFETY: unnecessary - | ^^^^^^^^^^^^^^^^^^^^^^ - -error: aborting due to 44 previous errors +error: aborting due to 36 previous errors diff --git a/tests/ui/unnecessary_safety_comment.rs b/tests/ui/unnecessary_safety_comment.rs new file mode 100644 index 000000000000..7fefea7051d6 --- /dev/null +++ b/tests/ui/unnecessary_safety_comment.rs @@ -0,0 +1,51 @@ +#![warn(clippy::undocumented_unsafe_blocks, clippy::unnecessary_safety_comment)] +#![allow(clippy::let_unit_value, clippy::missing_safety_doc)] + +mod unsafe_items_invalid_comment { + // SAFETY: + const CONST: u32 = 0; + // SAFETY: + static STATIC: u32 = 0; + // SAFETY: + struct Struct; + // SAFETY: + enum Enum {} + // SAFETY: + mod module {} +} + +mod unnecessary_from_macro { + trait T {} + + macro_rules! no_safety_comment { + ($t:ty) => { + impl T for $t {} + }; + } + + // FIXME: This is not caught + // Safety: unnecessary + no_safety_comment!(()); + + macro_rules! with_safety_comment { + ($t:ty) => { + // Safety: unnecessary + impl T for $t {} + }; + } + + with_safety_comment!(i32); +} + +fn unnecessary_on_stmt_and_expr() -> u32 { + // SAFETY: unnecessary + let num = 42; + + // SAFETY: unnecessary + if num > 24 {} + + // SAFETY: unnecessary + 24 +} + +fn main() {} diff --git a/tests/ui/unnecessary_safety_comment.stderr b/tests/ui/unnecessary_safety_comment.stderr new file mode 100644 index 000000000000..7b2af67d64c7 --- /dev/null +++ b/tests/ui/unnecessary_safety_comment.stderr @@ -0,0 +1,115 @@ +error: constant item has unnecessary safety comment + --> $DIR/unnecessary_safety_comment.rs:6:5 + | +LL | const CONST: u32 = 0; + | ^^^^^^^^^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/unnecessary_safety_comment.rs:5:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + = note: `-D clippy::unnecessary-safety-comment` implied by `-D warnings` + +error: static item has unnecessary safety comment + --> $DIR/unnecessary_safety_comment.rs:8:5 + | +LL | static STATIC: u32 = 0; + | ^^^^^^^^^^^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/unnecessary_safety_comment.rs:7:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + +error: struct has unnecessary safety comment + --> $DIR/unnecessary_safety_comment.rs:10:5 + | +LL | struct Struct; + | ^^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/unnecessary_safety_comment.rs:9:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + +error: enum has unnecessary safety comment + --> $DIR/unnecessary_safety_comment.rs:12:5 + | +LL | enum Enum {} + | ^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/unnecessary_safety_comment.rs:11:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + +error: module has unnecessary safety comment + --> $DIR/unnecessary_safety_comment.rs:14:5 + | +LL | mod module {} + | ^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/unnecessary_safety_comment.rs:13:5 + | +LL | // SAFETY: + | ^^^^^^^^^^ + +error: impl has unnecessary safety comment + --> $DIR/unnecessary_safety_comment.rs:33:13 + | +LL | impl T for $t {} + | ^^^^^^^^^^^^^^^^ +... +LL | with_safety_comment!(i32); + | ------------------------- in this macro invocation + | +help: consider removing the safety comment + --> $DIR/unnecessary_safety_comment.rs:32:13 + | +LL | // Safety: unnecessary + | ^^^^^^^^^^^^^^^^^^^^^^ + = note: this error originates in the macro `with_safety_comment` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: expression has unnecessary safety comment + --> $DIR/unnecessary_safety_comment.rs:48:5 + | +LL | 24 + | ^^ + | +help: consider removing the safety comment + --> $DIR/unnecessary_safety_comment.rs:47:5 + | +LL | // SAFETY: unnecessary + | ^^^^^^^^^^^^^^^^^^^^^^ + +error: statement has unnecessary safety comment + --> $DIR/unnecessary_safety_comment.rs:42:5 + | +LL | let num = 42; + | ^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/unnecessary_safety_comment.rs:41:5 + | +LL | // SAFETY: unnecessary + | ^^^^^^^^^^^^^^^^^^^^^^ + +error: statement has unnecessary safety comment + --> $DIR/unnecessary_safety_comment.rs:45:5 + | +LL | if num > 24 {} + | ^^^^^^^^^^^^^^ + | +help: consider removing the safety comment + --> $DIR/unnecessary_safety_comment.rs:44:5 + | +LL | // SAFETY: unnecessary + | ^^^^^^^^^^^^^^^^^^^^^^ + +error: aborting due to 9 previous errors + From 5977a4f8db59fe0d949782e730d1efb6328e529a Mon Sep 17 00:00:00 2001 From: Philipp Krones Date: Fri, 25 Nov 2022 11:39:04 +0100 Subject: [PATCH 14/17] Move syntax tree patterns RFC to the book --- book/src/SUMMARY.md | 1 + .../src/development/proposals/syntax-tree-patterns.md | 0 2 files changed, 1 insertion(+) rename rfcs/0001-syntax-tree-patterns.md => book/src/development/proposals/syntax-tree-patterns.md (100%) diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 0b945faf9b78..1f0b8db28a15 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -21,3 +21,4 @@ - [The Clippy Book](development/infrastructure/book.md) - [Proposals](development/proposals/README.md) - [Roadmap 2021](development/proposals/roadmap-2021.md) + - [Syntax Tree Patterns](development/proposals/syntax-tree-patterns.md) diff --git a/rfcs/0001-syntax-tree-patterns.md b/book/src/development/proposals/syntax-tree-patterns.md similarity index 100% rename from rfcs/0001-syntax-tree-patterns.md rename to book/src/development/proposals/syntax-tree-patterns.md From 6c0d1107f19612a7095cf5a3b3f3a78c260a3d5c Mon Sep 17 00:00:00 2001 From: Philipp Krones Date: Fri, 25 Nov 2022 11:39:36 +0100 Subject: [PATCH 15/17] Book: Format syntax tree pattern proposal --- .../proposals/syntax-tree-patterns.md | 575 ++++++++++++------ 1 file changed, 386 insertions(+), 189 deletions(-) diff --git a/book/src/development/proposals/syntax-tree-patterns.md b/book/src/development/proposals/syntax-tree-patterns.md index 9161986a7b77..c5587c4bf908 100644 --- a/book/src/development/proposals/syntax-tree-patterns.md +++ b/book/src/development/proposals/syntax-tree-patterns.md @@ -1,23 +1,22 @@ - - - Feature Name: syntax-tree-patterns - Start Date: 2019-03-12 -- RFC PR: (leave this empty) -- Rust Issue: (leave this empty) - -> Note: This project is part of my Master's Thesis (supervised by [@oli-obk](https://github.com/oli-obk)) +- RFC PR: [#3875](https://github.com/rust-lang/rust-clippy/pull/3875) # Summary -Introduce a domain-specific language (similar to regular expressions) that allows to describe lints using *syntax tree patterns*. +Introduce a domain-specific language (similar to regular expressions) that +allows to describe lints using *syntax tree patterns*. # Motivation +Finding parts of a syntax tree (AST, HIR, ...) that have certain properties +(e.g. "*an if that has a block as its condition*") is a major task when writing +lints. For non-trivial lints, it often requires nested pattern matching of AST / +HIR nodes. For example, testing that an expression is a boolean literal requires +the following checks: -Finding parts of a syntax tree (AST, HIR, ...) that have certain properties (e.g. "*an if that has a block as its condition*") is a major task when writing lints. For non-trivial lints, it often requires nested pattern matching of AST / HIR nodes. For example, testing that an expression is a boolean literal requires the following checks: - -``` +```rust if let ast::ExprKind::Lit(lit) = &expr.node { if let ast::LitKind::Bool(_) = &lit.node { ... @@ -25,9 +24,11 @@ if let ast::ExprKind::Lit(lit) = &expr.node { } ``` -Writing this kind of matching code quickly becomes a complex task and the resulting code is often hard to comprehend. The code below shows a simplified version of the pattern matching required by the `collapsible_if` lint: +Writing this kind of matching code quickly becomes a complex task and the +resulting code is often hard to comprehend. The code below shows a simplified +version of the pattern matching required by the `collapsible_if` lint: -``` +```rust // simplified version of the collapsible_if lint if let ast::ExprKind::If(check, then, None) = &expr.node { if then.stmts.len() == 1 { @@ -40,9 +41,10 @@ if let ast::ExprKind::If(check, then, None) = &expr.node { } ``` -The `if_chain` macro can improve readability by flattening the nested if statements, but the resulting code is still quite hard to read: +The `if_chain` macro can improve readability by flattening the nested if +statements, but the resulting code is still quite hard to read: -``` +```rust // simplified version of the collapsible_if lint if_chain! { if let ast::ExprKind::If(check, then, None) = &expr.node; @@ -55,39 +57,66 @@ if_chain! { } ``` -The code above matches if expressions that contain only another if expression (where both ifs don't have an else branch). While it's easy to explain what the lint does, it's hard to see that from looking at the code samples above. +The code above matches if expressions that contain only another if expression +(where both ifs don't have an else branch). While it's easy to explain what the +lint does, it's hard to see that from looking at the code samples above. -Following the motivation above, the first goal this RFC is to **simplify writing and reading lints**. +Following the motivation above, the first goal this RFC is to **simplify writing +and reading lints**. -The second part of the motivation is clippy's dependence on unstable compiler-internal data structures. Clippy lints are currently written against the compiler's AST / HIR which means that even small changes in these data structures might break a lot of lints. The second goal of this RFC is to **make lints independant of the compiler's AST / HIR data structures**. +The second part of the motivation is clippy's dependence on unstable +compiler-internal data structures. Clippy lints are currently written against +the compiler's AST / HIR which means that even small changes in these data +structures might break a lot of lints. The second goal of this RFC is to **make +lints independant of the compiler's AST / HIR data structures**. # Approach -A lot of complexity in writing lints currently seems to come from having to manually implement the matching logic (see code samples above). It's an imparative style that describes *how* to match a syntax tree node instead of specifying *what* should be matched against declaratively. In other areas, it's common to use declarative patterns to describe desired information and let the implementation do the actual matching. A well-known example of this approach are [regular expressions](https://en.wikipedia.org/wiki/Regular_expression). Instead of writing code that detects certain character sequences, one can describe a search pattern using a domain-specific language and search for matches using that pattern. The advantage of using a declarative domain-specific language is that its limited domain (e.g. matching character sequences in the case of regular expressions) allows to express entities in that domain in a very natural and expressive way. - -While regular expressions are very useful when searching for patterns in flat character sequences, they cannot easily be applied to hierarchical data structures like syntax trees. This RFC therefore proposes a pattern matching system that is inspired by regular expressions and designed for hierarchical syntax trees. +A lot of complexity in writing lints currently seems to come from having to +manually implement the matching logic (see code samples above). It's an +imparative style that describes *how* to match a syntax tree node instead of +specifying *what* should be matched against declaratively. In other areas, it's +common to use declarative patterns to describe desired information and let the +implementation do the actual matching. A well-known example of this approach are +[regular expressions](https://en.wikipedia.org/wiki/Regular_expression). Instead +of writing code that detects certain character sequences, one can describe a +search pattern using a domain-specific language and search for matches using +that pattern. The advantage of using a declarative domain-specific language is +that its limited domain (e.g. matching character sequences in the case of +regular expressions) allows to express entities in that domain in a very natural +and expressive way. + +While regular expressions are very useful when searching for patterns in flat +character sequences, they cannot easily be applied to hierarchical data +structures like syntax trees. This RFC therefore proposes a pattern matching +system that is inspired by regular expressions and designed for hierarchical +syntax trees. # Guide-level explanation -This proposal adds a `pattern!` macro that can be used to specify a syntax tree pattern to search for. A simple pattern is shown below: +This proposal adds a `pattern!` macro that can be used to specify a syntax tree +pattern to search for. A simple pattern is shown below: -``` +```rust pattern!{ - my_pattern: Expr = + my_pattern: Expr = Lit(Bool(false)) } ``` -This macro call defines a pattern named `my_pattern` that can be matched against an `Expr` syntax tree node. The actual pattern (`Lit(Bool(false))` in this case) defines which syntax trees should match the pattern. This pattern matches expressions that are boolean literals with value `false`. +This macro call defines a pattern named `my_pattern` that can be matched against +an `Expr` syntax tree node. The actual pattern (`Lit(Bool(false))` in this case) +defines which syntax trees should match the pattern. This pattern matches +expressions that are boolean literals with value `false`. The pattern can then be used to implement lints in the following way: -``` +```rust ... impl EarlyLintPass for MyAwesomeLint { fn check_expr(&mut self, cx: &EarlyContext, expr: &syntax::ast::Expr) { - + if my_pattern(expr).is_some() { cx.span_lint( MY_AWESOME_LINT, @@ -95,14 +124,18 @@ impl EarlyLintPass for MyAwesomeLint { "This is a match for a simple pattern. Well done!", ); } - + } } ``` -The `pattern!` macro call expands to a function `my_pattern` that expects a syntax tree expression as its argument and returns an `Option` that indicates whether the pattern matched. +The `pattern!` macro call expands to a function `my_pattern` that expects a +syntax tree expression as its argument and returns an `Option` that indicates +whether the pattern matched. -> Note: The result type is explained in more detail in [a later section](#the-result-type). For now, it's enough to know that the result is `Some` if the pattern matched and `None` otherwise. +> Note: The result type is explained in more detail in [a later +> section](#the-result-type). For now, it's enough to know that the result is +> `Some` if the pattern matched and `None` otherwise. ## Pattern syntax @@ -111,38 +144,43 @@ The following examples demonstate the pattern syntax: #### Any (`_`) -The simplest pattern is the any pattern. It matches anything and is therefore similar to regex's `*`. +The simplest pattern is the any pattern. It matches anything and is therefore +similar to regex's `*`. -``` +```rust pattern!{ // matches any expression - my_pattern: Expr = + my_pattern: Expr = _ } ``` #### Node (`()`) -Nodes are used to match a specific variant of an AST node. A node has a name and a number of arguments that depends on the node type. For example, the `Lit` node has a single argument that describes the type of the literal. As another example, the `If` node has three arguments describing the if's condition, then block and else block. +Nodes are used to match a specific variant of an AST node. A node has a name and +a number of arguments that depends on the node type. For example, the `Lit` node +has a single argument that describes the type of the literal. As another +example, the `If` node has three arguments describing the if's condition, then +block and else block. -``` +```rust pattern!{ // matches any expression that is a literal - my_pattern: Expr = + my_pattern: Expr = Lit(_) } pattern!{ // matches any expression that is a boolean literal - my_pattern: Expr = + my_pattern: Expr = Lit(Bool(_)) } pattern!{ // matches if expressions that have a boolean literal in their condition - // Note: The `_?` syntax here means that the else branch is optional and can be anything. + // Note: The `_?` syntax here means that the else branch is optional and can be anything. // This is discussed in more detail in the section `Repetition`. - my_pattern: Expr = + my_pattern: Expr = If( Lit(Bool(_)) , _, _?) } ``` @@ -152,69 +190,71 @@ pattern!{ A pattern can also contain Rust literals. These literals match themselves. -``` +```rust pattern!{ // matches the boolean literal false - my_pattern: Expr = + my_pattern: Expr = Lit(Bool(false)) } pattern!{ // matches the character literal 'x' - my_pattern: Expr = + my_pattern: Expr = Lit(Char('x')) } ``` #### Alternations (`a | b`) -``` +```rust pattern!{ // matches if the literal is a boolean or integer literal - my_pattern: Lit = + my_pattern: Lit = Bool(_) | Int(_) } pattern!{ // matches if the expression is a char literal with value 'x' or 'y' - my_pattern: Expr = + my_pattern: Expr = Lit( Char('x' | 'y') ) } ``` #### Empty (`()`) -The empty pattern represents an empty sequence or the `None` variant of an optional. +The empty pattern represents an empty sequence or the `None` variant of an +optional. -``` +```rust pattern!{ // matches if the expression is an empty array - my_pattern: Expr = + my_pattern: Expr = Array( () ) } pattern!{ // matches if expressions that don't have an else clause - my_pattern: Expr = + my_pattern: Expr = If(_, _, ()) } ``` #### Sequence (` `) -``` +```rust pattern!{ // matches the array [true, false] - my_pattern: Expr = + my_pattern: Expr = Array( Lit(Bool(true)) Lit(Bool(false)) ) } ``` #### Repetition (`*`, `+`, `?`, `{n}`, `{n,m}`, `{n,}`) -Elements may be repeated. The syntax for specifying repetitions is identical to [regex's syntax](https://docs.rs/regex/1.1.2/regex/#repetitions). +Elements may be repeated. The syntax for specifying repetitions is identical to +[regex's syntax](https://docs.rs/regex/1.1.2/regex/#repetitions). -``` +```rust pattern!{ // matches arrays that contain 2 'x's as their last or second-last elements // Examples: @@ -222,7 +262,7 @@ pattern!{ // ['x', 'x', 'y'] match // ['a', 'b', 'c', 'x', 'x', 'y'] match // ['x', 'x', 'y', 'z'] no match - my_pattern: Expr = + my_pattern: Expr = Array( _* Lit(Char('x')){2} _? ) } @@ -234,34 +274,35 @@ pattern!{ // If(_, _, _) | match | no match // If(_, _, _?) | match | match // If(_, _, ()) | no match | match - my_pattern: Expr = + my_pattern: Expr = If(_, _, _?) } ``` #### Named submatch (`#`) -``` +```rust pattern!{ // matches character literals and gives the literal the name foo - my_pattern: Expr = + my_pattern: Expr = Lit(Char(_)#foo) } pattern!{ // matches character literals and gives the char the name bar - my_pattern: Expr = + my_pattern: Expr = Lit(Char(_#bar)) } pattern!{ // matches character literals and gives the expression the name baz - my_pattern: Expr = + my_pattern: Expr = Lit(Char(_))#baz } ``` -The reason for using named submatches is described in the section [The result type](#the-result-type). +The reason for using named submatches is described in the section [The result +type](#the-result-type). ### Summary @@ -281,21 +322,31 @@ The following table gives an summary of the pattern syntax: ## The result type -A lot of lints require checks that go beyond what the pattern syntax described above can express. For example, a lint might want to check whether a node was created as part of a macro expansion or whether there's no comment above a node. Another example would be a lint that wants to match two nodes that have the same value (as needed by lints like `almost_swapped`). Instead of allowing users to write these checks into the pattern directly (which might make patterns hard to read), the proposed solution allows users to assign names to parts of a pattern expression. When matching a pattern against a syntax tree node, the return value will contain references to all nodes that were matched by these named subpatterns. This is similar to capture groups in regular expressions. +A lot of lints require checks that go beyond what the pattern syntax described +above can express. For example, a lint might want to check whether a node was +created as part of a macro expansion or whether there's no comment above a node. +Another example would be a lint that wants to match two nodes that have the same +value (as needed by lints like `almost_swapped`). Instead of allowing users to +write these checks into the pattern directly (which might make patterns hard to +read), the proposed solution allows users to assign names to parts of a pattern +expression. When matching a pattern against a syntax tree node, the return value +will contain references to all nodes that were matched by these named +subpatterns. This is similar to capture groups in regular expressions. For example, given the following pattern -``` +```rust pattern!{ // matches character literals - my_pattern: Expr = + my_pattern: Expr = Lit(Char(_#val_inner)#val)#val_outer } ``` -one could get references to the nodes that matched the subpatterns in the following way: +one could get references to the nodes that matched the subpatterns in the +following way: -``` +```rust ... fn check_expr(expr: &syntax::ast::Expr) { if let Some(result) = my_pattern(expr) { @@ -306,50 +357,57 @@ fn check_expr(expr: &syntax::ast::Expr) { } ``` -The types in the `result` struct depend on the pattern. For example, the following pattern +The types in the `result` struct depend on the pattern. For example, the +following pattern -``` +```rust pattern!{ // matches arrays of character literals - my_pattern_seq: Expr = + my_pattern_seq: Expr = Array( Lit(_)*#foo ) } ``` -matches arrays that consist of any number of literal expressions. Because those expressions are named `foo`, the result struct contains a `foo` attribute which is a vector of expressions: +matches arrays that consist of any number of literal expressions. Because those +expressions are named `foo`, the result struct contains a `foo` attribute which +is a vector of expressions: -``` +```rust ... if let Some(result) = my_pattern_seq(expr) { result.foo // type: Vec<&syntax::ast::Expr> } ``` -Another result type occurs when a name is only defined in one branch of an alternation: +Another result type occurs when a name is only defined in one branch of an +alternation: -``` +```rust pattern!{ // matches if expression is a boolean or integer literal - my_pattern_alt: Expr = + my_pattern_alt: Expr = Lit( Bool(_#bar) | Int(_) ) } ``` -In the pattern above, the `bar` name is only defined if the pattern matches a boolean literal. If it matches an integer literal, the name isn't set. To account fot this, the result struct's `bar` attribute is an option type: +In the pattern above, the `bar` name is only defined if the pattern matches a +boolean literal. If it matches an integer literal, the name isn't set. To +account for this, the result struct's `bar` attribute is an option type: -``` +```rust ... if let Some(result) = my_pattern_alt(expr) { result.bar // type: Option<&bool> } ``` -It's also possible to use a name in multiple alternation branches if they have compatible types: +It's also possible to use a name in multiple alternation branches if they have +compatible types: -``` +```rust pattern!{ // matches if expression is a boolean or integer literal - my_pattern_mult: Expr = + my_pattern_mult: Expr = Lit(_#baz) | Array( Lit(_#baz) ) } ... @@ -358,60 +416,77 @@ if let Some(result) = my_pattern_mult(expr) { } ``` -Named submatches are a **flat** namespace and this is intended. In the example above, two different sub-structures are assigned to a flat name. I expect that for most lints, a flat namespace is sufficient and easier to work with than a hierarchical one. +Named submatches are a **flat** namespace and this is intended. In the example +above, two different sub-structures are assigned to a flat name. I expect that +for most lints, a flat namespace is sufficient and easier to work with than a +hierarchical one. #### Two stages -Using named subpatterns, users can write lints in two stages. First, a coarse selection of possible matches is produced by the pattern syntax. In the second stage, the named subpattern references can be used to do additional tests like asserting that a node hasn't been created as part of a macro expansion. +Using named subpatterns, users can write lints in two stages. First, a coarse +selection of possible matches is produced by the pattern syntax. In the second +stage, the named subpattern references can be used to do additional tests like +asserting that a node hasn't been created as part of a macro expansion. ## Implementing clippy lints using patterns -As a "real-world" example, I re-implemented the `collapsible_if` lint using patterns. The code can be found [here](https://github.com/fkohlgrueber/rust-clippy-pattern/blob/039b07ecccaf96d6aa7504f5126720d2c9cceddd/clippy_lints/src/collapsible_if.rs#L88-L163). The pattern-based version passes all test cases that were written for `collapsible_if`. +As a "real-world" example, I re-implemented the `collapsible_if` lint using +patterns. The code can be found +[here](https://github.com/fkohlgrueber/rust-clippy-pattern/blob/039b07ecccaf96d6aa7504f5126720d2c9cceddd/clippy_lints/src/collapsible_if.rs#L88-L163). +The pattern-based version passes all test cases that were written for +`collapsible_if`. # Reference-level explanation ## Overview -The following diagram shows the dependencies between the main parts of the proposed solution: +The following diagram shows the dependencies between the main parts of the +proposed solution: ``` - Pattern syntax - | - | parsing / lowering - v - PatternTree - ^ - | - | - IsMatch trait - | - | - +---------------+-----------+---------+ - | | | | - v v v v - syntax::ast rustc::hir syn ... + Pattern syntax + | + | parsing / lowering + v + PatternTree + ^ + | + | + IsMatch trait + | + | + +---------------+-----------+---------+ + | | | | + v v v v + syntax::ast rustc::hir syn ... ``` -The pattern syntax described in the previous section is parsed / lowered into the so-called *PatternTree* data structure that represents a valid syntax tree pattern. Matching a *PatternTree* against an actual syntax tree (e.g. rust ast / hir or the syn ast, ...) is done using the *IsMatch* trait. +The pattern syntax described in the previous section is parsed / lowered into +the so-called *PatternTree* data structure that represents a valid syntax tree +pattern. Matching a *PatternTree* against an actual syntax tree (e.g. rust ast / +hir or the syn ast, ...) is done using the *IsMatch* trait. -The *PatternTree* and the *IsMatch* trait are introduced in more detail in the following sections. +The *PatternTree* and the *IsMatch* trait are introduced in more detail in the +following sections. ## PatternTree -The core data structure of this RFC is the **PatternTree**. +The core data structure of this RFC is the **PatternTree**. -It's a data structure similar to rust's AST / HIR, but with the following differences: +It's a data structure similar to rust's AST / HIR, but with the following +differences: - The PatternTree doesn't contain parsing information like `Span`s - The PatternTree can represent alternatives, sequences and optionals The code below shows a simplified version of the current PatternTree: -> Note: The current implementation can be found [here](https://github.com/fkohlgrueber/pattern-matching/blob/dfb3bc9fbab69cec7c91e72564a63ebaa2ede638/pattern-match/src/pattern_tree.rs#L50-L96). +> Note: The current implementation can be found +> [here](https://github.com/fkohlgrueber/pattern-matching/blob/dfb3bc9fbab69cec7c91e72564a63ebaa2ede638/pattern-match/src/pattern_tree.rs#L50-L96). -``` +```rust pub enum Expr { Lit(Alt), Array(Seq), @@ -441,9 +516,10 @@ pub enum BlockType { The `Alt`, `Seq` and `Opt` structs look like these: -> Note: The current implementation can be found [here](https://github.com/fkohlgrueber/pattern-matching/blob/dfb3bc9fbab69cec7c91e72564a63ebaa2ede638/pattern-match/src/matchers.rs#L35-L60). +> Note: The current implementation can be found +> [here](https://github.com/fkohlgrueber/pattern-matching/blob/dfb3bc9fbab69cec7c91e72564a63ebaa2ede638/pattern-match/src/matchers.rs#L35-L60). -``` +```rust pub enum Alt { Any, Elmt(Box), @@ -477,27 +553,45 @@ pub struct RepeatRange { ## Parsing / Lowering -The input of a `pattern!` macro call is parsed into a `ParseTree` first and then lowered to a `PatternTree`. +The input of a `pattern!` macro call is parsed into a `ParseTree` first and then +lowered to a `PatternTree`. -Valid patterns depend on the *PatternTree* definitions. For example, the pattern `Lit(Bool(_)*)` isn't valid because the parameter type of the `Lit` variant of the `Expr` enum is `Any` and therefore doesn't support repetition (`*`). As another example, `Array( Lit(_)* )` is a valid pattern because the parameter of `Array` is of type `Seq` which allows sequences and repetitions. +Valid patterns depend on the *PatternTree* definitions. For example, the pattern +`Lit(Bool(_)*)` isn't valid because the parameter type of the `Lit` variant of +the `Expr` enum is `Any` and therefore doesn't support repetition (`*`). As +another example, `Array( Lit(_)* )` is a valid pattern because the parameter of +`Array` is of type `Seq` which allows sequences and repetitions. -> Note: names in the pattern syntax correspond to *PatternTree* enum **variants**. For example, the `Lit` in the pattern above refers to the `Lit` variant of the `Expr` enum (`Expr::Lit`), not the `Lit` enum. +> Note: names in the pattern syntax correspond to *PatternTree* enum +> **variants**. For example, the `Lit` in the pattern above refers to the `Lit` +> variant of the `Expr` enum (`Expr::Lit`), not the `Lit` enum. ## The IsMatch Trait -The pattern syntax and the *PatternTree* are independant of specific syntax tree implementations (rust ast / hir, syn, ...). When looking at the different pattern examples in the previous sections, it can be seen that the patterns don't contain any information specific to a certain syntax tree implementation. In contrast, clippy lints currently match against ast / hir syntax tree nodes and therefore directly depend on their implementation. +The pattern syntax and the *PatternTree* are independant of specific syntax tree +implementations (rust ast / hir, syn, ...). When looking at the different +pattern examples in the previous sections, it can be seen that the patterns +don't contain any information specific to a certain syntax tree implementation. +In contrast, clippy lints currently match against ast / hir syntax tree nodes +and therefore directly depend on their implementation. -The connection between the *PatternTree* and specific syntax tree implementations is the `IsMatch` trait. It defines how to match *PatternTree* nodes against specific syntax tree nodes. A simplified implementation of the `IsMatch` trait is shown below: +The connection between the *PatternTree* and specific syntax tree +implementations is the `IsMatch` trait. It defines how to match *PatternTree* +nodes against specific syntax tree nodes. A simplified implementation of the +`IsMatch` trait is shown below: -``` +```rust pub trait IsMatch { fn is_match(&self, other: &'o O) -> bool; } ``` -This trait needs to be implemented on each enum of the *PatternTree* (for the corresponding syntax tree types). For example, the `IsMatch` implementation for matching `ast::LitKind` against the *PatternTree's* `Lit` enum might look like this: +This trait needs to be implemented on each enum of the *PatternTree* (for the +corresponding syntax tree types). For example, the `IsMatch` implementation for +matching `ast::LitKind` against the *PatternTree's* `Lit` enum might look like +this: -``` +```rust impl IsMatch for Lit { fn is_match(&self, other: &ast::LitKind) -> bool { match (self, other) { @@ -510,25 +604,30 @@ impl IsMatch for Lit { } ``` -All `IsMatch` implementations for matching the current *PatternTree* against `syntax::ast` can be found [here](https://github.com/fkohlgrueber/pattern-matching/blob/dfb3bc9fbab69cec7c91e72564a63ebaa2ede638/pattern-match/src/ast_match.rs). +All `IsMatch` implementations for matching the current *PatternTree* against +`syntax::ast` can be found +[here](https://github.com/fkohlgrueber/pattern-matching/blob/dfb3bc9fbab69cec7c91e72564a63ebaa2ede638/pattern-match/src/ast_match.rs). # Drawbacks #### Performance -The pattern matching code is currently not optimized for performance, so it might be slower than hand-written matching code. -Additionally, the two-stage approach (matching against the coarse pattern first and checking for additional properties later) might be slower than the current practice of checking for structure and additional properties in one pass. For example, the following lint +The pattern matching code is currently not optimized for performance, so it +might be slower than hand-written matching code. Additionally, the two-stage +approach (matching against the coarse pattern first and checking for additional +properties later) might be slower than the current practice of checking for +structure and additional properties in one pass. For example, the following lint -``` +```rust pattern!{ - pat_if_without_else: Expr = + pat_if_without_else: Expr = If( _, Block( Expr( If(_, _, ())#inner ) - | Semi( If(_, _, ())#inner ) - )#then, + | Semi( If(_, _, ())#inner ) + )#then, () ) } @@ -541,9 +640,11 @@ fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { } ``` -first matches against the pattern and then checks that the `then` block doesn't start with a comment. Using clippy's current approach, it's possible to check for these conditions earlier: +first matches against the pattern and then checks that the `then` block doesn't +start with a comment. Using clippy's current approach, it's possible to check +for these conditions earlier: -``` +```rust fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { if_chain! { if let ast::ExprKind::If(ref check, ref then, None) = expr.node; @@ -557,30 +658,57 @@ fn check_expr(&mut self, cx: &EarlyContext<'_>, expr: &ast::Expr) { } ``` -Whether or not this causes performance regressions depends on actual patterns. If it turns out to be a problem, the pattern matching algorithms could be extended to allow "early filtering" (see the [Early Filtering](#early-filtering) section in Future Possibilities). +Whether or not this causes performance regressions depends on actual patterns. +If it turns out to be a problem, the pattern matching algorithms could be +extended to allow "early filtering" (see the [Early Filtering](#early-filtering) +section in Future Possibilities). -That being said, I don't see any conceptual limitations regarding pattern matching performance. +That being said, I don't see any conceptual limitations regarding pattern +matching performance. #### Applicability -Even though I'd expect that a lot of lints can be written using the proposed pattern syntax, it's unlikely that all lints can be expressed using patterns. I suspect that there will still be lints that need to be implemented by writing custom pattern matching code. This would lead to mix within clippy's codebase where some lints are implemented using patterns and others aren't. This inconsistency might be considered a drawback. +Even though I'd expect that a lot of lints can be written using the proposed +pattern syntax, it's unlikely that all lints can be expressed using patterns. I +suspect that there will still be lints that need to be implemented by writing +custom pattern matching code. This would lead to mix within clippy's codebase +where some lints are implemented using patterns and others aren't. This +inconsistency might be considered a drawback. # Rationale and alternatives -Specifying lints using syntax tree patterns has a couple of advantages compared to the current approach of manually writing matching code. First, syntax tree patterns allow users to describe patterns in a simple and expressive way. This makes it easier to write new lints for both novices and experts and also makes reading / modifying existing lints simpler. - -Another advantage is that lints are independent of specific syntax tree implementations (e.g. AST / HIR, ...). When these syntax tree implementations change, only the `IsMatch` trait implementations need to be adapted and existing lints can remain unchanged. This also means that if the `IsMatch` trait implementations were integrated into the compiler, updating the `IsMatch` implementations would be required for the compiler to compile successfully. This could reduce the number of times clippy breaks because of changes in the compiler. Another advantage of the pattern's independence is that converting an `EarlyLintPass` lint into a `LatePassLint` wouldn't require rewriting the whole pattern matching code. In fact, the pattern might work just fine without any adaptions. - +Specifying lints using syntax tree patterns has a couple of advantages compared +to the current approach of manually writing matching code. First, syntax tree +patterns allow users to describe patterns in a simple and expressive way. This +makes it easier to write new lints for both novices and experts and also makes +reading / modifying existing lints simpler. + +Another advantage is that lints are independent of specific syntax tree +implementations (e.g. AST / HIR, ...). When these syntax tree implementations +change, only the `IsMatch` trait implementations need to be adapted and existing +lints can remain unchanged. This also means that if the `IsMatch` trait +implementations were integrated into the compiler, updating the `IsMatch` +implementations would be required for the compiler to compile successfully. This +could reduce the number of times clippy breaks because of changes in the +compiler. Another advantage of the pattern's independence is that converting an +`EarlyLintPass` lint into a `LatePassLint` wouldn't require rewriting the whole +pattern matching code. In fact, the pattern might work just fine without any +adaptions. ## Alternatives ### Rust-like pattern syntax -The proposed pattern syntax requires users to know the structure of the `PatternTree` (which is very similar to the AST's / HIR's structure) and also the pattern syntax. An alternative would be to introduce a pattern syntax that is similar to actual Rust syntax (probably like the `quote!` macro). For example, a pattern that matches `if` expressions that have `false` in their condition could look like this: +The proposed pattern syntax requires users to know the structure of the +`PatternTree` (which is very similar to the AST's / HIR's structure) and also +the pattern syntax. An alternative would be to introduce a pattern syntax that +is similar to actual Rust syntax (probably like the `quote!` macro). For +example, a pattern that matches `if` expressions that have `false` in their +condition could look like this: -``` +```rust if false { #[*] } @@ -588,9 +716,13 @@ if false { #### Problems -Extending Rust syntax (which is quite complex by itself) with additional syntax needed for specifying patterns (alternations, sequences, repetisions, named submatches, ...) might become difficult to read and really hard to parse properly. +Extending Rust syntax (which is quite complex by itself) with additional syntax +needed for specifying patterns (alternations, sequences, repetisions, named +submatches, ...) might become difficult to read and really hard to parse +properly. -For example, a pattern that matches a binary operation that has `0` on both sides might look like this: +For example, a pattern that matches a binary operation that has `0` on both +sides might look like this: ``` 0 #[*:BinOpKind] 0 @@ -602,74 +734,116 @@ Now consider this slightly more complex example: 1 + 0 #[*:BinOpKind] 0 ``` -The parser would need to know the precedence of `#[*:BinOpKind]` because it affects the structure of the resulting AST. `1 + 0 + 0` is parsed as `(1 + 0) + 0` while `1 + 0 * 0` is parsed as `1 + (0 * 0)`. Since the pattern could be any `BinOpKind`, the precedence cannot be known in advance. +The parser would need to know the precedence of `#[*:BinOpKind]` because it +affects the structure of the resulting AST. `1 + 0 + 0` is parsed as `(1 + 0) + +0` while `1 + 0 * 0` is parsed as `1 + (0 * 0)`. Since the pattern could be any +`BinOpKind`, the precedence cannot be known in advance. -Another example of a problem would be named submatches. Take a look at this pattern: +Another example of a problem would be named submatches. Take a look at this +pattern: -``` +```rust fn test() { 1 #foo } ``` -Which node is `#foo` referring to? `int`, `ast::Lit`, `ast::Expr`, `ast::Stmt`? Naming subpatterns in a rust-like syntax is difficult because a lot of AST nodes don't have a syntactic element that can be used to put the name tag on. In these situations, the only sensible option would be to assign the name tag to the outermost node (`ast::Stmt` in the example above), because the information of all child nodes can be retrieved through the outermost node. The problem with this then would be that accessing inner nodes (like `ast::Lit`) would again require manual pattern matching. +Which node is `#foo` referring to? `int`, `ast::Lit`, `ast::Expr`, `ast::Stmt`? +Naming subpatterns in a rust-like syntax is difficult because a lot of AST nodes +don't have a syntactic element that can be used to put the name tag on. In these +situations, the only sensible option would be to assign the name tag to the +outermost node (`ast::Stmt` in the example above), because the information of +all child nodes can be retrieved through the outermost node. The problem with +this then would be that accessing inner nodes (like `ast::Lit`) would again +require manual pattern matching. -In general, Rust syntax contains a lot of code structure implicitly. This structure is reconstructed during parsing (e.g. binary operations are reconstructed using operator precedence and left-to-right) and is one of the reasons why parsing is a complex task. The advantage of this approach is that writing code is simpler for users. +In general, Rust syntax contains a lot of code structure implicitly. This +structure is reconstructed during parsing (e.g. binary operations are +reconstructed using operator precedence and left-to-right) and is one of the +reasons why parsing is a complex task. The advantage of this approach is that +writing code is simpler for users. -When writing *syntax tree patterns*, each element of the hierarchy might have alternatives, repetitions, etc.. Respecting that while still allowing human-friendly syntax that contains structure implicitly seems to be really complex, if not impossible. +When writing *syntax tree patterns*, each element of the hierarchy might have +alternatives, repetitions, etc.. Respecting that while still allowing +human-friendly syntax that contains structure implicitly seems to be really +complex, if not impossible. -Developing such a syntax would also require to maintain a custom parser that is at least as complex as the Rust parser itself. Additionally, future changes in the Rust syntax might be incompatible with such a syntax. +Developing such a syntax would also require to maintain a custom parser that is +at least as complex as the Rust parser itself. Additionally, future changes in +the Rust syntax might be incompatible with such a syntax. -In summary, I think that developing such a syntax would introduce a lot of complexity to solve a relatively minor problem. +In summary, I think that developing such a syntax would introduce a lot of +complexity to solve a relatively minor problem. -The issue of users not knowing about the *PatternTree* structure could be solved by a tool that, given a rust program, generates a pattern that matches only this program (similar to the clippy author lint). +The issue of users not knowing about the *PatternTree* structure could be solved +by a tool that, given a rust program, generates a pattern that matches only this +program (similar to the clippy author lint). -For some simple cases (like the first example above), it might be possible to successfully mix Rust and pattern syntax. This space could be further explored in a future extension. +For some simple cases (like the first example above), it might be possible to +successfully mix Rust and pattern syntax. This space could be further explored +in a future extension. # Prior art -The pattern syntax is heavily inspired by regular expressions (repetitions, alternatives, sequences, ...). +The pattern syntax is heavily inspired by regular expressions (repetitions, +alternatives, sequences, ...). -From what I've seen until now, other linters also implement lints that directly work on syntax tree data structures, just like clippy does currently. I would therefore consider the pattern syntax to be *new*, but please correct me if I'm wrong. +From what I've seen until now, other linters also implement lints that directly +work on syntax tree data structures, just like clippy does currently. I would +therefore consider the pattern syntax to be *new*, but please correct me if I'm +wrong. # Unresolved questions #### How to handle multiple matches? -When matching a syntax tree node against a pattern, there are possibly multiple ways in which the pattern can be matched. A simple example of this would be the following pattern: +When matching a syntax tree node against a pattern, there are possibly multiple +ways in which the pattern can be matched. A simple example of this would be the +following pattern: -``` +```rust pattern!{ - my_pattern: Expr = + my_pattern: Expr = Array( _* Lit(_)+#literals) } ``` -This pattern matches arrays that end with at least one literal. Now given the array `[x, 1, 2]`, should `1` be matched as part of the `_*` or the `Lit(_)+` part of the pattern? The difference is important because the named submatch `#literals` would contain 1 or 2 elements depending how the pattern is matched. In regular expressions, this problem is solved by matching "greedy" by default and "non-greedy" optionally. +This pattern matches arrays that end with at least one literal. Now given the +array `[x, 1, 2]`, should `1` be matched as part of the `_*` or the `Lit(_)+` +part of the pattern? The difference is important because the named submatch +`#literals` would contain 1 or 2 elements depending how the pattern is matched. +In regular expressions, this problem is solved by matching "greedy" by default +and "non-greedy" optionally. -I haven't looked much into this yet because I don't know how relevant it is for most lints. The current implementation simply returns the first match it finds. +I haven't looked much into this yet because I don't know how relevant it is for +most lints. The current implementation simply returns the first match it finds. # Future possibilities #### Implement rest of Rust Syntax -The current project only implements a small part of the Rust syntax. In the future, this should incrementally be extended to more syntax to allow implementing more lints. Implementing more of the Rust syntax requires extending the `PatternTree` and `IsMatch` implementations, but should be relatively straight-forward. +The current project only implements a small part of the Rust syntax. In the +future, this should incrementally be extended to more syntax to allow +implementing more lints. Implementing more of the Rust syntax requires extending +the `PatternTree` and `IsMatch` implementations, but should be relatively +straight-forward. #### Early filtering -As described in the *Drawbacks/Performance* section, allowing additional checks during the pattern matching might be beneficial. +As described in the *Drawbacks/Performance* section, allowing additional checks +during the pattern matching might be beneficial. The pattern below shows how this could look like: -``` +```rust pattern!{ - pat_if_without_else: Expr = + pat_if_without_else: Expr = If( _, Block( Expr( If(_, _, ())#inner ) - | Semi( If(_, _, ())#inner ) - )#then, + | Semi( If(_, _, ())#inner ) + )#then, () ) where @@ -677,47 +851,64 @@ pattern!{ } ``` -The difference compared to the currently proposed two-stage filtering is that using early filtering, the condition (`!in_macro(#then.span)` in this case) would be evaluated as soon as the `Block(_)#then` was matched. +The difference compared to the currently proposed two-stage filtering is that +using early filtering, the condition (`!in_macro(#then.span)` in this case) +would be evaluated as soon as the `Block(_)#then` was matched. -Another idea in this area would be to introduce a syntax for backreferences. They could be used to require that multiple parts of a pattern should match the same value. For example, the `assign_op_pattern` lint that searches for `a = a op b` and recommends changing it to `a op= b` requires that both occurrances of `a` are the same. Using `=#...` as syntax for backreferences, the lint could be implemented like this: +Another idea in this area would be to introduce a syntax for backreferences. +They could be used to require that multiple parts of a pattern should match the +same value. For example, the `assign_op_pattern` lint that searches for `a = a +op b` and recommends changing it to `a op= b` requires that both occurrances of +`a` are the same. Using `=#...` as syntax for backreferences, the lint could be +implemented like this: -``` +```rust pattern!{ - assign_op_pattern: Expr = + assign_op_pattern: Expr = Assign(_#target, Binary(_, =#target, _) } ``` #### Match descendant -A lot of lints currently implement custom visitors that check whether any subtree (which might not be a direct descendant) of the current node matches some properties. This cannot be expressed with the proposed pattern syntax. Extending the pattern syntax to allow patterns like "a function that contains at least two return statements" could be a practical addition. +A lot of lints currently implement custom visitors that check whether any +subtree (which might not be a direct descendant) of the current node matches +some properties. This cannot be expressed with the proposed pattern syntax. +Extending the pattern syntax to allow patterns like "a function that contains at +least two return statements" could be a practical addition. #### Negation operator for alternatives -For patterns like "a literal that is not a boolean literal" one currently needs to list all alternatives except the boolean case. Introducing a negation operator that allows to write `Lit(!Bool(_))` might be a good idea. This pattern would be eqivalent to `Lit( Char(_) | Int(_) )` (given that currently only three literal types are implemented). +For patterns like "a literal that is not a boolean literal" one currently needs +to list all alternatives except the boolean case. Introducing a negation +operator that allows to write `Lit(!Bool(_))` might be a good idea. This pattern +would be eqivalent to `Lit( Char(_) | Int(_) )` (given that currently only three +literal types are implemented). #### Functional composition -Patterns currently don't have any concept of composition. This leads to repetitions within patterns. For example, one of the collapsible-if patterns currently has to be written like this: +Patterns currently don't have any concept of composition. This leads to +repetitions within patterns. For example, one of the collapsible-if patterns +currently has to be written like this: -``` +```rust pattern!{ - pat_if_else: Expr = + pat_if_else: Expr = If( - _, - _, + _, + _, Block_( Block( - Expr((If(_, _, _?) | IfLet(_, _?))#else_) | + Expr((If(_, _, _?) | IfLet(_, _?))#else_) | Semi((If(_, _, _?) | IfLet(_, _?))#else_) )#block_inner )#block ) | IfLet( - _, + _, Block_( Block( - Expr((If(_, _, _?) | IfLet(_, _?))#else_) | + Expr((If(_, _, _?) | IfLet(_, _?))#else_) | Semi((If(_, _, _?) | IfLet(_, _?))#else_) )#block_inner )#block @@ -725,9 +916,10 @@ pattern!{ } ``` -If patterns supported defining functions of subpatterns, the code could be simplified as follows: +If patterns supported defining functions of subpatterns, the code could be +simplified as follows: -``` +```rust pattern!{ fn expr_or_semi(expr: Expr) -> Stmt { Expr(expr) | Semi(expr) @@ -735,7 +927,7 @@ pattern!{ fn if_or_if_let(then: Block, else: Opt) -> Expr { If(_, then, else) | IfLet(then, else) } - pat_if_else: Expr = + pat_if_else: Expr = if_or_if_let( _, Block_( @@ -747,43 +939,48 @@ pattern!{ } ``` -Additionally, common patterns like `expr_or_semi` could be shared between different lints. +Additionally, common patterns like `expr_or_semi` could be shared between +different lints. #### Clippy Pattern Author -Another improvement could be to create a tool that, given some valid Rust syntax, generates a pattern that matches this syntax exactly. This would make starting to write a pattern easier. A user could take a look at the patterns generated for a couple of Rust code examples and use that information to write a pattern that matches all of them. +Another improvement could be to create a tool that, given some valid Rust +syntax, generates a pattern that matches this syntax exactly. This would make +starting to write a pattern easier. A user could take a look at the patterns +generated for a couple of Rust code examples and use that information to write a +pattern that matches all of them. This is similar to clippy's author lint. #### Supporting other syntaxes -Most of the proposed system is language-agnostic. For example, the pattern syntax could also be used to describe patterns for other programming languages. +Most of the proposed system is language-agnostic. For example, the pattern +syntax could also be used to describe patterns for other programming languages. -In order to support other languages' syntaxes, one would need to implement another `PatternTree` that sufficiently describes the languages' AST and implement `IsMatch` for this `PatternTree` and the languages' AST. +In order to support other languages' syntaxes, one would need to implement +another `PatternTree` that sufficiently describes the languages' AST and +implement `IsMatch` for this `PatternTree` and the languages' AST. -One aspect of this is that it would even be possible to write lints that work on the pattern syntax itself. For example, when writing the following pattern +One aspect of this is that it would even be possible to write lints that work on +the pattern syntax itself. For example, when writing the following pattern -``` +```rust pattern!{ - my_pattern: Expr = + my_pattern: Expr = Array( Lit(Bool(false)) Lit(Bool(false)) ) } ``` -a lint that works on the pattern syntax's AST could suggest using this pattern instead: +a lint that works on the pattern syntax's AST could suggest using this pattern +instead: -``` +```rust pattern!{ - my_pattern: Expr = + my_pattern: Expr = Array( Lit(Bool(false)){2} ) } ``` -In the future, clippy could use this system to also provide lints for custom syntaxes like those found in macros. - -# Final Note - -This is the first RFC I've ever written and it might be a little rough around the edges. - -I'm looking forward to hearing your feedback and discussing the proposal. +In the future, clippy could use this system to also provide lints for custom +syntaxes like those found in macros. From 8961815ef63e78137dab9a4d550736b97b3b040b Mon Sep 17 00:00:00 2001 From: kraktus Date: Fri, 25 Nov 2022 16:36:22 +0100 Subject: [PATCH 16/17] Re-enable `uninlined_format_args` on multiline `format!` But do not display the code suggestion which can be sometimes completely broken (fortunately when applied it's valid) --- clippy_lints/src/format_args.rs | 19 ++++++++++------ tests/ui/uninlined_format_args.fixed | 11 +++------- tests/ui/uninlined_format_args.stderr | 31 ++++++++++++++++++++++++++- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/clippy_lints/src/format_args.rs b/clippy_lints/src/format_args.rs index ec45be558f14..fd3ce2f3d6cd 100644 --- a/clippy_lints/src/format_args.rs +++ b/clippy_lints/src/format_args.rs @@ -9,7 +9,10 @@ use clippy_utils::source::snippet_opt; use clippy_utils::ty::{implements_trait, is_type_diagnostic_item}; use if_chain::if_chain; use itertools::Itertools; -use rustc_errors::Applicability; +use rustc_errors::{ + Applicability, + SuggestionStyle::{CompletelyHidden, ShowCode}, +}; use rustc_hir::{Expr, ExprKind, HirId, QPath}; use rustc_lint::{LateContext, LateLintPass, LintContext}; use rustc_middle::ty::adjustment::{Adjust, Adjustment}; @@ -286,10 +289,9 @@ fn check_uninlined_args(cx: &LateContext<'_>, args: &FormatArgsExpn<'_>, call_si return; } - // Temporarily ignore multiline spans: https://github.com/rust-lang/rust/pull/102729#discussion_r988704308 - if fixes.iter().any(|(span, _)| cx.sess().source_map().is_multiline(*span)) { - return; - } + // multiline span display suggestion is sometimes broken: https://github.com/rust-lang/rust/pull/102729#discussion_r988704308 + // in those cases, make the code suggestion hidden + let multiline_fix = fixes.iter().any(|(span, _)| cx.sess().source_map().is_multiline(*span)); span_lint_and_then( cx, @@ -297,7 +299,12 @@ fn check_uninlined_args(cx: &LateContext<'_>, args: &FormatArgsExpn<'_>, call_si call_site, "variables can be used directly in the `format!` string", |diag| { - diag.multipart_suggestion("change this to", fixes, Applicability::MachineApplicable); + diag.multipart_suggestion_with_style( + "change this to", + fixes, + Applicability::MachineApplicable, + if multiline_fix { CompletelyHidden } else { ShowCode }, + ); }, ); } diff --git a/tests/ui/uninlined_format_args.fixed b/tests/ui/uninlined_format_args.fixed index 106274479751..ca56c95c23f4 100644 --- a/tests/ui/uninlined_format_args.fixed +++ b/tests/ui/uninlined_format_args.fixed @@ -44,9 +44,7 @@ fn tester(fn_arg: i32) { println!("val='{local_i32}'"); // space+tab println!("val='{local_i32}'"); // tab+space println!( - "val='{ - }'", - local_i32 + "val='{local_i32}'" ); println!("{local_i32}"); println!("{fn_arg}"); @@ -110,8 +108,7 @@ fn tester(fn_arg: i32) { println!("{local_f64:width$.prec$}"); println!("{local_f64:width$.prec$} {local_f64} {width} {prec}"); println!( - "{0:1$.2$} {0:2$.1$} {1:0$.2$} {1:2$.0$} {2:0$.1$} {2:1$.0$}", - local_i32, width, prec, + "{local_i32:width$.prec$} {local_i32:prec$.width$} {width:local_i32$.prec$} {width:prec$.local_i32$} {prec:local_i32$.width$} {prec:width$.local_i32$}", ); println!( "{0:1$.2$} {0:2$.1$} {1:0$.2$} {1:2$.0$} {2:0$.1$} {2:1$.0$} {3}", @@ -142,9 +139,7 @@ fn tester(fn_arg: i32) { println!(no_param_str!(), local_i32); println!( - "{}", - // comment with a comma , in it - val, + "{val}", ); println!("{val}"); diff --git a/tests/ui/uninlined_format_args.stderr b/tests/ui/uninlined_format_args.stderr index 2ce3b7fa960c..1182d57ce9b7 100644 --- a/tests/ui/uninlined_format_args.stderr +++ b/tests/ui/uninlined_format_args.stderr @@ -59,6 +59,16 @@ LL - println!("val='{ }'", local_i32); // tab+space LL + println!("val='{local_i32}'"); // tab+space | +error: variables can be used directly in the `format!` string + --> $DIR/uninlined_format_args.rs:46:5 + | +LL | / println!( +LL | | "val='{ +LL | | }'", +LL | | local_i32 +LL | | ); + | |_____^ + error: variables can be used directly in the `format!` string --> $DIR/uninlined_format_args.rs:51:5 | @@ -767,6 +777,15 @@ LL - println!("{:1$.2$} {0} {1} {2}", local_f64, width, prec); LL + println!("{local_f64:width$.prec$} {local_f64} {width} {prec}"); | +error: variables can be used directly in the `format!` string + --> $DIR/uninlined_format_args.rs:112:5 + | +LL | / println!( +LL | | "{0:1$.2$} {0:2$.1$} {1:0$.2$} {1:2$.0$} {2:0$.1$} {2:1$.0$}", +LL | | local_i32, width, prec, +LL | | ); + | |_____^ + error: variables can be used directly in the `format!` string --> $DIR/uninlined_format_args.rs:123:5 | @@ -815,6 +834,16 @@ LL - println!("{}", format!("{}", local_i32)); LL + println!("{}", format!("{local_i32}")); | +error: variables can be used directly in the `format!` string + --> $DIR/uninlined_format_args.rs:144:5 + | +LL | / println!( +LL | | "{}", +LL | | // comment with a comma , in it +LL | | val, +LL | | ); + | |_____^ + error: variables can be used directly in the `format!` string --> $DIR/uninlined_format_args.rs:149:5 | @@ -875,5 +904,5 @@ LL - println!("expand='{}'", local_i32); LL + println!("expand='{local_i32}'"); | -error: aborting due to 73 previous errors +error: aborting due to 76 previous errors From 54dba4852cb7458c96641c20c4eef9ddb2eadddb Mon Sep 17 00:00:00 2001 From: kraktus Date: Fri, 25 Nov 2022 16:41:08 +0100 Subject: [PATCH 17/17] dogfood with expanded `uninlined_format_args` --- clippy_utils/src/ty.rs | 15 +++++---------- lintcheck/src/main.rs | 15 +++++++-------- 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/clippy_utils/src/ty.rs b/clippy_utils/src/ty.rs index 897edfc5495f..09967f317f89 100644 --- a/clippy_utils/src/ty.rs +++ b/clippy_utils/src/ty.rs @@ -1005,14 +1005,12 @@ pub fn make_projection<'tcx>( debug_assert!( generic_count == substs.len(), - "wrong number of substs for `{:?}`: found `{}` expected `{}`.\n\ + "wrong number of substs for `{:?}`: found `{}` expected `{generic_count}`.\n\ note: the expected parameters are: {:#?}\n\ - the given arguments are: `{:#?}`", + the given arguments are: `{substs:#?}`", assoc_item.def_id, substs.len(), - generic_count, params.map(GenericParamDefKind::descr).collect::>(), - substs, ); if let Some((idx, (param, arg))) = params @@ -1030,14 +1028,11 @@ pub fn make_projection<'tcx>( { debug_assert!( false, - "mismatched subst type at index {}: expected a {}, found `{:?}`\n\ + "mismatched subst type at index {idx}: expected a {}, found `{arg:?}`\n\ note: the expected parameters are {:#?}\n\ - the given arguments are {:#?}", - idx, + the given arguments are {substs:#?}", param.descr(), - arg, - params.map(GenericParamDefKind::descr).collect::>(), - substs, + params.map(GenericParamDefKind::descr).collect::>() ); } } diff --git a/lintcheck/src/main.rs b/lintcheck/src/main.rs index ee8ab7c1d7cb..bd49f0960726 100644 --- a/lintcheck/src/main.rs +++ b/lintcheck/src/main.rs @@ -120,8 +120,8 @@ impl ClippyWarning { format!("$CARGO_HOME/{}", stripped.display()) } else { format!( - "target/lintcheck/sources/{}-{}/{}", - crate_name, crate_version, span.file_name + "target/lintcheck/sources/{crate_name}-{crate_version}/{}", + span.file_name ) }; @@ -322,13 +322,13 @@ impl Crate { if config.max_jobs == 1 { println!( - "{}/{} {}% Linting {} {}", - index, total_crates_to_lint, perc, &self.name, &self.version + "{index}/{total_crates_to_lint} {perc}% Linting {} {}", + &self.name, &self.version ); } else { println!( - "{}/{} {}% Linting {} {} in target dir {:?}", - index, total_crates_to_lint, perc, &self.name, &self.version, thread_index + "{index}/{total_crates_to_lint} {perc}% Linting {} {} in target dir {thread_index:?}", + &self.name, &self.version ); } @@ -398,8 +398,7 @@ impl Crate { .output() .unwrap_or_else(|error| { panic!( - "Encountered error:\n{:?}\ncargo_clippy_path: {}\ncrate path:{}\n", - error, + "Encountered error:\n{error:?}\ncargo_clippy_path: {}\ncrate path:{}\n", &cargo_clippy_path.display(), &self.path.display() );