Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit ea291e5

Browse files
committedDec 31, 2024
Account for format_args in HiddenUnicodeCodepoints lint
1 parent c6afe82 commit ea291e5

File tree

9 files changed

+82
-34
lines changed

9 files changed

+82
-34
lines changed
 

‎compiler/rustc_ast/src/format.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use rustc_span::{Ident, Span, Symbol};
44

55
use crate::Expr;
66
use crate::ptr::P;
7+
use crate::token::LitKind;
78

89
// Definitions:
910
//
@@ -45,6 +46,10 @@ pub struct FormatArgs {
4546
pub span: Span,
4647
pub template: Vec<FormatArgsPiece>,
4748
pub arguments: FormatArguments,
49+
/// The raw, un-split format string literal, with no escaping or processing.
50+
///
51+
/// Generally only useful for lints that care about the raw bytes the user wrote.
52+
pub uncooked_fmt_str: (LitKind, Symbol),
4853
}
4954

5055
/// A piece of a format template string.

‎compiler/rustc_ast/src/mut_visit.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1596,7 +1596,7 @@ fn walk_inline_asm_sym<T: MutVisitor>(
15961596

15971597
fn walk_format_args<T: MutVisitor>(vis: &mut T, fmt: &mut FormatArgs) {
15981598
// FIXME: visit the template exhaustively.
1599-
let FormatArgs { span, template: _, arguments } = fmt;
1599+
let FormatArgs { span, template: _, arguments, uncooked_fmt_str: _ } = fmt;
16001600
for FormatArgument { kind, expr } in arguments.all_args_mut() {
16011601
match kind {
16021602
FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => {

‎compiler/rustc_ast/src/visit.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ pub fn walk_inline_asm_sym<'a, V: Visitor<'a>>(
10611061
}
10621062

10631063
pub fn walk_format_args<'a, V: Visitor<'a>>(visitor: &mut V, fmt: &'a FormatArgs) -> V::Result {
1064-
let FormatArgs { span: _, template: _, arguments } = fmt;
1064+
let FormatArgs { span: _, template: _, arguments, uncooked_fmt_str: _ } = fmt;
10651065
for FormatArgument { kind, expr } in arguments.all_args() {
10661066
match kind {
10671067
FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => {

‎compiler/rustc_builtin_macros/src/asm.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,7 @@ fn expand_preparsed_asm(
531531
symbol: template_str,
532532
style: template_style,
533533
span: template_span,
534+
..
534535
} = {
535536
let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, template_expr, msg) else {
536537
return ExpandResult::Retry(());

‎compiler/rustc_builtin_macros/src/format.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,12 @@ fn make_format_args(
166166

167167
let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input;
168168

169-
let ExprToSpannedString { symbol: fmt_str, span: fmt_span, style: fmt_style } = {
169+
let ExprToSpannedString {
170+
symbol: fmt_str,
171+
span: fmt_span,
172+
style: fmt_style,
173+
uncooked_symbol: uncooked_fmt_str,
174+
} = {
170175
let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, efmt.clone(), msg) else {
171176
return ExpandResult::Retry(());
172177
};
@@ -584,7 +589,12 @@ fn make_format_args(
584589
}
585590
}
586591

587-
ExpandResult::Ready(Ok(FormatArgs { span: fmt_span, template, arguments: args }))
592+
ExpandResult::Ready(Ok(FormatArgs {
593+
span: fmt_span,
594+
template,
595+
arguments: args,
596+
uncooked_fmt_str,
597+
}))
588598
}
589599

590600
fn invalid_placeholder_type_error(

‎compiler/rustc_builtin_macros/src/util.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ pub(crate) struct ExprToSpannedString {
6363
pub symbol: Symbol,
6464
pub style: ast::StrStyle,
6565
pub span: Span,
66+
/// The raw string literal, with no escaping or processing.
67+
///
68+
/// Generally only useful for lints that care about the raw bytes the user wrote.
69+
pub uncooked_symbol: (ast::token::LitKind, Symbol),
6670
}
6771

6872
/// - `Ok` is returned when the conversion to a string literal is unsuccessful,
@@ -100,6 +104,7 @@ pub(crate) fn expr_to_spanned_string<'a>(
100104
symbol: s,
101105
style,
102106
span: expr.span,
107+
uncooked_symbol: (token_lit.kind, token_lit.symbol),
103108
}));
104109
}
105110
Ok(ast::LitKind::ByteStr(..)) => {

‎compiler/rustc_lint/src/hidden_unicode_codepoints.rs

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,36 @@ impl HiddenUnicodeCodepoints {
8282
sub,
8383
});
8484
}
85+
86+
fn check_literal(
87+
&mut self,
88+
cx: &EarlyContext<'_>,
89+
text: Symbol,
90+
lit_kind: ast::token::LitKind,
91+
span: Span,
92+
label: &'static str,
93+
) {
94+
if !contains_text_flow_control_chars(text.as_str()) {
95+
return;
96+
}
97+
let (padding, point_at_inner_spans) = match lit_kind {
98+
// account for `"` or `'`
99+
ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
100+
// account for `c"`
101+
ast::token::LitKind::CStr => (2, true),
102+
// account for `r###"`
103+
ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
104+
// account for `cr###"`
105+
ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
106+
// suppress bad literals.
107+
ast::token::LitKind::Err(_) => return,
108+
// Be conservative just in case new literals do support these.
109+
_ => (0, false),
110+
};
111+
self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label);
112+
}
85113
}
114+
86115
impl EarlyLintPass for HiddenUnicodeCodepoints {
87116
fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) {
88117
if let ast::AttrKind::DocComment(_, comment) = attr.kind {
@@ -97,32 +126,11 @@ impl EarlyLintPass for HiddenUnicodeCodepoints {
97126
// byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString`
98127
match &expr.kind {
99128
ast::ExprKind::Lit(token_lit) => {
100-
let text = token_lit.symbol;
101-
if !contains_text_flow_control_chars(text.as_str()) {
102-
return;
103-
}
104-
let (padding, point_at_inner_spans) = match token_lit.kind {
105-
// account for `"` or `'`
106-
ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true),
107-
// account for `c"`
108-
ast::token::LitKind::CStr => (2, true),
109-
// account for `r###"`
110-
ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true),
111-
// account for `cr###"`
112-
ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true),
113-
// suppress bad literals.
114-
ast::token::LitKind::Err(_) => return,
115-
// Be conservative just in case new literals do support these.
116-
_ => (0, false),
117-
};
118-
self.lint_text_direction_codepoint(
119-
cx,
120-
text,
121-
expr.span,
122-
padding,
123-
point_at_inner_spans,
124-
"literal",
125-
);
129+
self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal");
130+
}
131+
ast::ExprKind::FormatArgs(args) => {
132+
let (lit_kind, text) = args.uncooked_fmt_str;
133+
self.check_literal(cx, text, lit_kind, args.span, "format string");
126134
}
127135
_ => {}
128136
};

‎tests/ui/parser/unicode-control-codepoints.rs

Lines changed: 3 additions & 0 deletions
This file contains bidirectional or hidden Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ fn main() {
3232
//~^ ERROR unicode codepoint changing visible direction of text present in literal
3333
let _ = cr#"‮"#;
3434
//~^ ERROR unicode codepoint changing visible direction of text present in literal
35+
36+
println!("{{‮}}");
37+
//~^ ERROR unicode codepoint changing visible direction of text present in format string
3538
}
3639

3740
//"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */"

‎tests/ui/parser/unicode-control-codepoints.stderr

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ LL | // if access_level != "us�e�r" { // Check if admin
9797
= help: if their presence wasn't intentional, you can remove them
9898

9999
error: unicode codepoint changing visible direction of text present in comment
100-
--> $DIR/unicode-control-codepoints.rs:37:1
100+
--> $DIR/unicode-control-codepoints.rs:40:1
101101
|
102102
LL | //"/*� } �if isAdmin� � begin admins only */"
103103
| ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^
@@ -198,8 +198,24 @@ help: if you want to keep them but make them visible in your source code, you ca
198198
LL | let _ = cr#"\u{202e}"#;
199199
| ~~~~~~~~
200200

201+
error: unicode codepoint changing visible direction of text present in format string
202+
--> $DIR/unicode-control-codepoints.rs:36:14
203+
|
204+
LL | println!("{{�}}");
205+
| ^^^-^^^
206+
| | |
207+
| | '\u{202e}'
208+
| this format string contains an invisible unicode text flow control codepoint
209+
|
210+
= note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen
211+
= help: if their presence wasn't intentional, you can remove them
212+
help: if you want to keep them but make them visible in your source code, you can escape them
213+
|
214+
LL | println!("{{\u{202e}}}");
215+
| ~~~~~~~~
216+
201217
error: unicode codepoint changing visible direction of text present in doc comment
202-
--> $DIR/unicode-control-codepoints.rs:40:1
218+
--> $DIR/unicode-control-codepoints.rs:43:1
203219
|
204220
LL | /** '�'); */fn foo() {}
205221
| ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint
@@ -209,7 +225,7 @@ LL | /** '�'); */fn foo() {}
209225
= note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
210226

211227
error: unicode codepoint changing visible direction of text present in doc comment
212-
--> $DIR/unicode-control-codepoints.rs:43:1
228+
--> $DIR/unicode-control-codepoints.rs:46:1
213229
|
214230
LL | / /**
215231
LL | | *
@@ -220,5 +236,5 @@ LL | | * '�'); */fn bar() {}
220236
= note: if their presence wasn't intentional, you can remove them
221237
= note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
222238

223-
error: aborting due to 19 previous errors
239+
error: aborting due to 20 previous errors
224240

0 commit comments

Comments
 (0)
This repository has been archived.