From 8a11462dd907b874f561dd898b109b9214129886 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Fri, 22 May 2020 16:22:01 -0700 Subject: [PATCH 01/54] Introduce extent to ClangToken --- src/clang.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/clang.rs b/src/clang.rs index 12ac46cebd..9995cf0c90 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -793,12 +793,13 @@ impl<'a> Drop for RawTokens<'a> { } } -/// A raw clang token, that exposes only the kind and spelling. This is a +/// A raw clang token, that exposes only kind, spelling, and extent. This is a /// slightly more convenient version of `CXToken` which owns the spelling -/// string. +/// string and extent. #[derive(Debug)] pub struct ClangToken { spelling: CXString, + extent: CXSourceRange, /// The kind of token, this is the same as the relevant member from /// `CXToken`. pub kind: CXTokenKind, @@ -834,7 +835,12 @@ impl<'a> Iterator for ClangTokenIterator<'a> { unsafe { let kind = clang_getTokenKind(*raw); let spelling = clang_getTokenSpelling(self.tu, *raw); - Some(ClangToken { kind, spelling }) + let extent = clang_getTokenExtent(self.tu, *raw); + Some(ClangToken { + kind, + extent, + spelling, + }) } } } From 1efcfd93c5494a2ebb847fb95d36cb5549fc4eb6 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Fri, 22 May 2020 16:29:44 -0700 Subject: [PATCH 02/54] Introduce ClangToken::is_abutting --- src/clang.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/clang.rs b/src/clang.rs index 9995cf0c90..12c89af728 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -813,6 +813,16 @@ impl ClangToken { }; c_str.to_bytes() } + + /// Tests whether a token's right boundary (CXSourceLocation) is the same as + /// the next token's left boundary, meaning they have no intervening + /// whitespace. + pub fn is_abutting(&self, next: &ClangToken) -> bool { + // This comparison makes an narrow assumption about the internal data + // format of CXSourceLocation. A more explicit and costly check might + // use clang_getSpellingLocation(). + self.extent.end_int_data == next.extent.begin_int_data + } } impl Drop for ClangToken { From cd10e21b3ce0dde4a05fbae23361f154500f7cd1 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Fri, 22 May 2020 16:43:54 -0700 Subject: [PATCH 03/54] Check for function-like macros and bail out --- src/ir/var.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/ir/var.rs b/src/ir/var.rs index 0f05a3eecf..977f0418bb 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -320,6 +320,21 @@ fn parse_macro( ) -> Option<(Vec, cexpr::expr::EvalResult)> { use cexpr::expr; + // cexpr explicitly does not handle function-like macros, except for a + // degenerate case in which it behaves in a non-conformant way. We prevent + // cexpr from ever seeing function-like macros by checking for a parenthesis + // token immediately adjacent to (that is, abutting) the first token in the + // macro definition. + let tokens = cursor.tokens(); + let mut tokens = tokens.iter().fuse(); + match (tokens.next(), tokens.next()) { + // TODO check second macro's type and content + (Some(first), Some(second)) if first.is_abutting(&second) => { + return None; + }, + _ => {}, + } + let mut cexpr_tokens = cursor.cexpr_tokens(); let parser = expr::IdentifierParser::new(ctx.parsed_macros()); From c9c9cfd200f544af74c624b27420a06bf378af5b Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Fri, 22 May 2020 16:50:38 -0700 Subject: [PATCH 04/54] Introduce func_macro to ParseCallbacks trait --- src/callbacks.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/callbacks.rs b/src/callbacks.rs index 1b5445e642..6c4c0cb6e1 100644 --- a/src/callbacks.rs +++ b/src/callbacks.rs @@ -39,6 +39,11 @@ pub trait ParseCallbacks: fmt::Debug + UnwindSafe { /// treatment of the macro, but may use the value to generate additional code or configuration. fn str_macro(&self, _name: &str, _value: &[u8]) {} + /// This will be run on every function-like macro. The callback can not + /// influence the further treatment of the macro, but may use the value to + /// generate additional code or configuration. + fn func_macro(&self, _name: &str, _value: &str) {} + /// This function should return whether, given an enum variant /// name, and value, this enum variant will forcibly be a constant. fn enum_variant_behavior( From 89f36d632907463feada191bda6039a609db6ee7 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Fri, 22 May 2020 17:13:46 -0700 Subject: [PATCH 05/54] Generate func_macro callbacks --- src/ir/var.rs | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 977f0418bb..f0f612d769 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -149,6 +149,40 @@ impl ClangSubItemParser for Var { } } + // cexpr explicitly does not handle function-like macros, except + // for a degenerate case in which it behaves in a non-conformant + // way. We prevent cexpr from ever seeing function-like macros + // by checking for a parenthesis token immediately adjacent to + // (that is, abutting) the first token in the macro definition. + let tokens: Vec<_> = cursor.tokens().iter().collect(); + match tokens.get(0..2) { + // TODO check second macro's type and content + Some([first, second]) if first.is_abutting(&second) => { + if let Some(callbacks) = ctx.parse_callbacks() { + let joined: Vec<_> = tokens + .into_iter() + .map(|t| t.spelling().to_owned()) + .collect(); + let boundary = joined + .iter() + .position(|s| s == b")") + .map(|n| n + 1) + .unwrap_or(joined.len()); + let (left, right) = joined.split_at(boundary); + let left = String::from_utf8(left.concat()); + let right = String::from_utf8(right.join(&b' ')); + match (left, right) { + (Ok(left), Ok(right)) => { + callbacks.func_macro(&left, &right) + } + _ => {} // TODO handle utf8 failure ? + } + } + return Err(ParseError::Continue); + } + _ => {} + } + let value = parse_macro(ctx, &cursor); let (id, value) = match value { @@ -320,21 +354,6 @@ fn parse_macro( ) -> Option<(Vec, cexpr::expr::EvalResult)> { use cexpr::expr; - // cexpr explicitly does not handle function-like macros, except for a - // degenerate case in which it behaves in a non-conformant way. We prevent - // cexpr from ever seeing function-like macros by checking for a parenthesis - // token immediately adjacent to (that is, abutting) the first token in the - // macro definition. - let tokens = cursor.tokens(); - let mut tokens = tokens.iter().fuse(); - match (tokens.next(), tokens.next()) { - // TODO check second macro's type and content - (Some(first), Some(second)) if first.is_abutting(&second) => { - return None; - }, - _ => {}, - } - let mut cexpr_tokens = cursor.cexpr_tokens(); let parser = expr::IdentifierParser::new(ctx.parsed_macros()); From 99a9f0a436852a91a8ddbbde61f2ee4f74667692 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Fri, 22 May 2020 20:34:47 -0700 Subject: [PATCH 06/54] Hoist function to handle function macros --- src/ir/var.rs | 76 +++++++++++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 33 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index f0f612d769..7642652ceb 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -130,6 +130,48 @@ fn default_macro_constant_type(value: i64) -> IntKind { } } +/// Determines whether a cursor that is pointing into a CXCursor_MacroDefinition +/// is pointing into a function-like macro. If so, calls the func_macro callback +/// and returns Err(ParseError::Continue) to signal to skip further processing. +fn handle_function_macro( + ctx: &BindgenContext, + cursor: &clang::Cursor, +) -> Result<(), ParseError> { + // cexpr explicitly does not handle function-like macros, except for a + // degenerate case in which it behaves in a non-conformant way. We prevent + // cexpr from ever seeing function-like macros by checking for a parenthesis + // token immediately adjacent to (that is, abutting) the first token in the + // macro definition. + let tokens: Vec<_> = cursor.tokens().iter().collect(); + match tokens.get(0..2) { + // TODO check second macro's type and content + Some([first, second]) if first.is_abutting(&second) => { + if let Some(callbacks) = ctx.parse_callbacks() { + let joined: Vec<_> = tokens + .into_iter() + .map(|t| t.spelling().to_owned()) + .collect(); + let boundary = joined + .iter() + .position(|s| s == b")") + .map(|n| n + 1) // skip past ')' + .unwrap_or(joined.len()); + let (left, right) = joined.split_at(boundary); + let left = String::from_utf8(left.concat()); + let right = String::from_utf8(right.join(&b' ')); + match (left, right) { + (Ok(left), Ok(right)) => { + callbacks.func_macro(&left, &right) + } + _ => {} // TODO handle utf8 failure ? + } + } + Err(ParseError::Continue) + } + _ => Ok(()), + } +} + impl ClangSubItemParser for Var { fn parse( cursor: clang::Cursor, @@ -149,39 +191,7 @@ impl ClangSubItemParser for Var { } } - // cexpr explicitly does not handle function-like macros, except - // for a degenerate case in which it behaves in a non-conformant - // way. We prevent cexpr from ever seeing function-like macros - // by checking for a parenthesis token immediately adjacent to - // (that is, abutting) the first token in the macro definition. - let tokens: Vec<_> = cursor.tokens().iter().collect(); - match tokens.get(0..2) { - // TODO check second macro's type and content - Some([first, second]) if first.is_abutting(&second) => { - if let Some(callbacks) = ctx.parse_callbacks() { - let joined: Vec<_> = tokens - .into_iter() - .map(|t| t.spelling().to_owned()) - .collect(); - let boundary = joined - .iter() - .position(|s| s == b")") - .map(|n| n + 1) - .unwrap_or(joined.len()); - let (left, right) = joined.split_at(boundary); - let left = String::from_utf8(left.concat()); - let right = String::from_utf8(right.join(&b' ')); - match (left, right) { - (Ok(left), Ok(right)) => { - callbacks.func_macro(&left, &right) - } - _ => {} // TODO handle utf8 failure ? - } - } - return Err(ParseError::Continue); - } - _ => {} - } + handle_function_macro(ctx, &cursor)?; let value = parse_macro(ctx, &cursor); From cc84e7a83582ca118e7077ec4733802b04c635a6 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Fri, 22 May 2020 20:55:42 -0700 Subject: [PATCH 07/54] Refactoring hoisted function interface --- src/ir/var.rs | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 7642652ceb..83d0a354ab 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -134,8 +134,8 @@ fn default_macro_constant_type(value: i64) -> IntKind { /// is pointing into a function-like macro. If so, calls the func_macro callback /// and returns Err(ParseError::Continue) to signal to skip further processing. fn handle_function_macro( - ctx: &BindgenContext, cursor: &clang::Cursor, + func_macro: impl FnOnce(&str, &str), ) -> Result<(), ParseError> { // cexpr explicitly does not handle function-like macros, except for a // degenerate case in which it behaves in a non-conformant way. We prevent @@ -146,25 +146,21 @@ fn handle_function_macro( match tokens.get(0..2) { // TODO check second macro's type and content Some([first, second]) if first.is_abutting(&second) => { - if let Some(callbacks) = ctx.parse_callbacks() { - let joined: Vec<_> = tokens - .into_iter() - .map(|t| t.spelling().to_owned()) - .collect(); - let boundary = joined - .iter() - .position(|s| s == b")") - .map(|n| n + 1) // skip past ')' - .unwrap_or(joined.len()); - let (left, right) = joined.split_at(boundary); - let left = String::from_utf8(left.concat()); - let right = String::from_utf8(right.join(&b' ')); - match (left, right) { - (Ok(left), Ok(right)) => { - callbacks.func_macro(&left, &right) - } - _ => {} // TODO handle utf8 failure ? - } + let joined: Vec<_> = tokens + .into_iter() + .map(|t| t.spelling().to_owned()) + .collect(); + let boundary = joined + .iter() + .position(|s| s == b")") + .map(|n| n + 1) // skip past ')' + .unwrap_or(joined.len()); + let (left, right) = joined.split_at(boundary); + let left = String::from_utf8(left.concat()); + let right = String::from_utf8(right.join(&b' ')); + match (left, right) { + (Ok(left), Ok(right)) => func_macro(&left, &right), + _ => {} // TODO handle utf8 failure ? } Err(ParseError::Continue) } @@ -189,9 +185,11 @@ impl ClangSubItemParser for Var { } MacroParsingBehavior::Default => {} } - } - handle_function_macro(ctx, &cursor)?; + handle_function_macro(&cursor, |left, right| { + callbacks.func_macro(left, right) + })?; + } let value = parse_macro(ctx, &cursor); From 8babf13330372011b412b7129e835f32bfa5b36b Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Fri, 22 May 2020 20:59:12 -0700 Subject: [PATCH 08/54] Make hoisted function a bit prettier --- src/ir/var.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 83d0a354ab..31be8052d7 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -155,13 +155,17 @@ fn handle_function_macro( .position(|s| s == b")") .map(|n| n + 1) // skip past ')' .unwrap_or(joined.len()); + let (left, right) = joined.split_at(boundary); let left = String::from_utf8(left.concat()); let right = String::from_utf8(right.join(&b' ')); - match (left, right) { - (Ok(left), Ok(right)) => func_macro(&left, &right), - _ => {} // TODO handle utf8 failure ? + if let (Ok(left), Ok(right)) = (left, right) { + func_macro(&left, &right) + } else { + // TODO handle utf8 failure ? } + + // We handled the macro, skip future macro processing. Err(ParseError::Continue) } _ => Ok(()), From 5b411c108aae4eeb1db0f52ecd6cb502f3a02257 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 23 May 2020 07:14:09 -0700 Subject: [PATCH 09/54] Prepare to check func_macro call count --- bindgen-integration/build.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index f042fe9380..dcbd9b68e3 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -12,6 +12,7 @@ use std::sync::{Arc, Mutex, RwLock}; struct MacroCallback { macros: Arc>>, seen_hellos: Mutex, + seen_funcs: Mutex, } impl ParseCallbacks for MacroCallback { @@ -78,7 +79,12 @@ impl Drop for MacroCallback { *self.seen_hellos.lock().unwrap(), 2, "str_macro handle was not called once for all relevant macros" - ) + ); + assert_eq!( + *self.seen_funcs.lock().unwrap(), + 0, + "func_macro handle was not called once for all relevant macros" + ); } } @@ -102,6 +108,7 @@ fn main() { .parse_callbacks(Box::new(MacroCallback { macros: macros.clone(), seen_hellos: Mutex::new(0), + seen_funcs: Mutex::new(0), })) .blacklist_function("my_prefixed_function_to_remove") .generate() From 1231d170d5846376b5325e5a2d5769a6bcc0af1e Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 23 May 2020 07:02:05 -0700 Subject: [PATCH 10/54] Add negative tests for functional macros --- bindgen-integration/build.rs | 15 ++++++++++++++- bindgen-integration/cpp/Test.h | 7 +++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index dcbd9b68e3..7de09c3f7e 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -46,6 +46,10 @@ impl ParseCallbacks for MacroCallback { fn str_macro(&self, name: &str, value: &[u8]) { match name { + "TESTMACRO_STRING_EXPR" => { + assert_eq!(value, b"string"); + *self.seen_hellos.lock().unwrap() += 1; + } "TESTMACRO_STRING_EXPANDED" | "TESTMACRO_STRING" | "TESTMACRO_INTEGER" => { @@ -71,13 +75,22 @@ impl ParseCallbacks for MacroCallback { _ => None, } } + + fn func_macro(&self, name: &str, value: &str) { + match name { + "TESTMACRO_NONFUNCTIONAL" => { + panic!("func_macro was called for a non-functional macro"); + } + _ => {} // The system might provide lots of functional macros + } + } } impl Drop for MacroCallback { fn drop(&mut self) { assert_eq!( *self.seen_hellos.lock().unwrap(), - 2, + 3, "str_macro handle was not called once for all relevant macros" ); assert_eq!( diff --git a/bindgen-integration/cpp/Test.h b/bindgen-integration/cpp/Test.h index a20cf4b7ad..9b9fe2b9e8 100644 --- a/bindgen-integration/cpp/Test.h +++ b/bindgen-integration/cpp/Test.h @@ -7,6 +7,13 @@ #define TESTMACRO_STRING_EXPANDED TESTMACRO_STRING #define TESTMACRO_CUSTOMINTKIND_PATH 123 +// The following two macros are parsed the same by cexpr, but are semantically +// different. +#define TESTMACRO_NONFUNCTIONAL (TESTMACRO_INTEGER) +#define TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER) +//#define TESTMACRO_INVALID("string") // A conforming preprocessor rejects this +#define TESTMACRO_STRING_EXPR ("string") + #include enum { From 9043b55a81664229ebb54bd686de2c750168a2ac Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 23 May 2020 07:06:35 -0700 Subject: [PATCH 11/54] Add positive tests for func_macro --- bindgen-integration/build.rs | 15 ++++++++++++++- bindgen-integration/cpp/Test.h | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index 7de09c3f7e..303e8d4321 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -81,6 +81,19 @@ impl ParseCallbacks for MacroCallback { "TESTMACRO_NONFUNCTIONAL" => { panic!("func_macro was called for a non-functional macro"); } + "TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)" => { + // Spaces are inserted into the right-hand side of a functional + // macro during reconstruction from the tokenization. This might + // change in the future, but it is safe by the definition of a + // token in C, whereas leaving the spaces out could change + // tokenization. + assert_eq!(value, "- TESTMACRO_INTEGER"); + *self.seen_funcs.lock().unwrap() += 1; + } + "TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER)" => { + assert_eq!(value, ""); + *self.seen_funcs.lock().unwrap() += 1; + } _ => {} // The system might provide lots of functional macros } } @@ -95,7 +108,7 @@ impl Drop for MacroCallback { ); assert_eq!( *self.seen_funcs.lock().unwrap(), - 0, + 2, "func_macro handle was not called once for all relevant macros" ); } diff --git a/bindgen-integration/cpp/Test.h b/bindgen-integration/cpp/Test.h index 9b9fe2b9e8..918f7007c0 100644 --- a/bindgen-integration/cpp/Test.h +++ b/bindgen-integration/cpp/Test.h @@ -11,6 +11,7 @@ // different. #define TESTMACRO_NONFUNCTIONAL (TESTMACRO_INTEGER) #define TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER) +#define TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)-TESTMACRO_INTEGER //#define TESTMACRO_INVALID("string") // A conforming preprocessor rejects this #define TESTMACRO_STRING_EXPR ("string") From 5f3f39f0642a8df01d64f3a698afec533826656b Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 23 May 2020 07:28:42 -0700 Subject: [PATCH 12/54] Demonstrate retokenization --- bindgen-integration/build.rs | 6 +++++- bindgen-integration/cpp/Test.h | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index 303e8d4321..92d967b2b3 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -94,6 +94,10 @@ impl ParseCallbacks for MacroCallback { assert_eq!(value, ""); *self.seen_funcs.lock().unwrap() += 1; } + "TESTMACRO_FUNCTIONAL_TOKENIZED(a,b,c,d,e)" => { + assert_eq!(value, "a / b c d ## e"); + *self.seen_funcs.lock().unwrap() += 1; + } _ => {} // The system might provide lots of functional macros } } @@ -108,7 +112,7 @@ impl Drop for MacroCallback { ); assert_eq!( *self.seen_funcs.lock().unwrap(), - 2, + 3, "func_macro handle was not called once for all relevant macros" ); } diff --git a/bindgen-integration/cpp/Test.h b/bindgen-integration/cpp/Test.h index 918f7007c0..35545530c1 100644 --- a/bindgen-integration/cpp/Test.h +++ b/bindgen-integration/cpp/Test.h @@ -12,6 +12,9 @@ #define TESTMACRO_NONFUNCTIONAL (TESTMACRO_INTEGER) #define TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER) #define TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)-TESTMACRO_INTEGER +// Unusual spacing in the following macro is intentional, to demonstrate +// reformatting after tokenization. +#define TESTMACRO_FUNCTIONAL_TOKENIZED( a, b ,c,d,e ) a/b c d ## e //#define TESTMACRO_INVALID("string") // A conforming preprocessor rejects this #define TESTMACRO_STRING_EXPR ("string") From 977e3da5328da00ffdab409eeb0a1099215917ba Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 23 May 2020 07:33:59 -0700 Subject: [PATCH 13/54] Use "cannot" for "can not" --- src/callbacks.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/callbacks.rs b/src/callbacks.rs index 6c4c0cb6e1..24c2e3d4aa 100644 --- a/src/callbacks.rs +++ b/src/callbacks.rs @@ -35,11 +35,11 @@ pub trait ParseCallbacks: fmt::Debug + UnwindSafe { None } - /// This will be run on every string macro. The callback can not influence the further + /// This will be run on every string macro. The callback cannot influence the further /// treatment of the macro, but may use the value to generate additional code or configuration. fn str_macro(&self, _name: &str, _value: &[u8]) {} - /// This will be run on every function-like macro. The callback can not + /// This will be run on every function-like macro. The callback cannot /// influence the further treatment of the macro, but may use the value to /// generate additional code or configuration. fn func_macro(&self, _name: &str, _value: &str) {} From 558ce72c31bd4a7ec055783234d8430e831b1531 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 23 May 2020 08:32:15 -0700 Subject: [PATCH 14/54] Explain arguments --- src/callbacks.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/callbacks.rs b/src/callbacks.rs index 24c2e3d4aa..051c1160a3 100644 --- a/src/callbacks.rs +++ b/src/callbacks.rs @@ -42,6 +42,12 @@ pub trait ParseCallbacks: fmt::Debug + UnwindSafe { /// This will be run on every function-like macro. The callback cannot /// influence the further treatment of the macro, but may use the value to /// generate additional code or configuration. + /// + /// The first parameter represents the name and argument list (including the + /// parentheses) of the function-like macro. The second parameter represents + /// the expansion of the macro. It is not guaranteed that the whitespace of + /// the original is preserved, but it is guaranteed that tokenization will + /// not be changed. fn func_macro(&self, _name: &str, _value: &str) {} /// This function should return whether, given an enum variant From b8c7173898b963834e2ce98bdd67b7c3ac969df7 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 23 May 2020 08:35:21 -0700 Subject: [PATCH 15/54] Demonstrate newlines within macros --- bindgen-integration/build.rs | 6 +++++- bindgen-integration/cpp/Test.h | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index 92d967b2b3..72b3f64f14 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -98,6 +98,10 @@ impl ParseCallbacks for MacroCallback { assert_eq!(value, "a / b c d ## e"); *self.seen_funcs.lock().unwrap() += 1; } + "TESTMACRO_FUNCTIONAL_SPLIT(a,b)" => { + assert_eq!(value, "b , a"); + *self.seen_funcs.lock().unwrap() += 1; + } _ => {} // The system might provide lots of functional macros } } @@ -112,7 +116,7 @@ impl Drop for MacroCallback { ); assert_eq!( *self.seen_funcs.lock().unwrap(), - 3, + 4, "func_macro handle was not called once for all relevant macros" ); } diff --git a/bindgen-integration/cpp/Test.h b/bindgen-integration/cpp/Test.h index 35545530c1..e387219dc9 100644 --- a/bindgen-integration/cpp/Test.h +++ b/bindgen-integration/cpp/Test.h @@ -15,6 +15,9 @@ // Unusual spacing in the following macro is intentional, to demonstrate // reformatting after tokenization. #define TESTMACRO_FUNCTIONAL_TOKENIZED( a, b ,c,d,e ) a/b c d ## e +#define TESTMACRO_FUNCTIONAL_SPLIT( a, \ + b) b,\ + a //#define TESTMACRO_INVALID("string") // A conforming preprocessor rejects this #define TESTMACRO_STRING_EXPR ("string") From e98f9a108bcd73fdd4c8938d9ced5921a9ab1084 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 23 May 2020 08:38:55 -0700 Subject: [PATCH 16/54] Make default case more robust --- bindgen-integration/build.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index 72b3f64f14..8bcbed106f 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -102,7 +102,11 @@ impl ParseCallbacks for MacroCallback { assert_eq!(value, "b , a"); *self.seen_funcs.lock().unwrap() += 1; } - _ => {} // The system might provide lots of functional macros + _ => { + // The system might provide lots of functional macros. + // Ensure we did not miss handling one that we meant to handle. + assert!(!name.starts_with("TESTMACRO_"), "name = {}", name); + } } } } From 614a31a6bb4918e95510448baa924e119a21bd7f Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 23 May 2020 12:29:12 -0700 Subject: [PATCH 17/54] Stop guaranteeing UTF-8 for expansion of macro A macro expansion could contain a string constant, which might not be UTF-8. Also, using `&[u8]` for the expansion keeps the new API closer to the precedent set by `str_macro`. --- bindgen-integration/build.rs | 10 +++++----- src/callbacks.rs | 2 +- src/ir/var.rs | 17 ++++++++--------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index 8bcbed106f..536853589b 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -76,7 +76,7 @@ impl ParseCallbacks for MacroCallback { } } - fn func_macro(&self, name: &str, value: &str) { + fn func_macro(&self, name: &str, value: &[u8]) { match name { "TESTMACRO_NONFUNCTIONAL" => { panic!("func_macro was called for a non-functional macro"); @@ -87,19 +87,19 @@ impl ParseCallbacks for MacroCallback { // change in the future, but it is safe by the definition of a // token in C, whereas leaving the spaces out could change // tokenization. - assert_eq!(value, "- TESTMACRO_INTEGER"); + assert_eq!(value, b"- TESTMACRO_INTEGER"); *self.seen_funcs.lock().unwrap() += 1; } "TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER)" => { - assert_eq!(value, ""); + assert_eq!(value, b""); *self.seen_funcs.lock().unwrap() += 1; } "TESTMACRO_FUNCTIONAL_TOKENIZED(a,b,c,d,e)" => { - assert_eq!(value, "a / b c d ## e"); + assert_eq!(value, b"a / b c d ## e"); *self.seen_funcs.lock().unwrap() += 1; } "TESTMACRO_FUNCTIONAL_SPLIT(a,b)" => { - assert_eq!(value, "b , a"); + assert_eq!(value, b"b , a"); *self.seen_funcs.lock().unwrap() += 1; } _ => { diff --git a/src/callbacks.rs b/src/callbacks.rs index 051c1160a3..93c00775f5 100644 --- a/src/callbacks.rs +++ b/src/callbacks.rs @@ -48,7 +48,7 @@ pub trait ParseCallbacks: fmt::Debug + UnwindSafe { /// the expansion of the macro. It is not guaranteed that the whitespace of /// the original is preserved, but it is guaranteed that tokenization will /// not be changed. - fn func_macro(&self, _name: &str, _value: &str) {} + fn func_macro(&self, _name: &str, _value: &[u8]) {} /// This function should return whether, given an enum variant /// name, and value, this enum variant will forcibly be a constant. diff --git a/src/ir/var.rs b/src/ir/var.rs index 31be8052d7..d2b744dd6e 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -132,10 +132,12 @@ fn default_macro_constant_type(value: i64) -> IntKind { /// Determines whether a cursor that is pointing into a CXCursor_MacroDefinition /// is pointing into a function-like macro. If so, calls the func_macro callback -/// and returns Err(ParseError::Continue) to signal to skip further processing. +/// and returns `Err(ParseError::Continue)` to signal to skip further +/// processing. If version to UTF-8 fails (it is performed only where it should +/// be infallible), then `Err(ParseError::Continue)` is returned as well. fn handle_function_macro( cursor: &clang::Cursor, - func_macro: impl FnOnce(&str, &str), + func_macro: impl FnOnce(&str, &[u8]), ) -> Result<(), ParseError> { // cexpr explicitly does not handle function-like macros, except for a // degenerate case in which it behaves in a non-conformant way. We prevent @@ -157,13 +159,10 @@ fn handle_function_macro( .unwrap_or(joined.len()); let (left, right) = joined.split_at(boundary); - let left = String::from_utf8(left.concat()); - let right = String::from_utf8(right.join(&b' ')); - if let (Ok(left), Ok(right)) = (left, right) { - func_macro(&left, &right) - } else { - // TODO handle utf8 failure ? - } + let left = String::from_utf8(left.concat()) + .map_err(|_| ParseError::Continue)?; + let right = right.join(&b' '); + func_macro(&left, &right); // We handled the macro, skip future macro processing. Err(ParseError::Continue) From a2966ea2b186e45494e9fb5ecb0f2cab042fb496 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 6 Jun 2020 11:17:13 -0700 Subject: [PATCH 18/54] Provide a list of tokens If we cannot reproduce the original whitespace anyway, then we should let the callee decide what to do with the tokens, rather than smashing them together in a "safe" way. --- bindgen-integration/build.rs | 20 +++++++++++++++----- src/callbacks.rs | 6 ++---- src/ir/var.rs | 4 ++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index 536853589b..d80e412cf3 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -76,7 +76,7 @@ impl ParseCallbacks for MacroCallback { } } - fn func_macro(&self, name: &str, value: &[u8]) { + fn func_macro(&self, name: &str, value: &[&[u8]]) { match name { "TESTMACRO_NONFUNCTIONAL" => { panic!("func_macro was called for a non-functional macro"); @@ -87,19 +87,29 @@ impl ParseCallbacks for MacroCallback { // change in the future, but it is safe by the definition of a // token in C, whereas leaving the spaces out could change // tokenization. - assert_eq!(value, b"- TESTMACRO_INTEGER"); + assert_eq!( + value, + &["-".as_bytes(), "TESTMACRO_INTEGER".as_bytes()] + ); *self.seen_funcs.lock().unwrap() += 1; } "TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER)" => { - assert_eq!(value, b""); + assert_eq!(value, &[] as &[&[u8]]); *self.seen_funcs.lock().unwrap() += 1; } "TESTMACRO_FUNCTIONAL_TOKENIZED(a,b,c,d,e)" => { - assert_eq!(value, b"a / b c d ## e"); + assert_eq!( + value, + ["a", "/", "b", "c", "d", "##", "e"] + .iter() + .map(|s| s.as_bytes()) + .collect::>() + .as_slice() + ); *self.seen_funcs.lock().unwrap() += 1; } "TESTMACRO_FUNCTIONAL_SPLIT(a,b)" => { - assert_eq!(value, b"b , a"); + assert_eq!(value, &[b"b", b",", b"a"]); *self.seen_funcs.lock().unwrap() += 1; } _ => { diff --git a/src/callbacks.rs b/src/callbacks.rs index 93c00775f5..1cd7f37b07 100644 --- a/src/callbacks.rs +++ b/src/callbacks.rs @@ -45,10 +45,8 @@ pub trait ParseCallbacks: fmt::Debug + UnwindSafe { /// /// The first parameter represents the name and argument list (including the /// parentheses) of the function-like macro. The second parameter represents - /// the expansion of the macro. It is not guaranteed that the whitespace of - /// the original is preserved, but it is guaranteed that tokenization will - /// not be changed. - fn func_macro(&self, _name: &str, _value: &[u8]) {} + /// the expansion of the macro as a sequence of tokens. + fn func_macro(&self, _name: &str, _value: &[&[u8]]) {} /// This function should return whether, given an enum variant /// name, and value, this enum variant will forcibly be a constant. diff --git a/src/ir/var.rs b/src/ir/var.rs index d2b744dd6e..315a1c404f 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -137,7 +137,7 @@ fn default_macro_constant_type(value: i64) -> IntKind { /// be infallible), then `Err(ParseError::Continue)` is returned as well. fn handle_function_macro( cursor: &clang::Cursor, - func_macro: impl FnOnce(&str, &[u8]), + func_macro: impl FnOnce(&str, &[&[u8]]), ) -> Result<(), ParseError> { // cexpr explicitly does not handle function-like macros, except for a // degenerate case in which it behaves in a non-conformant way. We prevent @@ -161,7 +161,7 @@ fn handle_function_macro( let (left, right) = joined.split_at(boundary); let left = String::from_utf8(left.concat()) .map_err(|_| ParseError::Continue)?; - let right = right.join(&b' '); + let right: Vec<_> = right.iter().map(Vec::as_slice).collect(); func_macro(&left, &right); // We handled the macro, skip future macro processing. From e678631861038af61e125abcc9843cbed9c451c6 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 6 Jun 2020 17:17:30 -0700 Subject: [PATCH 19/54] Drop last token under LLVM < 4.0 --- src/ir/var.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 315a1c404f..d3b9057152 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -162,7 +162,15 @@ fn handle_function_macro( let left = String::from_utf8(left.concat()) .map_err(|_| ParseError::Continue)?; let right: Vec<_> = right.iter().map(Vec::as_slice).collect(); - func_macro(&left, &right); + // Drop last token with LLVM < 4.0, due to an LLVM bug. + // + // See: + // https://bugs.llvm.org//show_bug.cgi?id=9069 + let right = match (right.len(), crate::clang_version().parsed) { + (len, Some((v, _))) if len > 0 && v < 4 => &right[..len - 1], + _ => &right[..], + }; + func_macro(&left, right); // We handled the macro, skip future macro processing. Err(ParseError::Continue) From a6f4cad86e15a6a13a437e1c1cb9c134fbccf214 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sun, 7 Jun 2020 14:55:54 -0700 Subject: [PATCH 20/54] Remove comment obsoleted by new API --- bindgen-integration/cpp/Test.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/bindgen-integration/cpp/Test.h b/bindgen-integration/cpp/Test.h index e387219dc9..e91c570b6e 100644 --- a/bindgen-integration/cpp/Test.h +++ b/bindgen-integration/cpp/Test.h @@ -12,8 +12,6 @@ #define TESTMACRO_NONFUNCTIONAL (TESTMACRO_INTEGER) #define TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER) #define TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)-TESTMACRO_INTEGER -// Unusual spacing in the following macro is intentional, to demonstrate -// reformatting after tokenization. #define TESTMACRO_FUNCTIONAL_TOKENIZED( a, b ,c,d,e ) a/b c d ## e #define TESTMACRO_FUNCTIONAL_SPLIT( a, \ b) b,\ From 4bc3e92ee21b398b081a44ce8cb8d01397465e35 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sun, 7 Jun 2020 14:58:10 -0700 Subject: [PATCH 21/54] Explain why a TODO is not necessary --- src/ir/var.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index d3b9057152..a0593b0015 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -146,8 +146,13 @@ fn handle_function_macro( // macro definition. let tokens: Vec<_> = cursor.tokens().iter().collect(); match tokens.get(0..2) { - // TODO check second macro's type and content Some([first, second]) if first.is_abutting(&second) => { + // To be doubly sure we are in a function macro definition, we could + // check the second macro's type and content, but in order for the + // first two macros after `#define` to be adjacent with no + // intervening space, the second one has to be an open parenthesis + // anyway, so the fact that this code is executing at all implies + // that we are defining a function macro. let joined: Vec<_> = tokens .into_iter() .map(|t| t.spelling().to_owned()) From 755d20869255ca7f4a1dceb5b69f234eaabed943 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Tue, 9 Jun 2020 02:00:32 -0700 Subject: [PATCH 22/54] Simplify syntax for byte-slice assertions --- bindgen-integration/build.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index d80e412cf3..c40b672da7 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -87,10 +87,7 @@ impl ParseCallbacks for MacroCallback { // change in the future, but it is safe by the definition of a // token in C, whereas leaving the spaces out could change // tokenization. - assert_eq!( - value, - &["-".as_bytes(), "TESTMACRO_INTEGER".as_bytes()] - ); + assert_eq!(value, &[b"-" as &[u8], b"TESTMACRO_INTEGER"]); *self.seen_funcs.lock().unwrap() += 1; } "TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER)" => { @@ -100,11 +97,7 @@ impl ParseCallbacks for MacroCallback { "TESTMACRO_FUNCTIONAL_TOKENIZED(a,b,c,d,e)" => { assert_eq!( value, - ["a", "/", "b", "c", "d", "##", "e"] - .iter() - .map(|s| s.as_bytes()) - .collect::>() - .as_slice() + &[b"a" as &[u8], b"/", b"b", b"c", b"d", b"##", b"e"] ); *self.seen_funcs.lock().unwrap() += 1; } From ae78ae4566352b125dd061a6d4db1105f2b9469e Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Mon, 8 Jun 2020 18:02:49 -0700 Subject: [PATCH 23/54] Derive PartialEq & Eq for clang::File --- src/clang.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/clang.rs b/src/clang.rs index 12c89af728..a95c11914e 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -1492,6 +1492,7 @@ impl Iterator for CommentAttributesIterator { } /// A source file. +#[derive(PartialEq, Eq)] pub struct File { x: CXFile, } From 0edc1d3e4b1038956df529cf8a3785fc4913630b Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Mon, 8 Jun 2020 18:03:22 -0700 Subject: [PATCH 24/54] Avoid depending on implementation detail of extents --- src/clang.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/clang.rs b/src/clang.rs index a95c11914e..37f40b7e03 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -818,10 +818,13 @@ impl ClangToken { /// the next token's left boundary, meaning they have no intervening /// whitespace. pub fn is_abutting(&self, next: &ClangToken) -> bool { - // This comparison makes an narrow assumption about the internal data - // format of CXSourceLocation. A more explicit and costly check might - // use clang_getSpellingLocation(). - self.extent.end_int_data == next.extent.begin_int_data + let a = unsafe { clang_getRangeEnd(self.extent) }; + let b = unsafe { clang_getRangeStart(next.extent) }; + + let a = SourceLocation { x: a }; + let b = SourceLocation { x: b }; + + a.location() == b.location() } } From 9fbb86c8857ca96e5b1eeb2f7c851be71a3a67ed Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Mon, 8 Jun 2020 18:06:59 -0700 Subject: [PATCH 25/54] Hoist tokenization from handle_function_macro --- src/ir/var.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index a0593b0015..693eb5ba6c 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -130,13 +130,13 @@ fn default_macro_constant_type(value: i64) -> IntKind { } } -/// Determines whether a cursor that is pointing into a CXCursor_MacroDefinition -/// is pointing into a function-like macro. If so, calls the func_macro callback +/// Determines whether a set of tokens from a CXCursor_MacroDefinition +/// represent a function-like macro. If so, calls the func_macro callback /// and returns `Err(ParseError::Continue)` to signal to skip further /// processing. If version to UTF-8 fails (it is performed only where it should /// be infallible), then `Err(ParseError::Continue)` is returned as well. fn handle_function_macro( - cursor: &clang::Cursor, + tokens: &[clang::ClangToken], func_macro: impl FnOnce(&str, &[&[u8]]), ) -> Result<(), ParseError> { // cexpr explicitly does not handle function-like macros, except for a @@ -144,7 +144,6 @@ fn handle_function_macro( // cexpr from ever seeing function-like macros by checking for a parenthesis // token immediately adjacent to (that is, abutting) the first token in the // macro definition. - let tokens: Vec<_> = cursor.tokens().iter().collect(); match tokens.get(0..2) { Some([first, second]) if first.is_abutting(&second) => { // To be doubly sure we are in a function macro definition, we could @@ -202,7 +201,8 @@ impl ClangSubItemParser for Var { MacroParsingBehavior::Default => {} } - handle_function_macro(&cursor, |left, right| { + let tokens: Vec<_> = cursor.tokens().iter().collect(); + handle_function_macro(&tokens, |left, right| { callbacks.func_macro(left, right) })?; } From 4287515440c26c98aa468f8f9e6f18053d281fca Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Mon, 8 Jun 2020 18:13:24 -0700 Subject: [PATCH 26/54] Make parse_macro take cexpr tokens Also switch to an explicit version check in `parse_macro` --- src/ir/var.rs | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 693eb5ba6c..bd6c4f4be7 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -207,7 +207,7 @@ impl ClangSubItemParser for Var { })?; } - let value = parse_macro(ctx, &cursor); + let value = parse_macro(ctx, &cursor.cexpr_tokens()); let (id, value) = match value { Some(v) => v, @@ -374,28 +374,22 @@ impl ClangSubItemParser for Var { /// Try and parse a macro using all the macros parsed until now. fn parse_macro( ctx: &BindgenContext, - cursor: &clang::Cursor, + cexpr_tokens: &[cexpr::token::Token], ) -> Option<(Vec, cexpr::expr::EvalResult)> { use cexpr::expr; - let mut cexpr_tokens = cursor.cexpr_tokens(); - let parser = expr::IdentifierParser::new(ctx.parsed_macros()); - match parser.macro_definition(&cexpr_tokens) { - Ok((_, (id, val))) => { - return Some((id.into(), val)); - } - _ => {} - } - - // Try without the last token, to workaround a libclang bug in versions - // previous to 4.0. + // Drop last token with LLVM < 4.0, due to an LLVM bug. // // See: // https://bugs.llvm.org//show_bug.cgi?id=9069 // https://reviews.llvm.org/D26446 - cexpr_tokens.pop()?; + let cexpr_tokens = match (cexpr_tokens.len(), crate::clang_version().parsed) + { + (len, Some((v, _))) if len > 0 && v < 4 => &cexpr_tokens[..len - 1], + _ => &cexpr_tokens[..], + }; match parser.macro_definition(&cexpr_tokens) { Ok((_, (id, val))) => Some((id.into(), val)), From 11ccf26da77aacb2908a66b2b942b72d6a03ffaa Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Mon, 8 Jun 2020 18:21:25 -0700 Subject: [PATCH 27/54] Hoist as_cexpr_token for cexpr tokens --- src/clang.rs | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/src/clang.rs b/src/clang.rs index 37f40b7e03..4689a4bea3 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -698,30 +698,9 @@ impl Cursor { /// Gets the tokens that correspond to that cursor as `cexpr` tokens. pub fn cexpr_tokens(self) -> Vec { - use cexpr::token; - self.tokens() .iter() - .filter_map(|token| { - let kind = match token.kind { - CXToken_Punctuation => token::Kind::Punctuation, - CXToken_Literal => token::Kind::Literal, - CXToken_Identifier => token::Kind::Identifier, - CXToken_Keyword => token::Kind::Keyword, - // NB: cexpr is not too happy about comments inside - // expressions, so we strip them down here. - CXToken_Comment => return None, - _ => { - error!("Found unexpected token kind: {:?}", token); - return None; - } - }; - - Some(token::Token { - kind, - raw: token.spelling().to_vec().into_boxed_slice(), - }) - }) + .filter_map(|token| token.as_cexpr_token()) .collect() } @@ -826,6 +805,30 @@ impl ClangToken { a.location() == b.location() } + + /// Converts a ClangToken to a `cexpr` token if possible. + pub fn as_cexpr_token(&self) -> Option { + use cexpr::token; + + let kind = match self.kind { + CXToken_Punctuation => token::Kind::Punctuation, + CXToken_Literal => token::Kind::Literal, + CXToken_Identifier => token::Kind::Identifier, + CXToken_Keyword => token::Kind::Keyword, + // NB: cexpr is not too happy about comments inside + // expressions, so we strip them down here. + CXToken_Comment => return None, + _ => { + error!("Found unexpected token kind: {:?}", self); + return None; + } + }; + + Some(token::Token { + kind, + raw: self.spelling().to_vec().into_boxed_slice(), + }) + } } impl Drop for ClangToken { From d42833824fda35097a10fa062d14da5eedab9c30 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Mon, 8 Jun 2020 19:48:15 -0700 Subject: [PATCH 28/54] Avoid parsing tokens twice --- src/ir/var.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index bd6c4f4be7..c1ea9a3370 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -193,6 +193,8 @@ impl ClangSubItemParser for Var { use clang_sys::*; match cursor.kind() { CXCursor_MacroDefinition => { + let tokens: Vec<_> = cursor.tokens().iter().collect(); + if let Some(callbacks) = ctx.parse_callbacks() { match callbacks.will_parse_macro(&cursor.spelling()) { MacroParsingBehavior::Ignore => { @@ -201,13 +203,12 @@ impl ClangSubItemParser for Var { MacroParsingBehavior::Default => {} } - let tokens: Vec<_> = cursor.tokens().iter().collect(); handle_function_macro(&tokens, |left, right| { callbacks.func_macro(left, right) })?; } - let value = parse_macro(ctx, &cursor.cexpr_tokens()); + let value = parse_macro(ctx, &tokens); let (id, value) = match value { Some(v) => v, @@ -374,7 +375,7 @@ impl ClangSubItemParser for Var { /// Try and parse a macro using all the macros parsed until now. fn parse_macro( ctx: &BindgenContext, - cexpr_tokens: &[cexpr::token::Token], + tokens: &[clang::ClangToken], ) -> Option<(Vec, cexpr::expr::EvalResult)> { use cexpr::expr; @@ -385,12 +386,15 @@ fn parse_macro( // See: // https://bugs.llvm.org//show_bug.cgi?id=9069 // https://reviews.llvm.org/D26446 - let cexpr_tokens = match (cexpr_tokens.len(), crate::clang_version().parsed) - { - (len, Some((v, _))) if len > 0 && v < 4 => &cexpr_tokens[..len - 1], - _ => &cexpr_tokens[..], + let tokens = match (tokens.len(), crate::clang_version().parsed) { + (len, Some((v, _))) if len > 0 && v < 4 => &tokens[..len - 1], + _ => &tokens[..], }; + let cexpr_tokens: Vec<_> = tokens + .iter() + .filter_map(|token| token.as_cexpr_token()) + .collect(); match parser.macro_definition(&cexpr_tokens) { Ok((_, (id, val))) => Some((id.into(), val)), _ => None, From 8e4815e02a56fe475af52dacc079e4b84e1c2707 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Tue, 9 Jun 2020 19:48:47 -0700 Subject: [PATCH 29/54] Inline cexpr_tokens completely --- src/clang.rs | 8 -------- src/ir/var.rs | 6 +++++- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/clang.rs b/src/clang.rs index 4689a4bea3..9ab8f55863 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -696,14 +696,6 @@ impl Cursor { RawTokens::new(self) } - /// Gets the tokens that correspond to that cursor as `cexpr` tokens. - pub fn cexpr_tokens(self) -> Vec { - self.tokens() - .iter() - .filter_map(|token| token.as_cexpr_token()) - .collect() - } - /// Obtain the real path name of a cursor of InclusionDirective kind. /// /// Returns None if the cursor does not include a file, otherwise the file's full name diff --git a/src/ir/var.rs b/src/ir/var.rs index c1ea9a3370..24120fc0c4 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -405,7 +405,11 @@ fn parse_int_literal_tokens(cursor: &clang::Cursor) -> Option { use cexpr::expr; use cexpr::expr::EvalResult; - let cexpr_tokens = cursor.cexpr_tokens(); + let cexpr_tokens: Vec<_> = cursor + .tokens() + .iter() + .filter_map(|t| t.as_cexpr_token()) + .collect(); // TODO(emilio): We can try to parse other kinds of literals. match expr::expr(&cexpr_tokens) { From cdb105c89f5bb3280eae1e119881cead8ac8c8b7 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Tue, 9 Jun 2020 19:59:30 -0700 Subject: [PATCH 30/54] Pass callbacks instead of using FnOnce --- src/ir/var.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 24120fc0c4..3e4688b3e7 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -137,7 +137,7 @@ fn default_macro_constant_type(value: i64) -> IntKind { /// be infallible), then `Err(ParseError::Continue)` is returned as well. fn handle_function_macro( tokens: &[clang::ClangToken], - func_macro: impl FnOnce(&str, &[&[u8]]), + callbacks: &dyn crate::callbacks::ParseCallbacks, ) -> Result<(), ParseError> { // cexpr explicitly does not handle function-like macros, except for a // degenerate case in which it behaves in a non-conformant way. We prevent @@ -174,7 +174,7 @@ fn handle_function_macro( (len, Some((v, _))) if len > 0 && v < 4 => &right[..len - 1], _ => &right[..], }; - func_macro(&left, right); + callbacks.func_macro(&left, right); // We handled the macro, skip future macro processing. Err(ParseError::Continue) @@ -203,9 +203,7 @@ impl ClangSubItemParser for Var { MacroParsingBehavior::Default => {} } - handle_function_macro(&tokens, |left, right| { - callbacks.func_macro(left, right) - })?; + handle_function_macro(&tokens, callbacks)?; } let value = parse_macro(ctx, &tokens); From 8c4ff651ba7857128138ad3bcafa00afbf82df62 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Tue, 9 Jun 2020 20:43:33 -0700 Subject: [PATCH 31/54] Restore previous fallback behavior in parse_macro --- src/ir/var.rs | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 3e4688b3e7..6ad829e749 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -377,22 +377,28 @@ fn parse_macro( ) -> Option<(Vec, cexpr::expr::EvalResult)> { use cexpr::expr; + let mut cexpr_tokens: Vec<_> = tokens + .iter() + .filter_map(|t| t.as_cexpr_token()) + .collect(); + let parser = expr::IdentifierParser::new(ctx.parsed_macros()); - // Drop last token with LLVM < 4.0, due to an LLVM bug. + match parser.macro_definition(&cexpr_tokens) { + Ok((_, (id, val))) => { + return Some((id.into(), val)); + } + _ => {} + } + + // Try without the last token, to workaround a libclang bug in versions + // previous to 4.0. // // See: // https://bugs.llvm.org//show_bug.cgi?id=9069 // https://reviews.llvm.org/D26446 - let tokens = match (tokens.len(), crate::clang_version().parsed) { - (len, Some((v, _))) if len > 0 && v < 4 => &tokens[..len - 1], - _ => &tokens[..], - }; + cexpr_tokens.pop()?; - let cexpr_tokens: Vec<_> = tokens - .iter() - .filter_map(|token| token.as_cexpr_token()) - .collect(); match parser.macro_definition(&cexpr_tokens) { Ok((_, (id, val))) => Some((id.into(), val)), _ => None, From f94bd54839ac89dea53edf23f3bd26b76e94f4df Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Wed, 10 Jun 2020 04:53:05 -0700 Subject: [PATCH 32/54] Run cargo +nightly fmt again --- src/ir/var.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 6ad829e749..a49c0b7fc2 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -377,10 +377,8 @@ fn parse_macro( ) -> Option<(Vec, cexpr::expr::EvalResult)> { use cexpr::expr; - let mut cexpr_tokens: Vec<_> = tokens - .iter() - .filter_map(|t| t.as_cexpr_token()) - .collect(); + let mut cexpr_tokens: Vec<_> = + tokens.iter().filter_map(|t| t.as_cexpr_token()).collect(); let parser = expr::IdentifierParser::new(ctx.parsed_macros()); From dc144e4341df4c7c33f84e919785393ae136c5af Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Wed, 17 Jun 2020 17:35:34 -0700 Subject: [PATCH 33/54] Correct a comment typo --- src/ir/var.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index a49c0b7fc2..22b93121ad 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -133,8 +133,8 @@ fn default_macro_constant_type(value: i64) -> IntKind { /// Determines whether a set of tokens from a CXCursor_MacroDefinition /// represent a function-like macro. If so, calls the func_macro callback /// and returns `Err(ParseError::Continue)` to signal to skip further -/// processing. If version to UTF-8 fails (it is performed only where it should -/// be infallible), then `Err(ParseError::Continue)` is returned as well. +/// processing. If conversion to UTF-8 fails (it is performed only where it +/// should be infallible), then `Err(ParseError::Continue)` is returned as well. fn handle_function_macro( tokens: &[clang::ClangToken], callbacks: &dyn crate::callbacks::ParseCallbacks, From 2b76ea80857c4b0bf7d2f2f3e3a8d2a1f36b5f9c Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Thu, 18 Jun 2020 05:18:04 -0700 Subject: [PATCH 34/54] Revert "Inline cexpr_tokens completely" This reverts commit 72dd6a87d5f5cba68a9398a4f9669eeb380462ac. --- src/clang.rs | 8 ++++++++ src/ir/var.rs | 6 +----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/clang.rs b/src/clang.rs index 9ab8f55863..4689a4bea3 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -696,6 +696,14 @@ impl Cursor { RawTokens::new(self) } + /// Gets the tokens that correspond to that cursor as `cexpr` tokens. + pub fn cexpr_tokens(self) -> Vec { + self.tokens() + .iter() + .filter_map(|token| token.as_cexpr_token()) + .collect() + } + /// Obtain the real path name of a cursor of InclusionDirective kind. /// /// Returns None if the cursor does not include a file, otherwise the file's full name diff --git a/src/ir/var.rs b/src/ir/var.rs index 22b93121ad..3b5a59a1a7 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -407,11 +407,7 @@ fn parse_int_literal_tokens(cursor: &clang::Cursor) -> Option { use cexpr::expr; use cexpr::expr::EvalResult; - let cexpr_tokens: Vec<_> = cursor - .tokens() - .iter() - .filter_map(|t| t.as_cexpr_token()) - .collect(); + let cexpr_tokens = cursor.cexpr_tokens(); // TODO(emilio): We can try to parse other kinds of literals. match expr::expr(&cexpr_tokens) { From b6819eaaa4111e8411f9f1667cab9b5e6eedf25f Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Thu, 18 Jun 2020 05:25:25 -0700 Subject: [PATCH 35/54] Check that second token is a parenthesis --- src/ir/var.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 3b5a59a1a7..a4d156db26 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -145,13 +145,9 @@ fn handle_function_macro( // token immediately adjacent to (that is, abutting) the first token in the // macro definition. match tokens.get(0..2) { - Some([first, second]) if first.is_abutting(&second) => { - // To be doubly sure we are in a function macro definition, we could - // check the second macro's type and content, but in order for the - // first two macros after `#define` to be adjacent with no - // intervening space, the second one has to be an open parenthesis - // anyway, so the fact that this code is executing at all implies - // that we are defining a function macro. + Some([first, second]) + if first.is_abutting(&second) && second.spelling() == b"(" => + { let joined: Vec<_> = tokens .into_iter() .map(|t| t.spelling().to_owned()) From 1d02ef3d69a4866e5746761e4e7e5c03a39532f7 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Thu, 18 Jun 2020 05:28:09 -0700 Subject: [PATCH 36/54] Avoid making spellings owned --- src/ir/var.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index a4d156db26..b716e29a19 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -150,7 +150,7 @@ fn handle_function_macro( { let joined: Vec<_> = tokens .into_iter() - .map(|t| t.spelling().to_owned()) + .map(clang::ClangToken::spelling) .collect(); let boundary = joined .iter() @@ -161,7 +161,7 @@ fn handle_function_macro( let (left, right) = joined.split_at(boundary); let left = String::from_utf8(left.concat()) .map_err(|_| ParseError::Continue)?; - let right: Vec<_> = right.iter().map(Vec::as_slice).collect(); + let right: Vec<_> = right.iter().cloned().collect(); // Drop last token with LLVM < 4.0, due to an LLVM bug. // // See: From 5bfe475aec47d45bc4f1ba75616f634388c7162b Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Thu, 18 Jun 2020 05:31:06 -0700 Subject: [PATCH 37/54] Defer .collect() call till later --- src/ir/var.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index b716e29a19..83d59eecd3 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -161,16 +161,17 @@ fn handle_function_macro( let (left, right) = joined.split_at(boundary); let left = String::from_utf8(left.concat()) .map_err(|_| ParseError::Continue)?; - let right: Vec<_> = right.iter().cloned().collect(); + let right = right.iter().cloned(); // Drop last token with LLVM < 4.0, due to an LLVM bug. // // See: // https://bugs.llvm.org//show_bug.cgi?id=9069 - let right = match (right.len(), crate::clang_version().parsed) { - (len, Some((v, _))) if len > 0 && v < 4 => &right[..len - 1], - _ => &right[..], + let len = match (right.len(), crate::clang_version().parsed) { + (len, Some((v, _))) if len > 0 && v < 4 => len - 1, + (len, _) => len, }; - callbacks.func_macro(&left, right); + let right: Vec<_> = right.take(len).collect(); + callbacks.func_macro(&left, &right); // We handled the macro, skip future macro processing. Err(ParseError::Continue) From 60ffe7841ff8c479aa7114609ebdac4d65e484e2 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Thu, 18 Jun 2020 05:40:55 -0700 Subject: [PATCH 38/54] Obviate more Vec<_> collects --- src/ir/var.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 83d59eecd3..333c6bd64e 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -148,20 +148,19 @@ fn handle_function_macro( Some([first, second]) if first.is_abutting(&second) && second.spelling() == b"(" => { - let joined: Vec<_> = tokens - .into_iter() - .map(clang::ClangToken::spelling) - .collect(); + let mut joined = + tokens.into_iter().map(clang::ClangToken::spelling); let boundary = joined - .iter() + .clone() // clone the iterator to avoid consuming elements .position(|s| s == b")") .map(|n| n + 1) // skip past ')' .unwrap_or(joined.len()); - let (left, right) = joined.split_at(boundary); - let left = String::from_utf8(left.concat()) - .map_err(|_| ParseError::Continue)?; - let right = right.iter().cloned(); + let left = joined.by_ref().take(boundary); + let left = left.collect::>().concat(); + let left = + String::from_utf8(left).map_err(|_| ParseError::Continue)?; + let right = joined; // Drop last token with LLVM < 4.0, due to an LLVM bug. // // See: From 41995f919df3c9fde8f701466d1309556cd20c85 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Thu, 18 Jun 2020 05:51:19 -0700 Subject: [PATCH 39/54] Hoist base-adjustment --- src/ir/var.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 333c6bd64e..dd97ddaaaf 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -153,10 +153,10 @@ fn handle_function_macro( let boundary = joined .clone() // clone the iterator to avoid consuming elements .position(|s| s == b")") - .map(|n| n + 1) // skip past ')' .unwrap_or(joined.len()); - let left = joined.by_ref().take(boundary); + // Add 1, to convert index to length. + let left = joined.by_ref().take(boundary + 1); let left = left.collect::>().concat(); let left = String::from_utf8(left).map_err(|_| ParseError::Continue)?; From d5d2367a4155eca1f8abbc01e17e9c7d5ada359c Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Thu, 18 Jun 2020 05:53:21 -0700 Subject: [PATCH 40/54] Rename joined -> spelled --- src/ir/var.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index dd97ddaaaf..d371e7d48b 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -148,19 +148,19 @@ fn handle_function_macro( Some([first, second]) if first.is_abutting(&second) && second.spelling() == b"(" => { - let mut joined = + let mut spelled = tokens.into_iter().map(clang::ClangToken::spelling); - let boundary = joined + let boundary = spelled .clone() // clone the iterator to avoid consuming elements .position(|s| s == b")") - .unwrap_or(joined.len()); + .unwrap_or(spelled.len()); // Add 1, to convert index to length. - let left = joined.by_ref().take(boundary + 1); + let left = spelled.by_ref().take(boundary + 1); let left = left.collect::>().concat(); let left = String::from_utf8(left).map_err(|_| ParseError::Continue)?; - let right = joined; + let right = spelled; // Drop last token with LLVM < 4.0, due to an LLVM bug. // // See: From 3370218738f63f103e4c543f155761ec603087c0 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Thu, 18 Jun 2020 05:59:07 -0700 Subject: [PATCH 41/54] Bail if we find no closing parenthesis --- src/ir/var.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index d371e7d48b..9e2bb86eef 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -139,22 +139,20 @@ fn handle_function_macro( tokens: &[clang::ClangToken], callbacks: &dyn crate::callbacks::ParseCallbacks, ) -> Result<(), ParseError> { + let mut spelled = tokens.into_iter().map(clang::ClangToken::spelling); + let boundary = spelled + .clone() // clone the iterator to avoid consuming elements + .position(|s| s == b")"); + // cexpr explicitly does not handle function-like macros, except for a // degenerate case in which it behaves in a non-conformant way. We prevent // cexpr from ever seeing function-like macros by checking for a parenthesis // token immediately adjacent to (that is, abutting) the first token in the // macro definition. - match tokens.get(0..2) { - Some([first, second]) + match (tokens.get(0..2), boundary) { + (Some([first, second]), Some(boundary)) if first.is_abutting(&second) && second.spelling() == b"(" => { - let mut spelled = - tokens.into_iter().map(clang::ClangToken::spelling); - let boundary = spelled - .clone() // clone the iterator to avoid consuming elements - .position(|s| s == b")") - .unwrap_or(spelled.len()); - // Add 1, to convert index to length. let left = spelled.by_ref().take(boundary + 1); let left = left.collect::>().concat(); From 4e1ac246029737891f27ab1f62061336396e3e9c Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Thu, 18 Jun 2020 21:42:59 -0700 Subject: [PATCH 42/54] Introduce an import of ClangToken --- src/ir/var.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 9e2bb86eef..575f55af2c 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -8,6 +8,7 @@ use super::item::Item; use super::ty::{FloatKind, TypeKind}; use crate::callbacks::MacroParsingBehavior; use crate::clang; +use crate::clang::ClangToken; use crate::parse::{ ClangItemParser, ClangSubItemParser, ParseError, ParseResult, }; @@ -136,10 +137,10 @@ fn default_macro_constant_type(value: i64) -> IntKind { /// processing. If conversion to UTF-8 fails (it is performed only where it /// should be infallible), then `Err(ParseError::Continue)` is returned as well. fn handle_function_macro( - tokens: &[clang::ClangToken], + tokens: &[ClangToken], callbacks: &dyn crate::callbacks::ParseCallbacks, ) -> Result<(), ParseError> { - let mut spelled = tokens.into_iter().map(clang::ClangToken::spelling); + let mut spelled = tokens.into_iter().map(ClangToken::spelling); let boundary = spelled .clone() // clone the iterator to avoid consuming elements .position(|s| s == b")"); @@ -367,12 +368,14 @@ impl ClangSubItemParser for Var { /// Try and parse a macro using all the macros parsed until now. fn parse_macro( ctx: &BindgenContext, - tokens: &[clang::ClangToken], + tokens: &[ClangToken], ) -> Option<(Vec, cexpr::expr::EvalResult)> { use cexpr::expr; - let mut cexpr_tokens: Vec<_> = - tokens.iter().filter_map(|t| t.as_cexpr_token()).collect(); + let mut cexpr_tokens: Vec<_> = tokens + .iter() + .filter_map(clang::ClangToken::as_cexpr_token) + .collect(); let parser = expr::IdentifierParser::new(ctx.parsed_macros()); From 8699782cdd537d382bfb941b92e99473d86c11e0 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Thu, 18 Jun 2020 21:44:14 -0700 Subject: [PATCH 43/54] Avoid explicit closure --- src/ir/var.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 575f55af2c..7f53bf7111 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -374,7 +374,7 @@ fn parse_macro( let mut cexpr_tokens: Vec<_> = tokens .iter() - .filter_map(clang::ClangToken::as_cexpr_token) + .filter_map(ClangToken::as_cexpr_token) .collect(); let parser = expr::IdentifierParser::new(ctx.parsed_macros()); From ef6498d11819c8c5e1881eea8490df43189340c0 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 09:47:30 -0700 Subject: [PATCH 44/54] Introduce is_macro_function_like --- src/clang.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/clang.rs b/src/clang.rs index 4689a4bea3..655dcfaf78 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -239,6 +239,17 @@ impl Cursor { } } + /// Is this Cursor pointing to a function-like macro definition? + /// Returns None if this cannot be determined with the available libclang + /// (it requires 3.9 or greater). + pub fn is_macro_function_like(&self) -> Option { + if clang_Cursor_isMacroFunctionLike::is_loaded() { + Some(unsafe { clang_Cursor_isMacroFunctionLike(self.x) != 0 }) + } else { + None + } + } + /// Get the kind of referent this cursor is pointing to. pub fn kind(&self) -> CXCursorKind { self.x.kind From 2ffe97bebf46192f74c1ff4764085e1d7dbd65b9 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 09:58:32 -0700 Subject: [PATCH 45/54] Use is_macro_function_like --- src/ir/var.rs | 73 +++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 7f53bf7111..8b18c46785 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -137,6 +137,7 @@ fn default_macro_constant_type(value: i64) -> IntKind { /// processing. If conversion to UTF-8 fails (it is performed only where it /// should be infallible), then `Err(ParseError::Continue)` is returned as well. fn handle_function_macro( + cursor: &clang::Cursor, tokens: &[ClangToken], callbacks: &dyn crate::callbacks::ParseCallbacks, ) -> Result<(), ParseError> { @@ -145,37 +146,47 @@ fn handle_function_macro( .clone() // clone the iterator to avoid consuming elements .position(|s| s == b")"); - // cexpr explicitly does not handle function-like macros, except for a - // degenerate case in which it behaves in a non-conformant way. We prevent - // cexpr from ever seeing function-like macros by checking for a parenthesis - // token immediately adjacent to (that is, abutting) the first token in the - // macro definition. - match (tokens.get(0..2), boundary) { - (Some([first, second]), Some(boundary)) - if first.is_abutting(&second) && second.spelling() == b"(" => - { - // Add 1, to convert index to length. - let left = spelled.by_ref().take(boundary + 1); - let left = left.collect::>().concat(); - let left = - String::from_utf8(left).map_err(|_| ParseError::Continue)?; - let right = spelled; - // Drop last token with LLVM < 4.0, due to an LLVM bug. - // - // See: - // https://bugs.llvm.org//show_bug.cgi?id=9069 - let len = match (right.len(), crate::clang_version().parsed) { - (len, Some((v, _))) if len > 0 && v < 4 => len - 1, - (len, _) => len, - }; - let right: Vec<_> = right.take(len).collect(); - callbacks.func_macro(&left, &right); - - // We handled the macro, skip future macro processing. - Err(ParseError::Continue) - } - _ => Ok(()), + let is_functional_macro = + // If we have libclang >= 3.9, we can use `is_macro_function_like()` and + // avoid checking for abutting tokens ourselves. + cursor.is_macro_function_like().unwrap_or_else(|| { + // cexpr explicitly does not handle function-like macros, except for + // a degenerate case in which it behaves in a non-conformant way. We + // prevent cexpr from ever seeing function-like macros by checking + // for a parenthesis token immediately adjacent to (that is, + // abutting) the first token in the macro definition. + match tokens.get(0..2) { + Some([a, b]) if a.is_abutting(&b) && b.spelling() == b"(" => { + true + } + _ => false, + } + }); + + if !is_functional_macro { + return Ok(()); } + + // Add 1, to convert index to length. + let left = spelled + .by_ref() + .take(boundary.ok_or(ParseError::Continue)? + 1); + let left = left.collect::>().concat(); + let left = String::from_utf8(left).map_err(|_| ParseError::Continue)?; + let right = spelled; + // Drop last token with LLVM < 4.0, due to an LLVM bug. + // + // See: + // https://bugs.llvm.org//show_bug.cgi?id=9069 + let len = match (right.len(), crate::clang_version().parsed) { + (len, Some((v, _))) if len > 0 && v < 4 => len - 1, + (len, _) => len, + }; + let right: Vec<_> = right.take(len).collect(); + callbacks.func_macro(&left, &right); + + // We handled the macro, skip future macro processing. + Err(ParseError::Continue) } impl ClangSubItemParser for Var { @@ -198,7 +209,7 @@ impl ClangSubItemParser for Var { MacroParsingBehavior::Default => {} } - handle_function_macro(&tokens, callbacks)?; + handle_function_macro(&cursor, &tokens, callbacks)?; } let value = parse_macro(ctx, &tokens); From 3f1c783a432b551add8e24195fc4d21281e6e8b3 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 11:12:42 -0700 Subject: [PATCH 46/54] Use clang_equalLocations --- src/clang.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/clang.rs b/src/clang.rs index 655dcfaf78..3014bde8e7 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -808,13 +808,12 @@ impl ClangToken { /// the next token's left boundary, meaning they have no intervening /// whitespace. pub fn is_abutting(&self, next: &ClangToken) -> bool { - let a = unsafe { clang_getRangeEnd(self.extent) }; - let b = unsafe { clang_getRangeStart(next.extent) }; - - let a = SourceLocation { x: a }; - let b = SourceLocation { x: b }; - - a.location() == b.location() + unsafe { + clang_equalLocations( + clang_getRangeEnd(self.extent), + clang_getRangeStart(next.extent), + ) != 0 + } } /// Converts a ClangToken to a `cexpr` token if possible. From 8988ce610fedf94e70b43c19dc9e93e4d4389704 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 11:12:49 -0700 Subject: [PATCH 47/54] Revert "Derive PartialEq & Eq for clang::File" This reverts commit f451f60445c70adce65b39b232b1affaa44001a4. --- src/clang.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/clang.rs b/src/clang.rs index 3014bde8e7..199bab25a7 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -1508,7 +1508,6 @@ impl Iterator for CommentAttributesIterator { } /// A source file. -#[derive(PartialEq, Eq)] pub struct File { x: CXFile, } From bbd1b1ac2d63ab0a0d12a283408ebbfe046ec787 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 11:14:40 -0700 Subject: [PATCH 48/54] Avoid exposing is_abutting --- src/clang.rs | 14 +------------- src/ir/var.rs | 11 ++++++++++- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/clang.rs b/src/clang.rs index 199bab25a7..2004100ceb 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -789,7 +789,7 @@ impl<'a> Drop for RawTokens<'a> { #[derive(Debug)] pub struct ClangToken { spelling: CXString, - extent: CXSourceRange, + pub extent: CXSourceRange, /// The kind of token, this is the same as the relevant member from /// `CXToken`. pub kind: CXTokenKind, @@ -804,18 +804,6 @@ impl ClangToken { c_str.to_bytes() } - /// Tests whether a token's right boundary (CXSourceLocation) is the same as - /// the next token's left boundary, meaning they have no intervening - /// whitespace. - pub fn is_abutting(&self, next: &ClangToken) -> bool { - unsafe { - clang_equalLocations( - clang_getRangeEnd(self.extent), - clang_getRangeStart(next.extent), - ) != 0 - } - } - /// Converts a ClangToken to a `cexpr` token if possible. pub fn as_cexpr_token(&self) -> Option { use cexpr::token; diff --git a/src/ir/var.rs b/src/ir/var.rs index 8b18c46785..73d7952d46 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -146,6 +146,15 @@ fn handle_function_macro( .clone() // clone the iterator to avoid consuming elements .position(|s| s == b")"); + fn is_abutting(a: &ClangToken, b: &ClangToken) -> bool { + unsafe { + clang_sys::clang_equalLocations( + clang_sys::clang_getRangeEnd(a.extent), + clang_sys::clang_getRangeStart(b.extent), + ) != 0 + } + } + let is_functional_macro = // If we have libclang >= 3.9, we can use `is_macro_function_like()` and // avoid checking for abutting tokens ourselves. @@ -156,7 +165,7 @@ fn handle_function_macro( // for a parenthesis token immediately adjacent to (that is, // abutting) the first token in the macro definition. match tokens.get(0..2) { - Some([a, b]) if a.is_abutting(&b) && b.spelling() == b"(" => { + Some([a, b]) if is_abutting(&a, &b) && b.spelling() == b"(" => { true } _ => false, From 777d211e035735fb3e3a713641cdd5c34333cc90 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 12:40:56 -0700 Subject: [PATCH 49/54] Prefer iter() over into_iter() --- src/ir/var.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 73d7952d46..bdac502f30 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -141,7 +141,7 @@ fn handle_function_macro( tokens: &[ClangToken], callbacks: &dyn crate::callbacks::ParseCallbacks, ) -> Result<(), ParseError> { - let mut spelled = tokens.into_iter().map(ClangToken::spelling); + let mut spelled = tokens.iter().map(ClangToken::spelling); let boundary = spelled .clone() // clone the iterator to avoid consuming elements .position(|s| s == b")"); From 83f908830f11206ae3e79edc6d02c171911110cf Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 12:54:18 -0700 Subject: [PATCH 50/54] Defer costly spelling expansion --- src/ir/var.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index bdac502f30..2a5495b53d 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -141,11 +141,6 @@ fn handle_function_macro( tokens: &[ClangToken], callbacks: &dyn crate::callbacks::ParseCallbacks, ) -> Result<(), ParseError> { - let mut spelled = tokens.iter().map(ClangToken::spelling); - let boundary = spelled - .clone() // clone the iterator to avoid consuming elements - .position(|s| s == b")"); - fn is_abutting(a: &ClangToken, b: &ClangToken) -> bool { unsafe { clang_sys::clang_equalLocations( @@ -176,6 +171,13 @@ fn handle_function_macro( return Ok(()); } + let is_closing_paren = |t: &ClangToken| { + // Test cheap token kind before comparing exact spellings. + t.kind == clang_sys::CXToken_Punctuation && t.spelling() == b")" + }; + let boundary = tokens.iter().position(is_closing_paren); + + let mut spelled = tokens.iter().map(ClangToken::spelling); // Add 1, to convert index to length. let left = spelled .by_ref() From f6427412eb142e0682c9f4b8970860e87791dab0 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 13:14:37 -0700 Subject: [PATCH 51/54] Rewrite match arm --- src/ir/var.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 2a5495b53d..de16e8e4e9 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -160,9 +160,7 @@ fn handle_function_macro( // for a parenthesis token immediately adjacent to (that is, // abutting) the first token in the macro definition. match tokens.get(0..2) { - Some([a, b]) if is_abutting(&a, &b) && b.spelling() == b"(" => { - true - } + Some([a, b]) => is_abutting(&a, &b) && b.spelling() == b"(", _ => false, } }); From ba3aeb08aff00f016ae97e9afacbad55a69144e0 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 13:18:17 -0700 Subject: [PATCH 52/54] Tweak comments slightly --- src/ir/var.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index de16e8e4e9..e6da7c2519 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -134,7 +134,7 @@ fn default_macro_constant_type(value: i64) -> IntKind { /// Determines whether a set of tokens from a CXCursor_MacroDefinition /// represent a function-like macro. If so, calls the func_macro callback /// and returns `Err(ParseError::Continue)` to signal to skip further -/// processing. If conversion to UTF-8 fails (it is performed only where it +/// processing. If conversion to UTF-8 fails (it is performed only where it /// should be infallible), then `Err(ParseError::Continue)` is returned as well. fn handle_function_macro( cursor: &clang::Cursor, @@ -154,9 +154,7 @@ fn handle_function_macro( // If we have libclang >= 3.9, we can use `is_macro_function_like()` and // avoid checking for abutting tokens ourselves. cursor.is_macro_function_like().unwrap_or_else(|| { - // cexpr explicitly does not handle function-like macros, except for - // a degenerate case in which it behaves in a non-conformant way. We - // prevent cexpr from ever seeing function-like macros by checking + // If we cannot get a definitive answer from clang, we instead check // for a parenthesis token immediately adjacent to (that is, // abutting) the first token in the macro definition. match tokens.get(0..2) { From 40346bce47e2e082af3a26a85237193c3e1ea4e9 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 13:52:07 -0700 Subject: [PATCH 53/54] Demonstrate support for non-UTF8 macro contents --- bindgen-integration/build.rs | 9 ++++++++- bindgen-integration/cpp/Test.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/bindgen-integration/build.rs b/bindgen-integration/build.rs index c40b672da7..0a28e8f56b 100644 --- a/bindgen-integration/build.rs +++ b/bindgen-integration/build.rs @@ -105,6 +105,13 @@ impl ParseCallbacks for MacroCallback { assert_eq!(value, &[b"b", b",", b"a"]); *self.seen_funcs.lock().unwrap() += 1; } + "TESTMACRO_STRING_FUNC_NON_UTF8(x)" => { + assert_eq!( + value, + &[b"(" as &[u8], b"x", b"\"\xff\xff\"", b")"] + ); + *self.seen_funcs.lock().unwrap() += 1; + } _ => { // The system might provide lots of functional macros. // Ensure we did not miss handling one that we meant to handle. @@ -123,7 +130,7 @@ impl Drop for MacroCallback { ); assert_eq!( *self.seen_funcs.lock().unwrap(), - 4, + 5, "func_macro handle was not called once for all relevant macros" ); } diff --git a/bindgen-integration/cpp/Test.h b/bindgen-integration/cpp/Test.h index e91c570b6e..f8b2263f6d 100644 --- a/bindgen-integration/cpp/Test.h +++ b/bindgen-integration/cpp/Test.h @@ -18,6 +18,7 @@ a //#define TESTMACRO_INVALID("string") // A conforming preprocessor rejects this #define TESTMACRO_STRING_EXPR ("string") +#define TESTMACRO_STRING_FUNC_NON_UTF8(x) (x "ÿÿ") /* invalid UTF-8 on purpose */ #include From 08201224b2a529a5a514858df613576a8be9add4 Mon Sep 17 00:00:00 2001 From: Darren Kulp Date: Sat, 20 Jun 2020 13:55:50 -0700 Subject: [PATCH 54/54] Document newly public `extent` member --- src/clang.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/clang.rs b/src/clang.rs index 2004100ceb..3ddf99dc8b 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -789,8 +789,10 @@ impl<'a> Drop for RawTokens<'a> { #[derive(Debug)] pub struct ClangToken { spelling: CXString, + /// The extent of the token. This is the same as the relevant member from + /// `CXToken`. pub extent: CXSourceRange, - /// The kind of token, this is the same as the relevant member from + /// The kind of the token. This is the same as the relevant member from /// `CXToken`. pub kind: CXTokenKind, }