Skip to content

Unsquash rust-lang/rust-bindgen#1793 for personal review #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 54 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
8a11462
Introduce extent to ClangToken
kulp May 22, 2020
1efcfd9
Introduce ClangToken::is_abutting
kulp May 22, 2020
cd10e21
Check for function-like macros and bail out
kulp May 22, 2020
c9c9cfd
Introduce func_macro to ParseCallbacks trait
kulp May 22, 2020
89f36d6
Generate func_macro callbacks
kulp May 23, 2020
99a9f0a
Hoist function to handle function macros
kulp May 23, 2020
cc84e7a
Refactoring hoisted function interface
kulp May 23, 2020
8babf13
Make hoisted function a bit prettier
kulp May 23, 2020
5b411c1
Prepare to check func_macro call count
kulp May 23, 2020
1231d17
Add negative tests for functional macros
kulp May 23, 2020
9043b55
Add positive tests for func_macro
kulp May 23, 2020
5f3f39f
Demonstrate retokenization
kulp May 23, 2020
977e3da
Use "cannot" for "can not"
kulp May 23, 2020
558ce72
Explain arguments
kulp May 23, 2020
b8c7173
Demonstrate newlines within macros
kulp May 23, 2020
e98f9a1
Make default case more robust
kulp May 23, 2020
614a31a
Stop guaranteeing UTF-8 for expansion of macro
kulp May 23, 2020
a2966ea
Provide a list of tokens
kulp Jun 6, 2020
e678631
Drop last token under LLVM < 4.0
kulp Jun 7, 2020
a6f4cad
Remove comment obsoleted by new API
kulp Jun 7, 2020
4bc3e92
Explain why a TODO is not necessary
kulp Jun 7, 2020
755d208
Simplify syntax for byte-slice assertions
kulp Jun 9, 2020
ae78ae4
Derive PartialEq & Eq for clang::File
kulp Jun 9, 2020
0edc1d3
Avoid depending on implementation detail of extents
kulp Jun 9, 2020
9fbb86c
Hoist tokenization from handle_function_macro
kulp Jun 9, 2020
4287515
Make parse_macro take cexpr tokens
kulp Jun 9, 2020
11ccf26
Hoist as_cexpr_token for cexpr tokens
kulp Jun 9, 2020
d428338
Avoid parsing tokens twice
kulp Jun 9, 2020
8e4815e
Inline cexpr_tokens completely
kulp Jun 10, 2020
cdb105c
Pass callbacks instead of using FnOnce
kulp Jun 10, 2020
8c4ff65
Restore previous fallback behavior in parse_macro
kulp Jun 10, 2020
f94bd54
Run cargo +nightly fmt again
kulp Jun 10, 2020
dc144e4
Correct a comment typo
kulp Jun 18, 2020
2b76ea8
Revert "Inline cexpr_tokens completely"
kulp Jun 18, 2020
b6819ea
Check that second token is a parenthesis
kulp Jun 18, 2020
1d02ef3
Avoid making spellings owned
kulp Jun 18, 2020
5bfe475
Defer .collect() call till later
kulp Jun 18, 2020
60ffe78
Obviate more Vec<_> collects
kulp Jun 18, 2020
41995f9
Hoist base-adjustment
kulp Jun 18, 2020
d5d2367
Rename joined -> spelled
kulp Jun 18, 2020
3370218
Bail if we find no closing parenthesis
kulp Jun 18, 2020
4e1ac24
Introduce an import of ClangToken
kulp Jun 19, 2020
8699782
Avoid explicit closure
kulp Jun 19, 2020
ef6498d
Introduce is_macro_function_like
kulp Jun 20, 2020
2ffe97b
Use is_macro_function_like
kulp Jun 20, 2020
3f1c783
Use clang_equalLocations
kulp Jun 20, 2020
8988ce6
Revert "Derive PartialEq & Eq for clang::File"
kulp Jun 20, 2020
bbd1b1a
Avoid exposing is_abutting
kulp Jun 20, 2020
777d211
Prefer iter() over into_iter()
kulp Jun 20, 2020
83f9088
Defer costly spelling expansion
kulp Jun 20, 2020
f642741
Rewrite match arm
kulp Jun 20, 2020
ba3aeb0
Tweak comments slightly
kulp Jun 20, 2020
40346bc
Demonstrate support for non-UTF8 macro contents
kulp Jun 20, 2020
0820122
Document newly public `extent` member
kulp Jun 20, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 57 additions & 2 deletions bindgen-integration/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use std::sync::{Arc, Mutex, RwLock};
struct MacroCallback {
macros: Arc<RwLock<HashSet<String>>>,
seen_hellos: Mutex<u32>,
seen_funcs: Mutex<u32>,
}

impl ParseCallbacks for MacroCallback {
Expand Down Expand Up @@ -45,6 +46,10 @@ impl ParseCallbacks for MacroCallback {

fn str_macro(&self, name: &str, value: &[u8]) {
match name {
"TESTMACRO_STRING_EXPR" => {
assert_eq!(value, b"string");
*self.seen_hellos.lock().unwrap() += 1;
}
"TESTMACRO_STRING_EXPANDED" |
"TESTMACRO_STRING" |
"TESTMACRO_INTEGER" => {
Expand All @@ -70,15 +75,64 @@ impl ParseCallbacks for MacroCallback {
_ => None,
}
}

fn func_macro(&self, name: &str, value: &[&[u8]]) {
match name {
"TESTMACRO_NONFUNCTIONAL" => {
panic!("func_macro was called for a non-functional macro");
}
"TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)" => {
// Spaces are inserted into the right-hand side of a functional
// macro during reconstruction from the tokenization. This might
// change in the future, but it is safe by the definition of a
// token in C, whereas leaving the spaces out could change
// tokenization.
assert_eq!(value, &[b"-" as &[u8], b"TESTMACRO_INTEGER"]);
*self.seen_funcs.lock().unwrap() += 1;
}
"TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER)" => {
assert_eq!(value, &[] as &[&[u8]]);
*self.seen_funcs.lock().unwrap() += 1;
}
"TESTMACRO_FUNCTIONAL_TOKENIZED(a,b,c,d,e)" => {
assert_eq!(
value,
&[b"a" as &[u8], b"/", b"b", b"c", b"d", b"##", b"e"]
);
*self.seen_funcs.lock().unwrap() += 1;
}
"TESTMACRO_FUNCTIONAL_SPLIT(a,b)" => {
assert_eq!(value, &[b"b", b",", b"a"]);
*self.seen_funcs.lock().unwrap() += 1;
}
"TESTMACRO_STRING_FUNC_NON_UTF8(x)" => {
assert_eq!(
value,
&[b"(" as &[u8], b"x", b"\"\xff\xff\"", b")"]
);
*self.seen_funcs.lock().unwrap() += 1;
}
_ => {
// The system might provide lots of functional macros.
// Ensure we did not miss handling one that we meant to handle.
assert!(!name.starts_with("TESTMACRO_"), "name = {}", name);
}
}
}
}

impl Drop for MacroCallback {
fn drop(&mut self) {
assert_eq!(
*self.seen_hellos.lock().unwrap(),
2,
3,
"str_macro handle was not called once for all relevant macros"
)
);
assert_eq!(
*self.seen_funcs.lock().unwrap(),
5,
"func_macro handle was not called once for all relevant macros"
);
}
}

Expand All @@ -102,6 +156,7 @@ fn main() {
.parse_callbacks(Box::new(MacroCallback {
macros: macros.clone(),
seen_hellos: Mutex::new(0),
seen_funcs: Mutex::new(0),
}))
.blacklist_function("my_prefixed_function_to_remove")
.generate()
Expand Down
13 changes: 13 additions & 0 deletions bindgen-integration/cpp/Test.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@
#define TESTMACRO_STRING_EXPANDED TESTMACRO_STRING
#define TESTMACRO_CUSTOMINTKIND_PATH 123

// The following two macros are parsed the same by cexpr, but are semantically
// different.
#define TESTMACRO_NONFUNCTIONAL (TESTMACRO_INTEGER)
#define TESTMACRO_FUNCTIONAL_EMPTY(TESTMACRO_INTEGER)
#define TESTMACRO_FUNCTIONAL_NONEMPTY(TESTMACRO_INTEGER)-TESTMACRO_INTEGER
#define TESTMACRO_FUNCTIONAL_TOKENIZED( a, b ,c,d,e ) a/b c d ## e
#define TESTMACRO_FUNCTIONAL_SPLIT( a, \
b) b,\
a
//#define TESTMACRO_INVALID("string") // A conforming preprocessor rejects this
#define TESTMACRO_STRING_EXPR ("string")
#define TESTMACRO_STRING_FUNC_NON_UTF8(x) (x "��") /* invalid UTF-8 on purpose */

#include <cwchar>

enum {
Expand Down
11 changes: 10 additions & 1 deletion src/callbacks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,19 @@ pub trait ParseCallbacks: fmt::Debug + UnwindSafe {
None
}

/// This will be run on every string macro. The callback can not influence the further
/// This will be run on every string macro. The callback cannot influence the further
/// treatment of the macro, but may use the value to generate additional code or configuration.
fn str_macro(&self, _name: &str, _value: &[u8]) {}

/// This will be run on every function-like macro. The callback cannot
/// influence the further treatment of the macro, but may use the value to
/// generate additional code or configuration.
///
/// The first parameter represents the name and argument list (including the
/// parentheses) of the function-like macro. The second parameter represents
/// the expansion of the macro as a sequence of tokens.
fn func_macro(&self, _name: &str, _value: &[&[u8]]) {}

/// This function should return whether, given an enum variant
/// name, and value, this enum variant will forcibly be a constant.
fn enum_variant_behavior(
Expand Down
74 changes: 48 additions & 26 deletions src/clang.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,17 @@ impl Cursor {
}
}

/// Is this Cursor pointing to a function-like macro definition?
/// Returns None if this cannot be determined with the available libclang
/// (it requires 3.9 or greater).
pub fn is_macro_function_like(&self) -> Option<bool> {
if clang_Cursor_isMacroFunctionLike::is_loaded() {
Some(unsafe { clang_Cursor_isMacroFunctionLike(self.x) != 0 })
} else {
None
}
}

/// Get the kind of referent this cursor is pointing to.
pub fn kind(&self) -> CXCursorKind {
self.x.kind
Expand Down Expand Up @@ -698,30 +709,9 @@ impl Cursor {

/// Gets the tokens that correspond to that cursor as `cexpr` tokens.
pub fn cexpr_tokens(self) -> Vec<cexpr::token::Token> {
use cexpr::token;

self.tokens()
.iter()
.filter_map(|token| {
let kind = match token.kind {
CXToken_Punctuation => token::Kind::Punctuation,
CXToken_Literal => token::Kind::Literal,
CXToken_Identifier => token::Kind::Identifier,
CXToken_Keyword => token::Kind::Keyword,
// NB: cexpr is not too happy about comments inside
// expressions, so we strip them down here.
CXToken_Comment => return None,
_ => {
error!("Found unexpected token kind: {:?}", token);
return None;
}
};

Some(token::Token {
kind,
raw: token.spelling().to_vec().into_boxed_slice(),
})
})
.filter_map(|token| token.as_cexpr_token())
.collect()
}

Expand Down Expand Up @@ -793,13 +783,16 @@ impl<'a> Drop for RawTokens<'a> {
}
}

/// A raw clang token, that exposes only the kind and spelling. This is a
/// A raw clang token, that exposes only kind, spelling, and extent. This is a
/// slightly more convenient version of `CXToken` which owns the spelling
/// string.
/// string and extent.
#[derive(Debug)]
pub struct ClangToken {
spelling: CXString,
/// The kind of token, this is the same as the relevant member from
/// The extent of the token. This is the same as the relevant member from
/// `CXToken`.
pub extent: CXSourceRange,
/// The kind of the token. This is the same as the relevant member from
/// `CXToken`.
pub kind: CXTokenKind,
}
Expand All @@ -812,6 +805,30 @@ impl ClangToken {
};
c_str.to_bytes()
}

/// Converts a ClangToken to a `cexpr` token if possible.
pub fn as_cexpr_token(&self) -> Option<cexpr::token::Token> {
use cexpr::token;

let kind = match self.kind {
CXToken_Punctuation => token::Kind::Punctuation,
CXToken_Literal => token::Kind::Literal,
CXToken_Identifier => token::Kind::Identifier,
CXToken_Keyword => token::Kind::Keyword,
// NB: cexpr is not too happy about comments inside
// expressions, so we strip them down here.
CXToken_Comment => return None,
_ => {
error!("Found unexpected token kind: {:?}", self);
return None;
}
};

Some(token::Token {
kind,
raw: self.spelling().to_vec().into_boxed_slice(),
})
}
}

impl Drop for ClangToken {
Expand All @@ -834,7 +851,12 @@ impl<'a> Iterator for ClangTokenIterator<'a> {
unsafe {
let kind = clang_getTokenKind(*raw);
let spelling = clang_getTokenSpelling(self.tu, *raw);
Some(ClangToken { kind, spelling })
let extent = clang_getTokenExtent(self.tu, *raw);
Some(ClangToken {
kind,
extent,
spelling,
})
}
}
}
Expand Down
79 changes: 76 additions & 3 deletions src/ir/var.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use super::item::Item;
use super::ty::{FloatKind, TypeKind};
use crate::callbacks::MacroParsingBehavior;
use crate::clang;
use crate::clang::ClangToken;
use crate::parse::{
ClangItemParser, ClangSubItemParser, ParseError, ParseResult,
};
Expand Down Expand Up @@ -130,6 +131,71 @@ fn default_macro_constant_type(value: i64) -> IntKind {
}
}

/// Determines whether a set of tokens from a CXCursor_MacroDefinition
/// represent a function-like macro. If so, calls the func_macro callback
/// and returns `Err(ParseError::Continue)` to signal to skip further
/// processing. If conversion to UTF-8 fails (it is performed only where it
/// should be infallible), then `Err(ParseError::Continue)` is returned as well.
fn handle_function_macro(
cursor: &clang::Cursor,
tokens: &[ClangToken],
callbacks: &dyn crate::callbacks::ParseCallbacks,
) -> Result<(), ParseError> {
fn is_abutting(a: &ClangToken, b: &ClangToken) -> bool {
unsafe {
clang_sys::clang_equalLocations(
clang_sys::clang_getRangeEnd(a.extent),
clang_sys::clang_getRangeStart(b.extent),
) != 0
}
}

let is_functional_macro =
// If we have libclang >= 3.9, we can use `is_macro_function_like()` and
// avoid checking for abutting tokens ourselves.
cursor.is_macro_function_like().unwrap_or_else(|| {
// If we cannot get a definitive answer from clang, we instead check
// for a parenthesis token immediately adjacent to (that is,
// abutting) the first token in the macro definition.
match tokens.get(0..2) {
Some([a, b]) => is_abutting(&a, &b) && b.spelling() == b"(",
_ => false,
}
});

if !is_functional_macro {
return Ok(());
}

let is_closing_paren = |t: &ClangToken| {
// Test cheap token kind before comparing exact spellings.
t.kind == clang_sys::CXToken_Punctuation && t.spelling() == b")"
};
let boundary = tokens.iter().position(is_closing_paren);

let mut spelled = tokens.iter().map(ClangToken::spelling);
// Add 1, to convert index to length.
let left = spelled
.by_ref()
.take(boundary.ok_or(ParseError::Continue)? + 1);
let left = left.collect::<Vec<_>>().concat();
let left = String::from_utf8(left).map_err(|_| ParseError::Continue)?;
let right = spelled;
// Drop last token with LLVM < 4.0, due to an LLVM bug.
//
// See:
// https://bugs.llvm.org//show_bug.cgi?id=9069
let len = match (right.len(), crate::clang_version().parsed) {
(len, Some((v, _))) if len > 0 && v < 4 => len - 1,
(len, _) => len,
};
let right: Vec<_> = right.take(len).collect();
callbacks.func_macro(&left, &right);

// We handled the macro, skip future macro processing.
Err(ParseError::Continue)
}

impl ClangSubItemParser for Var {
fn parse(
cursor: clang::Cursor,
Expand All @@ -140,16 +206,20 @@ impl ClangSubItemParser for Var {
use clang_sys::*;
match cursor.kind() {
CXCursor_MacroDefinition => {
let tokens: Vec<_> = cursor.tokens().iter().collect();

if let Some(callbacks) = ctx.parse_callbacks() {
match callbacks.will_parse_macro(&cursor.spelling()) {
MacroParsingBehavior::Ignore => {
return Err(ParseError::Continue);
}
MacroParsingBehavior::Default => {}
}

handle_function_macro(&cursor, &tokens, callbacks)?;
}

let value = parse_macro(ctx, &cursor);
let value = parse_macro(ctx, &tokens);

let (id, value) = match value {
Some(v) => v,
Expand Down Expand Up @@ -316,11 +386,14 @@ impl ClangSubItemParser for Var {
/// Try and parse a macro using all the macros parsed until now.
fn parse_macro(
ctx: &BindgenContext,
cursor: &clang::Cursor,
tokens: &[ClangToken],
) -> Option<(Vec<u8>, cexpr::expr::EvalResult)> {
use cexpr::expr;

let mut cexpr_tokens = cursor.cexpr_tokens();
let mut cexpr_tokens: Vec<_> = tokens
.iter()
.filter_map(ClangToken::as_cexpr_token)
.collect();

let parser = expr::IdentifierParser::new(ctx.parsed_macros());

Expand Down