diff --git a/CHANGELOG.md b/CHANGELOG.md index fad6ccff794..201b3fcc14f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Implements basic inbound filters for logs. ([#5011](https://github.com/getsentry/relay/pull/5011)) - Always emit a span usage metric, independent of span feature flags. ([#4976](https://github.com/getsentry/relay/pull/4976)) - Improve PII scrubbing for `logentry.formatted` by ensuring only sensitive data is redacted, rather than replacing the entire field value. ([#4985](https://github.com/getsentry/relay/pull/4985)) +- Add negated prefix to glob matching. ([#5040](https://github.com/getsentry/relay/pull/5040)) **Bug Fixes**: diff --git a/relay-pattern/src/lib.rs b/relay-pattern/src/lib.rs index b89fc83a4ef..ee2a60299f6 100644 --- a/relay-pattern/src/lib.rs +++ b/relay-pattern/src/lib.rs @@ -25,6 +25,8 @@ //! * `[!a-z]` matches one character that is not in the given range. //! * `{a,b}` matches any pattern within the alternation group. //! * `\` escapes any of the above special characters and treats it as a literal. +//! * `(a)` matches any pattern within the group. +//! * `(!a)` matches the inverted pattern within the group. //! //! # Complexity //! @@ -59,6 +61,10 @@ enum ErrorKind { InvalidRange(char, char), /// Unbalanced character class. The pattern contains unbalanced `[`, `]` characters. UnbalancedCharacterClass, + /// Unbalanced group. The pattern contains unbalanced `(`, `)` characters. + UnbalancedGroup, + /// Groups may not be nested. + InvalidNestedGroup, /// Character class is invalid and cannot be parsed. InvalidCharacterClass, /// Nested alternates are not valid. @@ -84,6 +90,8 @@ impl fmt::Display for Error { write!(f, "Invalid character range `{start}-{end}`") } ErrorKind::UnbalancedCharacterClass => write!(f, "Unbalanced character class"), + ErrorKind::UnbalancedGroup => write!(f, "Unbalanced group"), + ErrorKind::InvalidNestedGroup => write!(f, "Nested grouping is not permitted"), ErrorKind::InvalidCharacterClass => write!(f, "Invalid character class"), ErrorKind::NestedAlternates => write!(f, "Nested alternates"), ErrorKind::UnbalancedAlternates => write!(f, "Unbalanced alternates"), @@ -504,6 +512,8 @@ impl<'a> Parser<'a> { match c { '?' => self.push_token(Token::Any(NonZeroUsize::MIN)), '*' => self.push_token(Token::Wildcard), + '(' => self.parse_group()?, + ')' => return Err(ErrorKind::UnbalancedGroup), '[' => self.parse_class()?, ']' => return Err(ErrorKind::UnbalancedCharacterClass), '{' => self.start_alternates()?, @@ -555,6 +565,33 @@ impl<'a> Parser<'a> { Ok(()) } + fn parse_group(&mut self) -> Result<(), ErrorKind> { + let negated = self.advance_if(|c| c == '!'); + let mut literal = String::new(); + + loop { + let Some(c) = self.advance() else { + return Err(ErrorKind::UnbalancedGroup); + }; + + match c { + '(' => return Err(ErrorKind::InvalidNestedGroup), + ')' => break, + c => literal.push(match c { + '\\' => self.advance().ok_or(ErrorKind::DanglingEscape)?, + c => c, + }), + } + } + + self.push_token(Token::Group { + negated, + literal: Literal::new(literal, self.options), + }); + + Ok(()) + } + fn parse_class(&mut self) -> Result<(), ErrorKind> { let negated = self.advance_if(|c| c == '!'); @@ -763,6 +800,8 @@ enum Token { Wildcard, /// A class token `[abc]` or its negated variant `[!abc]`. Class { negated: bool, ranges: Ranges }, + /// A group token `(abc)` or its negated variant `(!abc)`. + Group { negated: bool, literal: Literal }, /// A list of nested alternate tokens `{a,b}`. Alternates(Vec), /// A list of optional tokens. @@ -1936,4 +1975,18 @@ mod tests { assert!(!patterns.is_match("foo")); assert!(patterns.is_match("bar")); } + + #[test] + fn test_patterns_inverted() { + // We want to match anything that is not prefixed with foo@ or bar@ + let mut builder = Patterns::builder().add("(!foo@)(!bar@)*").unwrap(); + + let patterns = builder.take(); + assert!(!patterns.is_match("foo@1.1")); + assert!(!patterns.is_match("bar@1.1")); + assert!(patterns.is_match("baz@1.1")); + assert!(patterns.is_match("foobar@1.1")); + assert!(patterns.is_match("barbaz@1.1")); + assert!(patterns.is_match("barbaz@1.1")); + } } diff --git a/relay-pattern/src/wildmatch.rs b/relay-pattern/src/wildmatch.rs index 6f3482db780..9197ee7d0b5 100644 --- a/relay-pattern/src/wildmatch.rs +++ b/relay-pattern/src/wildmatch.rs @@ -61,6 +61,29 @@ where // no match here. None => false, }, + // Token::Group never advances the cursor position. It can only tell you if the + // prefix match failed or succeeded. If it failed execution is stopped. If it + // succeeded then we iterate the token cursor and reparse the haystack from the + // exact position we started from in the Token::Group step. + Token::Group { negated, literal } => match M::is_prefix(h_current, literal) { + Some(n) => { + if *negated { + // We matched the literal but the negated operator was specified. The + // match fails and we do not advance the pointer. + false + } else { + // The haystack cursor is advanced because we matched the prefix. This + // is identical behavior to normal literal matching behavior. + advance!(n) + } + } + // We did not match the prefix literal. If the negated operator was + // specified we return true indicating a match but we do not increment + // the haystack pointer. We've only determined that the haystack is not + // prefixed with some value. We haven't made a definitive conclusion about + // _what_ the prefix actually is. The next token will perform that operation. + None => *negated, + }, Token::Any(n) => { advance!(match n_chars_to_bytes(*n, h_current) { Some(n) => n,