Skip to content

Commit 3c17903

Browse files
committed
auto merge of #9131 : Dretch/rust/glob-range-patterns, r=alexcrichton
This feature was overlooked in the original pull request (#8914). r? @alexcrichton
2 parents 0a2d3c5 + ef08b23 commit 3c17903

File tree

1 file changed

+126
-14
lines changed

1 file changed

+126
-14
lines changed

src/libextra/glob.rs

+126-14
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,14 @@ enum PatternToken {
147147
Char(char),
148148
AnyChar,
149149
AnySequence,
150-
AnyWithin(~[char]),
151-
AnyExcept(~[char])
150+
AnyWithin(~[CharSpecifier]),
151+
AnyExcept(~[CharSpecifier])
152+
}
153+
154+
#[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, IterBytes)]
155+
enum CharSpecifier {
156+
SingleChar(char),
157+
CharRange(char, char)
152158
}
153159
154160
#[deriving(Eq)]
@@ -164,12 +170,15 @@ impl Pattern {
164170
* This function compiles Unix shell style patterns: `?` matches any single character,
165171
* `*` matches any (possibly empty) sequence of characters and `[...]` matches any character
166172
* inside the brackets, unless the first character is `!` in which case it matches any
167-
* character except those between the `!` and the `]`.
173+
* character except those between the `!` and the `]`. Character sequences can also specify
174+
* ranges of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any character
175+
* between 0 and 9 inclusive.
168176
*
169177
* The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets (e.g. `[?]`).
170178
* When a `]` occurs immediately following `[` or `[!` then it is interpreted as
171179
* being part of, rather then ending, the character set, so `]` and NOT `]` can be
172-
* matched by `[]]` and `[!]]` respectively.
180+
* matched by `[]]` and `[!]]` respectively. The `-` character can be specified inside a
181+
* character sequence pattern by placing it at the start or the end, e.g. `[abc-]`.
173182
*
174183
* When a `[` does not have a closing `]` before the end of the string then the `[` will
175184
* be treated literally.
@@ -199,7 +208,8 @@ impl Pattern {
199208
match chars.slice_from(i + 3).position_elem(&']') {
200209
None => (),
201210
Some(j) => {
202-
tokens.push(AnyExcept(chars.slice(i + 2, i + 3 + j).to_owned()));
211+
let cs = parse_char_specifiers(chars.slice(i + 2, i + 3 + j));
212+
tokens.push(AnyExcept(cs));
203213
i += j + 4;
204214
loop;
205215
}
@@ -209,7 +219,8 @@ impl Pattern {
209219
match chars.slice_from(i + 2).position_elem(&']') {
210220
None => (),
211221
Some(j) => {
212-
tokens.push(AnyWithin(chars.slice(i + 1, i + 2 + j).to_owned()));
222+
let cs = parse_char_specifiers(chars.slice(i + 1, i + 2 + j));
223+
tokens.push(AnyWithin(cs));
213224
i += j + 3;
214225
loop;
215226
}
@@ -335,15 +346,11 @@ impl Pattern {
335346
AnyChar => {
336347
!require_literal(c)
337348
}
338-
AnyWithin(ref chars) => {
339-
!require_literal(c) &&
340-
chars.iter()
341-
.rposition(|&e| chars_eq(e, c, options.case_sensitive)).is_some()
349+
AnyWithin(ref specifiers) => {
350+
!require_literal(c) && in_char_specifiers(*specifiers, c, options)
342351
}
343-
AnyExcept(ref chars) => {
344-
!require_literal(c) &&
345-
chars.iter()
346-
.rposition(|&e| chars_eq(e, c, options.case_sensitive)).is_none()
352+
AnyExcept(ref specifiers) => {
353+
!require_literal(c) && !in_char_specifiers(*specifiers, c, options)
347354
}
348355
Char(c2) => {
349356
chars_eq(c, c2, options.case_sensitive)
@@ -370,6 +377,63 @@ impl Pattern {
370377
371378
}
372379
380+
fn parse_char_specifiers(s: &[char]) -> ~[CharSpecifier] {
381+
let mut cs = ~[];
382+
let mut i = 0;
383+
while i < s.len() {
384+
if i + 3 <= s.len() && s[i + 1] == '-' {
385+
cs.push(CharRange(s[i], s[i + 2]));
386+
i += 3;
387+
} else {
388+
cs.push(SingleChar(s[i]));
389+
i += 1;
390+
}
391+
}
392+
cs
393+
}
394+
395+
fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool {
396+
397+
for &specifier in specifiers.iter() {
398+
match specifier {
399+
SingleChar(sc) => {
400+
if chars_eq(c, sc, options.case_sensitive) {
401+
return true;
402+
}
403+
}
404+
CharRange(start, end) => {
405+
406+
// FIXME: work with non-ascii chars properly (issue #1347)
407+
if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() {
408+
409+
let start = start.to_ascii().to_lower();
410+
let end = end.to_ascii().to_lower();
411+
412+
let start_up = start.to_upper();
413+
let end_up = end.to_upper();
414+
415+
// only allow case insensitive matching when
416+
// both start and end are within a-z or A-Z
417+
if start != start_up && end != end_up {
418+
let start = start.to_char();
419+
let end = end.to_char();
420+
let c = c.to_ascii().to_lower().to_char();
421+
if c >= start && c <= end {
422+
return true;
423+
}
424+
}
425+
}
426+
427+
if c >= start && c <= end {
428+
return true;
429+
}
430+
}
431+
}
432+
}
433+
434+
false
435+
}
436+
373437
/// A helper function to determine if two chars are (possibly case-insensitively) equal.
374438
fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool {
375439
if cfg!(windows) && path::windows::is_sep(a) && path::windows::is_sep(b) {
@@ -672,6 +736,54 @@ mod test {
672736
glob("/*/*/*/*").skip(10000).next();
673737
}
674738

739+
#[test]
740+
fn test_range_pattern() {
741+
742+
let pat = Pattern::new("a[0-9]b");
743+
for i in range(0, 10) {
744+
assert!(pat.matches(fmt!("a%db", i)));
745+
}
746+
assert!(!pat.matches("a_b"));
747+
748+
let pat = Pattern::new("a[!0-9]b");
749+
for i in range(0, 10) {
750+
assert!(!pat.matches(fmt!("a%db", i)));
751+
}
752+
assert!(pat.matches("a_b"));
753+
754+
let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"];
755+
for &p in pats.iter() {
756+
let pat = Pattern::new(p);
757+
for c in "abcdefghijklmnopqrstuvwxyz".iter() {
758+
assert!(pat.matches(c.to_str()));
759+
}
760+
for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".iter() {
761+
let options = MatchOptions {case_sensitive: false, .. MatchOptions::new()};
762+
assert!(pat.matches_with(c.to_str(), options));
763+
}
764+
assert!(pat.matches("1"));
765+
assert!(pat.matches("2"));
766+
assert!(pat.matches("3"));
767+
}
768+
769+
let pats = ["[abc-]", "[-abc]", "[a-c-]"];
770+
for &p in pats.iter() {
771+
let pat = Pattern::new(p);
772+
assert!(pat.matches("a"));
773+
assert!(pat.matches("b"));
774+
assert!(pat.matches("c"));
775+
assert!(pat.matches("-"));
776+
assert!(!pat.matches("d"));
777+
}
778+
779+
let pat = Pattern::new("[2-1]");
780+
assert!(!pat.matches("1"));
781+
assert!(!pat.matches("2"));
782+
783+
assert!(Pattern::new("[-]").matches("-"));
784+
assert!(!Pattern::new("[!-]").matches("-"));
785+
}
786+
675787
#[test]
676788
fn test_unclosed_bracket() {
677789
// unclosed `[` should be treated literally

0 commit comments

Comments
 (0)