@@ -147,8 +147,14 @@ enum PatternToken {
147
147
Char(char),
148
148
AnyChar,
149
149
AnySequence,
150
- AnyWithin(~[char]),
151
- AnyExcept(~[char])
150
+ AnyWithin(~[CharSpecifier]),
151
+ AnyExcept(~[CharSpecifier])
152
+ }
153
+
154
+ #[deriving(Clone, Eq, TotalEq, Ord, TotalOrd, IterBytes)]
155
+ enum CharSpecifier {
156
+ SingleChar(char),
157
+ CharRange(char, char)
152
158
}
153
159
154
160
#[deriving(Eq)]
@@ -164,12 +170,15 @@ impl Pattern {
164
170
* This function compiles Unix shell style patterns: `?` matches any single character,
165
171
* `*` matches any (possibly empty) sequence of characters and `[...]` matches any character
166
172
* inside the brackets, unless the first character is `!` in which case it matches any
167
- * character except those between the `!` and the `]`.
173
+ * character except those between the `!` and the `]`. Character sequences can also specify
174
+ * ranges of characters, as ordered by Unicode, so e.g. `[0-9]` specifies any character
175
+ * between 0 and 9 inclusive.
168
176
*
169
177
* The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets (e.g. `[?]`).
170
178
* When a `]` occurs immediately following `[` or `[!` then it is interpreted as
171
179
* being part of, rather then ending, the character set, so `]` and NOT `]` can be
172
- * matched by `[]]` and `[!]]` respectively.
180
+ * matched by `[]]` and `[!]]` respectively. The `-` character can be specified inside a
181
+ * character sequence pattern by placing it at the start or the end, e.g. `[abc-]`.
173
182
*
174
183
* When a `[` does not have a closing `]` before the end of the string then the `[` will
175
184
* be treated literally.
@@ -199,7 +208,8 @@ impl Pattern {
199
208
match chars.slice_from(i + 3).position_elem(&']') {
200
209
None => (),
201
210
Some(j) => {
202
- tokens.push(AnyExcept(chars.slice(i + 2, i + 3 + j).to_owned()));
211
+ let cs = parse_char_specifiers(chars.slice(i + 2, i + 3 + j));
212
+ tokens.push(AnyExcept(cs));
203
213
i += j + 4;
204
214
loop;
205
215
}
@@ -209,7 +219,8 @@ impl Pattern {
209
219
match chars.slice_from(i + 2).position_elem(&']') {
210
220
None => (),
211
221
Some(j) => {
212
- tokens.push(AnyWithin(chars.slice(i + 1, i + 2 + j).to_owned()));
222
+ let cs = parse_char_specifiers(chars.slice(i + 1, i + 2 + j));
223
+ tokens.push(AnyWithin(cs));
213
224
i += j + 3;
214
225
loop;
215
226
}
@@ -335,15 +346,11 @@ impl Pattern {
335
346
AnyChar => {
336
347
!require_literal(c)
337
348
}
338
- AnyWithin(ref chars) => {
339
- !require_literal(c) &&
340
- chars.iter()
341
- .rposition(|&e| chars_eq(e, c, options.case_sensitive)).is_some()
349
+ AnyWithin(ref specifiers) => {
350
+ !require_literal(c) && in_char_specifiers(*specifiers, c, options)
342
351
}
343
- AnyExcept(ref chars) => {
344
- !require_literal(c) &&
345
- chars.iter()
346
- .rposition(|&e| chars_eq(e, c, options.case_sensitive)).is_none()
352
+ AnyExcept(ref specifiers) => {
353
+ !require_literal(c) && !in_char_specifiers(*specifiers, c, options)
347
354
}
348
355
Char(c2) => {
349
356
chars_eq(c, c2, options.case_sensitive)
@@ -370,6 +377,63 @@ impl Pattern {
370
377
371
378
}
372
379
380
+ fn parse_char_specifiers(s: &[char]) -> ~[CharSpecifier] {
381
+ let mut cs = ~[];
382
+ let mut i = 0;
383
+ while i < s.len() {
384
+ if i + 3 <= s.len() && s[i + 1] == '-' {
385
+ cs.push(CharRange(s[i], s[i + 2]));
386
+ i += 3;
387
+ } else {
388
+ cs.push(SingleChar(s[i]));
389
+ i += 1;
390
+ }
391
+ }
392
+ cs
393
+ }
394
+
395
+ fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool {
396
+
397
+ for &specifier in specifiers.iter() {
398
+ match specifier {
399
+ SingleChar(sc) => {
400
+ if chars_eq(c, sc, options.case_sensitive) {
401
+ return true;
402
+ }
403
+ }
404
+ CharRange(start, end) => {
405
+
406
+ // FIXME: work with non-ascii chars properly (issue #1347)
407
+ if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() {
408
+
409
+ let start = start.to_ascii().to_lower();
410
+ let end = end.to_ascii().to_lower();
411
+
412
+ let start_up = start.to_upper();
413
+ let end_up = end.to_upper();
414
+
415
+ // only allow case insensitive matching when
416
+ // both start and end are within a-z or A-Z
417
+ if start != start_up && end != end_up {
418
+ let start = start.to_char();
419
+ let end = end.to_char();
420
+ let c = c.to_ascii().to_lower().to_char();
421
+ if c >= start && c <= end {
422
+ return true;
423
+ }
424
+ }
425
+ }
426
+
427
+ if c >= start && c <= end {
428
+ return true;
429
+ }
430
+ }
431
+ }
432
+ }
433
+
434
+ false
435
+ }
436
+
373
437
/// A helper function to determine if two chars are (possibly case-insensitively) equal.
374
438
fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool {
375
439
if cfg!(windows) && path::windows::is_sep(a) && path::windows::is_sep(b) {
@@ -672,6 +736,54 @@ mod test {
672
736
glob ( "/*/*/*/*" ) . skip ( 10000 ) . next ( ) ;
673
737
}
674
738
739
+ #[ test]
740
+ fn test_range_pattern ( ) {
741
+
742
+ let pat = Pattern :: new ( "a[0-9]b" ) ;
743
+ for i in range ( 0 , 10 ) {
744
+ assert ! ( pat. matches( fmt!( "a%db" , i) ) ) ;
745
+ }
746
+ assert ! ( !pat. matches( "a_b" ) ) ;
747
+
748
+ let pat = Pattern :: new ( "a[!0-9]b" ) ;
749
+ for i in range ( 0 , 10 ) {
750
+ assert ! ( !pat. matches( fmt!( "a%db" , i) ) ) ;
751
+ }
752
+ assert ! ( pat. matches( "a_b" ) ) ;
753
+
754
+ let pats = [ "[a-z123]" , "[1a-z23]" , "[123a-z]" ] ;
755
+ for & p in pats. iter ( ) {
756
+ let pat = Pattern :: new ( p) ;
757
+ for c in "abcdefghijklmnopqrstuvwxyz" . iter ( ) {
758
+ assert ! ( pat. matches( c. to_str( ) ) ) ;
759
+ }
760
+ for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" . iter ( ) {
761
+ let options = MatchOptions { case_sensitive : false , .. MatchOptions :: new ( ) } ;
762
+ assert ! ( pat. matches_with( c. to_str( ) , options) ) ;
763
+ }
764
+ assert ! ( pat. matches( "1" ) ) ;
765
+ assert ! ( pat. matches( "2" ) ) ;
766
+ assert ! ( pat. matches( "3" ) ) ;
767
+ }
768
+
769
+ let pats = [ "[abc-]" , "[-abc]" , "[a-c-]" ] ;
770
+ for & p in pats. iter ( ) {
771
+ let pat = Pattern :: new ( p) ;
772
+ assert ! ( pat. matches( "a" ) ) ;
773
+ assert ! ( pat. matches( "b" ) ) ;
774
+ assert ! ( pat. matches( "c" ) ) ;
775
+ assert ! ( pat. matches( "-" ) ) ;
776
+ assert ! ( !pat. matches( "d" ) ) ;
777
+ }
778
+
779
+ let pat = Pattern :: new ( "[2-1]" ) ;
780
+ assert ! ( !pat. matches( "1" ) ) ;
781
+ assert ! ( !pat. matches( "2" ) ) ;
782
+
783
+ assert ! ( Pattern :: new( "[-]" ) . matches( "-" ) ) ;
784
+ assert ! ( !Pattern :: new( "[!-]" ) . matches( "-" ) ) ;
785
+ }
786
+
675
787
#[ test]
676
788
fn test_unclosed_bracket ( ) {
677
789
// unclosed `[` should be treated literally
0 commit comments