globset/
glob.rs

1use std::fmt::Write;
2use std::path::{is_separator, Path};
3
4use regex_automata::meta::Regex;
5
6use crate::{new_regex, Candidate, Error, ErrorKind};
7
8/// Describes a matching strategy for a particular pattern.
9///
10/// This provides a way to more quickly determine whether a pattern matches
11/// a particular file path in a way that scales with a large number of
12/// patterns. For example, if many patterns are of the form `*.ext`, then it's
13/// possible to test whether any of those patterns matches by looking up a
14/// file path's extension in a hash table.
15#[derive(Clone, Debug, Eq, PartialEq)]
16pub(crate) enum MatchStrategy {
17    /// A pattern matches if and only if the entire file path matches this
18    /// literal string.
19    Literal(String),
20    /// A pattern matches if and only if the file path's basename matches this
21    /// literal string.
22    BasenameLiteral(String),
23    /// A pattern matches if and only if the file path's extension matches this
24    /// literal string.
25    Extension(String),
26    /// A pattern matches if and only if this prefix literal is a prefix of the
27    /// candidate file path.
28    Prefix(String),
29    /// A pattern matches if and only if this prefix literal is a prefix of the
30    /// candidate file path.
31    ///
32    /// An exception: if `component` is true, then `suffix` must appear at the
33    /// beginning of a file path or immediately following a `/`.
34    Suffix {
35        /// The actual suffix.
36        suffix: String,
37        /// Whether this must start at the beginning of a path component.
38        component: bool,
39    },
40    /// A pattern matches only if the given extension matches the file path's
41    /// extension. Note that this is a necessary but NOT sufficient criterion.
42    /// Namely, if the extension matches, then a full regex search is still
43    /// required.
44    RequiredExtension(String),
45    /// A regex needs to be used for matching.
46    Regex,
47}
48
49impl MatchStrategy {
50    /// Returns a matching strategy for the given pattern.
51    pub(crate) fn new(pat: &Glob) -> MatchStrategy {
52        if let Some(lit) = pat.basename_literal() {
53            MatchStrategy::BasenameLiteral(lit)
54        } else if let Some(lit) = pat.literal() {
55            MatchStrategy::Literal(lit)
56        } else if let Some(ext) = pat.ext() {
57            MatchStrategy::Extension(ext)
58        } else if let Some(prefix) = pat.prefix() {
59            MatchStrategy::Prefix(prefix)
60        } else if let Some((suffix, component)) = pat.suffix() {
61            MatchStrategy::Suffix { suffix, component }
62        } else if let Some(ext) = pat.required_ext() {
63            MatchStrategy::RequiredExtension(ext)
64        } else {
65            MatchStrategy::Regex
66        }
67    }
68}
69
70/// Glob represents a successfully parsed shell glob pattern.
71///
72/// It cannot be used directly to match file paths, but it can be converted
73/// to a regular expression string or a matcher.
74#[derive(Clone, Debug, Eq)]
75pub struct Glob {
76    glob: String,
77    re: String,
78    opts: GlobOptions,
79    tokens: Tokens,
80}
81
82impl PartialEq for Glob {
83    fn eq(&self, other: &Glob) -> bool {
84        self.glob == other.glob && self.opts == other.opts
85    }
86}
87
88impl std::hash::Hash for Glob {
89    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
90        self.glob.hash(state);
91        self.opts.hash(state);
92    }
93}
94
95impl std::fmt::Display for Glob {
96    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
97        self.glob.fmt(f)
98    }
99}
100
101impl std::str::FromStr for Glob {
102    type Err = Error;
103
104    fn from_str(glob: &str) -> Result<Self, Self::Err> {
105        Self::new(glob)
106    }
107}
108
109/// A matcher for a single pattern.
110#[derive(Clone, Debug)]
111pub struct GlobMatcher {
112    /// The underlying pattern.
113    pat: Glob,
114    /// The pattern, as a compiled regex.
115    re: Regex,
116}
117
118impl GlobMatcher {
119    /// Tests whether the given path matches this pattern or not.
120    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
121        self.is_match_candidate(&Candidate::new(path.as_ref()))
122    }
123
124    /// Tests whether the given path matches this pattern or not.
125    pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
126        self.re.is_match(&path.path)
127    }
128
129    /// Returns the `Glob` used to compile this matcher.
130    pub fn glob(&self) -> &Glob {
131        &self.pat
132    }
133}
134
135/// A strategic matcher for a single pattern.
136#[cfg(test)]
137#[derive(Clone, Debug)]
138struct GlobStrategic {
139    /// The match strategy to use.
140    strategy: MatchStrategy,
141    /// The pattern, as a compiled regex.
142    re: Regex,
143}
144
145#[cfg(test)]
146impl GlobStrategic {
147    /// Tests whether the given path matches this pattern or not.
148    fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
149        self.is_match_candidate(&Candidate::new(path.as_ref()))
150    }
151
152    /// Tests whether the given path matches this pattern or not.
153    fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
154        let byte_path = &*candidate.path;
155
156        match self.strategy {
157            MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
158            MatchStrategy::BasenameLiteral(ref lit) => {
159                lit.as_bytes() == &*candidate.basename
160            }
161            MatchStrategy::Extension(ref ext) => {
162                ext.as_bytes() == &*candidate.ext
163            }
164            MatchStrategy::Prefix(ref pre) => {
165                starts_with(pre.as_bytes(), byte_path)
166            }
167            MatchStrategy::Suffix { ref suffix, component } => {
168                if component && byte_path == &suffix.as_bytes()[1..] {
169                    return true;
170                }
171                ends_with(suffix.as_bytes(), byte_path)
172            }
173            MatchStrategy::RequiredExtension(ref ext) => {
174                let ext = ext.as_bytes();
175                &*candidate.ext == ext && self.re.is_match(byte_path)
176            }
177            MatchStrategy::Regex => self.re.is_match(byte_path),
178        }
179    }
180}
181
182/// A builder for a pattern.
183///
184/// This builder enables configuring the match semantics of a pattern. For
185/// example, one can make matching case insensitive.
186///
187/// The lifetime `'a` refers to the lifetime of the pattern string.
188#[derive(Clone, Debug)]
189pub struct GlobBuilder<'a> {
190    /// The glob pattern to compile.
191    glob: &'a str,
192    /// Options for the pattern.
193    opts: GlobOptions,
194}
195
196#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
197struct GlobOptions {
198    /// Whether to match case insensitively.
199    case_insensitive: bool,
200    /// Whether to require a literal separator to match a separator in a file
201    /// path. e.g., when enabled, `*` won't match `/`.
202    literal_separator: bool,
203    /// Whether or not to use `\` to escape special characters.
204    /// e.g., when enabled, `\*` will match a literal `*`.
205    backslash_escape: bool,
206    /// Whether or not an empty case in an alternate will be removed.
207    /// e.g., when enabled, `{,a}` will match "" and "a".
208    empty_alternates: bool,
209}
210
211impl GlobOptions {
212    fn default() -> GlobOptions {
213        GlobOptions {
214            case_insensitive: false,
215            literal_separator: false,
216            backslash_escape: !is_separator('\\'),
217            empty_alternates: false,
218        }
219    }
220}
221
222#[derive(Clone, Debug, Default, Eq, PartialEq)]
223struct Tokens(Vec<Token>);
224
225impl std::ops::Deref for Tokens {
226    type Target = Vec<Token>;
227    fn deref(&self) -> &Vec<Token> {
228        &self.0
229    }
230}
231
232impl std::ops::DerefMut for Tokens {
233    fn deref_mut(&mut self) -> &mut Vec<Token> {
234        &mut self.0
235    }
236}
237
238#[derive(Clone, Debug, Eq, PartialEq)]
239enum Token {
240    Literal(char),
241    Any,
242    ZeroOrMore,
243    RecursivePrefix,
244    RecursiveSuffix,
245    RecursiveZeroOrMore,
246    Class { negated: bool, ranges: Vec<(char, char)> },
247    Alternates(Vec<Tokens>),
248}
249
250impl Glob {
251    /// Builds a new pattern with default options.
252    pub fn new(glob: &str) -> Result<Glob, Error> {
253        GlobBuilder::new(glob).build()
254    }
255
256    /// Returns a matcher for this pattern.
257    pub fn compile_matcher(&self) -> GlobMatcher {
258        let re =
259            new_regex(&self.re).expect("regex compilation shouldn't fail");
260        GlobMatcher { pat: self.clone(), re }
261    }
262
263    /// Returns a strategic matcher.
264    ///
265    /// This isn't exposed because it's not clear whether it's actually
266    /// faster than just running a regex for a *single* pattern. If it
267    /// is faster, then GlobMatcher should do it automatically.
268    #[cfg(test)]
269    fn compile_strategic_matcher(&self) -> GlobStrategic {
270        let strategy = MatchStrategy::new(self);
271        let re =
272            new_regex(&self.re).expect("regex compilation shouldn't fail");
273        GlobStrategic { strategy, re }
274    }
275
276    /// Returns the original glob pattern used to build this pattern.
277    pub fn glob(&self) -> &str {
278        &self.glob
279    }
280
281    /// Returns the regular expression string for this glob.
282    ///
283    /// Note that regular expressions for globs are intended to be matched on
284    /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
285    /// particular, globs are frequently used on file paths, where there is no
286    /// general guarantee that file paths are themselves valid UTF-8. As a
287    /// result, callers will need to ensure that they are using a regex API
288    /// that can match on arbitrary bytes. For example, the
289    /// [`regex`](https://crates.io/regex)
290    /// crate's
291    /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
292    /// API is not suitable for this since it matches on `&str`, but its
293    /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
294    /// API is suitable for this.
295    pub fn regex(&self) -> &str {
296        &self.re
297    }
298
299    /// Returns the pattern as a literal if and only if the pattern must match
300    /// an entire path exactly.
301    ///
302    /// The basic format of these patterns is `{literal}`.
303    fn literal(&self) -> Option<String> {
304        if self.opts.case_insensitive {
305            return None;
306        }
307        let mut lit = String::new();
308        for t in &*self.tokens {
309            let Token::Literal(c) = *t else { return None };
310            lit.push(c);
311        }
312        if lit.is_empty() {
313            None
314        } else {
315            Some(lit)
316        }
317    }
318
319    /// Returns an extension if this pattern matches a file path if and only
320    /// if the file path has the extension returned.
321    ///
322    /// Note that this extension returned differs from the extension that
323    /// std::path::Path::extension returns. Namely, this extension includes
324    /// the '.'. Also, paths like `.rs` are considered to have an extension
325    /// of `.rs`.
326    fn ext(&self) -> Option<String> {
327        if self.opts.case_insensitive {
328            return None;
329        }
330        let start = match *self.tokens.get(0)? {
331            Token::RecursivePrefix => 1,
332            _ => 0,
333        };
334        match *self.tokens.get(start)? {
335            Token::ZeroOrMore => {
336                // If there was no recursive prefix, then we only permit
337                // `*` if `*` can match a `/`. For example, if `*` can't
338                // match `/`, then `*.c` doesn't match `foo/bar.c`.
339                if start == 0 && self.opts.literal_separator {
340                    return None;
341                }
342            }
343            _ => return None,
344        }
345        match *self.tokens.get(start + 1)? {
346            Token::Literal('.') => {}
347            _ => return None,
348        }
349        let mut lit = ".".to_string();
350        for t in self.tokens[start + 2..].iter() {
351            match *t {
352                Token::Literal('.') | Token::Literal('/') => return None,
353                Token::Literal(c) => lit.push(c),
354                _ => return None,
355            }
356        }
357        if lit.is_empty() {
358            None
359        } else {
360            Some(lit)
361        }
362    }
363
364    /// This is like `ext`, but returns an extension even if it isn't sufficient
365    /// to imply a match. Namely, if an extension is returned, then it is
366    /// necessary but not sufficient for a match.
367    fn required_ext(&self) -> Option<String> {
368        if self.opts.case_insensitive {
369            return None;
370        }
371        // We don't care at all about the beginning of this pattern. All we
372        // need to check for is if it ends with a literal of the form `.ext`.
373        let mut ext: Vec<char> = vec![]; // built in reverse
374        for t in self.tokens.iter().rev() {
375            match *t {
376                Token::Literal('/') => return None,
377                Token::Literal(c) => {
378                    ext.push(c);
379                    if c == '.' {
380                        break;
381                    }
382                }
383                _ => return None,
384            }
385        }
386        if ext.last() != Some(&'.') {
387            None
388        } else {
389            ext.reverse();
390            Some(ext.into_iter().collect())
391        }
392    }
393
394    /// Returns a literal prefix of this pattern if the entire pattern matches
395    /// if the literal prefix matches.
396    fn prefix(&self) -> Option<String> {
397        if self.opts.case_insensitive {
398            return None;
399        }
400        let (end, need_sep) = match *self.tokens.last()? {
401            Token::ZeroOrMore => {
402                if self.opts.literal_separator {
403                    // If a trailing `*` can't match a `/`, then we can't
404                    // assume a match of the prefix corresponds to a match
405                    // of the overall pattern. e.g., `foo/*` with
406                    // `literal_separator` enabled matches `foo/bar` but not
407                    // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
408                    // literal prefix.
409                    return None;
410                }
411                (self.tokens.len() - 1, false)
412            }
413            Token::RecursiveSuffix => (self.tokens.len() - 1, true),
414            _ => (self.tokens.len(), false),
415        };
416        let mut lit = String::new();
417        for t in &self.tokens[0..end] {
418            let Token::Literal(c) = *t else { return None };
419            lit.push(c);
420        }
421        if need_sep {
422            lit.push('/');
423        }
424        if lit.is_empty() {
425            None
426        } else {
427            Some(lit)
428        }
429    }
430
431    /// Returns a literal suffix of this pattern if the entire pattern matches
432    /// if the literal suffix matches.
433    ///
434    /// If a literal suffix is returned and it must match either the entire
435    /// file path or be preceded by a `/`, then also return true. This happens
436    /// with a pattern like `**/foo/bar`. Namely, this pattern matches
437    /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
438    /// suffix returned is `/foo/bar` (but should match the entire path
439    /// `foo/bar`).
440    ///
441    /// When this returns true, the suffix literal is guaranteed to start with
442    /// a `/`.
443    fn suffix(&self) -> Option<(String, bool)> {
444        if self.opts.case_insensitive {
445            return None;
446        }
447        let mut lit = String::new();
448        let (start, entire) = match *self.tokens.get(0)? {
449            Token::RecursivePrefix => {
450                // We only care if this follows a path component if the next
451                // token is a literal.
452                if let Some(&Token::Literal(_)) = self.tokens.get(1) {
453                    lit.push('/');
454                    (1, true)
455                } else {
456                    (1, false)
457                }
458            }
459            _ => (0, false),
460        };
461        let start = match *self.tokens.get(start)? {
462            Token::ZeroOrMore => {
463                // If literal_separator is enabled, then a `*` can't
464                // necessarily match everything, so reporting a suffix match
465                // as a match of the pattern would be a false positive.
466                if self.opts.literal_separator {
467                    return None;
468                }
469                start + 1
470            }
471            _ => start,
472        };
473        for t in &self.tokens[start..] {
474            let Token::Literal(c) = *t else { return None };
475            lit.push(c);
476        }
477        if lit.is_empty() || lit == "/" {
478            None
479        } else {
480            Some((lit, entire))
481        }
482    }
483
484    /// If this pattern only needs to inspect the basename of a file path,
485    /// then the tokens corresponding to only the basename match are returned.
486    ///
487    /// For example, given a pattern of `**/*.foo`, only the tokens
488    /// corresponding to `*.foo` are returned.
489    ///
490    /// Note that this will return None if any match of the basename tokens
491    /// doesn't correspond to a match of the entire pattern. For example, the
492    /// glob `foo` only matches when a file path has a basename of `foo`, but
493    /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
494    /// `foo` doesn't match `abc/foo`.
495    fn basename_tokens(&self) -> Option<&[Token]> {
496        if self.opts.case_insensitive {
497            return None;
498        }
499        let start = match *self.tokens.get(0)? {
500            Token::RecursivePrefix => 1,
501            _ => {
502                // With nothing to gobble up the parent portion of a path,
503                // we can't assume that matching on only the basename is
504                // correct.
505                return None;
506            }
507        };
508        if self.tokens[start..].is_empty() {
509            return None;
510        }
511        for t in self.tokens[start..].iter() {
512            match *t {
513                Token::Literal('/') => return None,
514                Token::Literal(_) => {} // OK
515                Token::Any | Token::ZeroOrMore => {
516                    if !self.opts.literal_separator {
517                        // In this case, `*` and `?` can match a path
518                        // separator, which means this could reach outside
519                        // the basename.
520                        return None;
521                    }
522                }
523                Token::RecursivePrefix
524                | Token::RecursiveSuffix
525                | Token::RecursiveZeroOrMore => {
526                    return None;
527                }
528                Token::Class { .. } | Token::Alternates(..) => {
529                    // We *could* be a little smarter here, but either one
530                    // of these is going to prevent our literal optimizations
531                    // anyway, so give up.
532                    return None;
533                }
534            }
535        }
536        Some(&self.tokens[start..])
537    }
538
539    /// Returns the pattern as a literal if and only if the pattern exclusively
540    /// matches the basename of a file path *and* is a literal.
541    ///
542    /// The basic format of these patterns is `**/{literal}`, where `{literal}`
543    /// does not contain a path separator.
544    fn basename_literal(&self) -> Option<String> {
545        let tokens = self.basename_tokens()?;
546        let mut lit = String::new();
547        for t in tokens {
548            let Token::Literal(c) = *t else { return None };
549            lit.push(c);
550        }
551        Some(lit)
552    }
553}
554
555impl<'a> GlobBuilder<'a> {
556    /// Create a new builder for the pattern given.
557    ///
558    /// The pattern is not compiled until `build` is called.
559    pub fn new(glob: &'a str) -> GlobBuilder<'a> {
560        GlobBuilder { glob, opts: GlobOptions::default() }
561    }
562
563    /// Parses and builds the pattern.
564    pub fn build(&self) -> Result<Glob, Error> {
565        let mut p = Parser {
566            glob: &self.glob,
567            stack: vec![Tokens::default()],
568            chars: self.glob.chars().peekable(),
569            prev: None,
570            cur: None,
571            opts: &self.opts,
572        };
573        p.parse()?;
574        if p.stack.is_empty() {
575            Err(Error {
576                glob: Some(self.glob.to_string()),
577                kind: ErrorKind::UnopenedAlternates,
578            })
579        } else if p.stack.len() > 1 {
580            Err(Error {
581                glob: Some(self.glob.to_string()),
582                kind: ErrorKind::UnclosedAlternates,
583            })
584        } else {
585            let tokens = p.stack.pop().unwrap();
586            Ok(Glob {
587                glob: self.glob.to_string(),
588                re: tokens.to_regex_with(&self.opts),
589                opts: self.opts,
590                tokens,
591            })
592        }
593    }
594
595    /// Toggle whether the pattern matches case insensitively or not.
596    ///
597    /// This is disabled by default.
598    pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
599        self.opts.case_insensitive = yes;
600        self
601    }
602
603    /// Toggle whether a literal `/` is required to match a path separator.
604    ///
605    /// By default this is false: `*` and `?` will match `/`.
606    pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
607        self.opts.literal_separator = yes;
608        self
609    }
610
611    /// When enabled, a back slash (`\`) may be used to escape
612    /// special characters in a glob pattern. Additionally, this will
613    /// prevent `\` from being interpreted as a path separator on all
614    /// platforms.
615    ///
616    /// This is enabled by default on platforms where `\` is not a
617    /// path separator and disabled by default on platforms where `\`
618    /// is a path separator.
619    pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
620        self.opts.backslash_escape = yes;
621        self
622    }
623
624    /// Toggle whether an empty pattern in a list of alternates is accepted.
625    ///
626    /// For example, if this is set then the glob `foo{,.txt}` will match both
627    /// `foo` and `foo.txt`.
628    ///
629    /// By default this is false.
630    pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
631        self.opts.empty_alternates = yes;
632        self
633    }
634}
635
636impl Tokens {
637    /// Convert this pattern to a string that is guaranteed to be a valid
638    /// regular expression and will represent the matching semantics of this
639    /// glob pattern and the options given.
640    fn to_regex_with(&self, options: &GlobOptions) -> String {
641        let mut re = String::new();
642        re.push_str("(?-u)");
643        if options.case_insensitive {
644            re.push_str("(?i)");
645        }
646        re.push('^');
647        // Special case. If the entire glob is just `**`, then it should match
648        // everything.
649        if self.len() == 1 && self[0] == Token::RecursivePrefix {
650            re.push_str(".*");
651            re.push('$');
652            return re;
653        }
654        self.tokens_to_regex(options, &self, &mut re);
655        re.push('$');
656        re
657    }
658
659    fn tokens_to_regex(
660        &self,
661        options: &GlobOptions,
662        tokens: &[Token],
663        re: &mut String,
664    ) {
665        for tok in tokens.iter() {
666            match *tok {
667                Token::Literal(c) => {
668                    re.push_str(&char_to_escaped_literal(c));
669                }
670                Token::Any => {
671                    if options.literal_separator {
672                        re.push_str("[^/]");
673                    } else {
674                        re.push_str(".");
675                    }
676                }
677                Token::ZeroOrMore => {
678                    if options.literal_separator {
679                        re.push_str("[^/]*");
680                    } else {
681                        re.push_str(".*");
682                    }
683                }
684                Token::RecursivePrefix => {
685                    re.push_str("(?:/?|.*/)");
686                }
687                Token::RecursiveSuffix => {
688                    re.push_str("/.*");
689                }
690                Token::RecursiveZeroOrMore => {
691                    re.push_str("(?:/|/.*/)");
692                }
693                Token::Class { negated, ref ranges } => {
694                    re.push('[');
695                    if negated {
696                        re.push('^');
697                    }
698                    for r in ranges {
699                        if r.0 == r.1 {
700                            // Not strictly necessary, but nicer to look at.
701                            re.push_str(&char_to_escaped_literal(r.0));
702                        } else {
703                            re.push_str(&char_to_escaped_literal(r.0));
704                            re.push('-');
705                            re.push_str(&char_to_escaped_literal(r.1));
706                        }
707                    }
708                    re.push(']');
709                }
710                Token::Alternates(ref patterns) => {
711                    let mut parts = vec![];
712                    for pat in patterns {
713                        let mut altre = String::new();
714                        self.tokens_to_regex(options, &pat, &mut altre);
715                        if !altre.is_empty() || options.empty_alternates {
716                            parts.push(altre);
717                        }
718                    }
719
720                    // It is possible to have an empty set in which case the
721                    // resulting alternation '()' would be an error.
722                    if !parts.is_empty() {
723                        re.push_str("(?:");
724                        re.push_str(&parts.join("|"));
725                        re.push(')');
726                    }
727                }
728            }
729        }
730    }
731}
732
733/// Convert a Unicode scalar value to an escaped string suitable for use as
734/// a literal in a non-Unicode regex.
735fn char_to_escaped_literal(c: char) -> String {
736    let mut buf = [0; 4];
737    let bytes = c.encode_utf8(&mut buf).as_bytes();
738    bytes_to_escaped_literal(bytes)
739}
740
741/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
742/// code units are converted to their escaped form.
743fn bytes_to_escaped_literal(bs: &[u8]) -> String {
744    let mut s = String::with_capacity(bs.len());
745    for &b in bs {
746        if b <= 0x7F {
747            regex_syntax::escape_into(
748                char::from(b).encode_utf8(&mut [0; 4]),
749                &mut s,
750            );
751        } else {
752            write!(&mut s, "\\x{:02x}", b).unwrap();
753        }
754    }
755    s
756}
757
758struct Parser<'a> {
759    glob: &'a str,
760    stack: Vec<Tokens>,
761    chars: std::iter::Peekable<std::str::Chars<'a>>,
762    prev: Option<char>,
763    cur: Option<char>,
764    opts: &'a GlobOptions,
765}
766
767impl<'a> Parser<'a> {
768    fn error(&self, kind: ErrorKind) -> Error {
769        Error { glob: Some(self.glob.to_string()), kind }
770    }
771
772    fn parse(&mut self) -> Result<(), Error> {
773        while let Some(c) = self.bump() {
774            match c {
775                '?' => self.push_token(Token::Any)?,
776                '*' => self.parse_star()?,
777                '[' => self.parse_class()?,
778                '{' => self.push_alternate()?,
779                '}' => self.pop_alternate()?,
780                ',' => self.parse_comma()?,
781                '\\' => self.parse_backslash()?,
782                c => self.push_token(Token::Literal(c))?,
783            }
784        }
785        Ok(())
786    }
787
788    fn push_alternate(&mut self) -> Result<(), Error> {
789        if self.stack.len() > 1 {
790            return Err(self.error(ErrorKind::NestedAlternates));
791        }
792        Ok(self.stack.push(Tokens::default()))
793    }
794
795    fn pop_alternate(&mut self) -> Result<(), Error> {
796        let mut alts = vec![];
797        while self.stack.len() >= 2 {
798            alts.push(self.stack.pop().unwrap());
799        }
800        self.push_token(Token::Alternates(alts))
801    }
802
803    fn push_token(&mut self, tok: Token) -> Result<(), Error> {
804        if let Some(ref mut pat) = self.stack.last_mut() {
805            return Ok(pat.push(tok));
806        }
807        Err(self.error(ErrorKind::UnopenedAlternates))
808    }
809
810    fn pop_token(&mut self) -> Result<Token, Error> {
811        if let Some(ref mut pat) = self.stack.last_mut() {
812            return Ok(pat.pop().unwrap());
813        }
814        Err(self.error(ErrorKind::UnopenedAlternates))
815    }
816
817    fn have_tokens(&self) -> Result<bool, Error> {
818        match self.stack.last() {
819            None => Err(self.error(ErrorKind::UnopenedAlternates)),
820            Some(ref pat) => Ok(!pat.is_empty()),
821        }
822    }
823
824    fn parse_comma(&mut self) -> Result<(), Error> {
825        // If we aren't inside a group alternation, then don't
826        // treat commas specially. Otherwise, we need to start
827        // a new alternate.
828        if self.stack.len() <= 1 {
829            self.push_token(Token::Literal(','))
830        } else {
831            Ok(self.stack.push(Tokens::default()))
832        }
833    }
834
835    fn parse_backslash(&mut self) -> Result<(), Error> {
836        if self.opts.backslash_escape {
837            match self.bump() {
838                None => Err(self.error(ErrorKind::DanglingEscape)),
839                Some(c) => self.push_token(Token::Literal(c)),
840            }
841        } else if is_separator('\\') {
842            // Normalize all patterns to use / as a separator.
843            self.push_token(Token::Literal('/'))
844        } else {
845            self.push_token(Token::Literal('\\'))
846        }
847    }
848
849    fn parse_star(&mut self) -> Result<(), Error> {
850        let prev = self.prev;
851        if self.peek() != Some('*') {
852            self.push_token(Token::ZeroOrMore)?;
853            return Ok(());
854        }
855        assert!(self.bump() == Some('*'));
856        if !self.have_tokens()? {
857            if !self.peek().map_or(true, is_separator) {
858                self.push_token(Token::ZeroOrMore)?;
859                self.push_token(Token::ZeroOrMore)?;
860            } else {
861                self.push_token(Token::RecursivePrefix)?;
862                assert!(self.bump().map_or(true, is_separator));
863            }
864            return Ok(());
865        }
866
867        if !prev.map(is_separator).unwrap_or(false) {
868            if self.stack.len() <= 1
869                || (prev != Some(',') && prev != Some('{'))
870            {
871                self.push_token(Token::ZeroOrMore)?;
872                self.push_token(Token::ZeroOrMore)?;
873                return Ok(());
874            }
875        }
876        let is_suffix = match self.peek() {
877            None => {
878                assert!(self.bump().is_none());
879                true
880            }
881            Some(',') | Some('}') if self.stack.len() >= 2 => true,
882            Some(c) if is_separator(c) => {
883                assert!(self.bump().map(is_separator).unwrap_or(false));
884                false
885            }
886            _ => {
887                self.push_token(Token::ZeroOrMore)?;
888                self.push_token(Token::ZeroOrMore)?;
889                return Ok(());
890            }
891        };
892        match self.pop_token()? {
893            Token::RecursivePrefix => {
894                self.push_token(Token::RecursivePrefix)?;
895            }
896            Token::RecursiveSuffix => {
897                self.push_token(Token::RecursiveSuffix)?;
898            }
899            _ => {
900                if is_suffix {
901                    self.push_token(Token::RecursiveSuffix)?;
902                } else {
903                    self.push_token(Token::RecursiveZeroOrMore)?;
904                }
905            }
906        }
907        Ok(())
908    }
909
910    fn parse_class(&mut self) -> Result<(), Error> {
911        fn add_to_last_range(
912            glob: &str,
913            r: &mut (char, char),
914            add: char,
915        ) -> Result<(), Error> {
916            r.1 = add;
917            if r.1 < r.0 {
918                Err(Error {
919                    glob: Some(glob.to_string()),
920                    kind: ErrorKind::InvalidRange(r.0, r.1),
921                })
922            } else {
923                Ok(())
924            }
925        }
926        let mut ranges = vec![];
927        let negated = match self.chars.peek() {
928            Some(&'!') | Some(&'^') => {
929                let bump = self.bump();
930                assert!(bump == Some('!') || bump == Some('^'));
931                true
932            }
933            _ => false,
934        };
935        let mut first = true;
936        let mut in_range = false;
937        loop {
938            let c = match self.bump() {
939                Some(c) => c,
940                // The only way to successfully break this loop is to observe
941                // a ']'.
942                None => return Err(self.error(ErrorKind::UnclosedClass)),
943            };
944            match c {
945                ']' => {
946                    if first {
947                        ranges.push((']', ']'));
948                    } else {
949                        break;
950                    }
951                }
952                '-' => {
953                    if first {
954                        ranges.push(('-', '-'));
955                    } else if in_range {
956                        // invariant: in_range is only set when there is
957                        // already at least one character seen.
958                        let r = ranges.last_mut().unwrap();
959                        add_to_last_range(&self.glob, r, '-')?;
960                        in_range = false;
961                    } else {
962                        assert!(!ranges.is_empty());
963                        in_range = true;
964                    }
965                }
966                c => {
967                    if in_range {
968                        // invariant: in_range is only set when there is
969                        // already at least one character seen.
970                        add_to_last_range(
971                            &self.glob,
972                            ranges.last_mut().unwrap(),
973                            c,
974                        )?;
975                    } else {
976                        ranges.push((c, c));
977                    }
978                    in_range = false;
979                }
980            }
981            first = false;
982        }
983        if in_range {
984            // Means that the last character in the class was a '-', so add
985            // it as a literal.
986            ranges.push(('-', '-'));
987        }
988        self.push_token(Token::Class { negated, ranges })
989    }
990
991    fn bump(&mut self) -> Option<char> {
992        self.prev = self.cur;
993        self.cur = self.chars.next();
994        self.cur
995    }
996
997    fn peek(&mut self) -> Option<char> {
998        self.chars.peek().map(|&ch| ch)
999    }
1000}
1001
1002#[cfg(test)]
1003fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1004    needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1005}
1006
1007#[cfg(test)]
1008fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1009    if needle.len() > haystack.len() {
1010        return false;
1011    }
1012    needle == &haystack[haystack.len() - needle.len()..]
1013}
1014
1015#[cfg(test)]
1016mod tests {
1017    use super::Token::*;
1018    use super::{Glob, GlobBuilder, Token};
1019    use crate::{ErrorKind, GlobSetBuilder};
1020
1021    #[derive(Clone, Copy, Debug, Default)]
1022    struct Options {
1023        casei: Option<bool>,
1024        litsep: Option<bool>,
1025        bsesc: Option<bool>,
1026        ealtre: Option<bool>,
1027    }
1028
1029    macro_rules! syntax {
1030        ($name:ident, $pat:expr, $tokens:expr) => {
1031            #[test]
1032            fn $name() {
1033                let pat = Glob::new($pat).unwrap();
1034                assert_eq!($tokens, pat.tokens.0);
1035            }
1036        };
1037    }
1038
1039    macro_rules! syntaxerr {
1040        ($name:ident, $pat:expr, $err:expr) => {
1041            #[test]
1042            fn $name() {
1043                let err = Glob::new($pat).unwrap_err();
1044                assert_eq!(&$err, err.kind());
1045            }
1046        };
1047    }
1048
1049    macro_rules! toregex {
1050        ($name:ident, $pat:expr, $re:expr) => {
1051            toregex!($name, $pat, $re, Options::default());
1052        };
1053        ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1054            #[test]
1055            fn $name() {
1056                let mut builder = GlobBuilder::new($pat);
1057                if let Some(casei) = $options.casei {
1058                    builder.case_insensitive(casei);
1059                }
1060                if let Some(litsep) = $options.litsep {
1061                    builder.literal_separator(litsep);
1062                }
1063                if let Some(bsesc) = $options.bsesc {
1064                    builder.backslash_escape(bsesc);
1065                }
1066                if let Some(ealtre) = $options.ealtre {
1067                    builder.empty_alternates(ealtre);
1068                }
1069                let pat = builder.build().unwrap();
1070                assert_eq!(format!("(?-u){}", $re), pat.regex());
1071            }
1072        };
1073    }
1074
1075    macro_rules! matches {
1076        ($name:ident, $pat:expr, $path:expr) => {
1077            matches!($name, $pat, $path, Options::default());
1078        };
1079        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1080            #[test]
1081            fn $name() {
1082                let mut builder = GlobBuilder::new($pat);
1083                if let Some(casei) = $options.casei {
1084                    builder.case_insensitive(casei);
1085                }
1086                if let Some(litsep) = $options.litsep {
1087                    builder.literal_separator(litsep);
1088                }
1089                if let Some(bsesc) = $options.bsesc {
1090                    builder.backslash_escape(bsesc);
1091                }
1092                if let Some(ealtre) = $options.ealtre {
1093                    builder.empty_alternates(ealtre);
1094                }
1095                let pat = builder.build().unwrap();
1096                let matcher = pat.compile_matcher();
1097                let strategic = pat.compile_strategic_matcher();
1098                let set = GlobSetBuilder::new().add(pat).build().unwrap();
1099                assert!(matcher.is_match($path));
1100                assert!(strategic.is_match($path));
1101                assert!(set.is_match($path));
1102            }
1103        };
1104    }
1105
1106    macro_rules! nmatches {
1107        ($name:ident, $pat:expr, $path:expr) => {
1108            nmatches!($name, $pat, $path, Options::default());
1109        };
1110        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1111            #[test]
1112            fn $name() {
1113                let mut builder = GlobBuilder::new($pat);
1114                if let Some(casei) = $options.casei {
1115                    builder.case_insensitive(casei);
1116                }
1117                if let Some(litsep) = $options.litsep {
1118                    builder.literal_separator(litsep);
1119                }
1120                if let Some(bsesc) = $options.bsesc {
1121                    builder.backslash_escape(bsesc);
1122                }
1123                if let Some(ealtre) = $options.ealtre {
1124                    builder.empty_alternates(ealtre);
1125                }
1126                let pat = builder.build().unwrap();
1127                let matcher = pat.compile_matcher();
1128                let strategic = pat.compile_strategic_matcher();
1129                let set = GlobSetBuilder::new().add(pat).build().unwrap();
1130                assert!(!matcher.is_match($path));
1131                assert!(!strategic.is_match($path));
1132                assert!(!set.is_match($path));
1133            }
1134        };
1135    }
1136
1137    fn s(string: &str) -> String {
1138        string.to_string()
1139    }
1140
1141    fn class(s: char, e: char) -> Token {
1142        Class { negated: false, ranges: vec![(s, e)] }
1143    }
1144
1145    fn classn(s: char, e: char) -> Token {
1146        Class { negated: true, ranges: vec![(s, e)] }
1147    }
1148
1149    fn rclass(ranges: &[(char, char)]) -> Token {
1150        Class { negated: false, ranges: ranges.to_vec() }
1151    }
1152
1153    fn rclassn(ranges: &[(char, char)]) -> Token {
1154        Class { negated: true, ranges: ranges.to_vec() }
1155    }
1156
1157    syntax!(literal1, "a", vec![Literal('a')]);
1158    syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1159    syntax!(any1, "?", vec![Any]);
1160    syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1161    syntax!(seq1, "*", vec![ZeroOrMore]);
1162    syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1163    syntax!(
1164        seq3,
1165        "*a*b*",
1166        vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1167    );
1168    syntax!(rseq1, "**", vec![RecursivePrefix]);
1169    syntax!(rseq2, "**/", vec![RecursivePrefix]);
1170    syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1171    syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1172    syntax!(
1173        rseq5,
1174        "a/**/b",
1175        vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1176    );
1177    syntax!(cls1, "[a]", vec![class('a', 'a')]);
1178    syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1179    syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1180    syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1181    syntax!(cls5, "[-]", vec![class('-', '-')]);
1182    syntax!(cls6, "[]]", vec![class(']', ']')]);
1183    syntax!(cls7, "[*]", vec![class('*', '*')]);
1184    syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1185    syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1186    syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1187    syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1188    syntax!(
1189        cls12,
1190        "[-a-z-]",
1191        vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1192    );
1193    syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1194    syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1195    syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1196    syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1197    syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1198    syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1199    syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1200    syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1201    syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1202
1203    syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1204    syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1205    syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1206    syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1207    syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1208    syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1209
1210    const CASEI: Options =
1211        Options { casei: Some(true), litsep: None, bsesc: None, ealtre: None };
1212    const SLASHLIT: Options =
1213        Options { casei: None, litsep: Some(true), bsesc: None, ealtre: None };
1214    const NOBSESC: Options = Options {
1215        casei: None,
1216        litsep: None,
1217        bsesc: Some(false),
1218        ealtre: None,
1219    };
1220    const BSESC: Options =
1221        Options { casei: None, litsep: None, bsesc: Some(true), ealtre: None };
1222    const EALTRE: Options = Options {
1223        casei: None,
1224        litsep: None,
1225        bsesc: Some(true),
1226        ealtre: Some(true),
1227    };
1228
1229    toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1230
1231    toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1232    toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1233
1234    toregex!(re1, "a", "^a$");
1235    toregex!(re2, "?", "^.$");
1236    toregex!(re3, "*", "^.*$");
1237    toregex!(re4, "a?", "^a.$");
1238    toregex!(re5, "?a", "^.a$");
1239    toregex!(re6, "a*", "^a.*$");
1240    toregex!(re7, "*a", "^.*a$");
1241    toregex!(re8, "[*]", r"^[\*]$");
1242    toregex!(re9, "[+]", r"^[\+]$");
1243    toregex!(re10, "+", r"^\+$");
1244    toregex!(re11, "☃", r"^\xe2\x98\x83$");
1245    toregex!(re12, "**", r"^.*$");
1246    toregex!(re13, "**/", r"^.*$");
1247    toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1248    toregex!(re15, "**/**", r"^.*$");
1249    toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1250    toregex!(re17, "**/**/**", r"^.*$");
1251    toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1252    toregex!(re19, "a/**", r"^a/.*$");
1253    toregex!(re20, "a/**/**", r"^a/.*$");
1254    toregex!(re21, "a/**/**/**", r"^a/.*$");
1255    toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1256    toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1257    toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1258    toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1259    toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1260    toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1261    toregex!(re28, "a**", r"^a.*.*$");
1262    toregex!(re29, "**a", r"^.*.*a$");
1263    toregex!(re30, "a**b", r"^a.*.*b$");
1264    toregex!(re31, "***", r"^.*.*.*$");
1265    toregex!(re32, "/a**", r"^/a.*.*$");
1266    toregex!(re33, "/**a", r"^/.*.*a$");
1267    toregex!(re34, "/a**b", r"^/a.*.*b$");
1268    toregex!(re35, "{a,b}", r"^(?:b|a)$");
1269
1270    matches!(match1, "a", "a");
1271    matches!(match2, "a*b", "a_b");
1272    matches!(match3, "a*b*c", "abc");
1273    matches!(match4, "a*b*c", "a_b_c");
1274    matches!(match5, "a*b*c", "a___b___c");
1275    matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1276    matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1277    matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1278    matches!(match9, "*.rs", ".rs");
1279    matches!(match10, "☃", "☃");
1280
1281    matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1282    matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1283    matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1284    matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1285    matches!(matchrec5, "**", "abcde");
1286    matches!(matchrec6, "**", "");
1287    matches!(matchrec7, "**", ".asdf");
1288    matches!(matchrec8, "**", "/x/.asdf");
1289    matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1290    matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1291    matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1292    matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1293    matches!(matchrec13, "**/test", "one/two/test");
1294    matches!(matchrec14, "**/test", "one/test");
1295    matches!(matchrec15, "**/test", "test");
1296    matches!(matchrec16, "/**/test", "/one/two/test");
1297    matches!(matchrec17, "/**/test", "/one/test");
1298    matches!(matchrec18, "/**/test", "/test");
1299    matches!(matchrec19, "**/.*", ".abc");
1300    matches!(matchrec20, "**/.*", "abc/.abc");
1301    matches!(matchrec21, "**/foo/bar", "foo/bar");
1302    matches!(matchrec22, ".*/**", ".abc/abc");
1303    matches!(matchrec23, "test/**", "test/");
1304    matches!(matchrec24, "test/**", "test/one");
1305    matches!(matchrec25, "test/**", "test/one/two");
1306    matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
1307
1308    matches!(matchrange1, "a[0-9]b", "a0b");
1309    matches!(matchrange2, "a[0-9]b", "a9b");
1310    matches!(matchrange3, "a[!0-9]b", "a_b");
1311    matches!(matchrange4, "[a-z123]", "1");
1312    matches!(matchrange5, "[1a-z23]", "1");
1313    matches!(matchrange6, "[123a-z]", "1");
1314    matches!(matchrange7, "[abc-]", "-");
1315    matches!(matchrange8, "[-abc]", "-");
1316    matches!(matchrange9, "[-a-c]", "b");
1317    matches!(matchrange10, "[a-c-]", "b");
1318    matches!(matchrange11, "[-]", "-");
1319    matches!(matchrange12, "a[^0-9]b", "a_b");
1320
1321    matches!(matchpat1, "*hello.txt", "hello.txt");
1322    matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1323    matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1324    matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1325    matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1326    matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1327    matches!(
1328        matchpat7,
1329        "*some/path/to/hello.txt",
1330        "a/bigger/some/path/to/hello.txt"
1331    );
1332
1333    matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1334
1335    matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1336    matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1337    matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1338    matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1339
1340    matches!(matchalt1, "a,b", "a,b");
1341    matches!(matchalt2, ",", ",");
1342    matches!(matchalt3, "{a,b}", "a");
1343    matches!(matchalt4, "{a,b}", "b");
1344    matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1345    matches!(matchalt6, "{**/src/**,foo}", "foo");
1346    matches!(matchalt7, "{[}],foo}", "}");
1347    matches!(matchalt8, "{foo}", "foo");
1348    matches!(matchalt9, "{}", "");
1349    matches!(matchalt10, "{,}", "");
1350    matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1351    matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1352    matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1353    matches!(matchalt14, "foo{,.txt}", "foo.txt");
1354    nmatches!(matchalt15, "foo{,.txt}", "foo");
1355    matches!(matchalt16, "foo{,.txt}", "foo", EALTRE);
1356
1357    matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1358    #[cfg(unix)]
1359    nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1360    #[cfg(not(unix))]
1361    nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1362    nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1363    matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1364    #[cfg(unix)]
1365    nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1366    #[cfg(not(unix))]
1367    matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1368
1369    matches!(matchbackslash1, "\\[", "[", BSESC);
1370    matches!(matchbackslash2, "\\?", "?", BSESC);
1371    matches!(matchbackslash3, "\\*", "*", BSESC);
1372    matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1373    matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1374    matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1375    #[cfg(unix)]
1376    matches!(matchbackslash7, "\\a", "a");
1377    #[cfg(not(unix))]
1378    matches!(matchbackslash8, "\\a", "/a");
1379
1380    nmatches!(matchnot1, "a*b*c", "abcd");
1381    nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1382    nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1383    nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1384    nmatches!(matchnot5, "/**/test", "test");
1385    nmatches!(matchnot6, "/**/test", "/one/notthis");
1386    nmatches!(matchnot7, "/**/test", "/notthis");
1387    nmatches!(matchnot8, "**/.*", "ab.c");
1388    nmatches!(matchnot9, "**/.*", "abc/ab.c");
1389    nmatches!(matchnot10, ".*/**", "a.bc");
1390    nmatches!(matchnot11, ".*/**", "abc/a.bc");
1391    nmatches!(matchnot12, "a[0-9]b", "a_b");
1392    nmatches!(matchnot13, "a[!0-9]b", "a0b");
1393    nmatches!(matchnot14, "a[!0-9]b", "a9b");
1394    nmatches!(matchnot15, "[!-]", "-");
1395    nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1396    nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1397    nmatches!(
1398        matchnot18,
1399        "*some/path/to/hello.txt",
1400        "some/path/to/hello.txt-and-then-some"
1401    );
1402    nmatches!(
1403        matchnot19,
1404        "*some/path/to/hello.txt",
1405        "some/other/path/to/hello.txt"
1406    );
1407    nmatches!(matchnot20, "a", "foo/a");
1408    nmatches!(matchnot21, "./foo", "foo");
1409    nmatches!(matchnot22, "**/foo", "foofoo");
1410    nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1411    nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1412    nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1413    nmatches!(
1414        matchnot26,
1415        "**/m4/ltoptions.m4",
1416        "csharp/src/packages/repositories.config",
1417        SLASHLIT
1418    );
1419    nmatches!(matchnot27, "a[^0-9]b", "a0b");
1420    nmatches!(matchnot28, "a[^0-9]b", "a9b");
1421    nmatches!(matchnot29, "[^-]", "-");
1422    nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1423    nmatches!(
1424        matchrec31,
1425        "some/*/needle.txt",
1426        "some/one/two/needle.txt",
1427        SLASHLIT
1428    );
1429    nmatches!(
1430        matchrec32,
1431        "some/*/needle.txt",
1432        "some/one/two/three/needle.txt",
1433        SLASHLIT
1434    );
1435    nmatches!(matchrec33, ".*/**", ".abc");
1436    nmatches!(matchrec34, "foo/**", "foo");
1437
1438    macro_rules! extract {
1439        ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1440            extract!($which, $name, $pat, $expect, Options::default());
1441        };
1442        ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1443            #[test]
1444            fn $name() {
1445                let mut builder = GlobBuilder::new($pat);
1446                if let Some(casei) = $options.casei {
1447                    builder.case_insensitive(casei);
1448                }
1449                if let Some(litsep) = $options.litsep {
1450                    builder.literal_separator(litsep);
1451                }
1452                if let Some(bsesc) = $options.bsesc {
1453                    builder.backslash_escape(bsesc);
1454                }
1455                if let Some(ealtre) = $options.ealtre {
1456                    builder.empty_alternates(ealtre);
1457                }
1458                let pat = builder.build().unwrap();
1459                assert_eq!($expect, pat.$which());
1460            }
1461        };
1462    }
1463
1464    macro_rules! literal {
1465        ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1466    }
1467
1468    macro_rules! basetokens {
1469        ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1470    }
1471
1472    macro_rules! ext {
1473        ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1474    }
1475
1476    macro_rules! required_ext {
1477        ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1478    }
1479
1480    macro_rules! prefix {
1481        ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1482    }
1483
1484    macro_rules! suffix {
1485        ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1486    }
1487
1488    macro_rules! baseliteral {
1489        ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1490    }
1491
1492    literal!(extract_lit1, "foo", Some(s("foo")));
1493    literal!(extract_lit2, "foo", None, CASEI);
1494    literal!(extract_lit3, "/foo", Some(s("/foo")));
1495    literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1496    literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1497    literal!(extract_lit6, "*.foo", None);
1498    literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1499    literal!(extract_lit8, "**/foo/bar", None);
1500
1501    basetokens!(
1502        extract_basetoks1,
1503        "**/foo",
1504        Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1505    );
1506    basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1507    basetokens!(
1508        extract_basetoks3,
1509        "**/foo",
1510        Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1511        SLASHLIT
1512    );
1513    basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1514    basetokens!(extract_basetoks5, "*foo", None);
1515    basetokens!(extract_basetoks6, "**/fo*o", None);
1516    basetokens!(
1517        extract_basetoks7,
1518        "**/fo*o",
1519        Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1520        SLASHLIT
1521    );
1522
1523    ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1524    ext!(extract_ext2, "**/*.rs.bak", None);
1525    ext!(extract_ext3, "*.rs", Some(s(".rs")));
1526    ext!(extract_ext4, "a*.rs", None);
1527    ext!(extract_ext5, "/*.c", None);
1528    ext!(extract_ext6, "*.c", None, SLASHLIT);
1529    ext!(extract_ext7, "*.c", Some(s(".c")));
1530
1531    required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1532    required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1533    required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1534    required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1535    required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1536    required_ext!(extract_req_ext6, "./rs", None);
1537    required_ext!(extract_req_ext7, "foo", None);
1538    required_ext!(extract_req_ext8, ".foo/", None);
1539    required_ext!(extract_req_ext9, "foo/", None);
1540
1541    prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1542    prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1543    prefix!(extract_prefix3, "**/foo", None);
1544    prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
1545
1546    suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1547    suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1548    suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1549    suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1550    suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1551    suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1552    suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1553
1554    baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1555    baseliteral!(extract_baselit2, "foo", None);
1556    baseliteral!(extract_baselit3, "*foo", None);
1557    baseliteral!(extract_baselit4, "*/foo", None);
1558}
globset/glob.rs

globset/
glob.rs