globset/
lib.rs

1/*!
2The globset crate provides cross platform single glob and glob set matching.
3
4Glob set matching is the process of matching one or more glob patterns against
5a single candidate path simultaneously, and returning all of the globs that
6matched. For example, given this set of globs:
7
8* `*.rs`
9* `src/lib.rs`
10* `src/**/foo.rs`
11
12and a path `src/bar/baz/foo.rs`, then the set would report the first and third
13globs as matching.
14
15# Example: one glob
16
17This example shows how to match a single glob against a single file path.
18
19```
20use globset::Glob;
21
22let glob = Glob::new("*.rs")?.compile_matcher();
23
24assert!(glob.is_match("foo.rs"));
25assert!(glob.is_match("foo/bar.rs"));
26assert!(!glob.is_match("Cargo.toml"));
27# Ok::<(), Box<dyn std::error::Error>>(())
28```
29
30# Example: configuring a glob matcher
31
32This example shows how to use a `GlobBuilder` to configure aspects of match
33semantics. In this example, we prevent wildcards from matching path separators.
34
35```
36use globset::GlobBuilder;
37
38let glob = GlobBuilder::new("*.rs")
39    .literal_separator(true).build()?.compile_matcher();
40
41assert!(glob.is_match("foo.rs"));
42assert!(!glob.is_match("foo/bar.rs")); // no longer matches
43assert!(!glob.is_match("Cargo.toml"));
44# Ok::<(), Box<dyn std::error::Error>>(())
45```
46
47# Example: match multiple globs at once
48
49This example shows how to match multiple glob patterns at once.
50
51```
52use globset::{Glob, GlobSetBuilder};
53
54let mut builder = GlobSetBuilder::new();
55// A GlobBuilder can be used to configure each glob's match semantics
56// independently.
57builder.add(Glob::new("*.rs")?);
58builder.add(Glob::new("src/lib.rs")?);
59builder.add(Glob::new("src/**/foo.rs")?);
60let set = builder.build()?;
61
62assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
63# Ok::<(), Box<dyn std::error::Error>>(())
64```
65
66# Syntax
67
68Standard Unix-style glob syntax is supported:
69
70* `?` matches any single character. (If the `literal_separator` option is
71  enabled, then `?` can never match a path separator.)
72* `*` matches zero or more characters. (If the `literal_separator` option is
73  enabled, then `*` can never match a path separator.)
74* `**` recursively matches directories but are only legal in three situations.
75  First, if the glob starts with <code>\*\*&#x2F;</code>, then it matches
76  all directories. For example, <code>\*\*&#x2F;foo</code> matches `foo`
77  and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
78  <code>&#x2F;\*\*</code>, then it matches all sub-entries. For example,
79  <code>foo&#x2F;\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
80  Thirdly, if the glob contains <code>&#x2F;\*\*&#x2F;</code> anywhere within
81  the pattern, then it matches zero or more directories. Using `**` anywhere
82  else is illegal (N.B. the glob `**` is allowed and means "match everything").
83* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
84  (N.B. Nesting `{...}` is not currently allowed.)
85* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
86  `[!ab]` to match any character except for `a` and `b`.
87* Metacharacters such as `*` and `?` can be escaped with character class
88  notation. e.g., `[*]` matches `*`.
89* When backslash escapes are enabled, a backslash (`\`) will escape all meta
90  characters in a glob. If it precedes a non-meta character, then the slash is
91  ignored. A `\\` will match a literal `\\`. Note that this mode is only
92  enabled on Unix platforms by default, but can be enabled on any platform
93  via the `backslash_escape` setting on `Glob`.
94
95A `GlobBuilder` can be used to prevent wildcards from matching path separators,
96or to enable case insensitive matching.
97*/
98
99#![deny(missing_docs)]
100
101use std::{
102    borrow::Cow,
103    panic::{RefUnwindSafe, UnwindSafe},
104    path::Path,
105    sync::Arc,
106};
107
108use {
109    aho_corasick::AhoCorasick,
110    bstr::{ByteSlice, ByteVec, B},
111    regex_automata::{
112        meta::Regex,
113        util::pool::{Pool, PoolGuard},
114        PatternSet,
115    },
116};
117
118use crate::{
119    glob::MatchStrategy,
120    pathutil::{file_name, file_name_ext, normalize_path},
121};
122
123pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
124
125mod fnv;
126mod glob;
127mod pathutil;
128
129#[cfg(feature = "serde1")]
130mod serde_impl;
131
132#[cfg(feature = "log")]
133macro_rules! debug {
134    ($($token:tt)*) => (::log::debug!($($token)*);)
135}
136
137#[cfg(not(feature = "log"))]
138macro_rules! debug {
139    ($($token:tt)*) => {};
140}
141
142/// Represents an error that can occur when parsing a glob pattern.
143#[derive(Clone, Debug, Eq, PartialEq)]
144pub struct Error {
145    /// The original glob provided by the caller.
146    glob: Option<String>,
147    /// The kind of error.
148    kind: ErrorKind,
149}
150
151/// The kind of error that can occur when parsing a glob pattern.
152#[derive(Clone, Debug, Eq, PartialEq)]
153pub enum ErrorKind {
154    /// **DEPRECATED**.
155    ///
156    /// This error used to occur for consistency with git's glob specification,
157    /// but the specification now accepts all uses of `**`. When `**` does not
158    /// appear adjacent to a path separator or at the beginning/end of a glob,
159    /// it is now treated as two consecutive `*` patterns. As such, this error
160    /// is no longer used.
161    InvalidRecursive,
162    /// Occurs when a character class (e.g., `[abc]`) is not closed.
163    UnclosedClass,
164    /// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
165    /// example, if the range starts with a lexicographically larger character
166    /// than it ends with.
167    InvalidRange(char, char),
168    /// Occurs when a `}` is found without a matching `{`.
169    UnopenedAlternates,
170    /// Occurs when a `{` is found without a matching `}`.
171    UnclosedAlternates,
172    /// Occurs when an alternating group is nested inside another alternating
173    /// group, e.g., `{{a,b},{c,d}}`.
174    NestedAlternates,
175    /// Occurs when an unescaped '\' is found at the end of a glob.
176    DanglingEscape,
177    /// An error associated with parsing or compiling a regex.
178    Regex(String),
179    /// Hints that destructuring should not be exhaustive.
180    ///
181    /// This enum may grow additional variants, so this makes sure clients
182    /// don't count on exhaustive matching. (Otherwise, adding a new variant
183    /// could break existing code.)
184    #[doc(hidden)]
185    __Nonexhaustive,
186}
187
188impl std::error::Error for Error {
189    fn description(&self) -> &str {
190        self.kind.description()
191    }
192}
193
194impl Error {
195    /// Return the glob that caused this error, if one exists.
196    pub fn glob(&self) -> Option<&str> {
197        self.glob.as_ref().map(|s| &**s)
198    }
199
200    /// Return the kind of this error.
201    pub fn kind(&self) -> &ErrorKind {
202        &self.kind
203    }
204}
205
206impl ErrorKind {
207    fn description(&self) -> &str {
208        match *self {
209            ErrorKind::InvalidRecursive => {
210                "invalid use of **; must be one path component"
211            }
212            ErrorKind::UnclosedClass => {
213                "unclosed character class; missing ']'"
214            }
215            ErrorKind::InvalidRange(_, _) => "invalid character range",
216            ErrorKind::UnopenedAlternates => {
217                "unopened alternate group; missing '{' \
218                (maybe escape '}' with '[}]'?)"
219            }
220            ErrorKind::UnclosedAlternates => {
221                "unclosed alternate group; missing '}' \
222                (maybe escape '{' with '[{]'?)"
223            }
224            ErrorKind::NestedAlternates => {
225                "nested alternate groups are not allowed"
226            }
227            ErrorKind::DanglingEscape => "dangling '\\'",
228            ErrorKind::Regex(ref err) => err,
229            ErrorKind::__Nonexhaustive => unreachable!(),
230        }
231    }
232}
233
234impl std::fmt::Display for Error {
235    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
236        match self.glob {
237            None => self.kind.fmt(f),
238            Some(ref glob) => {
239                write!(f, "error parsing glob '{}': {}", glob, self.kind)
240            }
241        }
242    }
243}
244
245impl std::fmt::Display for ErrorKind {
246    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
247        match *self {
248            ErrorKind::InvalidRecursive
249            | ErrorKind::UnclosedClass
250            | ErrorKind::UnopenedAlternates
251            | ErrorKind::UnclosedAlternates
252            | ErrorKind::NestedAlternates
253            | ErrorKind::DanglingEscape
254            | ErrorKind::Regex(_) => write!(f, "{}", self.description()),
255            ErrorKind::InvalidRange(s, e) => {
256                write!(f, "invalid range; '{}' > '{}'", s, e)
257            }
258            ErrorKind::__Nonexhaustive => unreachable!(),
259        }
260    }
261}
262
263fn new_regex(pat: &str) -> Result<Regex, Error> {
264    let syntax = regex_automata::util::syntax::Config::new()
265        .utf8(false)
266        .dot_matches_new_line(true);
267    let config = Regex::config()
268        .utf8_empty(false)
269        .nfa_size_limit(Some(10 * (1 << 20)))
270        .hybrid_cache_capacity(10 * (1 << 20));
271    Regex::builder().syntax(syntax).configure(config).build(pat).map_err(
272        |err| Error {
273            glob: Some(pat.to_string()),
274            kind: ErrorKind::Regex(err.to_string()),
275        },
276    )
277}
278
279fn new_regex_set(pats: Vec<String>) -> Result<Regex, Error> {
280    let syntax = regex_automata::util::syntax::Config::new()
281        .utf8(false)
282        .dot_matches_new_line(true);
283    let config = Regex::config()
284        .match_kind(regex_automata::MatchKind::All)
285        .utf8_empty(false)
286        .nfa_size_limit(Some(10 * (1 << 20)))
287        .hybrid_cache_capacity(10 * (1 << 20));
288    Regex::builder()
289        .syntax(syntax)
290        .configure(config)
291        .build_many(&pats)
292        .map_err(|err| Error {
293            glob: None,
294            kind: ErrorKind::Regex(err.to_string()),
295        })
296}
297
298/// GlobSet represents a group of globs that can be matched together in a
299/// single pass.
300#[derive(Clone, Debug)]
301pub struct GlobSet {
302    len: usize,
303    strats: Vec<GlobSetMatchStrategy>,
304}
305
306impl GlobSet {
307    /// Create a new [`GlobSetBuilder`]. A `GlobSetBuilder` can be used to add
308    /// new patterns. Once all patterns have been added, `build` should be
309    /// called to produce a `GlobSet`, which can then be used for matching.
310    #[inline]
311    pub fn builder() -> GlobSetBuilder {
312        GlobSetBuilder::new()
313    }
314
315    /// Create an empty `GlobSet`. An empty set matches nothing.
316    #[inline]
317    pub fn empty() -> GlobSet {
318        GlobSet { len: 0, strats: vec![] }
319    }
320
321    /// Returns true if this set is empty, and therefore matches nothing.
322    #[inline]
323    pub fn is_empty(&self) -> bool {
324        self.len == 0
325    }
326
327    /// Returns the number of globs in this set.
328    #[inline]
329    pub fn len(&self) -> usize {
330        self.len
331    }
332
333    /// Returns true if any glob in this set matches the path given.
334    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
335        self.is_match_candidate(&Candidate::new(path.as_ref()))
336    }
337
338    /// Returns true if any glob in this set matches the path given.
339    ///
340    /// This takes a Candidate as input, which can be used to amortize the
341    /// cost of preparing a path for matching.
342    pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
343        if self.is_empty() {
344            return false;
345        }
346        for strat in &self.strats {
347            if strat.is_match(path) {
348                return true;
349            }
350        }
351        false
352    }
353
354    /// Returns the sequence number of every glob pattern that matches the
355    /// given path.
356    pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
357        self.matches_candidate(&Candidate::new(path.as_ref()))
358    }
359
360    /// Returns the sequence number of every glob pattern that matches the
361    /// given path.
362    ///
363    /// This takes a Candidate as input, which can be used to amortize the
364    /// cost of preparing a path for matching.
365    pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
366        let mut into = vec![];
367        if self.is_empty() {
368            return into;
369        }
370        self.matches_candidate_into(path, &mut into);
371        into
372    }
373
374    /// Adds the sequence number of every glob pattern that matches the given
375    /// path to the vec given.
376    ///
377    /// `into` is cleared before matching begins, and contains the set of
378    /// sequence numbers (in ascending order) after matching ends. If no globs
379    /// were matched, then `into` will be empty.
380    pub fn matches_into<P: AsRef<Path>>(
381        &self,
382        path: P,
383        into: &mut Vec<usize>,
384    ) {
385        self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
386    }
387
388    /// Adds the sequence number of every glob pattern that matches the given
389    /// path to the vec given.
390    ///
391    /// `into` is cleared before matching begins, and contains the set of
392    /// sequence numbers (in ascending order) after matching ends. If no globs
393    /// were matched, then `into` will be empty.
394    ///
395    /// This takes a Candidate as input, which can be used to amortize the
396    /// cost of preparing a path for matching.
397    pub fn matches_candidate_into(
398        &self,
399        path: &Candidate<'_>,
400        into: &mut Vec<usize>,
401    ) {
402        into.clear();
403        if self.is_empty() {
404            return;
405        }
406        for strat in &self.strats {
407            strat.matches_into(path, into);
408        }
409        into.sort();
410        into.dedup();
411    }
412
413    fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
414        if pats.is_empty() {
415            return Ok(GlobSet { len: 0, strats: vec![] });
416        }
417        let mut lits = LiteralStrategy::new();
418        let mut base_lits = BasenameLiteralStrategy::new();
419        let mut exts = ExtensionStrategy::new();
420        let mut prefixes = MultiStrategyBuilder::new();
421        let mut suffixes = MultiStrategyBuilder::new();
422        let mut required_exts = RequiredExtensionStrategyBuilder::new();
423        let mut regexes = MultiStrategyBuilder::new();
424        for (i, p) in pats.iter().enumerate() {
425            match MatchStrategy::new(p) {
426                MatchStrategy::Literal(lit) => {
427                    lits.add(i, lit);
428                }
429                MatchStrategy::BasenameLiteral(lit) => {
430                    base_lits.add(i, lit);
431                }
432                MatchStrategy::Extension(ext) => {
433                    exts.add(i, ext);
434                }
435                MatchStrategy::Prefix(prefix) => {
436                    prefixes.add(i, prefix);
437                }
438                MatchStrategy::Suffix { suffix, component } => {
439                    if component {
440                        lits.add(i, suffix[1..].to_string());
441                    }
442                    suffixes.add(i, suffix);
443                }
444                MatchStrategy::RequiredExtension(ext) => {
445                    required_exts.add(i, ext, p.regex().to_owned());
446                }
447                MatchStrategy::Regex => {
448                    debug!("glob converted to regex: {:?}", p);
449                    regexes.add(i, p.regex().to_owned());
450                }
451            }
452        }
453        debug!(
454            "built glob set; {} literals, {} basenames, {} extensions, \
455                {} prefixes, {} suffixes, {} required extensions, {} regexes",
456            lits.0.len(),
457            base_lits.0.len(),
458            exts.0.len(),
459            prefixes.literals.len(),
460            suffixes.literals.len(),
461            required_exts.0.len(),
462            regexes.literals.len()
463        );
464        Ok(GlobSet {
465            len: pats.len(),
466            strats: vec![
467                GlobSetMatchStrategy::Extension(exts),
468                GlobSetMatchStrategy::BasenameLiteral(base_lits),
469                GlobSetMatchStrategy::Literal(lits),
470                GlobSetMatchStrategy::Suffix(suffixes.suffix()),
471                GlobSetMatchStrategy::Prefix(prefixes.prefix()),
472                GlobSetMatchStrategy::RequiredExtension(
473                    required_exts.build()?,
474                ),
475                GlobSetMatchStrategy::Regex(regexes.regex_set()?),
476            ],
477        })
478    }
479}
480
481impl Default for GlobSet {
482    /// Create a default empty GlobSet.
483    fn default() -> Self {
484        GlobSet::empty()
485    }
486}
487
488/// GlobSetBuilder builds a group of patterns that can be used to
489/// simultaneously match a file path.
490#[derive(Clone, Debug)]
491pub struct GlobSetBuilder {
492    pats: Vec<Glob>,
493}
494
495impl GlobSetBuilder {
496    /// Create a new `GlobSetBuilder`. A `GlobSetBuilder` can be used to add new
497    /// patterns. Once all patterns have been added, `build` should be called
498    /// to produce a [`GlobSet`], which can then be used for matching.
499    pub fn new() -> GlobSetBuilder {
500        GlobSetBuilder { pats: vec![] }
501    }
502
503    /// Builds a new matcher from all of the glob patterns added so far.
504    ///
505    /// Once a matcher is built, no new patterns can be added to it.
506    pub fn build(&self) -> Result<GlobSet, Error> {
507        GlobSet::new(&self.pats)
508    }
509
510    /// Add a new pattern to this set.
511    pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
512        self.pats.push(pat);
513        self
514    }
515}
516
517/// A candidate path for matching.
518///
519/// All glob matching in this crate operates on `Candidate` values.
520/// Constructing candidates has a very small cost associated with it, so
521/// callers may find it beneficial to amortize that cost when matching a single
522/// path against multiple globs or sets of globs.
523#[derive(Clone)]
524pub struct Candidate<'a> {
525    path: Cow<'a, [u8]>,
526    basename: Cow<'a, [u8]>,
527    ext: Cow<'a, [u8]>,
528}
529
530impl<'a> std::fmt::Debug for Candidate<'a> {
531    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
532        f.debug_struct("Candidate")
533            .field("path", &self.path.as_bstr())
534            .field("basename", &self.basename.as_bstr())
535            .field("ext", &self.ext.as_bstr())
536            .finish()
537    }
538}
539
540impl<'a> Candidate<'a> {
541    /// Create a new candidate for matching from the given path.
542    pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
543        let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
544        let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
545        let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
546        Candidate { path, basename, ext }
547    }
548
549    fn path_prefix(&self, max: usize) -> &[u8] {
550        if self.path.len() <= max {
551            &*self.path
552        } else {
553            &self.path[..max]
554        }
555    }
556
557    fn path_suffix(&self, max: usize) -> &[u8] {
558        if self.path.len() <= max {
559            &*self.path
560        } else {
561            &self.path[self.path.len() - max..]
562        }
563    }
564}
565
566#[derive(Clone, Debug)]
567enum GlobSetMatchStrategy {
568    Literal(LiteralStrategy),
569    BasenameLiteral(BasenameLiteralStrategy),
570    Extension(ExtensionStrategy),
571    Prefix(PrefixStrategy),
572    Suffix(SuffixStrategy),
573    RequiredExtension(RequiredExtensionStrategy),
574    Regex(RegexSetStrategy),
575}
576
577impl GlobSetMatchStrategy {
578    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
579        use self::GlobSetMatchStrategy::*;
580        match *self {
581            Literal(ref s) => s.is_match(candidate),
582            BasenameLiteral(ref s) => s.is_match(candidate),
583            Extension(ref s) => s.is_match(candidate),
584            Prefix(ref s) => s.is_match(candidate),
585            Suffix(ref s) => s.is_match(candidate),
586            RequiredExtension(ref s) => s.is_match(candidate),
587            Regex(ref s) => s.is_match(candidate),
588        }
589    }
590
591    fn matches_into(
592        &self,
593        candidate: &Candidate<'_>,
594        matches: &mut Vec<usize>,
595    ) {
596        use self::GlobSetMatchStrategy::*;
597        match *self {
598            Literal(ref s) => s.matches_into(candidate, matches),
599            BasenameLiteral(ref s) => s.matches_into(candidate, matches),
600            Extension(ref s) => s.matches_into(candidate, matches),
601            Prefix(ref s) => s.matches_into(candidate, matches),
602            Suffix(ref s) => s.matches_into(candidate, matches),
603            RequiredExtension(ref s) => s.matches_into(candidate, matches),
604            Regex(ref s) => s.matches_into(candidate, matches),
605        }
606    }
607}
608
609#[derive(Clone, Debug)]
610struct LiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
611
612impl LiteralStrategy {
613    fn new() -> LiteralStrategy {
614        LiteralStrategy(fnv::HashMap::default())
615    }
616
617    fn add(&mut self, global_index: usize, lit: String) {
618        self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
619    }
620
621    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
622        self.0.contains_key(candidate.path.as_bytes())
623    }
624
625    #[inline(never)]
626    fn matches_into(
627        &self,
628        candidate: &Candidate<'_>,
629        matches: &mut Vec<usize>,
630    ) {
631        if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
632            matches.extend(hits);
633        }
634    }
635}
636
637#[derive(Clone, Debug)]
638struct BasenameLiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
639
640impl BasenameLiteralStrategy {
641    fn new() -> BasenameLiteralStrategy {
642        BasenameLiteralStrategy(fnv::HashMap::default())
643    }
644
645    fn add(&mut self, global_index: usize, lit: String) {
646        self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
647    }
648
649    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
650        if candidate.basename.is_empty() {
651            return false;
652        }
653        self.0.contains_key(candidate.basename.as_bytes())
654    }
655
656    #[inline(never)]
657    fn matches_into(
658        &self,
659        candidate: &Candidate<'_>,
660        matches: &mut Vec<usize>,
661    ) {
662        if candidate.basename.is_empty() {
663            return;
664        }
665        if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
666            matches.extend(hits);
667        }
668    }
669}
670
671#[derive(Clone, Debug)]
672struct ExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
673
674impl ExtensionStrategy {
675    fn new() -> ExtensionStrategy {
676        ExtensionStrategy(fnv::HashMap::default())
677    }
678
679    fn add(&mut self, global_index: usize, ext: String) {
680        self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
681    }
682
683    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
684        if candidate.ext.is_empty() {
685            return false;
686        }
687        self.0.contains_key(candidate.ext.as_bytes())
688    }
689
690    #[inline(never)]
691    fn matches_into(
692        &self,
693        candidate: &Candidate<'_>,
694        matches: &mut Vec<usize>,
695    ) {
696        if candidate.ext.is_empty() {
697            return;
698        }
699        if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
700            matches.extend(hits);
701        }
702    }
703}
704
705#[derive(Clone, Debug)]
706struct PrefixStrategy {
707    matcher: AhoCorasick,
708    map: Vec<usize>,
709    longest: usize,
710}
711
712impl PrefixStrategy {
713    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
714        let path = candidate.path_prefix(self.longest);
715        for m in self.matcher.find_overlapping_iter(path) {
716            if m.start() == 0 {
717                return true;
718            }
719        }
720        false
721    }
722
723    fn matches_into(
724        &self,
725        candidate: &Candidate<'_>,
726        matches: &mut Vec<usize>,
727    ) {
728        let path = candidate.path_prefix(self.longest);
729        for m in self.matcher.find_overlapping_iter(path) {
730            if m.start() == 0 {
731                matches.push(self.map[m.pattern()]);
732            }
733        }
734    }
735}
736
737#[derive(Clone, Debug)]
738struct SuffixStrategy {
739    matcher: AhoCorasick,
740    map: Vec<usize>,
741    longest: usize,
742}
743
744impl SuffixStrategy {
745    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
746        let path = candidate.path_suffix(self.longest);
747        for m in self.matcher.find_overlapping_iter(path) {
748            if m.end() == path.len() {
749                return true;
750            }
751        }
752        false
753    }
754
755    fn matches_into(
756        &self,
757        candidate: &Candidate<'_>,
758        matches: &mut Vec<usize>,
759    ) {
760        let path = candidate.path_suffix(self.longest);
761        for m in self.matcher.find_overlapping_iter(path) {
762            if m.end() == path.len() {
763                matches.push(self.map[m.pattern()]);
764            }
765        }
766    }
767}
768
769#[derive(Clone, Debug)]
770struct RequiredExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<(usize, Regex)>>);
771
772impl RequiredExtensionStrategy {
773    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
774        if candidate.ext.is_empty() {
775            return false;
776        }
777        match self.0.get(candidate.ext.as_bytes()) {
778            None => false,
779            Some(regexes) => {
780                for &(_, ref re) in regexes {
781                    if re.is_match(candidate.path.as_bytes()) {
782                        return true;
783                    }
784                }
785                false
786            }
787        }
788    }
789
790    #[inline(never)]
791    fn matches_into(
792        &self,
793        candidate: &Candidate<'_>,
794        matches: &mut Vec<usize>,
795    ) {
796        if candidate.ext.is_empty() {
797            return;
798        }
799        if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
800            for &(global_index, ref re) in regexes {
801                if re.is_match(candidate.path.as_bytes()) {
802                    matches.push(global_index);
803                }
804            }
805        }
806    }
807}
808
809#[derive(Clone, Debug)]
810struct RegexSetStrategy {
811    matcher: Regex,
812    map: Vec<usize>,
813    // We use a pool of PatternSets to hopefully allocating a fresh one on each
814    // call.
815    //
816    // TODO: In the next semver breaking release, we should drop this pool and
817    // expose an opaque type that wraps PatternSet. Then callers can provide
818    // it to `matches_into` directly. Callers might still want to use a pool
819    // or similar to amortize allocation, but that matches the status quo and
820    // absolves us of needing to do it here.
821    patset: Arc<Pool<PatternSet, PatternSetPoolFn>>,
822}
823
824type PatternSetPoolFn =
825    Box<dyn Fn() -> PatternSet + Send + Sync + UnwindSafe + RefUnwindSafe>;
826
827impl RegexSetStrategy {
828    fn is_match(&self, candidate: &Candidate<'_>) -> bool {
829        self.matcher.is_match(candidate.path.as_bytes())
830    }
831
832    fn matches_into(
833        &self,
834        candidate: &Candidate<'_>,
835        matches: &mut Vec<usize>,
836    ) {
837        let input = regex_automata::Input::new(candidate.path.as_bytes());
838        let mut patset = self.patset.get();
839        patset.clear();
840        self.matcher.which_overlapping_matches(&input, &mut patset);
841        for i in patset.iter() {
842            matches.push(self.map[i]);
843        }
844        PoolGuard::put(patset);
845    }
846}
847
848#[derive(Clone, Debug)]
849struct MultiStrategyBuilder {
850    literals: Vec<String>,
851    map: Vec<usize>,
852    longest: usize,
853}
854
855impl MultiStrategyBuilder {
856    fn new() -> MultiStrategyBuilder {
857        MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
858    }
859
860    fn add(&mut self, global_index: usize, literal: String) {
861        if literal.len() > self.longest {
862            self.longest = literal.len();
863        }
864        self.map.push(global_index);
865        self.literals.push(literal);
866    }
867
868    fn prefix(self) -> PrefixStrategy {
869        PrefixStrategy {
870            matcher: AhoCorasick::new(&self.literals).unwrap(),
871            map: self.map,
872            longest: self.longest,
873        }
874    }
875
876    fn suffix(self) -> SuffixStrategy {
877        SuffixStrategy {
878            matcher: AhoCorasick::new(&self.literals).unwrap(),
879            map: self.map,
880            longest: self.longest,
881        }
882    }
883
884    fn regex_set(self) -> Result<RegexSetStrategy, Error> {
885        let matcher = new_regex_set(self.literals)?;
886        let pattern_len = matcher.pattern_len();
887        let create: PatternSetPoolFn =
888            Box::new(move || PatternSet::new(pattern_len));
889        Ok(RegexSetStrategy {
890            matcher,
891            map: self.map,
892            patset: Arc::new(Pool::new(create)),
893        })
894    }
895}
896
897#[derive(Clone, Debug)]
898struct RequiredExtensionStrategyBuilder(
899    fnv::HashMap<Vec<u8>, Vec<(usize, String)>>,
900);
901
902impl RequiredExtensionStrategyBuilder {
903    fn new() -> RequiredExtensionStrategyBuilder {
904        RequiredExtensionStrategyBuilder(fnv::HashMap::default())
905    }
906
907    fn add(&mut self, global_index: usize, ext: String, regex: String) {
908        self.0
909            .entry(ext.into_bytes())
910            .or_insert(vec![])
911            .push((global_index, regex));
912    }
913
914    fn build(self) -> Result<RequiredExtensionStrategy, Error> {
915        let mut exts = fnv::HashMap::default();
916        for (ext, regexes) in self.0.into_iter() {
917            exts.insert(ext.clone(), vec![]);
918            for (global_index, regex) in regexes {
919                let compiled = new_regex(&regex)?;
920                exts.get_mut(&ext).unwrap().push((global_index, compiled));
921            }
922        }
923        Ok(RequiredExtensionStrategy(exts))
924    }
925}
926
927/// Escape meta-characters within the given glob pattern.
928///
929/// The escaping works by surrounding meta-characters with brackets. For
930/// example, `*` becomes `[*]`.
931pub fn escape(s: &str) -> String {
932    let mut escaped = String::with_capacity(s.len());
933    for c in s.chars() {
934        match c {
935            // note that ! does not need escaping because it is only special
936            // inside brackets
937            '?' | '*' | '[' | ']' => {
938                escaped.push('[');
939                escaped.push(c);
940                escaped.push(']');
941            }
942            c => {
943                escaped.push(c);
944            }
945        }
946    }
947    escaped
948}
949
950#[cfg(test)]
951mod tests {
952    use crate::glob::Glob;
953
954    use super::{GlobSet, GlobSetBuilder};
955
956    #[test]
957    fn set_works() {
958        let mut builder = GlobSetBuilder::new();
959        builder.add(Glob::new("src/**/*.rs").unwrap());
960        builder.add(Glob::new("*.c").unwrap());
961        builder.add(Glob::new("src/lib.rs").unwrap());
962        let set = builder.build().unwrap();
963
964        assert!(set.is_match("foo.c"));
965        assert!(set.is_match("src/foo.c"));
966        assert!(!set.is_match("foo.rs"));
967        assert!(!set.is_match("tests/foo.rs"));
968        assert!(set.is_match("src/foo.rs"));
969        assert!(set.is_match("src/grep/src/main.rs"));
970
971        let matches = set.matches("src/lib.rs");
972        assert_eq!(2, matches.len());
973        assert_eq!(0, matches[0]);
974        assert_eq!(2, matches[1]);
975    }
976
977    #[test]
978    fn empty_set_works() {
979        let set = GlobSetBuilder::new().build().unwrap();
980        assert!(!set.is_match(""));
981        assert!(!set.is_match("a"));
982    }
983
984    #[test]
985    fn default_set_is_empty_works() {
986        let set: GlobSet = Default::default();
987        assert!(!set.is_match(""));
988        assert!(!set.is_match("a"));
989    }
990
991    #[test]
992    fn escape() {
993        use super::escape;
994        assert_eq!("foo", escape("foo"));
995        assert_eq!("foo[*]", escape("foo*"));
996        assert_eq!("[[][]]", escape("[]"));
997        assert_eq!("[*][?]", escape("*?"));
998        assert_eq!("src/[*][*]/[*].rs", escape("src/**/*.rs"));
999        assert_eq!("bar[[]ab[]]baz", escape("bar[ab]baz"));
1000        assert_eq!("bar[[]!![]]!baz", escape("bar[!!]!baz"));
1001    }
1002
1003    // This tests that regex matching doesn't "remember" the results of
1004    // previous searches. That is, if any memory is reused from a previous
1005    // search, then it should be cleared first.
1006    #[test]
1007    fn set_does_not_remember() {
1008        let mut builder = GlobSetBuilder::new();
1009        builder.add(Glob::new("*foo*").unwrap());
1010        builder.add(Glob::new("*bar*").unwrap());
1011        builder.add(Glob::new("*quux*").unwrap());
1012        let set = builder.build().unwrap();
1013
1014        let matches = set.matches("ZfooZquuxZ");
1015        assert_eq!(2, matches.len());
1016        assert_eq!(0, matches[0]);
1017        assert_eq!(2, matches[1]);
1018
1019        let matches = set.matches("nada");
1020        assert_eq!(0, matches.len());
1021    }
1022}