rayon/
str.rs

1//! Parallel iterator types for [strings][std::str]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7//! reference a `Pattern` trait which is not visible outside this crate.
8//! This trait is intentionally kept private, for use only by Rayon itself.
9//! It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
10//! and any function or closure `F: Fn(char) -> bool + Sync + Send`.
11//!
12//! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
13//! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
14//!
15//! [std::str]: https://doc.rust-lang.org/stable/std/str/
16
17use crate::iter::plumbing::*;
18use crate::iter::*;
19use crate::split_producer::*;
20
21/// Test if a byte is the start of a UTF-8 character.
22/// (extracted from `str::is_char_boundary`)
23#[inline]
24fn is_char_boundary(b: u8) -> bool {
25    // This is bit magic equivalent to: b < 128 || b >= 192
26    (b as i8) >= -0x40
27}
28
29/// Find the index of a character boundary near the midpoint.
30#[inline]
31fn find_char_midpoint(chars: &str) -> usize {
32    let mid = chars.len() / 2;
33
34    // We want to split near the midpoint, but we need to find an actual
35    // character boundary.  So we look at the raw bytes, first scanning
36    // forward from the midpoint for a boundary, then trying backward.
37    let (left, right) = chars.as_bytes().split_at(mid);
38    match right.iter().copied().position(is_char_boundary) {
39        Some(i) => mid + i,
40        None => left
41            .iter()
42            .copied()
43            .rposition(is_char_boundary)
44            .unwrap_or(0),
45    }
46}
47
48/// Try to split a string near the midpoint.
49#[inline]
50fn split(chars: &str) -> Option<(&str, &str)> {
51    let index = find_char_midpoint(chars);
52    if index > 0 {
53        Some(chars.split_at(index))
54    } else {
55        None
56    }
57}
58
59/// Parallel extensions for strings.
60pub trait ParallelString {
61    /// Returns a plain string slice, which is used to implement the rest of
62    /// the parallel methods.
63    fn as_parallel_string(&self) -> &str;
64
65    /// Returns a parallel iterator over the characters of a string.
66    ///
67    /// # Examples
68    ///
69    /// ```
70    /// use rayon::prelude::*;
71    /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
72    /// assert_eq!(Some('o'), max);
73    /// ```
74    fn par_chars(&self) -> Chars<'_> {
75        Chars {
76            chars: self.as_parallel_string(),
77        }
78    }
79
80    /// Returns a parallel iterator over the characters of a string, with their positions.
81    ///
82    /// # Examples
83    ///
84    /// ```
85    /// use rayon::prelude::*;
86    /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
87    /// assert_eq!(Some((1, 'e')), min);
88    /// ```
89    fn par_char_indices(&self) -> CharIndices<'_> {
90        CharIndices {
91            chars: self.as_parallel_string(),
92        }
93    }
94
95    /// Returns a parallel iterator over the bytes of a string.
96    ///
97    /// Note that multi-byte sequences (for code points greater than `U+007F`)
98    /// are produced as separate items, but will not be split across threads.
99    /// If you would prefer an indexed iterator without that guarantee, consider
100    /// `string.as_bytes().par_iter().copied()` instead.
101    ///
102    /// # Examples
103    ///
104    /// ```
105    /// use rayon::prelude::*;
106    /// let max = "hello".par_bytes().max();
107    /// assert_eq!(Some(b'o'), max);
108    /// ```
109    fn par_bytes(&self) -> Bytes<'_> {
110        Bytes {
111            chars: self.as_parallel_string(),
112        }
113    }
114
115    /// Returns a parallel iterator over a string encoded as UTF-16.
116    ///
117    /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
118    /// produced as separate items, but will not be split across threads.
119    ///
120    /// # Examples
121    ///
122    /// ```
123    /// use rayon::prelude::*;
124    ///
125    /// let max = "hello".par_encode_utf16().max();
126    /// assert_eq!(Some(b'o' as u16), max);
127    ///
128    /// let text = "Zażółć gęślą jaźń";
129    /// let utf8_len = text.len();
130    /// let utf16_len = text.par_encode_utf16().count();
131    /// assert!(utf16_len <= utf8_len);
132    /// ```
133    fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
134        EncodeUtf16 {
135            chars: self.as_parallel_string(),
136        }
137    }
138
139    /// Returns a parallel iterator over substrings separated by a
140    /// given character or predicate, similar to `str::split`.
141    ///
142    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
143    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
144    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
145    ///
146    /// # Examples
147    ///
148    /// ```
149    /// use rayon::prelude::*;
150    /// let total = "1, 2, buckle, 3, 4, door"
151    ///    .par_split(',')
152    ///    .filter_map(|s| s.trim().parse::<i32>().ok())
153    ///    .sum();
154    /// assert_eq!(10, total);
155    /// ```
156    fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
157        Split::new(self.as_parallel_string(), separator)
158    }
159
160    /// Returns a parallel iterator over substrings separated by a
161    /// given character or predicate, keeping the matched part as a terminator
162    /// of the substring similar to `str::split_inclusive`.
163    ///
164    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
165    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
166    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
167    ///
168    /// # Examples
169    ///
170    /// ```
171    /// use rayon::prelude::*;
172    /// let lines: Vec<_> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
173    ///    .par_split_inclusive('\n')
174    ///    .collect();
175    /// assert_eq!(lines, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
176    /// ```
177    fn par_split_inclusive<P: Pattern>(&self, separator: P) -> SplitInclusive<'_, P> {
178        SplitInclusive::new(self.as_parallel_string(), separator)
179    }
180
181    /// Returns a parallel iterator over substrings terminated by a
182    /// given character or predicate, similar to `str::split_terminator`.
183    /// It's equivalent to `par_split`, except it doesn't produce an empty
184    /// substring after a trailing terminator.
185    ///
186    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
187    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
188    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
189    ///
190    /// # Examples
191    ///
192    /// ```
193    /// use rayon::prelude::*;
194    /// let parts: Vec<_> = "((1 + 3) * 2)"
195    ///     .par_split_terminator(|c| c == '(' || c == ')')
196    ///     .collect();
197    /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
198    /// ```
199    fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
200        SplitTerminator::new(self.as_parallel_string(), terminator)
201    }
202
203    /// Returns a parallel iterator over the lines of a string, ending with an
204    /// optional carriage return and with a newline (`\r\n` or just `\n`).
205    /// The final line ending is optional, and line endings are not included in
206    /// the output strings.
207    ///
208    /// # Examples
209    ///
210    /// ```
211    /// use rayon::prelude::*;
212    /// let lengths: Vec<_> = "hello world\nfizbuzz"
213    ///     .par_lines()
214    ///     .map(|l| l.len())
215    ///     .collect();
216    /// assert_eq!(vec![11, 7], lengths);
217    /// ```
218    fn par_lines(&self) -> Lines<'_> {
219        Lines(self.as_parallel_string())
220    }
221
222    /// Returns a parallel iterator over the sub-slices of a string that are
223    /// separated by any amount of whitespace.
224    ///
225    /// As with `str::split_whitespace`, 'whitespace' is defined according to
226    /// the terms of the Unicode Derived Core Property `White_Space`.
227    /// If you only want to split on ASCII whitespace instead, use
228    /// [`par_split_ascii_whitespace`][`ParallelString::par_split_ascii_whitespace`].
229    ///
230    /// # Examples
231    ///
232    /// ```
233    /// use rayon::prelude::*;
234    /// let longest = "which is the longest word?"
235    ///     .par_split_whitespace()
236    ///     .max_by_key(|word| word.len());
237    /// assert_eq!(Some("longest"), longest);
238    /// ```
239    ///
240    /// All kinds of whitespace are considered:
241    ///
242    /// ```
243    /// use rayon::prelude::*;
244    /// let words: Vec<&str> = " Mary   had\ta\u{2009}little  \n\t lamb"
245    ///     .par_split_whitespace()
246    ///     .collect();
247    /// assert_eq!(words, ["Mary", "had", "a", "little", "lamb"]);
248    /// ```
249    ///
250    /// If the string is empty or all whitespace, the iterator yields no string slices:
251    ///
252    /// ```
253    /// use rayon::prelude::*;
254    /// assert_eq!("".par_split_whitespace().count(), 0);
255    /// assert_eq!("   ".par_split_whitespace().count(), 0);
256    /// ```
257    fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
258        SplitWhitespace(self.as_parallel_string())
259    }
260
261    /// Returns a parallel iterator over the sub-slices of a string that are
262    /// separated by any amount of ASCII whitespace.
263    ///
264    /// To split by Unicode `White_Space` instead, use
265    /// [`par_split_whitespace`][`ParallelString::par_split_whitespace`].
266    ///
267    /// # Examples
268    ///
269    /// ```
270    /// use rayon::prelude::*;
271    /// let longest = "which is the longest word?"
272    ///     .par_split_ascii_whitespace()
273    ///     .max_by_key(|word| word.len());
274    /// assert_eq!(Some("longest"), longest);
275    /// ```
276    ///
277    /// All kinds of ASCII whitespace are considered, but not Unicode `White_Space`:
278    ///
279    /// ```
280    /// use rayon::prelude::*;
281    /// let words: Vec<&str> = " Mary   had\ta\u{2009}little  \n\t lamb"
282    ///     .par_split_ascii_whitespace()
283    ///     .collect();
284    /// assert_eq!(words, ["Mary", "had", "a\u{2009}little", "lamb"]);
285    /// ```
286    ///
287    /// If the string is empty or all ASCII whitespace, the iterator yields no string slices:
288    ///
289    /// ```
290    /// use rayon::prelude::*;
291    /// assert_eq!("".par_split_whitespace().count(), 0);
292    /// assert_eq!("   ".par_split_whitespace().count(), 0);
293    /// ```
294    fn par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
295        SplitAsciiWhitespace(self.as_parallel_string())
296    }
297
298    /// Returns a parallel iterator over substrings that match a
299    /// given character or predicate, similar to `str::matches`.
300    ///
301    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
302    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
303    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
304    ///
305    /// # Examples
306    ///
307    /// ```
308    /// use rayon::prelude::*;
309    /// let total = "1, 2, buckle, 3, 4, door"
310    ///    .par_matches(char::is_numeric)
311    ///    .map(|s| s.parse::<i32>().expect("digit"))
312    ///    .sum();
313    /// assert_eq!(10, total);
314    /// ```
315    fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
316        Matches {
317            chars: self.as_parallel_string(),
318            pattern,
319        }
320    }
321
322    /// Returns a parallel iterator over substrings that match a given character
323    /// or predicate, with their positions, similar to `str::match_indices`.
324    ///
325    /// Note: the `Pattern` trait is private, for use only by Rayon itself.
326    /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
327    /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
328    ///
329    /// # Examples
330    ///
331    /// ```
332    /// use rayon::prelude::*;
333    /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
334    ///    .par_match_indices(char::is_numeric)
335    ///    .collect();
336    /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
337    /// ```
338    fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
339        MatchIndices {
340            chars: self.as_parallel_string(),
341            pattern,
342        }
343    }
344}
345
346impl ParallelString for str {
347    #[inline]
348    fn as_parallel_string(&self) -> &str {
349        self
350    }
351}
352
353// /////////////////////////////////////////////////////////////////////////
354
355/// We hide the `Pattern` trait in a private module, as its API is not meant
356/// for general consumption.  If we could have privacy on trait items, then it
357/// would be nicer to have its basic existence and implementors public while
358/// keeping all of the methods private.
359mod private {
360    use crate::iter::plumbing::Folder;
361
362    /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
363    /// `std::str::pattern::{Pattern, Searcher}`.
364    ///
365    /// Implementing this trait is not permitted outside of `rayon`.
366    pub trait Pattern: Sized + Sync + Send {
367        private_decl! {}
368        fn find_in(&self, haystack: &str) -> Option<usize>;
369        fn rfind_in(&self, haystack: &str) -> Option<usize>;
370        fn is_suffix_of(&self, haystack: &str) -> bool;
371        fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
372        where
373            F: Folder<&'ch str>;
374        fn fold_inclusive_splits<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
375        where
376            F: Folder<&'ch str>;
377        fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
378        where
379            F: Folder<&'ch str>;
380        fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
381        where
382            F: Folder<(usize, &'ch str)>;
383    }
384}
385use self::private::Pattern;
386
387#[inline]
388fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
389    move |(i, x)| (base + i, x)
390}
391
392macro_rules! impl_pattern {
393    (&$self:ident => $pattern:expr) => {
394        private_impl! {}
395
396        #[inline]
397        fn find_in(&$self, chars: &str) -> Option<usize> {
398            chars.find($pattern)
399        }
400
401        #[inline]
402        fn rfind_in(&$self, chars: &str) -> Option<usize> {
403            chars.rfind($pattern)
404        }
405
406        #[inline]
407        fn is_suffix_of(&$self, chars: &str) -> bool {
408            chars.ends_with($pattern)
409        }
410
411        fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
412        where
413            F: Folder<&'ch str>,
414        {
415            let mut split = chars.split($pattern);
416            if skip_last {
417                split.next_back();
418            }
419            folder.consume_iter(split)
420        }
421
422        fn fold_inclusive_splits<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
423        where
424            F: Folder<&'ch str>,
425        {
426            folder.consume_iter(chars.split_inclusive($pattern))
427        }
428
429        fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
430        where
431            F: Folder<&'ch str>,
432        {
433            folder.consume_iter(chars.matches($pattern))
434        }
435
436        fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
437        where
438            F: Folder<(usize, &'ch str)>,
439        {
440            folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
441        }
442    }
443}
444
445impl Pattern for char {
446    impl_pattern!(&self => *self);
447}
448
449impl Pattern for &[char] {
450    impl_pattern!(&self => *self);
451}
452
453// TODO (MSRV 1.75): use `*self` for array patterns too.
454// - Needs `DoubleEndedSearcher` so `split.next_back()` works.
455
456impl<const N: usize> Pattern for [char; N] {
457    impl_pattern!(&self => self.as_slice());
458}
459
460impl<const N: usize> Pattern for &[char; N] {
461    impl_pattern!(&self => self.as_slice());
462}
463
464impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
465    impl_pattern!(&self => self);
466}
467
468// /////////////////////////////////////////////////////////////////////////
469
470/// Parallel iterator over the characters of a string
471#[derive(Debug, Clone)]
472pub struct Chars<'ch> {
473    chars: &'ch str,
474}
475
476struct CharsProducer<'ch> {
477    chars: &'ch str,
478}
479
480impl<'ch> ParallelIterator for Chars<'ch> {
481    type Item = char;
482
483    fn drive_unindexed<C>(self, consumer: C) -> C::Result
484    where
485        C: UnindexedConsumer<Self::Item>,
486    {
487        bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
488    }
489}
490
491impl<'ch> UnindexedProducer for CharsProducer<'ch> {
492    type Item = char;
493
494    fn split(self) -> (Self, Option<Self>) {
495        match split(self.chars) {
496            Some((left, right)) => (
497                CharsProducer { chars: left },
498                Some(CharsProducer { chars: right }),
499            ),
500            None => (self, None),
501        }
502    }
503
504    fn fold_with<F>(self, folder: F) -> F
505    where
506        F: Folder<Self::Item>,
507    {
508        folder.consume_iter(self.chars.chars())
509    }
510}
511
512// /////////////////////////////////////////////////////////////////////////
513
514/// Parallel iterator over the characters of a string, with their positions
515#[derive(Debug, Clone)]
516pub struct CharIndices<'ch> {
517    chars: &'ch str,
518}
519
520struct CharIndicesProducer<'ch> {
521    index: usize,
522    chars: &'ch str,
523}
524
525impl<'ch> ParallelIterator for CharIndices<'ch> {
526    type Item = (usize, char);
527
528    fn drive_unindexed<C>(self, consumer: C) -> C::Result
529    where
530        C: UnindexedConsumer<Self::Item>,
531    {
532        let producer = CharIndicesProducer {
533            index: 0,
534            chars: self.chars,
535        };
536        bridge_unindexed(producer, consumer)
537    }
538}
539
540impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
541    type Item = (usize, char);
542
543    fn split(self) -> (Self, Option<Self>) {
544        match split(self.chars) {
545            Some((left, right)) => (
546                CharIndicesProducer {
547                    chars: left,
548                    ..self
549                },
550                Some(CharIndicesProducer {
551                    chars: right,
552                    index: self.index + left.len(),
553                }),
554            ),
555            None => (self, None),
556        }
557    }
558
559    fn fold_with<F>(self, folder: F) -> F
560    where
561        F: Folder<Self::Item>,
562    {
563        let base = self.index;
564        folder.consume_iter(self.chars.char_indices().map(offset(base)))
565    }
566}
567
568// /////////////////////////////////////////////////////////////////////////
569
570/// Parallel iterator over the bytes of a string
571#[derive(Debug, Clone)]
572pub struct Bytes<'ch> {
573    chars: &'ch str,
574}
575
576struct BytesProducer<'ch> {
577    chars: &'ch str,
578}
579
580impl<'ch> ParallelIterator for Bytes<'ch> {
581    type Item = u8;
582
583    fn drive_unindexed<C>(self, consumer: C) -> C::Result
584    where
585        C: UnindexedConsumer<Self::Item>,
586    {
587        bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
588    }
589}
590
591impl<'ch> UnindexedProducer for BytesProducer<'ch> {
592    type Item = u8;
593
594    fn split(self) -> (Self, Option<Self>) {
595        match split(self.chars) {
596            Some((left, right)) => (
597                BytesProducer { chars: left },
598                Some(BytesProducer { chars: right }),
599            ),
600            None => (self, None),
601        }
602    }
603
604    fn fold_with<F>(self, folder: F) -> F
605    where
606        F: Folder<Self::Item>,
607    {
608        folder.consume_iter(self.chars.bytes())
609    }
610}
611
612// /////////////////////////////////////////////////////////////////////////
613
614/// Parallel iterator over a string encoded as UTF-16
615#[derive(Debug, Clone)]
616pub struct EncodeUtf16<'ch> {
617    chars: &'ch str,
618}
619
620struct EncodeUtf16Producer<'ch> {
621    chars: &'ch str,
622}
623
624impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
625    type Item = u16;
626
627    fn drive_unindexed<C>(self, consumer: C) -> C::Result
628    where
629        C: UnindexedConsumer<Self::Item>,
630    {
631        bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
632    }
633}
634
635impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
636    type Item = u16;
637
638    fn split(self) -> (Self, Option<Self>) {
639        match split(self.chars) {
640            Some((left, right)) => (
641                EncodeUtf16Producer { chars: left },
642                Some(EncodeUtf16Producer { chars: right }),
643            ),
644            None => (self, None),
645        }
646    }
647
648    fn fold_with<F>(self, folder: F) -> F
649    where
650        F: Folder<Self::Item>,
651    {
652        folder.consume_iter(self.chars.encode_utf16())
653    }
654}
655
656// /////////////////////////////////////////////////////////////////////////
657
658/// Parallel iterator over substrings separated by a pattern
659#[derive(Debug, Clone)]
660pub struct Split<'ch, P: Pattern> {
661    chars: &'ch str,
662    separator: P,
663}
664
665impl<'ch, P: Pattern> Split<'ch, P> {
666    fn new(chars: &'ch str, separator: P) -> Self {
667        Split { chars, separator }
668    }
669}
670
671impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
672    type Item = &'ch str;
673
674    fn drive_unindexed<C>(self, consumer: C) -> C::Result
675    where
676        C: UnindexedConsumer<Self::Item>,
677    {
678        let producer = SplitProducer::new(self.chars, &self.separator);
679        bridge_unindexed(producer, consumer)
680    }
681}
682
683/// Implement support for `SplitProducer`.
684impl<'ch, P: Pattern> Fissile<P> for &'ch str {
685    fn length(&self) -> usize {
686        self.len()
687    }
688
689    fn midpoint(&self, end: usize) -> usize {
690        // First find a suitable UTF-8 boundary.
691        find_char_midpoint(&self[..end])
692    }
693
694    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
695        separator.find_in(&self[start..end])
696    }
697
698    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
699        separator.rfind_in(&self[..end])
700    }
701
702    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
703        if INCL {
704            // include the separator in the left side
705            let separator = self[index..].chars().next().unwrap();
706            self.split_at(index + separator.len_utf8())
707        } else {
708            let (left, right) = self.split_at(index);
709            let mut right_iter = right.chars();
710            right_iter.next(); // skip the separator
711            (left, right_iter.as_str())
712        }
713    }
714
715    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
716    where
717        F: Folder<Self>,
718    {
719        if INCL {
720            debug_assert!(!skip_last);
721            separator.fold_inclusive_splits(self, folder)
722        } else {
723            separator.fold_splits(self, folder, skip_last)
724        }
725    }
726}
727
728// /////////////////////////////////////////////////////////////////////////
729
730/// Parallel iterator over substrings separated by a pattern
731#[derive(Debug, Clone)]
732pub struct SplitInclusive<'ch, P: Pattern> {
733    chars: &'ch str,
734    separator: P,
735}
736
737impl<'ch, P: Pattern> SplitInclusive<'ch, P> {
738    fn new(chars: &'ch str, separator: P) -> Self {
739        SplitInclusive { chars, separator }
740    }
741}
742
743impl<'ch, P: Pattern> ParallelIterator for SplitInclusive<'ch, P> {
744    type Item = &'ch str;
745
746    fn drive_unindexed<C>(self, consumer: C) -> C::Result
747    where
748        C: UnindexedConsumer<Self::Item>,
749    {
750        let producer = SplitInclusiveProducer::new_incl(self.chars, &self.separator);
751        bridge_unindexed(producer, consumer)
752    }
753}
754
755// /////////////////////////////////////////////////////////////////////////
756
757/// Parallel iterator over substrings separated by a terminator pattern
758#[derive(Debug, Clone)]
759pub struct SplitTerminator<'ch, P: Pattern> {
760    chars: &'ch str,
761    terminator: P,
762}
763
764struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
765    splitter: SplitProducer<'sep, P, &'ch str>,
766    skip_last: bool,
767}
768
769impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
770    fn new(chars: &'ch str, terminator: P) -> Self {
771        SplitTerminator { chars, terminator }
772    }
773}
774
775impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
776    fn new(chars: &'ch str, terminator: &'sep P) -> Self {
777        SplitTerminatorProducer {
778            splitter: SplitProducer::new(chars, terminator),
779            skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
780        }
781    }
782}
783
784impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
785    type Item = &'ch str;
786
787    fn drive_unindexed<C>(self, consumer: C) -> C::Result
788    where
789        C: UnindexedConsumer<Self::Item>,
790    {
791        let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
792        bridge_unindexed(producer, consumer)
793    }
794}
795
796impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
797    type Item = &'ch str;
798
799    fn split(mut self) -> (Self, Option<Self>) {
800        let (left, right) = self.splitter.split();
801        self.splitter = left;
802        let right = right.map(|right| {
803            let skip_last = self.skip_last;
804            self.skip_last = false;
805            SplitTerminatorProducer {
806                splitter: right,
807                skip_last,
808            }
809        });
810        (self, right)
811    }
812
813    fn fold_with<F>(self, folder: F) -> F
814    where
815        F: Folder<Self::Item>,
816    {
817        self.splitter.fold_with(folder, self.skip_last)
818    }
819}
820
821// /////////////////////////////////////////////////////////////////////////
822
823/// Parallel iterator over lines in a string
824#[derive(Debug, Clone)]
825pub struct Lines<'ch>(&'ch str);
826
827#[inline]
828fn no_carriage_return(line: &str) -> &str {
829    line.strip_suffix('\r').unwrap_or(line)
830}
831
832impl<'ch> ParallelIterator for Lines<'ch> {
833    type Item = &'ch str;
834
835    fn drive_unindexed<C>(self, consumer: C) -> C::Result
836    where
837        C: UnindexedConsumer<Self::Item>,
838    {
839        self.0
840            .par_split_terminator('\n')
841            .map(no_carriage_return)
842            .drive_unindexed(consumer)
843    }
844}
845
846// /////////////////////////////////////////////////////////////////////////
847
848/// Parallel iterator over substrings separated by whitespace
849#[derive(Debug, Clone)]
850pub struct SplitWhitespace<'ch>(&'ch str);
851
852#[inline]
853fn not_empty(s: &&str) -> bool {
854    !s.is_empty()
855}
856
857impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
858    type Item = &'ch str;
859
860    fn drive_unindexed<C>(self, consumer: C) -> C::Result
861    where
862        C: UnindexedConsumer<Self::Item>,
863    {
864        self.0
865            .par_split(char::is_whitespace)
866            .filter(not_empty)
867            .drive_unindexed(consumer)
868    }
869}
870
871// /////////////////////////////////////////////////////////////////////////
872
873/// Parallel iterator over substrings separated by ASCII whitespace
874#[derive(Debug, Clone)]
875pub struct SplitAsciiWhitespace<'ch>(&'ch str);
876
877#[inline]
878fn is_ascii_whitespace(c: char) -> bool {
879    c.is_ascii_whitespace()
880}
881
882impl<'ch> ParallelIterator for SplitAsciiWhitespace<'ch> {
883    type Item = &'ch str;
884
885    fn drive_unindexed<C>(self, consumer: C) -> C::Result
886    where
887        C: UnindexedConsumer<Self::Item>,
888    {
889        self.0
890            .par_split(is_ascii_whitespace)
891            .filter(not_empty)
892            .drive_unindexed(consumer)
893    }
894}
895
896// /////////////////////////////////////////////////////////////////////////
897
898/// Parallel iterator over substrings that match a pattern
899#[derive(Debug, Clone)]
900pub struct Matches<'ch, P: Pattern> {
901    chars: &'ch str,
902    pattern: P,
903}
904
905struct MatchesProducer<'ch, 'pat, P: Pattern> {
906    chars: &'ch str,
907    pattern: &'pat P,
908}
909
910impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
911    type Item = &'ch str;
912
913    fn drive_unindexed<C>(self, consumer: C) -> C::Result
914    where
915        C: UnindexedConsumer<Self::Item>,
916    {
917        let producer = MatchesProducer {
918            chars: self.chars,
919            pattern: &self.pattern,
920        };
921        bridge_unindexed(producer, consumer)
922    }
923}
924
925impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
926    type Item = &'ch str;
927
928    fn split(self) -> (Self, Option<Self>) {
929        match split(self.chars) {
930            Some((left, right)) => (
931                MatchesProducer {
932                    chars: left,
933                    ..self
934                },
935                Some(MatchesProducer {
936                    chars: right,
937                    ..self
938                }),
939            ),
940            None => (self, None),
941        }
942    }
943
944    fn fold_with<F>(self, folder: F) -> F
945    where
946        F: Folder<Self::Item>,
947    {
948        self.pattern.fold_matches(self.chars, folder)
949    }
950}
951
952// /////////////////////////////////////////////////////////////////////////
953
954/// Parallel iterator over substrings that match a pattern, with their positions
955#[derive(Debug, Clone)]
956pub struct MatchIndices<'ch, P: Pattern> {
957    chars: &'ch str,
958    pattern: P,
959}
960
961struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
962    index: usize,
963    chars: &'ch str,
964    pattern: &'pat P,
965}
966
967impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
968    type Item = (usize, &'ch str);
969
970    fn drive_unindexed<C>(self, consumer: C) -> C::Result
971    where
972        C: UnindexedConsumer<Self::Item>,
973    {
974        let producer = MatchIndicesProducer {
975            index: 0,
976            chars: self.chars,
977            pattern: &self.pattern,
978        };
979        bridge_unindexed(producer, consumer)
980    }
981}
982
983impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
984    type Item = (usize, &'ch str);
985
986    fn split(self) -> (Self, Option<Self>) {
987        match split(self.chars) {
988            Some((left, right)) => (
989                MatchIndicesProducer {
990                    chars: left,
991                    ..self
992                },
993                Some(MatchIndicesProducer {
994                    chars: right,
995                    index: self.index + left.len(),
996                    ..self
997                }),
998            ),
999            None => (self, None),
1000        }
1001    }
1002
1003    fn fold_with<F>(self, folder: F) -> F
1004    where
1005        F: Folder<Self::Item>,
1006    {
1007        self.pattern
1008            .fold_match_indices(self.chars, folder, self.index)
1009    }
1010}