1use crate::iter::plumbing::*;
18use crate::iter::*;
19use crate::split_producer::*;
20
21#[inline]
24fn is_char_boundary(b: u8) -> bool {
25 (b as i8) >= -0x40
27}
28
29#[inline]
31fn find_char_midpoint(chars: &str) -> usize {
32 let mid = chars.len() / 2;
33
34 let (left, right) = chars.as_bytes().split_at(mid);
38 match right.iter().copied().position(is_char_boundary) {
39 Some(i) => mid + i,
40 None => left
41 .iter()
42 .copied()
43 .rposition(is_char_boundary)
44 .unwrap_or(0),
45 }
46}
47
48#[inline]
50fn split(chars: &str) -> Option<(&str, &str)> {
51 let index = find_char_midpoint(chars);
52 if index > 0 {
53 Some(chars.split_at(index))
54 } else {
55 None
56 }
57}
58
59pub trait ParallelString {
61 fn as_parallel_string(&self) -> &str;
64
65 fn par_chars(&self) -> Chars<'_> {
75 Chars {
76 chars: self.as_parallel_string(),
77 }
78 }
79
80 fn par_char_indices(&self) -> CharIndices<'_> {
90 CharIndices {
91 chars: self.as_parallel_string(),
92 }
93 }
94
95 fn par_bytes(&self) -> Bytes<'_> {
110 Bytes {
111 chars: self.as_parallel_string(),
112 }
113 }
114
115 fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
134 EncodeUtf16 {
135 chars: self.as_parallel_string(),
136 }
137 }
138
139 fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
157 Split::new(self.as_parallel_string(), separator)
158 }
159
160 fn par_split_inclusive<P: Pattern>(&self, separator: P) -> SplitInclusive<'_, P> {
178 SplitInclusive::new(self.as_parallel_string(), separator)
179 }
180
181 fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
200 SplitTerminator::new(self.as_parallel_string(), terminator)
201 }
202
203 fn par_lines(&self) -> Lines<'_> {
219 Lines(self.as_parallel_string())
220 }
221
222 fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
258 SplitWhitespace(self.as_parallel_string())
259 }
260
261 fn par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
295 SplitAsciiWhitespace(self.as_parallel_string())
296 }
297
298 fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
316 Matches {
317 chars: self.as_parallel_string(),
318 pattern,
319 }
320 }
321
322 fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
339 MatchIndices {
340 chars: self.as_parallel_string(),
341 pattern,
342 }
343 }
344}
345
346impl ParallelString for str {
347 #[inline]
348 fn as_parallel_string(&self) -> &str {
349 self
350 }
351}
352
353mod private {
360 use crate::iter::plumbing::Folder;
361
362 pub trait Pattern: Sized + Sync + Send {
367 private_decl! {}
368 fn find_in(&self, haystack: &str) -> Option<usize>;
369 fn rfind_in(&self, haystack: &str) -> Option<usize>;
370 fn is_suffix_of(&self, haystack: &str) -> bool;
371 fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
372 where
373 F: Folder<&'ch str>;
374 fn fold_inclusive_splits<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
375 where
376 F: Folder<&'ch str>;
377 fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
378 where
379 F: Folder<&'ch str>;
380 fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
381 where
382 F: Folder<(usize, &'ch str)>;
383 }
384}
385use self::private::Pattern;
386
387#[inline]
388fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
389 move |(i, x)| (base + i, x)
390}
391
392macro_rules! impl_pattern {
393 (&$self:ident => $pattern:expr) => {
394 private_impl! {}
395
396 #[inline]
397 fn find_in(&$self, chars: &str) -> Option<usize> {
398 chars.find($pattern)
399 }
400
401 #[inline]
402 fn rfind_in(&$self, chars: &str) -> Option<usize> {
403 chars.rfind($pattern)
404 }
405
406 #[inline]
407 fn is_suffix_of(&$self, chars: &str) -> bool {
408 chars.ends_with($pattern)
409 }
410
411 fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
412 where
413 F: Folder<&'ch str>,
414 {
415 let mut split = chars.split($pattern);
416 if skip_last {
417 split.next_back();
418 }
419 folder.consume_iter(split)
420 }
421
422 fn fold_inclusive_splits<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
423 where
424 F: Folder<&'ch str>,
425 {
426 folder.consume_iter(chars.split_inclusive($pattern))
427 }
428
429 fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
430 where
431 F: Folder<&'ch str>,
432 {
433 folder.consume_iter(chars.matches($pattern))
434 }
435
436 fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
437 where
438 F: Folder<(usize, &'ch str)>,
439 {
440 folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
441 }
442 }
443}
444
445impl Pattern for char {
446 impl_pattern!(&self => *self);
447}
448
449impl Pattern for &[char] {
450 impl_pattern!(&self => *self);
451}
452
453impl<const N: usize> Pattern for [char; N] {
457 impl_pattern!(&self => self.as_slice());
458}
459
460impl<const N: usize> Pattern for &[char; N] {
461 impl_pattern!(&self => self.as_slice());
462}
463
464impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
465 impl_pattern!(&self => self);
466}
467
468#[derive(Debug, Clone)]
472pub struct Chars<'ch> {
473 chars: &'ch str,
474}
475
476struct CharsProducer<'ch> {
477 chars: &'ch str,
478}
479
480impl<'ch> ParallelIterator for Chars<'ch> {
481 type Item = char;
482
483 fn drive_unindexed<C>(self, consumer: C) -> C::Result
484 where
485 C: UnindexedConsumer<Self::Item>,
486 {
487 bridge_unindexed(CharsProducer { chars: self.chars }, consumer)
488 }
489}
490
491impl<'ch> UnindexedProducer for CharsProducer<'ch> {
492 type Item = char;
493
494 fn split(self) -> (Self, Option<Self>) {
495 match split(self.chars) {
496 Some((left, right)) => (
497 CharsProducer { chars: left },
498 Some(CharsProducer { chars: right }),
499 ),
500 None => (self, None),
501 }
502 }
503
504 fn fold_with<F>(self, folder: F) -> F
505 where
506 F: Folder<Self::Item>,
507 {
508 folder.consume_iter(self.chars.chars())
509 }
510}
511
512#[derive(Debug, Clone)]
516pub struct CharIndices<'ch> {
517 chars: &'ch str,
518}
519
520struct CharIndicesProducer<'ch> {
521 index: usize,
522 chars: &'ch str,
523}
524
525impl<'ch> ParallelIterator for CharIndices<'ch> {
526 type Item = (usize, char);
527
528 fn drive_unindexed<C>(self, consumer: C) -> C::Result
529 where
530 C: UnindexedConsumer<Self::Item>,
531 {
532 let producer = CharIndicesProducer {
533 index: 0,
534 chars: self.chars,
535 };
536 bridge_unindexed(producer, consumer)
537 }
538}
539
540impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
541 type Item = (usize, char);
542
543 fn split(self) -> (Self, Option<Self>) {
544 match split(self.chars) {
545 Some((left, right)) => (
546 CharIndicesProducer {
547 chars: left,
548 ..self
549 },
550 Some(CharIndicesProducer {
551 chars: right,
552 index: self.index + left.len(),
553 }),
554 ),
555 None => (self, None),
556 }
557 }
558
559 fn fold_with<F>(self, folder: F) -> F
560 where
561 F: Folder<Self::Item>,
562 {
563 let base = self.index;
564 folder.consume_iter(self.chars.char_indices().map(offset(base)))
565 }
566}
567
568#[derive(Debug, Clone)]
572pub struct Bytes<'ch> {
573 chars: &'ch str,
574}
575
576struct BytesProducer<'ch> {
577 chars: &'ch str,
578}
579
580impl<'ch> ParallelIterator for Bytes<'ch> {
581 type Item = u8;
582
583 fn drive_unindexed<C>(self, consumer: C) -> C::Result
584 where
585 C: UnindexedConsumer<Self::Item>,
586 {
587 bridge_unindexed(BytesProducer { chars: self.chars }, consumer)
588 }
589}
590
591impl<'ch> UnindexedProducer for BytesProducer<'ch> {
592 type Item = u8;
593
594 fn split(self) -> (Self, Option<Self>) {
595 match split(self.chars) {
596 Some((left, right)) => (
597 BytesProducer { chars: left },
598 Some(BytesProducer { chars: right }),
599 ),
600 None => (self, None),
601 }
602 }
603
604 fn fold_with<F>(self, folder: F) -> F
605 where
606 F: Folder<Self::Item>,
607 {
608 folder.consume_iter(self.chars.bytes())
609 }
610}
611
612#[derive(Debug, Clone)]
616pub struct EncodeUtf16<'ch> {
617 chars: &'ch str,
618}
619
620struct EncodeUtf16Producer<'ch> {
621 chars: &'ch str,
622}
623
624impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
625 type Item = u16;
626
627 fn drive_unindexed<C>(self, consumer: C) -> C::Result
628 where
629 C: UnindexedConsumer<Self::Item>,
630 {
631 bridge_unindexed(EncodeUtf16Producer { chars: self.chars }, consumer)
632 }
633}
634
635impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
636 type Item = u16;
637
638 fn split(self) -> (Self, Option<Self>) {
639 match split(self.chars) {
640 Some((left, right)) => (
641 EncodeUtf16Producer { chars: left },
642 Some(EncodeUtf16Producer { chars: right }),
643 ),
644 None => (self, None),
645 }
646 }
647
648 fn fold_with<F>(self, folder: F) -> F
649 where
650 F: Folder<Self::Item>,
651 {
652 folder.consume_iter(self.chars.encode_utf16())
653 }
654}
655
656#[derive(Debug, Clone)]
660pub struct Split<'ch, P: Pattern> {
661 chars: &'ch str,
662 separator: P,
663}
664
665impl<'ch, P: Pattern> Split<'ch, P> {
666 fn new(chars: &'ch str, separator: P) -> Self {
667 Split { chars, separator }
668 }
669}
670
671impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
672 type Item = &'ch str;
673
674 fn drive_unindexed<C>(self, consumer: C) -> C::Result
675 where
676 C: UnindexedConsumer<Self::Item>,
677 {
678 let producer = SplitProducer::new(self.chars, &self.separator);
679 bridge_unindexed(producer, consumer)
680 }
681}
682
683impl<'ch, P: Pattern> Fissile<P> for &'ch str {
685 fn length(&self) -> usize {
686 self.len()
687 }
688
689 fn midpoint(&self, end: usize) -> usize {
690 find_char_midpoint(&self[..end])
692 }
693
694 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
695 separator.find_in(&self[start..end])
696 }
697
698 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
699 separator.rfind_in(&self[..end])
700 }
701
702 fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
703 if INCL {
704 let separator = self[index..].chars().next().unwrap();
706 self.split_at(index + separator.len_utf8())
707 } else {
708 let (left, right) = self.split_at(index);
709 let mut right_iter = right.chars();
710 right_iter.next(); (left, right_iter.as_str())
712 }
713 }
714
715 fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
716 where
717 F: Folder<Self>,
718 {
719 if INCL {
720 debug_assert!(!skip_last);
721 separator.fold_inclusive_splits(self, folder)
722 } else {
723 separator.fold_splits(self, folder, skip_last)
724 }
725 }
726}
727
728#[derive(Debug, Clone)]
732pub struct SplitInclusive<'ch, P: Pattern> {
733 chars: &'ch str,
734 separator: P,
735}
736
737impl<'ch, P: Pattern> SplitInclusive<'ch, P> {
738 fn new(chars: &'ch str, separator: P) -> Self {
739 SplitInclusive { chars, separator }
740 }
741}
742
743impl<'ch, P: Pattern> ParallelIterator for SplitInclusive<'ch, P> {
744 type Item = &'ch str;
745
746 fn drive_unindexed<C>(self, consumer: C) -> C::Result
747 where
748 C: UnindexedConsumer<Self::Item>,
749 {
750 let producer = SplitInclusiveProducer::new_incl(self.chars, &self.separator);
751 bridge_unindexed(producer, consumer)
752 }
753}
754
755#[derive(Debug, Clone)]
759pub struct SplitTerminator<'ch, P: Pattern> {
760 chars: &'ch str,
761 terminator: P,
762}
763
764struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
765 splitter: SplitProducer<'sep, P, &'ch str>,
766 skip_last: bool,
767}
768
769impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
770 fn new(chars: &'ch str, terminator: P) -> Self {
771 SplitTerminator { chars, terminator }
772 }
773}
774
775impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
776 fn new(chars: &'ch str, terminator: &'sep P) -> Self {
777 SplitTerminatorProducer {
778 splitter: SplitProducer::new(chars, terminator),
779 skip_last: chars.is_empty() || terminator.is_suffix_of(chars),
780 }
781 }
782}
783
784impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
785 type Item = &'ch str;
786
787 fn drive_unindexed<C>(self, consumer: C) -> C::Result
788 where
789 C: UnindexedConsumer<Self::Item>,
790 {
791 let producer = SplitTerminatorProducer::new(self.chars, &self.terminator);
792 bridge_unindexed(producer, consumer)
793 }
794}
795
796impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
797 type Item = &'ch str;
798
799 fn split(mut self) -> (Self, Option<Self>) {
800 let (left, right) = self.splitter.split();
801 self.splitter = left;
802 let right = right.map(|right| {
803 let skip_last = self.skip_last;
804 self.skip_last = false;
805 SplitTerminatorProducer {
806 splitter: right,
807 skip_last,
808 }
809 });
810 (self, right)
811 }
812
813 fn fold_with<F>(self, folder: F) -> F
814 where
815 F: Folder<Self::Item>,
816 {
817 self.splitter.fold_with(folder, self.skip_last)
818 }
819}
820
821#[derive(Debug, Clone)]
825pub struct Lines<'ch>(&'ch str);
826
827#[inline]
828fn no_carriage_return(line: &str) -> &str {
829 line.strip_suffix('\r').unwrap_or(line)
830}
831
832impl<'ch> ParallelIterator for Lines<'ch> {
833 type Item = &'ch str;
834
835 fn drive_unindexed<C>(self, consumer: C) -> C::Result
836 where
837 C: UnindexedConsumer<Self::Item>,
838 {
839 self.0
840 .par_split_terminator('\n')
841 .map(no_carriage_return)
842 .drive_unindexed(consumer)
843 }
844}
845
846#[derive(Debug, Clone)]
850pub struct SplitWhitespace<'ch>(&'ch str);
851
852#[inline]
853fn not_empty(s: &&str) -> bool {
854 !s.is_empty()
855}
856
857impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
858 type Item = &'ch str;
859
860 fn drive_unindexed<C>(self, consumer: C) -> C::Result
861 where
862 C: UnindexedConsumer<Self::Item>,
863 {
864 self.0
865 .par_split(char::is_whitespace)
866 .filter(not_empty)
867 .drive_unindexed(consumer)
868 }
869}
870
871#[derive(Debug, Clone)]
875pub struct SplitAsciiWhitespace<'ch>(&'ch str);
876
877#[inline]
878fn is_ascii_whitespace(c: char) -> bool {
879 c.is_ascii_whitespace()
880}
881
882impl<'ch> ParallelIterator for SplitAsciiWhitespace<'ch> {
883 type Item = &'ch str;
884
885 fn drive_unindexed<C>(self, consumer: C) -> C::Result
886 where
887 C: UnindexedConsumer<Self::Item>,
888 {
889 self.0
890 .par_split(is_ascii_whitespace)
891 .filter(not_empty)
892 .drive_unindexed(consumer)
893 }
894}
895
896#[derive(Debug, Clone)]
900pub struct Matches<'ch, P: Pattern> {
901 chars: &'ch str,
902 pattern: P,
903}
904
905struct MatchesProducer<'ch, 'pat, P: Pattern> {
906 chars: &'ch str,
907 pattern: &'pat P,
908}
909
910impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
911 type Item = &'ch str;
912
913 fn drive_unindexed<C>(self, consumer: C) -> C::Result
914 where
915 C: UnindexedConsumer<Self::Item>,
916 {
917 let producer = MatchesProducer {
918 chars: self.chars,
919 pattern: &self.pattern,
920 };
921 bridge_unindexed(producer, consumer)
922 }
923}
924
925impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
926 type Item = &'ch str;
927
928 fn split(self) -> (Self, Option<Self>) {
929 match split(self.chars) {
930 Some((left, right)) => (
931 MatchesProducer {
932 chars: left,
933 ..self
934 },
935 Some(MatchesProducer {
936 chars: right,
937 ..self
938 }),
939 ),
940 None => (self, None),
941 }
942 }
943
944 fn fold_with<F>(self, folder: F) -> F
945 where
946 F: Folder<Self::Item>,
947 {
948 self.pattern.fold_matches(self.chars, folder)
949 }
950}
951
952#[derive(Debug, Clone)]
956pub struct MatchIndices<'ch, P: Pattern> {
957 chars: &'ch str,
958 pattern: P,
959}
960
961struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
962 index: usize,
963 chars: &'ch str,
964 pattern: &'pat P,
965}
966
967impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
968 type Item = (usize, &'ch str);
969
970 fn drive_unindexed<C>(self, consumer: C) -> C::Result
971 where
972 C: UnindexedConsumer<Self::Item>,
973 {
974 let producer = MatchIndicesProducer {
975 index: 0,
976 chars: self.chars,
977 pattern: &self.pattern,
978 };
979 bridge_unindexed(producer, consumer)
980 }
981}
982
983impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
984 type Item = (usize, &'ch str);
985
986 fn split(self) -> (Self, Option<Self>) {
987 match split(self.chars) {
988 Some((left, right)) => (
989 MatchIndicesProducer {
990 chars: left,
991 ..self
992 },
993 Some(MatchIndicesProducer {
994 chars: right,
995 index: self.index + left.len(),
996 ..self
997 }),
998 ),
999 None => (self, None),
1000 }
1001 }
1002
1003 fn fold_with<F>(self, folder: F) -> F
1004 where
1005 F: Folder<Self::Item>,
1006 {
1007 self.pattern
1008 .fold_match_indices(self.chars, folder, self.index)
1009 }
1010}