rayon/slice/
mod.rs

1//! Parallel iterator types for [slices][std::slice]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
7
8mod chunk_by;
9mod chunks;
10mod mergesort;
11mod quicksort;
12mod rchunks;
13
14mod test;
15
16use self::mergesort::par_mergesort;
17use self::quicksort::par_quicksort;
18use crate::iter::plumbing::*;
19use crate::iter::*;
20use crate::split_producer::*;
21
22use std::cmp::Ordering;
23use std::fmt::{self, Debug};
24use std::mem;
25
26pub use self::chunk_by::{ChunkBy, ChunkByMut};
27pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut};
28pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut};
29
30/// Parallel extensions for slices.
31pub trait ParallelSlice<T: Sync> {
32    /// Returns a plain slice, which is used to implement the rest of the
33    /// parallel methods.
34    fn as_parallel_slice(&self) -> &[T];
35
36    /// Returns a parallel iterator over subslices separated by elements that
37    /// match the separator.
38    ///
39    /// # Examples
40    ///
41    /// ```
42    /// use rayon::prelude::*;
43    /// let products: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
44    ///     .par_split(|i| *i == 0)
45    ///     .map(|numbers| numbers.iter().product::<i32>())
46    ///     .collect();
47    /// assert_eq!(products, [6, 64, 162]);
48    /// ```
49    fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
50    where
51        P: Fn(&T) -> bool + Sync + Send,
52    {
53        Split {
54            slice: self.as_parallel_slice(),
55            separator,
56        }
57    }
58
59    /// Returns a parallel iterator over subslices separated by elements that
60    /// match the separator, including the matched part as a terminator.
61    ///
62    /// # Examples
63    ///
64    /// ```
65    /// use rayon::prelude::*;
66    /// let lengths: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
67    ///     .par_split_inclusive(|i| *i == 0)
68    ///     .map(|numbers| numbers.len())
69    ///     .collect();
70    /// assert_eq!(lengths, [4, 4, 3]);
71    /// ```
72    fn par_split_inclusive<P>(&self, separator: P) -> SplitInclusive<'_, T, P>
73    where
74        P: Fn(&T) -> bool + Sync + Send,
75    {
76        SplitInclusive {
77            slice: self.as_parallel_slice(),
78            separator,
79        }
80    }
81
82    /// Returns a parallel iterator over all contiguous windows of length
83    /// `window_size`. The windows overlap.
84    ///
85    /// # Examples
86    ///
87    /// ```
88    /// use rayon::prelude::*;
89    /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect();
90    /// assert_eq!(vec![[1, 2], [2, 3]], windows);
91    /// ```
92    fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
93        Windows {
94            window_size,
95            slice: self.as_parallel_slice(),
96        }
97    }
98
99    /// Returns a parallel iterator over at most `chunk_size` elements of
100    /// `self` at a time. The chunks do not overlap.
101    ///
102    /// If the number of elements in the iterator is not divisible by
103    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
104    /// other chunks will have that exact length.
105    ///
106    /// # Examples
107    ///
108    /// ```
109    /// use rayon::prelude::*;
110    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
111    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
112    /// ```
113    #[track_caller]
114    fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
115        assert!(chunk_size != 0, "chunk_size must not be zero");
116        Chunks::new(chunk_size, self.as_parallel_slice())
117    }
118
119    /// Returns a parallel iterator over `chunk_size` elements of
120    /// `self` at a time. The chunks do not overlap.
121    ///
122    /// If `chunk_size` does not divide the length of the slice, then the
123    /// last up to `chunk_size-1` elements will be omitted and can be
124    /// retrieved from the remainder function of the iterator.
125    ///
126    /// # Examples
127    ///
128    /// ```
129    /// use rayon::prelude::*;
130    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
131    /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
132    /// ```
133    #[track_caller]
134    fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
135        assert!(chunk_size != 0, "chunk_size must not be zero");
136        ChunksExact::new(chunk_size, self.as_parallel_slice())
137    }
138
139    /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
140    /// starting at the end. The chunks do not overlap.
141    ///
142    /// If the number of elements in the iterator is not divisible by
143    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
144    /// other chunks will have that exact length.
145    ///
146    /// # Examples
147    ///
148    /// ```
149    /// use rayon::prelude::*;
150    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks(2).collect();
151    /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3], &[1]]);
152    /// ```
153    #[track_caller]
154    fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
155        assert!(chunk_size != 0, "chunk_size must not be zero");
156        RChunks::new(chunk_size, self.as_parallel_slice())
157    }
158
159    /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
160    /// starting at the end. The chunks do not overlap.
161    ///
162    /// If `chunk_size` does not divide the length of the slice, then the
163    /// last up to `chunk_size-1` elements will be omitted and can be
164    /// retrieved from the remainder function of the iterator.
165    ///
166    /// # Examples
167    ///
168    /// ```
169    /// use rayon::prelude::*;
170    /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks_exact(2).collect();
171    /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3]]);
172    /// ```
173    #[track_caller]
174    fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
175        assert!(chunk_size != 0, "chunk_size must not be zero");
176        RChunksExact::new(chunk_size, self.as_parallel_slice())
177    }
178
179    /// Returns a parallel iterator over the slice producing non-overlapping runs
180    /// of elements using the predicate to separate them.
181    ///
182    /// The predicate is called on two elements following themselves,
183    /// it means the predicate is called on `slice[0]` and `slice[1]`
184    /// then on `slice[1]` and `slice[2]` and so on.
185    ///
186    /// # Examples
187    ///
188    /// ```
189    /// use rayon::prelude::*;
190    /// let chunks: Vec<_> = [1, 2, 2, 3, 3, 3].par_chunk_by(|&x, &y| x == y).collect();
191    /// assert_eq!(chunks[0], &[1]);
192    /// assert_eq!(chunks[1], &[2, 2]);
193    /// assert_eq!(chunks[2], &[3, 3, 3]);
194    /// ```
195    fn par_chunk_by<F>(&self, pred: F) -> ChunkBy<'_, T, F>
196    where
197        F: Fn(&T, &T) -> bool + Send + Sync,
198    {
199        ChunkBy::new(self.as_parallel_slice(), pred)
200    }
201}
202
203impl<T: Sync> ParallelSlice<T> for [T] {
204    #[inline]
205    fn as_parallel_slice(&self) -> &[T] {
206        self
207    }
208}
209
210/// Parallel extensions for mutable slices.
211pub trait ParallelSliceMut<T: Send> {
212    /// Returns a plain mutable slice, which is used to implement the rest of
213    /// the parallel methods.
214    fn as_parallel_slice_mut(&mut self) -> &mut [T];
215
216    /// Returns a parallel iterator over mutable subslices separated by
217    /// elements that match the separator.
218    ///
219    /// # Examples
220    ///
221    /// ```
222    /// use rayon::prelude::*;
223    /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
224    /// array.par_split_mut(|i| *i == 0)
225    ///      .for_each(|slice| slice.reverse());
226    /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]);
227    /// ```
228    fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
229    where
230        P: Fn(&T) -> bool + Sync + Send,
231    {
232        SplitMut {
233            slice: self.as_parallel_slice_mut(),
234            separator,
235        }
236    }
237
238    /// Returns a parallel iterator over mutable subslices separated by elements
239    /// that match the separator, including the matched part as a terminator.
240    ///
241    /// # Examples
242    ///
243    /// ```
244    /// use rayon::prelude::*;
245    /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
246    /// array.par_split_inclusive_mut(|i| *i == 0)
247    ///      .for_each(|slice| slice.reverse());
248    /// assert_eq!(array, [0, 3, 2, 1, 0, 8, 4, 2, 9, 6, 3]);
249    /// ```
250    fn par_split_inclusive_mut<P>(&mut self, separator: P) -> SplitInclusiveMut<'_, T, P>
251    where
252        P: Fn(&T) -> bool + Sync + Send,
253    {
254        SplitInclusiveMut {
255            slice: self.as_parallel_slice_mut(),
256            separator,
257        }
258    }
259
260    /// Returns a parallel iterator over at most `chunk_size` elements of
261    /// `self` at a time. The chunks are mutable and do not overlap.
262    ///
263    /// If the number of elements in the iterator is not divisible by
264    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
265    /// other chunks will have that exact length.
266    ///
267    /// # Examples
268    ///
269    /// ```
270    /// use rayon::prelude::*;
271    /// let mut array = [1, 2, 3, 4, 5];
272    /// array.par_chunks_mut(2)
273    ///      .for_each(|slice| slice.reverse());
274    /// assert_eq!(array, [2, 1, 4, 3, 5]);
275    /// ```
276    #[track_caller]
277    fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
278        assert!(chunk_size != 0, "chunk_size must not be zero");
279        ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
280    }
281
282    /// Returns a parallel iterator over `chunk_size` elements of
283    /// `self` at a time. The chunks are mutable and do not overlap.
284    ///
285    /// If `chunk_size` does not divide the length of the slice, then the
286    /// last up to `chunk_size-1` elements will be omitted and can be
287    /// retrieved from the remainder function of the iterator.
288    ///
289    /// # Examples
290    ///
291    /// ```
292    /// use rayon::prelude::*;
293    /// let mut array = [1, 2, 3, 4, 5];
294    /// array.par_chunks_exact_mut(3)
295    ///      .for_each(|slice| slice.reverse());
296    /// assert_eq!(array, [3, 2, 1, 4, 5]);
297    /// ```
298    #[track_caller]
299    fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
300        assert!(chunk_size != 0, "chunk_size must not be zero");
301        ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
302    }
303
304    /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
305    /// starting at the end. The chunks are mutable and do not overlap.
306    ///
307    /// If the number of elements in the iterator is not divisible by
308    /// `chunk_size`, the last chunk may be shorter than `chunk_size`.  All
309    /// other chunks will have that exact length.
310    ///
311    /// # Examples
312    ///
313    /// ```
314    /// use rayon::prelude::*;
315    /// let mut array = [1, 2, 3, 4, 5];
316    /// array.par_rchunks_mut(2)
317    ///      .for_each(|slice| slice.reverse());
318    /// assert_eq!(array, [1, 3, 2, 5, 4]);
319    /// ```
320    #[track_caller]
321    fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
322        assert!(chunk_size != 0, "chunk_size must not be zero");
323        RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
324    }
325
326    /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
327    /// starting at the end. The chunks are mutable and do not overlap.
328    ///
329    /// If `chunk_size` does not divide the length of the slice, then the
330    /// last up to `chunk_size-1` elements will be omitted and can be
331    /// retrieved from the remainder function of the iterator.
332    ///
333    /// # Examples
334    ///
335    /// ```
336    /// use rayon::prelude::*;
337    /// let mut array = [1, 2, 3, 4, 5];
338    /// array.par_rchunks_exact_mut(3)
339    ///      .for_each(|slice| slice.reverse());
340    /// assert_eq!(array, [1, 2, 5, 4, 3]);
341    /// ```
342    #[track_caller]
343    fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
344        assert!(chunk_size != 0, "chunk_size must not be zero");
345        RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
346    }
347
348    /// Sorts the slice in parallel.
349    ///
350    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
351    ///
352    /// When applicable, unstable sorting is preferred because it is generally faster than stable
353    /// sorting and it doesn't allocate auxiliary memory.
354    /// See [`par_sort_unstable`](#method.par_sort_unstable).
355    ///
356    /// # Current implementation
357    ///
358    /// The current algorithm is an adaptive merge sort inspired by
359    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
360    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
361    /// two or more sorted sequences concatenated one after another.
362    ///
363    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
364    /// non-allocating insertion sort is used instead.
365    ///
366    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
367    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
368    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
369    /// parallel subdivision of chunks and parallel merge operation.
370    ///
371    /// # Examples
372    ///
373    /// ```
374    /// use rayon::prelude::*;
375    ///
376    /// let mut v = [-5, 4, 1, -3, 2];
377    ///
378    /// v.par_sort();
379    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
380    /// ```
381    fn par_sort(&mut self)
382    where
383        T: Ord,
384    {
385        par_mergesort(self.as_parallel_slice_mut(), T::lt);
386    }
387
388    /// Sorts the slice in parallel with a comparator function.
389    ///
390    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
391    ///
392    /// The comparator function must define a total ordering for the elements in the slice. If
393    /// the ordering is not total, the order of the elements is unspecified. An order is a
394    /// total order if it is (for all `a`, `b` and `c`):
395    ///
396    /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
397    /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
398    ///
399    /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
400    /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
401    ///
402    /// ```
403    /// use rayon::prelude::*;
404    ///
405    /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
406    /// floats.par_sort_by(|a, b| a.partial_cmp(b).unwrap());
407    /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
408    /// ```
409    ///
410    /// When applicable, unstable sorting is preferred because it is generally faster than stable
411    /// sorting and it doesn't allocate auxiliary memory.
412    /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
413    ///
414    /// # Current implementation
415    ///
416    /// The current algorithm is an adaptive merge sort inspired by
417    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
418    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
419    /// two or more sorted sequences concatenated one after another.
420    ///
421    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
422    /// non-allocating insertion sort is used instead.
423    ///
424    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
425    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
426    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
427    /// parallel subdivision of chunks and parallel merge operation.
428    ///
429    /// # Examples
430    ///
431    /// ```
432    /// use rayon::prelude::*;
433    ///
434    /// let mut v = [5, 4, 1, 3, 2];
435    /// v.par_sort_by(|a, b| a.cmp(b));
436    /// assert_eq!(v, [1, 2, 3, 4, 5]);
437    ///
438    /// // reverse sorting
439    /// v.par_sort_by(|a, b| b.cmp(a));
440    /// assert_eq!(v, [5, 4, 3, 2, 1]);
441    /// ```
442    fn par_sort_by<F>(&mut self, compare: F)
443    where
444        F: Fn(&T, &T) -> Ordering + Sync,
445    {
446        par_mergesort(self.as_parallel_slice_mut(), |a, b| {
447            compare(a, b) == Ordering::Less
448        });
449    }
450
451    /// Sorts the slice in parallel with a key extraction function.
452    ///
453    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*))
454    /// worst-case, where the key function is *O*(*m*).
455    ///
456    /// For expensive key functions (e.g. functions that are not simple property accesses or
457    /// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
458    /// be significantly faster, as it does not recompute element keys.
459    ///
460    /// When applicable, unstable sorting is preferred because it is generally faster than stable
461    /// sorting and it doesn't allocate auxiliary memory.
462    /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
463    ///
464    /// # Current implementation
465    ///
466    /// The current algorithm is an adaptive merge sort inspired by
467    /// [timsort](https://en.wikipedia.org/wiki/Timsort).
468    /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
469    /// two or more sorted sequences concatenated one after another.
470    ///
471    /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
472    /// non-allocating insertion sort is used instead.
473    ///
474    /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
475    /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
476    /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
477    /// parallel subdivision of chunks and parallel merge operation.
478    ///
479    /// # Examples
480    ///
481    /// ```
482    /// use rayon::prelude::*;
483    ///
484    /// let mut v = [-5i32, 4, 1, -3, 2];
485    ///
486    /// v.par_sort_by_key(|k| k.abs());
487    /// assert_eq!(v, [1, 2, -3, 4, -5]);
488    /// ```
489    fn par_sort_by_key<K, F>(&mut self, f: F)
490    where
491        K: Ord,
492        F: Fn(&T) -> K + Sync,
493    {
494        par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
495    }
496
497    /// Sorts the slice in parallel with a key extraction function.
498    ///
499    /// During sorting, the key function is called at most once per element, by using
500    /// temporary storage to remember the results of key evaluation.
501    /// The key function is called in parallel, so the order of calls is completely unspecified.
502    ///
503    /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*))
504    /// worst-case, where the key function is *O*(*m*).
505    ///
506    /// For simple key functions (e.g., functions that are property accesses or
507    /// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be
508    /// faster.
509    ///
510    /// # Current implementation
511    ///
512    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
513    /// which combines the fast average case of randomized quicksort with the fast worst case of
514    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
515    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
516    /// deterministic behavior.
517    ///
518    /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
519    /// length of the slice.
520    ///
521    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
522    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
523    /// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially.
524    ///
525    /// [pdqsort]: https://github.com/orlp/pdqsort
526    ///
527    /// # Examples
528    ///
529    /// ```
530    /// use rayon::prelude::*;
531    ///
532    /// let mut v = [-5i32, 4, 32, -3, 2];
533    ///
534    /// v.par_sort_by_cached_key(|k| k.to_string());
535    /// assert!(v == [-3, -5, 2, 32, 4]);
536    /// ```
537    fn par_sort_by_cached_key<K, F>(&mut self, f: F)
538    where
539        F: Fn(&T) -> K + Sync,
540        K: Ord + Send,
541    {
542        let slice = self.as_parallel_slice_mut();
543        let len = slice.len();
544        if len < 2 {
545            return;
546        }
547
548        // Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
549        macro_rules! sort_by_key {
550            ($t:ty) => {{
551                let mut indices: Vec<_> = slice
552                    .par_iter_mut()
553                    .enumerate()
554                    .map(|(i, x)| (f(&*x), i as $t))
555                    .collect();
556                // The elements of `indices` are unique, as they are indexed, so any sort will be
557                // stable with respect to the original slice. We use `sort_unstable` here because
558                // it requires less memory allocation.
559                indices.par_sort_unstable();
560                for i in 0..len {
561                    let mut index = indices[i].1;
562                    while (index as usize) < i {
563                        index = indices[index as usize].1;
564                    }
565                    indices[i].1 = index;
566                    slice.swap(i, index as usize);
567                }
568            }};
569        }
570
571        let sz_u8 = mem::size_of::<(K, u8)>();
572        let sz_u16 = mem::size_of::<(K, u16)>();
573        let sz_u32 = mem::size_of::<(K, u32)>();
574        let sz_usize = mem::size_of::<(K, usize)>();
575
576        if sz_u8 < sz_u16 && len <= (std::u8::MAX as usize) {
577            return sort_by_key!(u8);
578        }
579        if sz_u16 < sz_u32 && len <= (std::u16::MAX as usize) {
580            return sort_by_key!(u16);
581        }
582        if sz_u32 < sz_usize && len <= (std::u32::MAX as usize) {
583            return sort_by_key!(u32);
584        }
585        sort_by_key!(usize)
586    }
587
588    /// Sorts the slice in parallel, but might not preserve the order of equal elements.
589    ///
590    /// This sort is unstable (i.e., may reorder equal elements), in-place
591    /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
592    ///
593    /// # Current implementation
594    ///
595    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
596    /// which combines the fast average case of randomized quicksort with the fast worst case of
597    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
598    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
599    /// deterministic behavior.
600    ///
601    /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
602    /// slice consists of several concatenated sorted sequences.
603    ///
604    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
605    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
606    /// parallel.
607    ///
608    /// [pdqsort]: https://github.com/orlp/pdqsort
609    ///
610    /// # Examples
611    ///
612    /// ```
613    /// use rayon::prelude::*;
614    ///
615    /// let mut v = [-5, 4, 1, -3, 2];
616    ///
617    /// v.par_sort_unstable();
618    /// assert_eq!(v, [-5, -3, 1, 2, 4]);
619    /// ```
620    fn par_sort_unstable(&mut self)
621    where
622        T: Ord,
623    {
624        par_quicksort(self.as_parallel_slice_mut(), T::lt);
625    }
626
627    /// Sorts the slice in parallel with a comparator function, but might not preserve the order of
628    /// equal elements.
629    ///
630    /// This sort is unstable (i.e., may reorder equal elements), in-place
631    /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
632    ///
633    /// The comparator function must define a total ordering for the elements in the slice. If
634    /// the ordering is not total, the order of the elements is unspecified. An order is a
635    /// total order if it is (for all `a`, `b` and `c`):
636    ///
637    /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
638    /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
639    ///
640    /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
641    /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
642    ///
643    /// ```
644    /// use rayon::prelude::*;
645    ///
646    /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
647    /// floats.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
648    /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
649    /// ```
650    ///
651    /// # Current implementation
652    ///
653    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
654    /// which combines the fast average case of randomized quicksort with the fast worst case of
655    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
656    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
657    /// deterministic behavior.
658    ///
659    /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
660    /// slice consists of several concatenated sorted sequences.
661    ///
662    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
663    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
664    /// parallel.
665    ///
666    /// [pdqsort]: https://github.com/orlp/pdqsort
667    ///
668    /// # Examples
669    ///
670    /// ```
671    /// use rayon::prelude::*;
672    ///
673    /// let mut v = [5, 4, 1, 3, 2];
674    /// v.par_sort_unstable_by(|a, b| a.cmp(b));
675    /// assert_eq!(v, [1, 2, 3, 4, 5]);
676    ///
677    /// // reverse sorting
678    /// v.par_sort_unstable_by(|a, b| b.cmp(a));
679    /// assert_eq!(v, [5, 4, 3, 2, 1]);
680    /// ```
681    fn par_sort_unstable_by<F>(&mut self, compare: F)
682    where
683        F: Fn(&T, &T) -> Ordering + Sync,
684    {
685        par_quicksort(self.as_parallel_slice_mut(), |a, b| {
686            compare(a, b) == Ordering::Less
687        });
688    }
689
690    /// Sorts the slice in parallel with a key extraction function, but might not preserve the order
691    /// of equal elements.
692    ///
693    /// This sort is unstable (i.e., may reorder equal elements), in-place
694    /// (i.e., does not allocate), and *O*(m \* *n* \* log(*n*)) worst-case,
695    /// where the key function is *O*(*m*).
696    ///
697    /// # Current implementation
698    ///
699    /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
700    /// which combines the fast average case of randomized quicksort with the fast worst case of
701    /// heapsort, while achieving linear time on slices with certain patterns. It uses some
702    /// randomization to avoid degenerate cases, but with a fixed seed to always provide
703    /// deterministic behavior.
704    ///
705    /// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than
706    /// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function
707    /// is expensive.
708    ///
709    /// All quicksorts work in two stages: partitioning into two halves followed by recursive
710    /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
711    /// parallel.
712    ///
713    /// [pdqsort]: https://github.com/orlp/pdqsort
714    ///
715    /// # Examples
716    ///
717    /// ```
718    /// use rayon::prelude::*;
719    ///
720    /// let mut v = [-5i32, 4, 1, -3, 2];
721    ///
722    /// v.par_sort_unstable_by_key(|k| k.abs());
723    /// assert_eq!(v, [1, 2, -3, 4, -5]);
724    /// ```
725    fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
726    where
727        K: Ord,
728        F: Fn(&T) -> K + Sync,
729    {
730        par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
731    }
732
733    /// Returns a parallel iterator over the slice producing non-overlapping mutable
734    /// runs of elements using the predicate to separate them.
735    ///
736    /// The predicate is called on two elements following themselves,
737    /// it means the predicate is called on `slice[0]` and `slice[1]`
738    /// then on `slice[1]` and `slice[2]` and so on.
739    ///
740    /// # Examples
741    ///
742    /// ```
743    /// use rayon::prelude::*;
744    /// let mut xs = [1, 2, 2, 3, 3, 3];
745    /// let chunks: Vec<_> = xs.par_chunk_by_mut(|&x, &y| x == y).collect();
746    /// assert_eq!(chunks[0], &mut [1]);
747    /// assert_eq!(chunks[1], &mut [2, 2]);
748    /// assert_eq!(chunks[2], &mut [3, 3, 3]);
749    /// ```
750    fn par_chunk_by_mut<F>(&mut self, pred: F) -> ChunkByMut<'_, T, F>
751    where
752        F: Fn(&T, &T) -> bool + Send + Sync,
753    {
754        ChunkByMut::new(self.as_parallel_slice_mut(), pred)
755    }
756}
757
758impl<T: Send> ParallelSliceMut<T> for [T] {
759    #[inline]
760    fn as_parallel_slice_mut(&mut self) -> &mut [T] {
761        self
762    }
763}
764
765impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] {
766    type Item = &'data T;
767    type Iter = Iter<'data, T>;
768
769    fn into_par_iter(self) -> Self::Iter {
770        Iter { slice: self }
771    }
772}
773
774impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] {
775    type Item = &'data mut T;
776    type Iter = IterMut<'data, T>;
777
778    fn into_par_iter(self) -> Self::Iter {
779        IterMut { slice: self }
780    }
781}
782
783/// Parallel iterator over immutable items in a slice
784#[derive(Debug)]
785pub struct Iter<'data, T: Sync> {
786    slice: &'data [T],
787}
788
789impl<'data, T: Sync> Clone for Iter<'data, T> {
790    fn clone(&self) -> Self {
791        Iter { ..*self }
792    }
793}
794
795impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> {
796    type Item = &'data T;
797
798    fn drive_unindexed<C>(self, consumer: C) -> C::Result
799    where
800        C: UnindexedConsumer<Self::Item>,
801    {
802        bridge(self, consumer)
803    }
804
805    fn opt_len(&self) -> Option<usize> {
806        Some(self.len())
807    }
808}
809
810impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> {
811    fn drive<C>(self, consumer: C) -> C::Result
812    where
813        C: Consumer<Self::Item>,
814    {
815        bridge(self, consumer)
816    }
817
818    fn len(&self) -> usize {
819        self.slice.len()
820    }
821
822    fn with_producer<CB>(self, callback: CB) -> CB::Output
823    where
824        CB: ProducerCallback<Self::Item>,
825    {
826        callback.callback(IterProducer { slice: self.slice })
827    }
828}
829
830struct IterProducer<'data, T: Sync> {
831    slice: &'data [T],
832}
833
834impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
835    type Item = &'data T;
836    type IntoIter = ::std::slice::Iter<'data, T>;
837
838    fn into_iter(self) -> Self::IntoIter {
839        self.slice.iter()
840    }
841
842    fn split_at(self, index: usize) -> (Self, Self) {
843        let (left, right) = self.slice.split_at(index);
844        (IterProducer { slice: left }, IterProducer { slice: right })
845    }
846}
847
848/// Parallel iterator over immutable overlapping windows of a slice
849#[derive(Debug)]
850pub struct Windows<'data, T: Sync> {
851    window_size: usize,
852    slice: &'data [T],
853}
854
855impl<'data, T: Sync> Clone for Windows<'data, T> {
856    fn clone(&self) -> Self {
857        Windows { ..*self }
858    }
859}
860
861impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> {
862    type Item = &'data [T];
863
864    fn drive_unindexed<C>(self, consumer: C) -> C::Result
865    where
866        C: UnindexedConsumer<Self::Item>,
867    {
868        bridge(self, consumer)
869    }
870
871    fn opt_len(&self) -> Option<usize> {
872        Some(self.len())
873    }
874}
875
876impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> {
877    fn drive<C>(self, consumer: C) -> C::Result
878    where
879        C: Consumer<Self::Item>,
880    {
881        bridge(self, consumer)
882    }
883
884    fn len(&self) -> usize {
885        assert!(self.window_size >= 1);
886        self.slice.len().saturating_sub(self.window_size - 1)
887    }
888
889    fn with_producer<CB>(self, callback: CB) -> CB::Output
890    where
891        CB: ProducerCallback<Self::Item>,
892    {
893        callback.callback(WindowsProducer {
894            window_size: self.window_size,
895            slice: self.slice,
896        })
897    }
898}
899
900struct WindowsProducer<'data, T: Sync> {
901    window_size: usize,
902    slice: &'data [T],
903}
904
905impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
906    type Item = &'data [T];
907    type IntoIter = ::std::slice::Windows<'data, T>;
908
909    fn into_iter(self) -> Self::IntoIter {
910        self.slice.windows(self.window_size)
911    }
912
913    fn split_at(self, index: usize) -> (Self, Self) {
914        let left_index = Ord::min(self.slice.len(), index + (self.window_size - 1));
915        let left = &self.slice[..left_index];
916        let right = &self.slice[index..];
917        (
918            WindowsProducer {
919                window_size: self.window_size,
920                slice: left,
921            },
922            WindowsProducer {
923                window_size: self.window_size,
924                slice: right,
925            },
926        )
927    }
928}
929
930/// Parallel iterator over mutable items in a slice
931#[derive(Debug)]
932pub struct IterMut<'data, T: Send> {
933    slice: &'data mut [T],
934}
935
936impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> {
937    type Item = &'data mut T;
938
939    fn drive_unindexed<C>(self, consumer: C) -> C::Result
940    where
941        C: UnindexedConsumer<Self::Item>,
942    {
943        bridge(self, consumer)
944    }
945
946    fn opt_len(&self) -> Option<usize> {
947        Some(self.len())
948    }
949}
950
951impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> {
952    fn drive<C>(self, consumer: C) -> C::Result
953    where
954        C: Consumer<Self::Item>,
955    {
956        bridge(self, consumer)
957    }
958
959    fn len(&self) -> usize {
960        self.slice.len()
961    }
962
963    fn with_producer<CB>(self, callback: CB) -> CB::Output
964    where
965        CB: ProducerCallback<Self::Item>,
966    {
967        callback.callback(IterMutProducer { slice: self.slice })
968    }
969}
970
971struct IterMutProducer<'data, T: Send> {
972    slice: &'data mut [T],
973}
974
975impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
976    type Item = &'data mut T;
977    type IntoIter = ::std::slice::IterMut<'data, T>;
978
979    fn into_iter(self) -> Self::IntoIter {
980        self.slice.iter_mut()
981    }
982
983    fn split_at(self, index: usize) -> (Self, Self) {
984        let (left, right) = self.slice.split_at_mut(index);
985        (
986            IterMutProducer { slice: left },
987            IterMutProducer { slice: right },
988        )
989    }
990}
991
992/// Parallel iterator over slices separated by a predicate
993pub struct Split<'data, T, P> {
994    slice: &'data [T],
995    separator: P,
996}
997
998impl<'data, T, P: Clone> Clone for Split<'data, T, P> {
999    fn clone(&self) -> Self {
1000        Split {
1001            separator: self.separator.clone(),
1002            ..*self
1003        }
1004    }
1005}
1006
1007impl<'data, T: Debug, P> Debug for Split<'data, T, P> {
1008    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1009        f.debug_struct("Split").field("slice", &self.slice).finish()
1010    }
1011}
1012
1013impl<'data, T, P> ParallelIterator for Split<'data, T, P>
1014where
1015    P: Fn(&T) -> bool + Sync + Send,
1016    T: Sync,
1017{
1018    type Item = &'data [T];
1019
1020    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1021    where
1022        C: UnindexedConsumer<Self::Item>,
1023    {
1024        let producer = SplitProducer::new(self.slice, &self.separator);
1025        bridge_unindexed(producer, consumer)
1026    }
1027}
1028
1029/// Parallel iterator over slices separated by a predicate,
1030/// including the matched part as a terminator.
1031pub struct SplitInclusive<'data, T, P> {
1032    slice: &'data [T],
1033    separator: P,
1034}
1035
1036impl<'data, T, P: Clone> Clone for SplitInclusive<'data, T, P> {
1037    fn clone(&self) -> Self {
1038        SplitInclusive {
1039            separator: self.separator.clone(),
1040            ..*self
1041        }
1042    }
1043}
1044
1045impl<'data, T: Debug, P> Debug for SplitInclusive<'data, T, P> {
1046    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1047        f.debug_struct("SplitInclusive")
1048            .field("slice", &self.slice)
1049            .finish()
1050    }
1051}
1052
1053impl<'data, T, P> ParallelIterator for SplitInclusive<'data, T, P>
1054where
1055    P: Fn(&T) -> bool + Sync + Send,
1056    T: Sync,
1057{
1058    type Item = &'data [T];
1059
1060    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1061    where
1062        C: UnindexedConsumer<Self::Item>,
1063    {
1064        let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1065        bridge_unindexed(producer, consumer)
1066    }
1067}
1068
1069/// Implement support for `SplitProducer`.
1070impl<'data, T, P> Fissile<P> for &'data [T]
1071where
1072    P: Fn(&T) -> bool,
1073{
1074    fn length(&self) -> usize {
1075        self.len()
1076    }
1077
1078    fn midpoint(&self, end: usize) -> usize {
1079        end / 2
1080    }
1081
1082    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1083        self[start..end].iter().position(separator)
1084    }
1085
1086    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1087        self[..end].iter().rposition(separator)
1088    }
1089
1090    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1091        if INCL {
1092            // include the separator in the left side
1093            self.split_at(index + 1)
1094        } else {
1095            let (left, right) = self.split_at(index);
1096            (left, &right[1..]) // skip the separator
1097        }
1098    }
1099
1100    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1101    where
1102        F: Folder<Self>,
1103        Self: Send,
1104    {
1105        if INCL {
1106            debug_assert!(!skip_last);
1107            folder.consume_iter(self.split_inclusive(separator))
1108        } else {
1109            let mut split = self.split(separator);
1110            if skip_last {
1111                split.next_back();
1112            }
1113            folder.consume_iter(split)
1114        }
1115    }
1116}
1117
1118/// Parallel iterator over mutable slices separated by a predicate
1119pub struct SplitMut<'data, T, P> {
1120    slice: &'data mut [T],
1121    separator: P,
1122}
1123
1124impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> {
1125    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1126        f.debug_struct("SplitMut")
1127            .field("slice", &self.slice)
1128            .finish()
1129    }
1130}
1131
1132impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
1133where
1134    P: Fn(&T) -> bool + Sync + Send,
1135    T: Send,
1136{
1137    type Item = &'data mut [T];
1138
1139    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1140    where
1141        C: UnindexedConsumer<Self::Item>,
1142    {
1143        let producer = SplitProducer::new(self.slice, &self.separator);
1144        bridge_unindexed(producer, consumer)
1145    }
1146}
1147
1148/// Parallel iterator over mutable slices separated by a predicate,
1149/// including the matched part as a terminator.
1150pub struct SplitInclusiveMut<'data, T, P> {
1151    slice: &'data mut [T],
1152    separator: P,
1153}
1154
1155impl<'data, T: Debug, P> Debug for SplitInclusiveMut<'data, T, P> {
1156    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1157        f.debug_struct("SplitInclusiveMut")
1158            .field("slice", &self.slice)
1159            .finish()
1160    }
1161}
1162
1163impl<'data, T, P> ParallelIterator for SplitInclusiveMut<'data, T, P>
1164where
1165    P: Fn(&T) -> bool + Sync + Send,
1166    T: Send,
1167{
1168    type Item = &'data mut [T];
1169
1170    fn drive_unindexed<C>(self, consumer: C) -> C::Result
1171    where
1172        C: UnindexedConsumer<Self::Item>,
1173    {
1174        let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1175        bridge_unindexed(producer, consumer)
1176    }
1177}
1178
1179/// Implement support for `SplitProducer`.
1180impl<'data, T, P> Fissile<P> for &'data mut [T]
1181where
1182    P: Fn(&T) -> bool,
1183{
1184    fn length(&self) -> usize {
1185        self.len()
1186    }
1187
1188    fn midpoint(&self, end: usize) -> usize {
1189        end / 2
1190    }
1191
1192    fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1193        self[start..end].iter().position(separator)
1194    }
1195
1196    fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1197        self[..end].iter().rposition(separator)
1198    }
1199
1200    fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1201        if INCL {
1202            // include the separator in the left side
1203            self.split_at_mut(index + 1)
1204        } else {
1205            let (left, right) = self.split_at_mut(index);
1206            (left, &mut right[1..]) // skip the separator
1207        }
1208    }
1209
1210    fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1211    where
1212        F: Folder<Self>,
1213        Self: Send,
1214    {
1215        if INCL {
1216            debug_assert!(!skip_last);
1217            folder.consume_iter(self.split_inclusive_mut(separator))
1218        } else {
1219            let mut split = self.split_mut(separator);
1220            if skip_last {
1221                split.next_back();
1222            }
1223            folder.consume_iter(split)
1224        }
1225    }
1226}