rayon/slice/mod.rs
1//! Parallel iterator types for [slices][std::slice]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! [std::slice]: https://doc.rust-lang.org/stable/std/slice/
7
8mod chunk_by;
9mod chunks;
10mod mergesort;
11mod quicksort;
12mod rchunks;
13
14mod test;
15
16use self::mergesort::par_mergesort;
17use self::quicksort::par_quicksort;
18use crate::iter::plumbing::*;
19use crate::iter::*;
20use crate::split_producer::*;
21
22use std::cmp::Ordering;
23use std::fmt::{self, Debug};
24use std::mem;
25
26pub use self::chunk_by::{ChunkBy, ChunkByMut};
27pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut};
28pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut};
29
30/// Parallel extensions for slices.
31pub trait ParallelSlice<T: Sync> {
32 /// Returns a plain slice, which is used to implement the rest of the
33 /// parallel methods.
34 fn as_parallel_slice(&self) -> &[T];
35
36 /// Returns a parallel iterator over subslices separated by elements that
37 /// match the separator.
38 ///
39 /// # Examples
40 ///
41 /// ```
42 /// use rayon::prelude::*;
43 /// let products: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
44 /// .par_split(|i| *i == 0)
45 /// .map(|numbers| numbers.iter().product::<i32>())
46 /// .collect();
47 /// assert_eq!(products, [6, 64, 162]);
48 /// ```
49 fn par_split<P>(&self, separator: P) -> Split<'_, T, P>
50 where
51 P: Fn(&T) -> bool + Sync + Send,
52 {
53 Split {
54 slice: self.as_parallel_slice(),
55 separator,
56 }
57 }
58
59 /// Returns a parallel iterator over subslices separated by elements that
60 /// match the separator, including the matched part as a terminator.
61 ///
62 /// # Examples
63 ///
64 /// ```
65 /// use rayon::prelude::*;
66 /// let lengths: Vec<_> = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9]
67 /// .par_split_inclusive(|i| *i == 0)
68 /// .map(|numbers| numbers.len())
69 /// .collect();
70 /// assert_eq!(lengths, [4, 4, 3]);
71 /// ```
72 fn par_split_inclusive<P>(&self, separator: P) -> SplitInclusive<'_, T, P>
73 where
74 P: Fn(&T) -> bool + Sync + Send,
75 {
76 SplitInclusive {
77 slice: self.as_parallel_slice(),
78 separator,
79 }
80 }
81
82 /// Returns a parallel iterator over all contiguous windows of length
83 /// `window_size`. The windows overlap.
84 ///
85 /// # Examples
86 ///
87 /// ```
88 /// use rayon::prelude::*;
89 /// let windows: Vec<_> = [1, 2, 3].par_windows(2).collect();
90 /// assert_eq!(vec![[1, 2], [2, 3]], windows);
91 /// ```
92 fn par_windows(&self, window_size: usize) -> Windows<'_, T> {
93 Windows {
94 window_size,
95 slice: self.as_parallel_slice(),
96 }
97 }
98
99 /// Returns a parallel iterator over at most `chunk_size` elements of
100 /// `self` at a time. The chunks do not overlap.
101 ///
102 /// If the number of elements in the iterator is not divisible by
103 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
104 /// other chunks will have that exact length.
105 ///
106 /// # Examples
107 ///
108 /// ```
109 /// use rayon::prelude::*;
110 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks(2).collect();
111 /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4], &[5]]);
112 /// ```
113 #[track_caller]
114 fn par_chunks(&self, chunk_size: usize) -> Chunks<'_, T> {
115 assert!(chunk_size != 0, "chunk_size must not be zero");
116 Chunks::new(chunk_size, self.as_parallel_slice())
117 }
118
119 /// Returns a parallel iterator over `chunk_size` elements of
120 /// `self` at a time. The chunks do not overlap.
121 ///
122 /// If `chunk_size` does not divide the length of the slice, then the
123 /// last up to `chunk_size-1` elements will be omitted and can be
124 /// retrieved from the remainder function of the iterator.
125 ///
126 /// # Examples
127 ///
128 /// ```
129 /// use rayon::prelude::*;
130 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_chunks_exact(2).collect();
131 /// assert_eq!(chunks, vec![&[1, 2][..], &[3, 4]]);
132 /// ```
133 #[track_caller]
134 fn par_chunks_exact(&self, chunk_size: usize) -> ChunksExact<'_, T> {
135 assert!(chunk_size != 0, "chunk_size must not be zero");
136 ChunksExact::new(chunk_size, self.as_parallel_slice())
137 }
138
139 /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
140 /// starting at the end. The chunks do not overlap.
141 ///
142 /// If the number of elements in the iterator is not divisible by
143 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
144 /// other chunks will have that exact length.
145 ///
146 /// # Examples
147 ///
148 /// ```
149 /// use rayon::prelude::*;
150 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks(2).collect();
151 /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3], &[1]]);
152 /// ```
153 #[track_caller]
154 fn par_rchunks(&self, chunk_size: usize) -> RChunks<'_, T> {
155 assert!(chunk_size != 0, "chunk_size must not be zero");
156 RChunks::new(chunk_size, self.as_parallel_slice())
157 }
158
159 /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
160 /// starting at the end. The chunks do not overlap.
161 ///
162 /// If `chunk_size` does not divide the length of the slice, then the
163 /// last up to `chunk_size-1` elements will be omitted and can be
164 /// retrieved from the remainder function of the iterator.
165 ///
166 /// # Examples
167 ///
168 /// ```
169 /// use rayon::prelude::*;
170 /// let chunks: Vec<_> = [1, 2, 3, 4, 5].par_rchunks_exact(2).collect();
171 /// assert_eq!(chunks, vec![&[4, 5][..], &[2, 3]]);
172 /// ```
173 #[track_caller]
174 fn par_rchunks_exact(&self, chunk_size: usize) -> RChunksExact<'_, T> {
175 assert!(chunk_size != 0, "chunk_size must not be zero");
176 RChunksExact::new(chunk_size, self.as_parallel_slice())
177 }
178
179 /// Returns a parallel iterator over the slice producing non-overlapping runs
180 /// of elements using the predicate to separate them.
181 ///
182 /// The predicate is called on two elements following themselves,
183 /// it means the predicate is called on `slice[0]` and `slice[1]`
184 /// then on `slice[1]` and `slice[2]` and so on.
185 ///
186 /// # Examples
187 ///
188 /// ```
189 /// use rayon::prelude::*;
190 /// let chunks: Vec<_> = [1, 2, 2, 3, 3, 3].par_chunk_by(|&x, &y| x == y).collect();
191 /// assert_eq!(chunks[0], &[1]);
192 /// assert_eq!(chunks[1], &[2, 2]);
193 /// assert_eq!(chunks[2], &[3, 3, 3]);
194 /// ```
195 fn par_chunk_by<F>(&self, pred: F) -> ChunkBy<'_, T, F>
196 where
197 F: Fn(&T, &T) -> bool + Send + Sync,
198 {
199 ChunkBy::new(self.as_parallel_slice(), pred)
200 }
201}
202
203impl<T: Sync> ParallelSlice<T> for [T] {
204 #[inline]
205 fn as_parallel_slice(&self) -> &[T] {
206 self
207 }
208}
209
210/// Parallel extensions for mutable slices.
211pub trait ParallelSliceMut<T: Send> {
212 /// Returns a plain mutable slice, which is used to implement the rest of
213 /// the parallel methods.
214 fn as_parallel_slice_mut(&mut self) -> &mut [T];
215
216 /// Returns a parallel iterator over mutable subslices separated by
217 /// elements that match the separator.
218 ///
219 /// # Examples
220 ///
221 /// ```
222 /// use rayon::prelude::*;
223 /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
224 /// array.par_split_mut(|i| *i == 0)
225 /// .for_each(|slice| slice.reverse());
226 /// assert_eq!(array, [3, 2, 1, 0, 8, 4, 2, 0, 9, 6, 3]);
227 /// ```
228 fn par_split_mut<P>(&mut self, separator: P) -> SplitMut<'_, T, P>
229 where
230 P: Fn(&T) -> bool + Sync + Send,
231 {
232 SplitMut {
233 slice: self.as_parallel_slice_mut(),
234 separator,
235 }
236 }
237
238 /// Returns a parallel iterator over mutable subslices separated by elements
239 /// that match the separator, including the matched part as a terminator.
240 ///
241 /// # Examples
242 ///
243 /// ```
244 /// use rayon::prelude::*;
245 /// let mut array = [1, 2, 3, 0, 2, 4, 8, 0, 3, 6, 9];
246 /// array.par_split_inclusive_mut(|i| *i == 0)
247 /// .for_each(|slice| slice.reverse());
248 /// assert_eq!(array, [0, 3, 2, 1, 0, 8, 4, 2, 9, 6, 3]);
249 /// ```
250 fn par_split_inclusive_mut<P>(&mut self, separator: P) -> SplitInclusiveMut<'_, T, P>
251 where
252 P: Fn(&T) -> bool + Sync + Send,
253 {
254 SplitInclusiveMut {
255 slice: self.as_parallel_slice_mut(),
256 separator,
257 }
258 }
259
260 /// Returns a parallel iterator over at most `chunk_size` elements of
261 /// `self` at a time. The chunks are mutable and do not overlap.
262 ///
263 /// If the number of elements in the iterator is not divisible by
264 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
265 /// other chunks will have that exact length.
266 ///
267 /// # Examples
268 ///
269 /// ```
270 /// use rayon::prelude::*;
271 /// let mut array = [1, 2, 3, 4, 5];
272 /// array.par_chunks_mut(2)
273 /// .for_each(|slice| slice.reverse());
274 /// assert_eq!(array, [2, 1, 4, 3, 5]);
275 /// ```
276 #[track_caller]
277 fn par_chunks_mut(&mut self, chunk_size: usize) -> ChunksMut<'_, T> {
278 assert!(chunk_size != 0, "chunk_size must not be zero");
279 ChunksMut::new(chunk_size, self.as_parallel_slice_mut())
280 }
281
282 /// Returns a parallel iterator over `chunk_size` elements of
283 /// `self` at a time. The chunks are mutable and do not overlap.
284 ///
285 /// If `chunk_size` does not divide the length of the slice, then the
286 /// last up to `chunk_size-1` elements will be omitted and can be
287 /// retrieved from the remainder function of the iterator.
288 ///
289 /// # Examples
290 ///
291 /// ```
292 /// use rayon::prelude::*;
293 /// let mut array = [1, 2, 3, 4, 5];
294 /// array.par_chunks_exact_mut(3)
295 /// .for_each(|slice| slice.reverse());
296 /// assert_eq!(array, [3, 2, 1, 4, 5]);
297 /// ```
298 #[track_caller]
299 fn par_chunks_exact_mut(&mut self, chunk_size: usize) -> ChunksExactMut<'_, T> {
300 assert!(chunk_size != 0, "chunk_size must not be zero");
301 ChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
302 }
303
304 /// Returns a parallel iterator over at most `chunk_size` elements of `self` at a time,
305 /// starting at the end. The chunks are mutable and do not overlap.
306 ///
307 /// If the number of elements in the iterator is not divisible by
308 /// `chunk_size`, the last chunk may be shorter than `chunk_size`. All
309 /// other chunks will have that exact length.
310 ///
311 /// # Examples
312 ///
313 /// ```
314 /// use rayon::prelude::*;
315 /// let mut array = [1, 2, 3, 4, 5];
316 /// array.par_rchunks_mut(2)
317 /// .for_each(|slice| slice.reverse());
318 /// assert_eq!(array, [1, 3, 2, 5, 4]);
319 /// ```
320 #[track_caller]
321 fn par_rchunks_mut(&mut self, chunk_size: usize) -> RChunksMut<'_, T> {
322 assert!(chunk_size != 0, "chunk_size must not be zero");
323 RChunksMut::new(chunk_size, self.as_parallel_slice_mut())
324 }
325
326 /// Returns a parallel iterator over `chunk_size` elements of `self` at a time,
327 /// starting at the end. The chunks are mutable and do not overlap.
328 ///
329 /// If `chunk_size` does not divide the length of the slice, then the
330 /// last up to `chunk_size-1` elements will be omitted and can be
331 /// retrieved from the remainder function of the iterator.
332 ///
333 /// # Examples
334 ///
335 /// ```
336 /// use rayon::prelude::*;
337 /// let mut array = [1, 2, 3, 4, 5];
338 /// array.par_rchunks_exact_mut(3)
339 /// .for_each(|slice| slice.reverse());
340 /// assert_eq!(array, [1, 2, 5, 4, 3]);
341 /// ```
342 #[track_caller]
343 fn par_rchunks_exact_mut(&mut self, chunk_size: usize) -> RChunksExactMut<'_, T> {
344 assert!(chunk_size != 0, "chunk_size must not be zero");
345 RChunksExactMut::new(chunk_size, self.as_parallel_slice_mut())
346 }
347
348 /// Sorts the slice in parallel.
349 ///
350 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
351 ///
352 /// When applicable, unstable sorting is preferred because it is generally faster than stable
353 /// sorting and it doesn't allocate auxiliary memory.
354 /// See [`par_sort_unstable`](#method.par_sort_unstable).
355 ///
356 /// # Current implementation
357 ///
358 /// The current algorithm is an adaptive merge sort inspired by
359 /// [timsort](https://en.wikipedia.org/wiki/Timsort).
360 /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
361 /// two or more sorted sequences concatenated one after another.
362 ///
363 /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
364 /// non-allocating insertion sort is used instead.
365 ///
366 /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
367 /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
368 /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
369 /// parallel subdivision of chunks and parallel merge operation.
370 ///
371 /// # Examples
372 ///
373 /// ```
374 /// use rayon::prelude::*;
375 ///
376 /// let mut v = [-5, 4, 1, -3, 2];
377 ///
378 /// v.par_sort();
379 /// assert_eq!(v, [-5, -3, 1, 2, 4]);
380 /// ```
381 fn par_sort(&mut self)
382 where
383 T: Ord,
384 {
385 par_mergesort(self.as_parallel_slice_mut(), T::lt);
386 }
387
388 /// Sorts the slice in parallel with a comparator function.
389 ///
390 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*n* \* log(*n*)) worst-case.
391 ///
392 /// The comparator function must define a total ordering for the elements in the slice. If
393 /// the ordering is not total, the order of the elements is unspecified. An order is a
394 /// total order if it is (for all `a`, `b` and `c`):
395 ///
396 /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
397 /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
398 ///
399 /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
400 /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
401 ///
402 /// ```
403 /// use rayon::prelude::*;
404 ///
405 /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
406 /// floats.par_sort_by(|a, b| a.partial_cmp(b).unwrap());
407 /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
408 /// ```
409 ///
410 /// When applicable, unstable sorting is preferred because it is generally faster than stable
411 /// sorting and it doesn't allocate auxiliary memory.
412 /// See [`par_sort_unstable_by`](#method.par_sort_unstable_by).
413 ///
414 /// # Current implementation
415 ///
416 /// The current algorithm is an adaptive merge sort inspired by
417 /// [timsort](https://en.wikipedia.org/wiki/Timsort).
418 /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
419 /// two or more sorted sequences concatenated one after another.
420 ///
421 /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
422 /// non-allocating insertion sort is used instead.
423 ///
424 /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
425 /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
426 /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
427 /// parallel subdivision of chunks and parallel merge operation.
428 ///
429 /// # Examples
430 ///
431 /// ```
432 /// use rayon::prelude::*;
433 ///
434 /// let mut v = [5, 4, 1, 3, 2];
435 /// v.par_sort_by(|a, b| a.cmp(b));
436 /// assert_eq!(v, [1, 2, 3, 4, 5]);
437 ///
438 /// // reverse sorting
439 /// v.par_sort_by(|a, b| b.cmp(a));
440 /// assert_eq!(v, [5, 4, 3, 2, 1]);
441 /// ```
442 fn par_sort_by<F>(&mut self, compare: F)
443 where
444 F: Fn(&T, &T) -> Ordering + Sync,
445 {
446 par_mergesort(self.as_parallel_slice_mut(), |a, b| {
447 compare(a, b) == Ordering::Less
448 });
449 }
450
451 /// Sorts the slice in parallel with a key extraction function.
452 ///
453 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* \* log(*n*))
454 /// worst-case, where the key function is *O*(*m*).
455 ///
456 /// For expensive key functions (e.g. functions that are not simple property accesses or
457 /// basic operations), [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) is likely to
458 /// be significantly faster, as it does not recompute element keys.
459 ///
460 /// When applicable, unstable sorting is preferred because it is generally faster than stable
461 /// sorting and it doesn't allocate auxiliary memory.
462 /// See [`par_sort_unstable_by_key`](#method.par_sort_unstable_by_key).
463 ///
464 /// # Current implementation
465 ///
466 /// The current algorithm is an adaptive merge sort inspired by
467 /// [timsort](https://en.wikipedia.org/wiki/Timsort).
468 /// It is designed to be very fast in cases where the slice is nearly sorted, or consists of
469 /// two or more sorted sequences concatenated one after another.
470 ///
471 /// Also, it allocates temporary storage the same size as `self`, but for very short slices a
472 /// non-allocating insertion sort is used instead.
473 ///
474 /// In order to sort the slice in parallel, the slice is first divided into smaller chunks and
475 /// all chunks are sorted in parallel. Then, adjacent chunks that together form non-descending
476 /// or descending runs are concatenated. Finally, the remaining chunks are merged together using
477 /// parallel subdivision of chunks and parallel merge operation.
478 ///
479 /// # Examples
480 ///
481 /// ```
482 /// use rayon::prelude::*;
483 ///
484 /// let mut v = [-5i32, 4, 1, -3, 2];
485 ///
486 /// v.par_sort_by_key(|k| k.abs());
487 /// assert_eq!(v, [1, 2, -3, 4, -5]);
488 /// ```
489 fn par_sort_by_key<K, F>(&mut self, f: F)
490 where
491 K: Ord,
492 F: Fn(&T) -> K + Sync,
493 {
494 par_mergesort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
495 }
496
497 /// Sorts the slice in parallel with a key extraction function.
498 ///
499 /// During sorting, the key function is called at most once per element, by using
500 /// temporary storage to remember the results of key evaluation.
501 /// The key function is called in parallel, so the order of calls is completely unspecified.
502 ///
503 /// This sort is stable (i.e., does not reorder equal elements) and *O*(*m* \* *n* + *n* \* log(*n*))
504 /// worst-case, where the key function is *O*(*m*).
505 ///
506 /// For simple key functions (e.g., functions that are property accesses or
507 /// basic operations), [`par_sort_by_key`](#method.par_sort_by_key) is likely to be
508 /// faster.
509 ///
510 /// # Current implementation
511 ///
512 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
513 /// which combines the fast average case of randomized quicksort with the fast worst case of
514 /// heapsort, while achieving linear time on slices with certain patterns. It uses some
515 /// randomization to avoid degenerate cases, but with a fixed seed to always provide
516 /// deterministic behavior.
517 ///
518 /// In the worst case, the algorithm allocates temporary storage in a `Vec<(K, usize)>` the
519 /// length of the slice.
520 ///
521 /// All quicksorts work in two stages: partitioning into two halves followed by recursive
522 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
523 /// parallel. Finally, after sorting the cached keys, the item positions are updated sequentially.
524 ///
525 /// [pdqsort]: https://github.com/orlp/pdqsort
526 ///
527 /// # Examples
528 ///
529 /// ```
530 /// use rayon::prelude::*;
531 ///
532 /// let mut v = [-5i32, 4, 32, -3, 2];
533 ///
534 /// v.par_sort_by_cached_key(|k| k.to_string());
535 /// assert!(v == [-3, -5, 2, 32, 4]);
536 /// ```
537 fn par_sort_by_cached_key<K, F>(&mut self, f: F)
538 where
539 F: Fn(&T) -> K + Sync,
540 K: Ord + Send,
541 {
542 let slice = self.as_parallel_slice_mut();
543 let len = slice.len();
544 if len < 2 {
545 return;
546 }
547
548 // Helper macro for indexing our vector by the smallest possible type, to reduce allocation.
549 macro_rules! sort_by_key {
550 ($t:ty) => {{
551 let mut indices: Vec<_> = slice
552 .par_iter_mut()
553 .enumerate()
554 .map(|(i, x)| (f(&*x), i as $t))
555 .collect();
556 // The elements of `indices` are unique, as they are indexed, so any sort will be
557 // stable with respect to the original slice. We use `sort_unstable` here because
558 // it requires less memory allocation.
559 indices.par_sort_unstable();
560 for i in 0..len {
561 let mut index = indices[i].1;
562 while (index as usize) < i {
563 index = indices[index as usize].1;
564 }
565 indices[i].1 = index;
566 slice.swap(i, index as usize);
567 }
568 }};
569 }
570
571 let sz_u8 = mem::size_of::<(K, u8)>();
572 let sz_u16 = mem::size_of::<(K, u16)>();
573 let sz_u32 = mem::size_of::<(K, u32)>();
574 let sz_usize = mem::size_of::<(K, usize)>();
575
576 if sz_u8 < sz_u16 && len <= (std::u8::MAX as usize) {
577 return sort_by_key!(u8);
578 }
579 if sz_u16 < sz_u32 && len <= (std::u16::MAX as usize) {
580 return sort_by_key!(u16);
581 }
582 if sz_u32 < sz_usize && len <= (std::u32::MAX as usize) {
583 return sort_by_key!(u32);
584 }
585 sort_by_key!(usize)
586 }
587
588 /// Sorts the slice in parallel, but might not preserve the order of equal elements.
589 ///
590 /// This sort is unstable (i.e., may reorder equal elements), in-place
591 /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
592 ///
593 /// # Current implementation
594 ///
595 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
596 /// which combines the fast average case of randomized quicksort with the fast worst case of
597 /// heapsort, while achieving linear time on slices with certain patterns. It uses some
598 /// randomization to avoid degenerate cases, but with a fixed seed to always provide
599 /// deterministic behavior.
600 ///
601 /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
602 /// slice consists of several concatenated sorted sequences.
603 ///
604 /// All quicksorts work in two stages: partitioning into two halves followed by recursive
605 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
606 /// parallel.
607 ///
608 /// [pdqsort]: https://github.com/orlp/pdqsort
609 ///
610 /// # Examples
611 ///
612 /// ```
613 /// use rayon::prelude::*;
614 ///
615 /// let mut v = [-5, 4, 1, -3, 2];
616 ///
617 /// v.par_sort_unstable();
618 /// assert_eq!(v, [-5, -3, 1, 2, 4]);
619 /// ```
620 fn par_sort_unstable(&mut self)
621 where
622 T: Ord,
623 {
624 par_quicksort(self.as_parallel_slice_mut(), T::lt);
625 }
626
627 /// Sorts the slice in parallel with a comparator function, but might not preserve the order of
628 /// equal elements.
629 ///
630 /// This sort is unstable (i.e., may reorder equal elements), in-place
631 /// (i.e., does not allocate), and *O*(*n* \* log(*n*)) worst-case.
632 ///
633 /// The comparator function must define a total ordering for the elements in the slice. If
634 /// the ordering is not total, the order of the elements is unspecified. An order is a
635 /// total order if it is (for all `a`, `b` and `c`):
636 ///
637 /// * total and antisymmetric: exactly one of `a < b`, `a == b` or `a > b` is true, and
638 /// * transitive, `a < b` and `b < c` implies `a < c`. The same must hold for both `==` and `>`.
639 ///
640 /// For example, while [`f64`] doesn't implement [`Ord`] because `NaN != NaN`, we can use
641 /// `partial_cmp` as our sort function when we know the slice doesn't contain a `NaN`.
642 ///
643 /// ```
644 /// use rayon::prelude::*;
645 ///
646 /// let mut floats = [5f64, 4.0, 1.0, 3.0, 2.0];
647 /// floats.par_sort_unstable_by(|a, b| a.partial_cmp(b).unwrap());
648 /// assert_eq!(floats, [1.0, 2.0, 3.0, 4.0, 5.0]);
649 /// ```
650 ///
651 /// # Current implementation
652 ///
653 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
654 /// which combines the fast average case of randomized quicksort with the fast worst case of
655 /// heapsort, while achieving linear time on slices with certain patterns. It uses some
656 /// randomization to avoid degenerate cases, but with a fixed seed to always provide
657 /// deterministic behavior.
658 ///
659 /// It is typically faster than stable sorting, except in a few special cases, e.g., when the
660 /// slice consists of several concatenated sorted sequences.
661 ///
662 /// All quicksorts work in two stages: partitioning into two halves followed by recursive
663 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
664 /// parallel.
665 ///
666 /// [pdqsort]: https://github.com/orlp/pdqsort
667 ///
668 /// # Examples
669 ///
670 /// ```
671 /// use rayon::prelude::*;
672 ///
673 /// let mut v = [5, 4, 1, 3, 2];
674 /// v.par_sort_unstable_by(|a, b| a.cmp(b));
675 /// assert_eq!(v, [1, 2, 3, 4, 5]);
676 ///
677 /// // reverse sorting
678 /// v.par_sort_unstable_by(|a, b| b.cmp(a));
679 /// assert_eq!(v, [5, 4, 3, 2, 1]);
680 /// ```
681 fn par_sort_unstable_by<F>(&mut self, compare: F)
682 where
683 F: Fn(&T, &T) -> Ordering + Sync,
684 {
685 par_quicksort(self.as_parallel_slice_mut(), |a, b| {
686 compare(a, b) == Ordering::Less
687 });
688 }
689
690 /// Sorts the slice in parallel with a key extraction function, but might not preserve the order
691 /// of equal elements.
692 ///
693 /// This sort is unstable (i.e., may reorder equal elements), in-place
694 /// (i.e., does not allocate), and *O*(m \* *n* \* log(*n*)) worst-case,
695 /// where the key function is *O*(*m*).
696 ///
697 /// # Current implementation
698 ///
699 /// The current algorithm is based on [pattern-defeating quicksort][pdqsort] by Orson Peters,
700 /// which combines the fast average case of randomized quicksort with the fast worst case of
701 /// heapsort, while achieving linear time on slices with certain patterns. It uses some
702 /// randomization to avoid degenerate cases, but with a fixed seed to always provide
703 /// deterministic behavior.
704 ///
705 /// Due to its key calling strategy, `par_sort_unstable_by_key` is likely to be slower than
706 /// [`par_sort_by_cached_key`](#method.par_sort_by_cached_key) in cases where the key function
707 /// is expensive.
708 ///
709 /// All quicksorts work in two stages: partitioning into two halves followed by recursive
710 /// calls. The partitioning phase is sequential, but the two recursive calls are performed in
711 /// parallel.
712 ///
713 /// [pdqsort]: https://github.com/orlp/pdqsort
714 ///
715 /// # Examples
716 ///
717 /// ```
718 /// use rayon::prelude::*;
719 ///
720 /// let mut v = [-5i32, 4, 1, -3, 2];
721 ///
722 /// v.par_sort_unstable_by_key(|k| k.abs());
723 /// assert_eq!(v, [1, 2, -3, 4, -5]);
724 /// ```
725 fn par_sort_unstable_by_key<K, F>(&mut self, f: F)
726 where
727 K: Ord,
728 F: Fn(&T) -> K + Sync,
729 {
730 par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b)));
731 }
732
733 /// Returns a parallel iterator over the slice producing non-overlapping mutable
734 /// runs of elements using the predicate to separate them.
735 ///
736 /// The predicate is called on two elements following themselves,
737 /// it means the predicate is called on `slice[0]` and `slice[1]`
738 /// then on `slice[1]` and `slice[2]` and so on.
739 ///
740 /// # Examples
741 ///
742 /// ```
743 /// use rayon::prelude::*;
744 /// let mut xs = [1, 2, 2, 3, 3, 3];
745 /// let chunks: Vec<_> = xs.par_chunk_by_mut(|&x, &y| x == y).collect();
746 /// assert_eq!(chunks[0], &mut [1]);
747 /// assert_eq!(chunks[1], &mut [2, 2]);
748 /// assert_eq!(chunks[2], &mut [3, 3, 3]);
749 /// ```
750 fn par_chunk_by_mut<F>(&mut self, pred: F) -> ChunkByMut<'_, T, F>
751 where
752 F: Fn(&T, &T) -> bool + Send + Sync,
753 {
754 ChunkByMut::new(self.as_parallel_slice_mut(), pred)
755 }
756}
757
758impl<T: Send> ParallelSliceMut<T> for [T] {
759 #[inline]
760 fn as_parallel_slice_mut(&mut self) -> &mut [T] {
761 self
762 }
763}
764
765impl<'data, T: Sync + 'data> IntoParallelIterator for &'data [T] {
766 type Item = &'data T;
767 type Iter = Iter<'data, T>;
768
769 fn into_par_iter(self) -> Self::Iter {
770 Iter { slice: self }
771 }
772}
773
774impl<'data, T: Send + 'data> IntoParallelIterator for &'data mut [T] {
775 type Item = &'data mut T;
776 type Iter = IterMut<'data, T>;
777
778 fn into_par_iter(self) -> Self::Iter {
779 IterMut { slice: self }
780 }
781}
782
783/// Parallel iterator over immutable items in a slice
784#[derive(Debug)]
785pub struct Iter<'data, T: Sync> {
786 slice: &'data [T],
787}
788
789impl<'data, T: Sync> Clone for Iter<'data, T> {
790 fn clone(&self) -> Self {
791 Iter { ..*self }
792 }
793}
794
795impl<'data, T: Sync + 'data> ParallelIterator for Iter<'data, T> {
796 type Item = &'data T;
797
798 fn drive_unindexed<C>(self, consumer: C) -> C::Result
799 where
800 C: UnindexedConsumer<Self::Item>,
801 {
802 bridge(self, consumer)
803 }
804
805 fn opt_len(&self) -> Option<usize> {
806 Some(self.len())
807 }
808}
809
810impl<'data, T: Sync + 'data> IndexedParallelIterator for Iter<'data, T> {
811 fn drive<C>(self, consumer: C) -> C::Result
812 where
813 C: Consumer<Self::Item>,
814 {
815 bridge(self, consumer)
816 }
817
818 fn len(&self) -> usize {
819 self.slice.len()
820 }
821
822 fn with_producer<CB>(self, callback: CB) -> CB::Output
823 where
824 CB: ProducerCallback<Self::Item>,
825 {
826 callback.callback(IterProducer { slice: self.slice })
827 }
828}
829
830struct IterProducer<'data, T: Sync> {
831 slice: &'data [T],
832}
833
834impl<'data, T: 'data + Sync> Producer for IterProducer<'data, T> {
835 type Item = &'data T;
836 type IntoIter = ::std::slice::Iter<'data, T>;
837
838 fn into_iter(self) -> Self::IntoIter {
839 self.slice.iter()
840 }
841
842 fn split_at(self, index: usize) -> (Self, Self) {
843 let (left, right) = self.slice.split_at(index);
844 (IterProducer { slice: left }, IterProducer { slice: right })
845 }
846}
847
848/// Parallel iterator over immutable overlapping windows of a slice
849#[derive(Debug)]
850pub struct Windows<'data, T: Sync> {
851 window_size: usize,
852 slice: &'data [T],
853}
854
855impl<'data, T: Sync> Clone for Windows<'data, T> {
856 fn clone(&self) -> Self {
857 Windows { ..*self }
858 }
859}
860
861impl<'data, T: Sync + 'data> ParallelIterator for Windows<'data, T> {
862 type Item = &'data [T];
863
864 fn drive_unindexed<C>(self, consumer: C) -> C::Result
865 where
866 C: UnindexedConsumer<Self::Item>,
867 {
868 bridge(self, consumer)
869 }
870
871 fn opt_len(&self) -> Option<usize> {
872 Some(self.len())
873 }
874}
875
876impl<'data, T: Sync + 'data> IndexedParallelIterator for Windows<'data, T> {
877 fn drive<C>(self, consumer: C) -> C::Result
878 where
879 C: Consumer<Self::Item>,
880 {
881 bridge(self, consumer)
882 }
883
884 fn len(&self) -> usize {
885 assert!(self.window_size >= 1);
886 self.slice.len().saturating_sub(self.window_size - 1)
887 }
888
889 fn with_producer<CB>(self, callback: CB) -> CB::Output
890 where
891 CB: ProducerCallback<Self::Item>,
892 {
893 callback.callback(WindowsProducer {
894 window_size: self.window_size,
895 slice: self.slice,
896 })
897 }
898}
899
900struct WindowsProducer<'data, T: Sync> {
901 window_size: usize,
902 slice: &'data [T],
903}
904
905impl<'data, T: 'data + Sync> Producer for WindowsProducer<'data, T> {
906 type Item = &'data [T];
907 type IntoIter = ::std::slice::Windows<'data, T>;
908
909 fn into_iter(self) -> Self::IntoIter {
910 self.slice.windows(self.window_size)
911 }
912
913 fn split_at(self, index: usize) -> (Self, Self) {
914 let left_index = Ord::min(self.slice.len(), index + (self.window_size - 1));
915 let left = &self.slice[..left_index];
916 let right = &self.slice[index..];
917 (
918 WindowsProducer {
919 window_size: self.window_size,
920 slice: left,
921 },
922 WindowsProducer {
923 window_size: self.window_size,
924 slice: right,
925 },
926 )
927 }
928}
929
930/// Parallel iterator over mutable items in a slice
931#[derive(Debug)]
932pub struct IterMut<'data, T: Send> {
933 slice: &'data mut [T],
934}
935
936impl<'data, T: Send + 'data> ParallelIterator for IterMut<'data, T> {
937 type Item = &'data mut T;
938
939 fn drive_unindexed<C>(self, consumer: C) -> C::Result
940 where
941 C: UnindexedConsumer<Self::Item>,
942 {
943 bridge(self, consumer)
944 }
945
946 fn opt_len(&self) -> Option<usize> {
947 Some(self.len())
948 }
949}
950
951impl<'data, T: Send + 'data> IndexedParallelIterator for IterMut<'data, T> {
952 fn drive<C>(self, consumer: C) -> C::Result
953 where
954 C: Consumer<Self::Item>,
955 {
956 bridge(self, consumer)
957 }
958
959 fn len(&self) -> usize {
960 self.slice.len()
961 }
962
963 fn with_producer<CB>(self, callback: CB) -> CB::Output
964 where
965 CB: ProducerCallback<Self::Item>,
966 {
967 callback.callback(IterMutProducer { slice: self.slice })
968 }
969}
970
971struct IterMutProducer<'data, T: Send> {
972 slice: &'data mut [T],
973}
974
975impl<'data, T: 'data + Send> Producer for IterMutProducer<'data, T> {
976 type Item = &'data mut T;
977 type IntoIter = ::std::slice::IterMut<'data, T>;
978
979 fn into_iter(self) -> Self::IntoIter {
980 self.slice.iter_mut()
981 }
982
983 fn split_at(self, index: usize) -> (Self, Self) {
984 let (left, right) = self.slice.split_at_mut(index);
985 (
986 IterMutProducer { slice: left },
987 IterMutProducer { slice: right },
988 )
989 }
990}
991
992/// Parallel iterator over slices separated by a predicate
993pub struct Split<'data, T, P> {
994 slice: &'data [T],
995 separator: P,
996}
997
998impl<'data, T, P: Clone> Clone for Split<'data, T, P> {
999 fn clone(&self) -> Self {
1000 Split {
1001 separator: self.separator.clone(),
1002 ..*self
1003 }
1004 }
1005}
1006
1007impl<'data, T: Debug, P> Debug for Split<'data, T, P> {
1008 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1009 f.debug_struct("Split").field("slice", &self.slice).finish()
1010 }
1011}
1012
1013impl<'data, T, P> ParallelIterator for Split<'data, T, P>
1014where
1015 P: Fn(&T) -> bool + Sync + Send,
1016 T: Sync,
1017{
1018 type Item = &'data [T];
1019
1020 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1021 where
1022 C: UnindexedConsumer<Self::Item>,
1023 {
1024 let producer = SplitProducer::new(self.slice, &self.separator);
1025 bridge_unindexed(producer, consumer)
1026 }
1027}
1028
1029/// Parallel iterator over slices separated by a predicate,
1030/// including the matched part as a terminator.
1031pub struct SplitInclusive<'data, T, P> {
1032 slice: &'data [T],
1033 separator: P,
1034}
1035
1036impl<'data, T, P: Clone> Clone for SplitInclusive<'data, T, P> {
1037 fn clone(&self) -> Self {
1038 SplitInclusive {
1039 separator: self.separator.clone(),
1040 ..*self
1041 }
1042 }
1043}
1044
1045impl<'data, T: Debug, P> Debug for SplitInclusive<'data, T, P> {
1046 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1047 f.debug_struct("SplitInclusive")
1048 .field("slice", &self.slice)
1049 .finish()
1050 }
1051}
1052
1053impl<'data, T, P> ParallelIterator for SplitInclusive<'data, T, P>
1054where
1055 P: Fn(&T) -> bool + Sync + Send,
1056 T: Sync,
1057{
1058 type Item = &'data [T];
1059
1060 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1061 where
1062 C: UnindexedConsumer<Self::Item>,
1063 {
1064 let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1065 bridge_unindexed(producer, consumer)
1066 }
1067}
1068
1069/// Implement support for `SplitProducer`.
1070impl<'data, T, P> Fissile<P> for &'data [T]
1071where
1072 P: Fn(&T) -> bool,
1073{
1074 fn length(&self) -> usize {
1075 self.len()
1076 }
1077
1078 fn midpoint(&self, end: usize) -> usize {
1079 end / 2
1080 }
1081
1082 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1083 self[start..end].iter().position(separator)
1084 }
1085
1086 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1087 self[..end].iter().rposition(separator)
1088 }
1089
1090 fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1091 if INCL {
1092 // include the separator in the left side
1093 self.split_at(index + 1)
1094 } else {
1095 let (left, right) = self.split_at(index);
1096 (left, &right[1..]) // skip the separator
1097 }
1098 }
1099
1100 fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1101 where
1102 F: Folder<Self>,
1103 Self: Send,
1104 {
1105 if INCL {
1106 debug_assert!(!skip_last);
1107 folder.consume_iter(self.split_inclusive(separator))
1108 } else {
1109 let mut split = self.split(separator);
1110 if skip_last {
1111 split.next_back();
1112 }
1113 folder.consume_iter(split)
1114 }
1115 }
1116}
1117
1118/// Parallel iterator over mutable slices separated by a predicate
1119pub struct SplitMut<'data, T, P> {
1120 slice: &'data mut [T],
1121 separator: P,
1122}
1123
1124impl<'data, T: Debug, P> Debug for SplitMut<'data, T, P> {
1125 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1126 f.debug_struct("SplitMut")
1127 .field("slice", &self.slice)
1128 .finish()
1129 }
1130}
1131
1132impl<'data, T, P> ParallelIterator for SplitMut<'data, T, P>
1133where
1134 P: Fn(&T) -> bool + Sync + Send,
1135 T: Send,
1136{
1137 type Item = &'data mut [T];
1138
1139 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1140 where
1141 C: UnindexedConsumer<Self::Item>,
1142 {
1143 let producer = SplitProducer::new(self.slice, &self.separator);
1144 bridge_unindexed(producer, consumer)
1145 }
1146}
1147
1148/// Parallel iterator over mutable slices separated by a predicate,
1149/// including the matched part as a terminator.
1150pub struct SplitInclusiveMut<'data, T, P> {
1151 slice: &'data mut [T],
1152 separator: P,
1153}
1154
1155impl<'data, T: Debug, P> Debug for SplitInclusiveMut<'data, T, P> {
1156 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1157 f.debug_struct("SplitInclusiveMut")
1158 .field("slice", &self.slice)
1159 .finish()
1160 }
1161}
1162
1163impl<'data, T, P> ParallelIterator for SplitInclusiveMut<'data, T, P>
1164where
1165 P: Fn(&T) -> bool + Sync + Send,
1166 T: Send,
1167{
1168 type Item = &'data mut [T];
1169
1170 fn drive_unindexed<C>(self, consumer: C) -> C::Result
1171 where
1172 C: UnindexedConsumer<Self::Item>,
1173 {
1174 let producer = SplitInclusiveProducer::new_incl(self.slice, &self.separator);
1175 bridge_unindexed(producer, consumer)
1176 }
1177}
1178
1179/// Implement support for `SplitProducer`.
1180impl<'data, T, P> Fissile<P> for &'data mut [T]
1181where
1182 P: Fn(&T) -> bool,
1183{
1184 fn length(&self) -> usize {
1185 self.len()
1186 }
1187
1188 fn midpoint(&self, end: usize) -> usize {
1189 end / 2
1190 }
1191
1192 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
1193 self[start..end].iter().position(separator)
1194 }
1195
1196 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
1197 self[..end].iter().rposition(separator)
1198 }
1199
1200 fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
1201 if INCL {
1202 // include the separator in the left side
1203 self.split_at_mut(index + 1)
1204 } else {
1205 let (left, right) = self.split_at_mut(index);
1206 (left, &mut right[1..]) // skip the separator
1207 }
1208 }
1209
1210 fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
1211 where
1212 F: Folder<Self>,
1213 Self: Send,
1214 {
1215 if INCL {
1216 debug_assert!(!skip_last);
1217 folder.consume_iter(self.split_inclusive_mut(separator))
1218 } else {
1219 let mut split = self.split_mut(separator);
1220 if skip_last {
1221 split.next_back();
1222 }
1223 folder.consume_iter(split)
1224 }
1225 }
1226}