rayon/slice/
mergesort.rs

1//! Parallel merge sort.
2//!
3//! This implementation is copied verbatim from `std::slice::sort` and then parallelized.
4//! The only difference from the original is that the sequential `mergesort` returns
5//! `MergesortResult` and leaves descending arrays intact.
6
7use crate::iter::*;
8use crate::slice::ParallelSliceMut;
9use crate::SendPtr;
10use std::mem;
11use std::mem::size_of;
12use std::ptr;
13use std::slice;
14
15unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
16    let old = *ptr;
17    *ptr = ptr.offset(1);
18    old
19}
20
21unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
22    *ptr = ptr.offset(-1);
23    *ptr
24}
25
26/// When dropped, copies from `src` into `dest` a sequence of length `len`.
27struct CopyOnDrop<T> {
28    src: *const T,
29    dest: *mut T,
30    len: usize,
31}
32
33impl<T> Drop for CopyOnDrop<T> {
34    fn drop(&mut self) {
35        unsafe {
36            ptr::copy_nonoverlapping(self.src, self.dest, self.len);
37        }
38    }
39}
40
41/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
42///
43/// This is the integral subroutine of insertion sort.
44fn insert_head<T, F>(v: &mut [T], is_less: &F)
45where
46    F: Fn(&T, &T) -> bool,
47{
48    if v.len() >= 2 && is_less(&v[1], &v[0]) {
49        unsafe {
50            // There are three ways to implement insertion here:
51            //
52            // 1. Swap adjacent elements until the first one gets to its final destination.
53            //    However, this way we copy data around more than is necessary. If elements are big
54            //    structures (costly to copy), this method will be slow.
55            //
56            // 2. Iterate until the right place for the first element is found. Then shift the
57            //    elements succeeding it to make room for it and finally place it into the
58            //    remaining hole. This is a good method.
59            //
60            // 3. Copy the first element into a temporary variable. Iterate until the right place
61            //    for it is found. As we go along, copy every traversed element into the slot
62            //    preceding it. Finally, copy data from the temporary variable into the remaining
63            //    hole. This method is very good. Benchmarks demonstrated slightly better
64            //    performance than with the 2nd method.
65            //
66            // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
67            let tmp = mem::ManuallyDrop::new(ptr::read(&v[0]));
68
69            // Intermediate state of the insertion process is always tracked by `hole`, which
70            // serves two purposes:
71            // 1. Protects integrity of `v` from panics in `is_less`.
72            // 2. Fills the remaining hole in `v` in the end.
73            //
74            // Panic safety:
75            //
76            // If `is_less` panics at any point during the process, `hole` will get dropped and
77            // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
78            // initially held exactly once.
79            let mut hole = InsertionHole {
80                src: &*tmp,
81                dest: &mut v[1],
82            };
83            ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
84
85            for i in 2..v.len() {
86                if !is_less(&v[i], &*tmp) {
87                    break;
88                }
89                ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
90                hole.dest = &mut v[i];
91            }
92            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
93        }
94    }
95
96    // When dropped, copies from `src` into `dest`.
97    struct InsertionHole<T> {
98        src: *const T,
99        dest: *mut T,
100    }
101
102    impl<T> Drop for InsertionHole<T> {
103        fn drop(&mut self) {
104            unsafe {
105                ptr::copy_nonoverlapping(self.src, self.dest, 1);
106            }
107        }
108    }
109}
110
111/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
112/// stores the result into `v[..]`.
113///
114/// # Safety
115///
116/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
117/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
118unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &F)
119where
120    F: Fn(&T, &T) -> bool,
121{
122    let len = v.len();
123    let v = v.as_mut_ptr();
124    let v_mid = v.add(mid);
125    let v_end = v.add(len);
126
127    // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
128    // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
129    // copying the lesser (or greater) one into `v`.
130    //
131    // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
132    // consumed first, then we must copy whatever is left of the shorter run into the remaining
133    // hole in `v`.
134    //
135    // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
136    // 1. Protects integrity of `v` from panics in `is_less`.
137    // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
138    //
139    // Panic safety:
140    //
141    // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
142    // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
143    // object it initially held exactly once.
144    let mut hole;
145
146    if mid <= len - mid {
147        // The left run is shorter.
148        ptr::copy_nonoverlapping(v, buf, mid);
149        hole = MergeHole {
150            start: buf,
151            end: buf.add(mid),
152            dest: v,
153        };
154
155        // Initially, these pointers point to the beginnings of their arrays.
156        let left = &mut hole.start;
157        let mut right = v_mid;
158        let out = &mut hole.dest;
159
160        while *left < hole.end && right < v_end {
161            // Consume the lesser side.
162            // If equal, prefer the left run to maintain stability.
163            let to_copy = if is_less(&*right, &**left) {
164                get_and_increment(&mut right)
165            } else {
166                get_and_increment(left)
167            };
168            ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
169        }
170    } else {
171        // The right run is shorter.
172        ptr::copy_nonoverlapping(v_mid, buf, len - mid);
173        hole = MergeHole {
174            start: buf,
175            end: buf.add(len - mid),
176            dest: v_mid,
177        };
178
179        // Initially, these pointers point past the ends of their arrays.
180        let left = &mut hole.dest;
181        let right = &mut hole.end;
182        let mut out = v_end;
183
184        while v < *left && buf < *right {
185            // Consume the greater side.
186            // If equal, prefer the right run to maintain stability.
187            let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) {
188                decrement_and_get(left)
189            } else {
190                decrement_and_get(right)
191            };
192            ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
193        }
194    }
195    // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
196    // it will now be copied into the hole in `v`.
197
198    // When dropped, copies the range `start..end` into `dest..`.
199    struct MergeHole<T> {
200        start: *mut T,
201        end: *mut T,
202        dest: *mut T,
203    }
204
205    impl<T> Drop for MergeHole<T> {
206        fn drop(&mut self) {
207            // `T` is not a zero-sized type, so it's okay to divide by its size.
208            unsafe {
209                let len = self.end.offset_from(self.start) as usize;
210                ptr::copy_nonoverlapping(self.start, self.dest, len);
211            }
212        }
213    }
214}
215
216/// The result of merge sort.
217#[must_use]
218#[derive(Clone, Copy, PartialEq, Eq)]
219enum MergesortResult {
220    /// The slice has already been sorted.
221    NonDescending,
222    /// The slice has been descending and therefore it was left intact.
223    Descending,
224    /// The slice was sorted.
225    Sorted,
226}
227
228/// A sorted run that starts at index `start` and is of length `len`.
229#[derive(Clone, Copy)]
230struct Run {
231    start: usize,
232    len: usize,
233}
234
235/// Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
236/// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
237/// algorithm should continue building a new run instead, `None` is returned.
238///
239/// TimSort is infamous for its buggy implementations, as described here:
240/// http://envisage-project.eu/timsort-specification-and-verification/
241///
242/// The gist of the story is: we must enforce the invariants on the top four runs on the stack.
243/// Enforcing them on just top three is not sufficient to ensure that the invariants will still
244/// hold for *all* runs in the stack.
245///
246/// This function correctly checks invariants for the top four runs. Additionally, if the top
247/// run starts at index 0, it will always demand a merge operation until the stack is fully
248/// collapsed, in order to complete the sort.
249#[inline]
250fn collapse(runs: &[Run]) -> Option<usize> {
251    let n = runs.len();
252
253    if n >= 2
254        && (runs[n - 1].start == 0
255            || runs[n - 2].len <= runs[n - 1].len
256            || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
257            || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
258    {
259        if n >= 3 && runs[n - 3].len < runs[n - 1].len {
260            Some(n - 3)
261        } else {
262            Some(n - 2)
263        }
264    } else {
265        None
266    }
267}
268
269/// Sorts a slice using merge sort, unless it is already in descending order.
270///
271/// This function doesn't modify the slice if it is already non-descending or descending.
272/// Otherwise, it sorts the slice into non-descending order.
273///
274/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
275/// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt).
276///
277/// The algorithm identifies strictly descending and non-descending subsequences, which are called
278/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
279/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
280/// satisfied:
281///
282/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
283/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
284///
285/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
286///
287/// # Safety
288///
289/// The argument `buf` is used as a temporary buffer and must be at least as long as `v`.
290unsafe fn mergesort<T, F>(v: &mut [T], buf: *mut T, is_less: &F) -> MergesortResult
291where
292    T: Send,
293    F: Fn(&T, &T) -> bool + Sync,
294{
295    // Very short runs are extended using insertion sort to span at least this many elements.
296    const MIN_RUN: usize = 10;
297
298    let len = v.len();
299
300    // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
301    // strange decision, but consider the fact that merges more often go in the opposite direction
302    // (forwards). According to benchmarks, merging forwards is slightly faster than merging
303    // backwards. To conclude, identifying runs by traversing backwards improves performance.
304    let mut runs = vec![];
305    let mut end = len;
306    while end > 0 {
307        // Find the next natural run, and reverse it if it's strictly descending.
308        let mut start = end - 1;
309
310        if start > 0 {
311            start -= 1;
312
313            if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
314                while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
315                    start -= 1;
316                }
317
318                // If this descending run covers the whole slice, return immediately.
319                if start == 0 && end == len {
320                    return MergesortResult::Descending;
321                } else {
322                    v[start..end].reverse();
323                }
324            } else {
325                while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
326                    start -= 1;
327                }
328
329                // If this non-descending run covers the whole slice, return immediately.
330                if end - start == len {
331                    return MergesortResult::NonDescending;
332                }
333            }
334        }
335
336        // Insert some more elements into the run if it's too short. Insertion sort is faster than
337        // merge sort on short sequences, so this significantly improves performance.
338        while start > 0 && end - start < MIN_RUN {
339            start -= 1;
340            insert_head(&mut v[start..end], &is_less);
341        }
342
343        // Push this run onto the stack.
344        runs.push(Run {
345            start,
346            len: end - start,
347        });
348        end = start;
349
350        // Merge some pairs of adjacent runs to satisfy the invariants.
351        while let Some(r) = collapse(&runs) {
352            let left = runs[r + 1];
353            let right = runs[r];
354            merge(
355                &mut v[left.start..right.start + right.len],
356                left.len,
357                buf,
358                &is_less,
359            );
360
361            runs[r] = Run {
362                start: left.start,
363                len: left.len + right.len,
364            };
365            runs.remove(r + 1);
366        }
367    }
368
369    // Finally, exactly one run must remain in the stack.
370    debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
371
372    // The original order of the slice was neither non-descending nor descending.
373    MergesortResult::Sorted
374}
375
376////////////////////////////////////////////////////////////////////////////
377// Everything above this line is copied from `std::slice::sort` (with very minor tweaks).
378// Everything below this line is parallelization.
379////////////////////////////////////////////////////////////////////////////
380
381/// Splits two sorted slices so that they can be merged in parallel.
382///
383/// Returns two indices `(a, b)` so that slices `left[..a]` and `right[..b]` come before
384/// `left[a..]` and `right[b..]`.
385fn split_for_merge<T, F>(left: &[T], right: &[T], is_less: &F) -> (usize, usize)
386where
387    F: Fn(&T, &T) -> bool,
388{
389    let left_len = left.len();
390    let right_len = right.len();
391
392    if left_len >= right_len {
393        let left_mid = left_len / 2;
394
395        // Find the first element in `right` that is greater than or equal to `left[left_mid]`.
396        let mut a = 0;
397        let mut b = right_len;
398        while a < b {
399            let m = a + (b - a) / 2;
400            if is_less(&right[m], &left[left_mid]) {
401                a = m + 1;
402            } else {
403                b = m;
404            }
405        }
406
407        (left_mid, a)
408    } else {
409        let right_mid = right_len / 2;
410
411        // Find the first element in `left` that is greater than `right[right_mid]`.
412        let mut a = 0;
413        let mut b = left_len;
414        while a < b {
415            let m = a + (b - a) / 2;
416            if is_less(&right[right_mid], &left[m]) {
417                b = m;
418            } else {
419                a = m + 1;
420            }
421        }
422
423        (a, right_mid)
424    }
425}
426
427/// Merges slices `left` and `right` in parallel and stores the result into `dest`.
428///
429/// # Safety
430///
431/// The `dest` pointer must have enough space to store the result.
432///
433/// Even if `is_less` panics at any point during the merge process, this function will fully copy
434/// all elements from `left` and `right` into `dest` (not necessarily in sorted order).
435unsafe fn par_merge<T, F>(left: &mut [T], right: &mut [T], dest: *mut T, is_less: &F)
436where
437    T: Send,
438    F: Fn(&T, &T) -> bool + Sync,
439{
440    // Slices whose lengths sum up to this value are merged sequentially. This number is slightly
441    // larger than `CHUNK_LENGTH`, and the reason is that merging is faster than merge sorting, so
442    // merging needs a bit coarser granularity in order to hide the overhead of Rayon's task
443    // scheduling.
444    const MAX_SEQUENTIAL: usize = 5000;
445
446    let left_len = left.len();
447    let right_len = right.len();
448
449    // Intermediate state of the merge process, which serves two purposes:
450    // 1. Protects integrity of `dest` from panics in `is_less`.
451    // 2. Copies the remaining elements as soon as one of the two sides is exhausted.
452    //
453    // Panic safety:
454    //
455    // If `is_less` panics at any point during the merge process, `s` will get dropped and copy the
456    // remaining parts of `left` and `right` into `dest`.
457    let mut s = State {
458        left_start: left.as_mut_ptr(),
459        left_end: left.as_mut_ptr().add(left_len),
460        right_start: right.as_mut_ptr(),
461        right_end: right.as_mut_ptr().add(right_len),
462        dest,
463    };
464
465    if left_len == 0 || right_len == 0 || left_len + right_len < MAX_SEQUENTIAL {
466        while s.left_start < s.left_end && s.right_start < s.right_end {
467            // Consume the lesser side.
468            // If equal, prefer the left run to maintain stability.
469            let to_copy = if is_less(&*s.right_start, &*s.left_start) {
470                get_and_increment(&mut s.right_start)
471            } else {
472                get_and_increment(&mut s.left_start)
473            };
474            ptr::copy_nonoverlapping(to_copy, get_and_increment(&mut s.dest), 1);
475        }
476    } else {
477        // Function `split_for_merge` might panic. If that happens, `s` will get destructed and copy
478        // the whole `left` and `right` into `dest`.
479        let (left_mid, right_mid) = split_for_merge(left, right, is_less);
480        let (left_l, left_r) = left.split_at_mut(left_mid);
481        let (right_l, right_r) = right.split_at_mut(right_mid);
482
483        // Prevent the destructor of `s` from running. Rayon will ensure that both calls to
484        // `par_merge` happen. If one of the two calls panics, they will ensure that elements still
485        // get copied into `dest_left` and `dest_right``.
486        mem::forget(s);
487
488        // Wrap pointers in SendPtr so that they can be sent to another thread
489        // See the documentation of SendPtr for a full explanation
490        let dest_l = SendPtr(dest);
491        let dest_r = SendPtr(dest.add(left_l.len() + right_l.len()));
492        rayon_core::join(
493            move || par_merge(left_l, right_l, dest_l.get(), is_less),
494            move || par_merge(left_r, right_r, dest_r.get(), is_less),
495        );
496    }
497    // Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements
498    // all at once.
499
500    // When dropped, copies arrays `left_start..left_end` and `right_start..right_end` into `dest`,
501    // in that order.
502    struct State<T> {
503        left_start: *mut T,
504        left_end: *mut T,
505        right_start: *mut T,
506        right_end: *mut T,
507        dest: *mut T,
508    }
509
510    impl<T> Drop for State<T> {
511        fn drop(&mut self) {
512            let size = size_of::<T>();
513            let left_len = (self.left_end as usize - self.left_start as usize) / size;
514            let right_len = (self.right_end as usize - self.right_start as usize) / size;
515
516            // Copy array `left`, followed by `right`.
517            unsafe {
518                ptr::copy_nonoverlapping(self.left_start, self.dest, left_len);
519                self.dest = self.dest.add(left_len);
520                ptr::copy_nonoverlapping(self.right_start, self.dest, right_len);
521            }
522        }
523    }
524}
525
526/// Recursively merges pre-sorted chunks inside `v`.
527///
528/// Chunks of `v` are stored in `chunks` as intervals (inclusive left and exclusive right bound).
529/// Argument `buf` is an auxiliary buffer that will be used during the procedure.
530/// If `into_buf` is true, the result will be stored into `buf`, otherwise it will be in `v`.
531///
532/// # Safety
533///
534/// The number of chunks must be positive and they must be adjacent: the right bound of each chunk
535/// must equal the left bound of the following chunk.
536///
537/// The buffer must be at least as long as `v`.
538unsafe fn recurse<T, F>(
539    v: *mut T,
540    buf: *mut T,
541    chunks: &[(usize, usize)],
542    into_buf: bool,
543    is_less: &F,
544) where
545    T: Send,
546    F: Fn(&T, &T) -> bool + Sync,
547{
548    let len = chunks.len();
549    debug_assert!(len > 0);
550
551    // Base case of the algorithm.
552    // If only one chunk is remaining, there's no more work to split and merge.
553    if len == 1 {
554        if into_buf {
555            // Copy the chunk from `v` into `buf`.
556            let (start, end) = chunks[0];
557            let src = v.add(start);
558            let dest = buf.add(start);
559            ptr::copy_nonoverlapping(src, dest, end - start);
560        }
561        return;
562    }
563
564    // Split the chunks into two halves.
565    let (start, _) = chunks[0];
566    let (mid, _) = chunks[len / 2];
567    let (_, end) = chunks[len - 1];
568    let (left, right) = chunks.split_at(len / 2);
569
570    // After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from
571    // `src` into `dest`. If the current invocation has to store the result into `buf`, we'll
572    // merge chunks from `v` into `buf`, and vice versa.
573    //
574    // Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge`
575    // merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second
576    // level etc.
577    let (src, dest) = if into_buf { (v, buf) } else { (buf, v) };
578
579    // Panic safety:
580    //
581    // If `is_less` panics at any point during the recursive calls, the destructor of `guard` will
582    // be executed, thus copying everything from `src` into `dest`. This way we ensure that all
583    // chunks are in fact copied into `dest`, even if the merge process doesn't finish.
584    let guard = CopyOnDrop {
585        src: src.add(start),
586        dest: dest.add(start),
587        len: end - start,
588    };
589
590    // Wrap pointers in SendPtr so that they can be sent to another thread
591    // See the documentation of SendPtr for a full explanation
592    let v = SendPtr(v);
593    let buf = SendPtr(buf);
594    rayon_core::join(
595        move || recurse(v.get(), buf.get(), left, !into_buf, is_less),
596        move || recurse(v.get(), buf.get(), right, !into_buf, is_less),
597    );
598
599    // Everything went all right - recursive calls didn't panic.
600    // Forget the guard in order to prevent its destructor from running.
601    mem::forget(guard);
602
603    // Merge chunks `(start, mid)` and `(mid, end)` from `src` into `dest`.
604    let src_left = slice::from_raw_parts_mut(src.add(start), mid - start);
605    let src_right = slice::from_raw_parts_mut(src.add(mid), end - mid);
606    par_merge(src_left, src_right, dest.add(start), is_less);
607}
608
609/// Sorts `v` using merge sort in parallel.
610///
611/// The algorithm is stable, allocates memory, and `O(n log n)` worst-case.
612/// The allocated temporary buffer is of the same length as is `v`.
613pub(super) fn par_mergesort<T, F>(v: &mut [T], is_less: F)
614where
615    T: Send,
616    F: Fn(&T, &T) -> bool + Sync,
617{
618    // Slices of up to this length get sorted using insertion sort in order to avoid the cost of
619    // buffer allocation.
620    const MAX_INSERTION: usize = 20;
621    // The length of initial chunks. This number is as small as possible but so that the overhead
622    // of Rayon's task scheduling is still negligible.
623    const CHUNK_LENGTH: usize = 2000;
624
625    // Sorting has no meaningful behavior on zero-sized types.
626    if size_of::<T>() == 0 {
627        return;
628    }
629
630    let len = v.len();
631
632    // Short slices get sorted in-place via insertion sort to avoid allocations.
633    if len <= MAX_INSERTION {
634        if len >= 2 {
635            for i in (0..len - 1).rev() {
636                insert_head(&mut v[i..], &is_less);
637            }
638        }
639        return;
640    }
641
642    // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
643    // shallow copies of the contents of `v` without risking the dtors running on copies if
644    // `is_less` panics.
645    let mut buf = Vec::<T>::with_capacity(len);
646    let buf = buf.as_mut_ptr();
647
648    // If the slice is not longer than one chunk would be, do sequential merge sort and return.
649    if len <= CHUNK_LENGTH {
650        let res = unsafe { mergesort(v, buf, &is_less) };
651        if res == MergesortResult::Descending {
652            v.reverse();
653        }
654        return;
655    }
656
657    // Split the slice into chunks and merge sort them in parallel.
658    // However, descending chunks will not be sorted - they will be simply left intact.
659    let mut iter = {
660        // Wrap pointer in SendPtr so that it can be sent to another thread
661        // See the documentation of SendPtr for a full explanation
662        let buf = SendPtr(buf);
663        let is_less = &is_less;
664
665        v.par_chunks_mut(CHUNK_LENGTH)
666            .with_max_len(1)
667            .enumerate()
668            .map(move |(i, chunk)| {
669                let l = CHUNK_LENGTH * i;
670                let r = l + chunk.len();
671                unsafe {
672                    let buf = buf.get().add(l);
673                    (l, r, mergesort(chunk, buf, is_less))
674                }
675            })
676            .collect::<Vec<_>>()
677            .into_iter()
678            .peekable()
679    };
680
681    // Now attempt to concatenate adjacent chunks that were left intact.
682    let mut chunks = Vec::with_capacity(iter.len());
683
684    while let Some((a, mut b, res)) = iter.next() {
685        // If this chunk was not modified by the sort procedure...
686        if res != MergesortResult::Sorted {
687            while let Some(&(x, y, r)) = iter.peek() {
688                // If the following chunk is of the same type and can be concatenated...
689                if r == res && (r == MergesortResult::Descending) == is_less(&v[x], &v[x - 1]) {
690                    // Concatenate them.
691                    b = y;
692                    iter.next();
693                } else {
694                    break;
695                }
696            }
697        }
698
699        // Descending chunks must be reversed.
700        if res == MergesortResult::Descending {
701            v[a..b].reverse();
702        }
703
704        chunks.push((a, b));
705    }
706
707    // All chunks are properly sorted.
708    // Now we just have to merge them together.
709    unsafe {
710        recurse(v.as_mut_ptr(), buf, &chunks, false, &is_less);
711    }
712}
713
714#[cfg(test)]
715mod tests {
716    use super::split_for_merge;
717    use rand::distributions::Uniform;
718    use rand::{thread_rng, Rng};
719
720    #[test]
721    fn test_split_for_merge() {
722        fn check(left: &[u32], right: &[u32]) {
723            let (l, r) = split_for_merge(left, right, &|&a, &b| a < b);
724            assert!(left[..l]
725                .iter()
726                .all(|&x| right[r..].iter().all(|&y| x <= y)));
727            assert!(right[..r].iter().all(|&x| left[l..].iter().all(|&y| x < y)));
728        }
729
730        check(&[1, 2, 2, 2, 2, 3], &[1, 2, 2, 2, 2, 3]);
731        check(&[1, 2, 2, 2, 2, 3], &[]);
732        check(&[], &[1, 2, 2, 2, 2, 3]);
733
734        let rng = &mut thread_rng();
735
736        for _ in 0..100 {
737            let limit: u32 = rng.gen_range(1..21);
738            let left_len: usize = rng.gen_range(0..20);
739            let right_len: usize = rng.gen_range(0..20);
740
741            let mut left = rng
742                .sample_iter(&Uniform::new(0, limit))
743                .take(left_len)
744                .collect::<Vec<_>>();
745            let mut right = rng
746                .sample_iter(&Uniform::new(0, limit))
747                .take(right_len)
748                .collect::<Vec<_>>();
749
750            left.sort();
751            right.sort();
752            check(&left, &right);
753        }
754    }
755}