itertools/
duplicates_impl.rs

1use std::hash::Hash;
2
3mod private {
4    use std::collections::HashMap;
5    use std::fmt;
6    use std::hash::Hash;
7
8    #[derive(Clone)]
9    #[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
10    pub struct DuplicatesBy<I: Iterator, Key, F> {
11        pub(crate) iter: I,
12        pub(crate) meta: Meta<Key, F>,
13    }
14
15    impl<I, V, F> fmt::Debug for DuplicatesBy<I, V, F>
16    where
17        I: Iterator + fmt::Debug,
18        V: fmt::Debug + Hash + Eq,
19    {
20        debug_fmt_fields!(DuplicatesBy, iter, meta.used);
21    }
22
23    impl<I: Iterator, Key: Eq + Hash, F> DuplicatesBy<I, Key, F> {
24        pub(crate) fn new(iter: I, key_method: F) -> Self {
25            Self {
26                iter,
27                meta: Meta {
28                    used: HashMap::new(),
29                    pending: 0,
30                    key_method,
31                },
32            }
33        }
34    }
35
36    #[derive(Clone)]
37    pub struct Meta<Key, F> {
38        used: HashMap<Key, bool>,
39        pending: usize,
40        key_method: F,
41    }
42
43    impl<Key, F> Meta<Key, F>
44    where
45        Key: Eq + Hash,
46    {
47        /// Takes an item and returns it back to the caller if it's the second time we see it.
48        /// Otherwise the item is consumed and None is returned
49        #[inline(always)]
50        fn filter<I>(&mut self, item: I) -> Option<I>
51        where
52            F: KeyMethod<Key, I>,
53        {
54            let kv = self.key_method.make(item);
55            match self.used.get_mut(kv.key_ref()) {
56                None => {
57                    self.used.insert(kv.key(), false);
58                    self.pending += 1;
59                    None
60                }
61                Some(true) => None,
62                Some(produced) => {
63                    *produced = true;
64                    self.pending -= 1;
65                    Some(kv.value())
66                }
67            }
68        }
69    }
70
71    impl<I, Key, F> Iterator for DuplicatesBy<I, Key, F>
72    where
73        I: Iterator,
74        Key: Eq + Hash,
75        F: KeyMethod<Key, I::Item>,
76    {
77        type Item = I::Item;
78
79        fn next(&mut self) -> Option<Self::Item> {
80            let Self { iter, meta } = self;
81            iter.find_map(|v| meta.filter(v))
82        }
83
84        #[inline]
85        fn size_hint(&self) -> (usize, Option<usize>) {
86            let (_, hi) = self.iter.size_hint();
87            let hi = hi.map(|hi| {
88                if hi <= self.meta.pending {
89                    // fewer or equally many iter-remaining elements than pending elements
90                    // => at most, each iter-remaining element is matched
91                    hi
92                } else {
93                    // fewer pending elements than iter-remaining elements
94                    // => at most:
95                    //    * each pending element is matched
96                    //    * the other iter-remaining elements come in pairs
97                    self.meta.pending + (hi - self.meta.pending) / 2
98                }
99            });
100            // The lower bound is always 0 since we might only get unique items from now on
101            (0, hi)
102        }
103    }
104
105    impl<I, Key, F> DoubleEndedIterator for DuplicatesBy<I, Key, F>
106    where
107        I: DoubleEndedIterator,
108        Key: Eq + Hash,
109        F: KeyMethod<Key, I::Item>,
110    {
111        fn next_back(&mut self) -> Option<Self::Item> {
112            let Self { iter, meta } = self;
113            iter.rev().find_map(|v| meta.filter(v))
114        }
115    }
116
117    /// A keying method for use with `DuplicatesBy`
118    pub trait KeyMethod<K, V> {
119        type Container: KeyXorValue<K, V>;
120
121        fn make(&mut self, value: V) -> Self::Container;
122    }
123
124    /// Apply the identity function to elements before checking them for equality.
125    #[derive(Debug, Clone)]
126    pub struct ById;
127    impl<V> KeyMethod<V, V> for ById {
128        type Container = JustValue<V>;
129
130        fn make(&mut self, v: V) -> Self::Container {
131            JustValue(v)
132        }
133    }
134
135    /// Apply a user-supplied function to elements before checking them for equality.
136    #[derive(Clone)]
137    pub struct ByFn<F>(pub(crate) F);
138    impl<F> fmt::Debug for ByFn<F> {
139        debug_fmt_fields!(ByFn,);
140    }
141    impl<K, V, F> KeyMethod<K, V> for ByFn<F>
142    where
143        F: FnMut(&V) -> K,
144    {
145        type Container = KeyValue<K, V>;
146
147        fn make(&mut self, v: V) -> Self::Container {
148            KeyValue((self.0)(&v), v)
149        }
150    }
151
152    // Implementors of this trait can hold onto a key and a value but only give access to one of them
153    // at a time. This allows the key and the value to be the same value internally
154    pub trait KeyXorValue<K, V> {
155        fn key_ref(&self) -> &K;
156        fn key(self) -> K;
157        fn value(self) -> V;
158    }
159
160    #[derive(Debug)]
161    pub struct KeyValue<K, V>(K, V);
162    impl<K, V> KeyXorValue<K, V> for KeyValue<K, V> {
163        fn key_ref(&self) -> &K {
164            &self.0
165        }
166        fn key(self) -> K {
167            self.0
168        }
169        fn value(self) -> V {
170            self.1
171        }
172    }
173
174    #[derive(Debug)]
175    pub struct JustValue<V>(V);
176    impl<V> KeyXorValue<V, V> for JustValue<V> {
177        fn key_ref(&self) -> &V {
178            &self.0
179        }
180        fn key(self) -> V {
181            self.0
182        }
183        fn value(self) -> V {
184            self.0
185        }
186    }
187}
188
189/// An iterator adapter to filter for duplicate elements.
190///
191/// See [`.duplicates_by()`](crate::Itertools::duplicates_by) for more information.
192pub type DuplicatesBy<I, V, F> = private::DuplicatesBy<I, V, private::ByFn<F>>;
193
194/// Create a new `DuplicatesBy` iterator.
195pub fn duplicates_by<I, Key, F>(iter: I, f: F) -> DuplicatesBy<I, Key, F>
196where
197    Key: Eq + Hash,
198    F: FnMut(&I::Item) -> Key,
199    I: Iterator,
200{
201    DuplicatesBy::new(iter, private::ByFn(f))
202}
203
204/// An iterator adapter to filter out duplicate elements.
205///
206/// See [`.duplicates()`](crate::Itertools::duplicates) for more information.
207pub type Duplicates<I> = private::DuplicatesBy<I, <I as Iterator>::Item, private::ById>;
208
209/// Create a new `Duplicates` iterator.
210pub fn duplicates<I>(iter: I) -> Duplicates<I>
211where
212    I: Iterator,
213    I::Item: Eq + Hash,
214{
215    Duplicates::new(iter, private::ById)
216}