diffus/
lcs.rs

1use crate::{edit, Diffable, Same};
2
3#[cfg_attr(feature = "serialize-impl", derive(serde::Serialize))]
4#[derive(Debug, PartialEq, Eq)]
5pub enum Edit<T> {
6    Same(T, T),
7    Insert(T),
8    Remove(T),
9}
10
11impl<T> Edit<T> {
12    pub fn is_same(&self) -> bool {
13        if let Edit::Same(_, _) = self {
14            true
15        } else {
16            false
17        }
18    }
19}
20
21fn c_matrix<T: Same, I, J>(
22    x: impl Fn() -> I,
23    y: impl Fn() -> J,
24    x_len: usize,
25    y_len: usize,
26) -> (usize, crate::twodvec::TwoDVec<usize>, usize)
27where
28    I: DoubleEndedIterator<Item = T>,
29    J: DoubleEndedIterator<Item = T>,
30{
31    let mut x_iter = x();
32    let mut y_iter = y();
33    let prefix_eq = x_iter
34        .by_ref()
35        .zip(y_iter.by_ref())
36        .take_while(|(x, y)| x.same(y))
37        .count();
38    // Only check the suffix if we did not consume the entirety of either of the iterators
39    // (If one of them are consumed, we would double count elements)
40    let check_suffix = x_len.min(y_len) != prefix_eq;
41    let suffix_eq = if check_suffix {
42        x_iter
43            .rev()
44            .zip(y_iter.rev())
45            .take_while(|(x, y)| x.same(y))
46            .count()
47    } else {
48        0
49    };
50
51    let width = x_len.saturating_sub(prefix_eq + suffix_eq) + 1;
52    let height = y_len.saturating_sub(prefix_eq + suffix_eq) + 1;
53
54    let mut c = crate::twodvec::TwoDVec::new(0, width, height);
55
56    for (i, x) in x().skip(prefix_eq).take(width - 1).enumerate() {
57        for (j, y) in y().skip(prefix_eq).take(height - 1).enumerate() {
58            c[j + 1][i + 1] = if x.same(&y) {
59                c[j][i] + 1
60            } else {
61                c[j][i + 1].max(c[j + 1][i])
62            };
63        }
64    }
65
66    (prefix_eq, c, suffix_eq)
67}
68
69fn lcs_base<T: Same>(
70    c: crate::twodvec::TwoDVec<usize>,
71    mut x: itertools::PutBack<impl Iterator<Item = T>>,
72    mut y: itertools::PutBack<impl Iterator<Item = T>>,
73) -> impl Iterator<Item = Edit<T>> {
74    let mut i = c.width() - 1;
75    let mut j = c.height() - 1;
76
77    std::iter::from_fn(move || {
78        let current_x = x.next();
79        let current_y = y.next();
80
81        let left = j.checked_sub(1).map(|j_minus| c[j_minus][i]);
82        let above = i.checked_sub(1).map(|i_minus| c[j][i_minus]);
83
84        if current_x.is_some()
85            && current_y.is_some()
86            && current_x
87                .as_ref()
88                .unwrap()
89                .same(current_y.as_ref().unwrap())
90        {
91            i = i - 1;
92            j = j - 1;
93
94            match (current_x, current_y) {
95                (Some(current_x), Some(current_y)) => Some(Edit::Same(current_x, current_y)),
96                _ => unreachable!(),
97            }
98        } else if current_y.is_some() && (current_x.is_none() || left >= above) {
99            current_x.map(|c| x.put_back(c));
100            j = j - 1;
101            current_y.map(|value| Edit::Insert(value))
102        } else if current_x.is_some() && (current_y.is_none() || left < above) {
103            current_y.map(|c| y.put_back(c));
104            i = i - 1;
105            current_x.map(|value| Edit::Remove(value))
106        } else {
107            None
108        }
109    })
110    .collect::<Vec<_>>()
111    .into_iter()
112    .rev()
113}
114
115pub(crate) fn lcs<
116    'a,
117    T: Same,
118    I: DoubleEndedIterator<Item = T>,
119    J: DoubleEndedIterator<Item = T>,
120>(
121    x: impl Fn() -> I,
122    y: impl Fn() -> J,
123    x_len: usize,
124    y_len: usize,
125) -> impl Iterator<Item = Edit<T>> {
126    let (prefix_eq, c, suffix_eq) = c_matrix(&x, &y, x_len, y_len);
127
128    x().zip(y())
129        .take(prefix_eq)
130        .map(|(x, y)| Edit::Same(x, y))
131        .chain(lcs_base(
132            c,
133            itertools::put_back(
134                x().rev()
135                    .skip(suffix_eq)
136                    .take(x_len.saturating_sub(prefix_eq + suffix_eq)),
137            ),
138            itertools::put_back(
139                y().rev()
140                    .skip(suffix_eq)
141                    .take(y_len.saturating_sub(prefix_eq + suffix_eq)),
142            ),
143        ))
144        .chain(
145            x().skip(x_len - suffix_eq)
146                .zip(y().skip(y_len - suffix_eq))
147                .map(|(x, y)| Edit::Same(x, y)),
148        )
149}
150
151// FIXME move out from lcs
152pub(crate) fn lcs_post_change<'a, T: Same + Diffable<'a> + ?Sized + 'a>(
153    result: impl Iterator<Item = Edit<&'a T>>,
154) -> impl Iterator<Item = edit::collection::Edit<'a, T, <T as Diffable<'a>>::Diff>> {
155    result.map(|edit| match edit {
156        Edit::Same(left, right) => match left.diff(right) {
157            edit::Edit::Copy(t) => edit::collection::Edit::Copy(t),
158            edit::Edit::Change(diff) => edit::collection::Edit::Change(diff),
159        },
160        Edit::Insert(value) => edit::collection::Edit::Insert(value),
161        Edit::Remove(value) => edit::collection::Edit::Remove(value),
162    })
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    #[test]
170    fn characters() {
171        let left = "XMJYAUZ";
172        let right = "MZJAWXU";
173
174        let s = lcs(
175            || left.chars(),
176            || right.chars(),
177            left.chars().count(),
178            right.chars().count(),
179        );
180
181        assert_eq!(
182            s.collect::<Vec<_>>(),
183            vec![
184                Edit::Remove('X'),
185                Edit::Same('M', 'M'),
186                Edit::Insert('Z'),
187                Edit::Same('J', 'J'),
188                Edit::Remove('Y'),
189                Edit::Same('A', 'A'),
190                Edit::Insert('W'),
191                Edit::Insert('X'),
192                Edit::Same('U', 'U'),
193                Edit::Remove('Z')
194            ]
195        );
196    }
197
198    #[test]
199    fn words() {
200        let left = "The quick brown fox jumps over the lazy dog";
201        let right = "The quick brown dog leaps over the lazy cat";
202
203        let s = lcs(
204            || left.split_whitespace(),
205            || right.split_whitespace(),
206            left.split_whitespace().count(),
207            right.split_whitespace().count(),
208        );
209
210        assert_eq!(
211            s.collect::<Vec<_>>(),
212            vec![
213                Edit::Same("The", "The"),
214                Edit::Same("quick", "quick"),
215                Edit::Same("brown", "brown"),
216                Edit::Remove("fox"),
217                Edit::Remove("jumps"),
218                Edit::Insert("dog"),
219                Edit::Insert("leaps"),
220                Edit::Same("over", "over"),
221                Edit::Same("the", "the"),
222                Edit::Same("lazy", "lazy"),
223                Edit::Remove("dog"),
224                Edit::Insert("cat")
225            ]
226        );
227    }
228}