determinator/determinator.rs
1// Copyright (c) The cargo-guppy Contributors
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use crate::{
5 errors::RulesError,
6 rules::{
7 DeterminatorPostRule, DeterminatorRules, MarkChangedImpl, PathMatch, PathRuleImpl,
8 RulesImpl,
9 },
10};
11use ahash::AHashMap;
12use camino::Utf8Path;
13use globset::Candidate;
14use guppy::{
15 PackageId,
16 graph::{
17 DependencyDirection, PackageGraph, PackageMetadata, PackageSet, Workspace,
18 cargo::{CargoOptions, CargoSet},
19 feature::{FeatureFilter, FeatureSet, StandardFeatures},
20 },
21 platform::PlatformSpec,
22};
23use petgraph::{Directed, graphmap::GraphMap};
24use rayon::prelude::*;
25use std::collections::{HashSet, hash_map::Entry};
26
27/// Determine target dependencies from changed files and packages in a workspace.
28///
29/// For more about how the determinator works, see the [crate-level documentation](crate).
30///
31/// This struct has two lifetime parameters:
32/// * `'g` stands for the lifetime of the new graph. The `DeterminatorSet` will be bound to this
33/// lifetime.
34/// * `'a` is the lifetime of the old graph, Cargo options, and changed paths. The `DeterminatorSet`
35/// will not be bound to this lifetime.
36#[derive(Clone, Debug)]
37pub struct Determinator<'g, 'a> {
38 old: &'a PackageGraph,
39 new: &'g PackageGraph,
40 rules: RulesImpl<'g>,
41 cargo_options: Option<&'a CargoOptions<'a>>,
42 old_features_only: Option<FeatureSet<'a>>,
43 new_features_only: Option<FeatureSet<'g>>,
44 changed_paths: Vec<&'a Utf8Path>,
45}
46
47impl<'g, 'a> Determinator<'g, 'a> {
48 /// Creates a new instance of `Determinator` with old and new package graphs.
49 pub fn new(old: &'a PackageGraph, new: &'g PackageGraph) -> Self {
50 Self {
51 old,
52 new,
53 rules: RulesImpl::new(new, &DeterminatorRules::default())
54 .expect("default rules should parse"),
55 cargo_options: None,
56 old_features_only: None,
57 new_features_only: None,
58 changed_paths: vec![],
59 }
60 }
61
62 /// Adds a list of changed paths. This list is used as a source of information for the
63 /// determinator.
64 ///
65 /// This should consist of paths that are changed since the base revision. Paths on Windows may
66 /// use either `/` or `\\` as separators.
67 ///
68 /// [`Utf8Paths0`](crate::Utf8Paths0) in this crate provides a convenient way to handle
69 /// null-separated paths as produced by source control systems.
70 ///
71 /// # Should you include untracked and ignored files?
72 ///
73 /// For comparisons against the working directory, one may or may not wish to include untracked
74 /// files. A few points to consider:
75 ///
76 /// * If your code uses a library like [`datatest`](https://github.com/commure/datatest), simply
77 /// creating a file in the right place is enough to add a new test. If untracked files are
78 /// not included, the user may have to run `git add` before the determinator picks the change
79 /// up.
80 /// * On the other hand, if a user wishes to include a new test in their PR, they're going to
81 /// have to run `git add` at some point anyway.
82 /// * Some users may have untracked files lying around in their repository for long periods of
83 /// time, and those files may cause false positives.
84 /// * Git makes it surprisingly hard to list out untracked files, requiring `git status
85 /// --porcelain -z` with some additional filtering on top to do so. `git diff` doesn't have
86 /// an option to list untracked files.
87 /// * This is generally moot in CI, since those workflows will likely be comparing against a
88 /// commit.
89 /// * In most cases, ignored files should not be considered by the determinator, since they
90 /// don't affect CI builds.
91 ///
92 /// On balance, only considering tracked files appears to be the right approach for most
93 /// situations.
94 pub fn add_changed_paths(
95 &mut self,
96 paths: impl IntoIterator<Item = &'a (impl AsRef<Utf8Path> + ?Sized + 'a)>,
97 ) -> &mut Self {
98 self.changed_paths
99 .extend(paths.into_iter().map(|path| path.as_ref()));
100 self
101 }
102
103 /// Returns what *would* happen if a given path was added to the changed set.
104 ///
105 /// This does not add any path to the changed set, but indicates what *would* happen if a path
106 /// is added.
107 ///
108 /// This method may be used to ensure that all paths in a repository are covered by at least one
109 /// rule if they don't match a package.
110 ///
111 /// `match_cb` is called for all package IDs that the path matches.
112 pub fn match_path(
113 &self,
114 path: impl AsRef<Utf8Path>,
115 match_cb: impl FnMut(&'g PackageId),
116 ) -> PathMatch {
117 process_path(
118 path.as_ref(),
119 &self.new.workspace(),
120 &self.rules.path_rules,
121 match_cb,
122 )
123 }
124
125 /// Processes and configures determinator rules.
126 ///
127 /// Returns an error if the rules were invalid in some way.
128 pub fn set_rules(&mut self, rules: &DeterminatorRules) -> Result<&mut Self, RulesError> {
129 let rules = RulesImpl::new(self.new, rules)?;
130 self.rules = rules;
131 Ok(self)
132 }
133
134 /// Configures Cargo options.
135 ///
136 /// These options are used to determine if the build for a particular package has changed.
137 ///
138 /// If no options are specified, the default `CargoOptions`, as specified by
139 /// `CargoOptions::new`, are used, with one exception: dev-dependencies are built by default.
140 pub fn set_cargo_options(&mut self, cargo_options: &'a CargoOptions<'a>) -> &mut Self {
141 self.cargo_options = Some(cargo_options);
142 self
143 }
144
145 /// Returns the default Cargo options used by the determinator.
146 ///
147 /// These are the same as the defaults returned by [`CargoOptions::new`](CargoOptions::new),
148 /// except:
149 /// * dev-dependencies are enabled
150 /// * the host and target platforms are set to the build target
151 pub fn default_cargo_options() -> CargoOptions<'static> {
152 let mut options = CargoOptions::new();
153 options
154 .set_include_dev(true)
155 .set_platform(PlatformSpec::build_target().expect("current platform is unknown"));
156 options
157 }
158
159 /// Configures features-only packages that are used in build simulations.
160 ///
161 /// The packages and features will be used for feature unification. This is useful for
162 /// pseudo-packages or workspace-hack packages, including those generated by tools like
163 /// [Hakari](https://docs.rs/hakari).
164 ///
165 /// For more about `features_only`, see the documentation for [`CargoSet::new`](CargoSet::new).
166 ///
167 /// The package names are expected to be present in the new graph, but may not be present in the
168 /// old `PackageGraph`.
169 /// * If a package name isn't in the *new* graph, this method returns an error.
170 /// * If a package name isn't in the *old* graph, it is ignored.
171 pub fn set_features_only<'b>(
172 &mut self,
173 workspace_names: impl IntoIterator<Item = &'b str>,
174 features: StandardFeatures,
175 ) -> Result<&mut Self, guppy::Error> {
176 let old_workspace = self.old.workspace();
177 let mut old_names = vec![];
178 let new_names: Vec<_> = workspace_names
179 .into_iter()
180 .inspect(|&name| {
181 if old_workspace.contains_name(name) {
182 old_names.push(name);
183 }
184 })
185 .collect();
186
187 // Missing package name in new workspace => error.
188 let new_features_only = self
189 .new
190 .resolve_workspace_names(new_names)?
191 .to_feature_set(features);
192 let old_features_only = self
193 .old
194 .resolve_workspace_names(old_names)
195 .expect("old names were checked already")
196 .to_feature_set(features);
197
198 self.new_features_only = Some(new_features_only);
199 self.old_features_only = Some(old_features_only);
200 Ok(self)
201 }
202
203 /// Uses the old and new sets and the list of changed files to compute the list
204 /// of projects that is affected.
205 pub fn compute(&self) -> DeterminatorSet<'g> {
206 let mut build_state = BuildState::new(self);
207
208 // 1-2. Process every changed path.
209 for path in &self.changed_paths {
210 build_state = match build_state.process_path(path) {
211 Some(build_state) => build_state,
212 None => {
213 // The build state was discarded, which means that the entire workspace is
214 // changed and affected.
215 let path_changed_set = self.new.resolve_workspace();
216 let affected_set = path_changed_set.clone();
217 return DeterminatorSet {
218 path_changed_set,
219 // This is an empty set.
220 summary_changed_set: self.new.resolve_none(),
221 affected_set,
222 };
223 }
224 }
225 }
226
227 // 3. Construct the path changed set from the given IDs.
228 let path_changed_set = self
229 .new
230 .resolve_ids(build_state.path_changed_ids.iter().copied())
231 .expect("package IDs are all valid");
232
233 // 4. Use build summaries as another source of changes.
234 build_state.process_build_summaries();
235 let summary_changed_set = self
236 .new
237 .resolve_ids(build_state.summary_changed_ids.iter().copied())
238 .expect("package IDs are all valid");
239
240 // 5. The affected set is the transitive closure of the graph constructed by looking at both
241 // the build cache and Cargo rules.
242 let affected_set = build_state.reverse_index.affected_closure(
243 self.new,
244 &build_state.path_changed_ids,
245 &build_state.summary_changed_ids,
246 );
247
248 DeterminatorSet {
249 path_changed_set,
250 summary_changed_set,
251 affected_set,
252 }
253 }
254}
255
256/// The result of a `Determinator` computation.
257///
258/// The lifetime `'g` is tied to the *new* `PackageGraph` passed to a `Determinator`.
259#[derive(Clone, Debug)]
260pub struct DeterminatorSet<'g> {
261 /// The packages that were affected, directly or indirectly. This set is what most consumers
262 /// care about.
263 ///
264 /// A package is in this set if it was marked changed due to a path or summaries changing, or if
265 /// a simulated Cargo build or package rule indicated that it is affected.
266 pub affected_set: PackageSet<'g>,
267
268 /// The packages that were marked changed because a file changed.
269 ///
270 /// Either a file inside this package changed or a path rule was matched.
271 pub path_changed_set: PackageSet<'g>,
272
273 /// The packages that were marked changed becuase a simulated Cargo build's summary showed
274 /// changes in dependencies.
275 ///
276 /// This does not include packages marked changed through a path. For example, if a path rule
277 /// caused all packages to be marked changed, further steps aren't run and this set is empty.
278 pub summary_changed_set: PackageSet<'g>,
279}
280
281// ---
282// Private structures
283// ---
284
285#[derive(Debug)]
286struct BuildState<'g, 'a, 'b> {
287 determinator: &'b Determinator<'g, 'a>,
288 path_changed_ids: HashSet<&'g PackageId>,
289 summary_changed_ids: HashSet<&'g PackageId>,
290 build_cache: CargoBuildCache<'g>,
291 reverse_index: ReverseIndex<'g>,
292}
293
294impl<'g, 'a, 'b> BuildState<'g, 'a, 'b> {
295 fn new(determinator: &'b Determinator<'g, 'a>) -> Self {
296 let build_cache = CargoBuildCache::new(determinator);
297 let reverse_index = ReverseIndex::new(determinator, &build_cache);
298 Self {
299 determinator,
300 path_changed_ids: HashSet::new(),
301 summary_changed_ids: HashSet::new(),
302 build_cache,
303 reverse_index,
304 }
305 }
306
307 // A return value of None stands for all packages in the workspace changed.
308 fn process_path(mut self, path: &Utf8Path) -> Option<Self> {
309 let status = process_path(
310 path,
311 &self.determinator.new.workspace(),
312 &self.determinator.rules.path_rules,
313 |id| {
314 self.path_changed_ids.insert(id);
315 },
316 );
317 match status {
318 PathMatch::RuleMatchedAll | PathMatch::NoMatches => None,
319 PathMatch::RuleMatched(_) | PathMatch::AncestorMatched => Some(self),
320 }
321 }
322
323 fn process_build_summaries(&mut self) {
324 // For each workspace package, if its build summaries have changed mark it changed.
325 let summary_changed_ids: Vec<_> = self
326 .determinator
327 .new
328 .workspace()
329 .par_iter_by_name()
330 .filter_map(|(name, package)| {
331 // Don't include packages already marked as changed through paths. (This is documented.)
332 if !self.path_changed_ids.contains(package.id())
333 && self.build_summaries_changed(name, package)
334 {
335 Some(package.id())
336 } else {
337 None
338 }
339 })
340 .collect();
341 self.summary_changed_ids.extend(summary_changed_ids);
342 }
343
344 fn build_summaries_changed(&self, name: &str, package: PackageMetadata<'g>) -> bool {
345 // Look up the package in the old metadata by path. (Workspace packages are uniquely
346 // identified by both name and path -- this could be done by name as well).
347 let old_workspace = self.determinator.old.workspace();
348 let old_package = match old_workspace.member_by_name(name) {
349 Ok(package) => package,
350 Err(_) => {
351 // Member not found: this is new or renamed.
352 return true;
353 }
354 };
355
356 let default_options = Determinator::default_cargo_options();
357 let cargo_options = self.determinator.cargo_options.unwrap_or(&default_options);
358
359 let default_features_only = self.determinator.old.feature_graph().resolve_none();
360 let features_only = self
361 .determinator
362 .old_features_only
363 .as_ref()
364 .unwrap_or(&default_features_only);
365
366 let old_result = BuildResult::new(old_package, cargo_options, features_only);
367 let new_result = &self.build_cache.result_cache[package.id()];
368 new_result.is_changed(&old_result, cargo_options)
369 }
370}
371
372fn process_path<'g>(
373 path: &Utf8Path,
374 workspace: &Workspace<'g>,
375 path_rules: &[PathRuleImpl<'g>],
376 mut match_cb: impl FnMut(&'g PackageId),
377) -> PathMatch {
378 let candidate = Candidate::new(path);
379
380 // 1. Apply any rules that match the path.
381 for rule in path_rules {
382 if rule.glob_set.is_match_candidate(&candidate) {
383 // This glob matches this rule, so execute it.
384 match &rule.mark_changed {
385 MarkChangedImpl::Packages(packages) => {
386 for package in packages {
387 match_cb(package.id());
388 }
389 }
390 MarkChangedImpl::All => {
391 // Mark all packages changed.
392 return PathMatch::RuleMatchedAll;
393 }
394 }
395
396 match &rule.post_rule {
397 DeterminatorPostRule::Skip => {
398 // Skip all further processing for this path but continue reading other
399 // paths.
400 return PathMatch::RuleMatched(rule.rule_index);
401 }
402 DeterminatorPostRule::SkipRules => {
403 // Skip further rule processing but continue to step 2 to match to the
404 // nearest package.
405 break;
406 }
407 DeterminatorPostRule::Fallthrough => {
408 // Continue applying rules.
409 continue;
410 }
411 }
412 }
413 }
414
415 // 2. Map the path to its nearest ancestor package.
416 for ancestor in path.ancestors() {
417 if let Ok(package) = workspace.member_by_path(ancestor) {
418 match_cb(package.id());
419 return PathMatch::AncestorMatched;
420 }
421 }
422
423 // 3. If a file didn't match anything so far, rebuild everything.
424 PathMatch::NoMatches
425}
426
427/// Stores a build cache of every package in a workspace.
428#[derive(Debug)]
429struct CargoBuildCache<'g> {
430 result_cache: AHashMap<&'g PackageId, BuildResult<'g>>,
431}
432
433impl<'g> CargoBuildCache<'g> {
434 fn new(determinator: &Determinator<'g, '_>) -> Self {
435 let default_options = Determinator::default_cargo_options();
436 let cargo_options = determinator.cargo_options.unwrap_or(&default_options);
437
438 let workspace = determinator.new.workspace();
439 let default_features_only = determinator.new.feature_graph().resolve_none();
440 let features_only = determinator
441 .new_features_only
442 .as_ref()
443 .unwrap_or(&default_features_only);
444
445 let result_cache: ahash::HashMap<_, _> = workspace
446 .par_iter()
447 .map(|package| {
448 let id = package.id();
449 let build_result = BuildResult::new(package, cargo_options, features_only);
450 (id, build_result)
451 })
452 .collect();
453
454 Self {
455 result_cache: result_cache.into(),
456 }
457 }
458}
459
460#[derive(Debug)]
461struct BuildResult<'g> {
462 none: CargoSet<'g>,
463 default: CargoSet<'g>,
464 all: CargoSet<'g>,
465 // TODO: add support for more feature sets?
466}
467
468impl<'g> BuildResult<'g> {
469 fn new(
470 package: PackageMetadata<'g>,
471 cargo_options: &CargoOptions<'_>,
472 features_only: &FeatureSet<'g>,
473 ) -> Self {
474 let (none, (default, all)) = rayon::join(
475 || {
476 make_cargo_set(
477 &package,
478 StandardFeatures::None,
479 cargo_options,
480 features_only,
481 )
482 },
483 || {
484 rayon::join(
485 || {
486 make_cargo_set(
487 &package,
488 StandardFeatures::Default,
489 cargo_options,
490 features_only,
491 )
492 },
493 || {
494 make_cargo_set(
495 &package,
496 StandardFeatures::All,
497 cargo_options,
498 features_only,
499 )
500 },
501 )
502 },
503 );
504
505 Self { none, default, all }
506 }
507
508 /// Returns the unified set of workspace dependencies.
509 fn unified_workspace_set(&self, workspace_set: &PackageSet<'g>) -> PackageSet<'g> {
510 let target_set = self
511 .all_cargo_sets()
512 .map(|x| x.target_features().to_package_set())
513 .reduce(|a, b| a.union(&b))
514 .expect("at least one set");
515 let host_set = self
516 .all_cargo_sets()
517 .map(|x| x.host_features().to_package_set())
518 .reduce(|a, b| a.union(&b))
519 .expect("at least one set");
520
521 target_set.union(&host_set).intersection(workspace_set)
522 }
523
524 fn is_changed(&self, other: &BuildResult<'_>, cargo_options: &CargoOptions<'_>) -> bool {
525 for (a, b) in self.all_cargo_sets().zip(other.all_cargo_sets()) {
526 let a_summary = a
527 .to_summary(cargo_options)
528 .expect("custom platforms currently unsupported");
529 let b_summary = b
530 .to_summary(cargo_options)
531 .expect("custom platforms currently unsupported");
532 let diff = a_summary.diff(&b_summary);
533 if diff.is_changed() {
534 return true;
535 }
536 }
537 false
538 }
539
540 fn all_cargo_sets<'a>(&'a self) -> impl Iterator<Item = &'a CargoSet<'g>> + 'a {
541 std::iter::once(&self.none)
542 .chain(std::iter::once(&self.default))
543 .chain(std::iter::once(&self.all))
544 }
545}
546
547fn make_cargo_set<'x>(
548 package: &PackageMetadata<'x>,
549 filter: impl FeatureFilter<'x>,
550 cargo_options: &CargoOptions<'_>,
551 features_only: &FeatureSet<'x>,
552) -> CargoSet<'x> {
553 let package_set = package.to_package_set();
554 let initials = package_set.to_feature_set(filter);
555
556 CargoSet::new(initials, features_only.clone(), cargo_options).expect("valid cargo options")
557}
558
559/// A reverse index of if a package is affected -> what else gets marked changed or affected.
560#[derive(Debug)]
561struct ReverseIndex<'g> {
562 // None for the node type represents the "all packages" sentinel value.
563 reverse_index: GraphMap<Option<&'g PackageId>, ReverseIndexEdge, Directed>,
564}
565
566/// Edges in the reverse index graph.
567#[derive(Copy, Clone, Debug, Eq, PartialEq)]
568enum ReverseIndexEdge {
569 /// This edge was added as a package rule. This always takes precedence over `CargoBuild`.
570 PackageRule,
571 /// This edge was added through the Cargo build cache.
572 CargoBuild,
573}
574
575impl<'g> ReverseIndex<'g> {
576 fn new(determinator: &Determinator<'g, '_>, build_cache: &CargoBuildCache<'g>) -> Self {
577 let mut reverse_index = GraphMap::new();
578
579 let workspace_set = determinator.new.resolve_workspace();
580
581 // First, look at the result cache and add edges based on that.
582 for (id, build_result) in &build_cache.result_cache {
583 reverse_index.extend(
584 build_result
585 .unified_workspace_set(&workspace_set)
586 .package_ids(DependencyDirection::Forward)
587 .map(|dep_id| (Some(dep_id), Some(*id), ReverseIndexEdge::CargoBuild)),
588 );
589 }
590
591 // Now, look at all the package rules and add anything in them to the reverse index.
592 // IMPORTANT: This comes later so that PackageRule edges overwrite CargoBuild edges.
593 for package_rule in &determinator.rules.package_rules {
594 for on_affected in package_rule
595 .on_affected
596 .package_ids(DependencyDirection::Forward)
597 {
598 match &package_rule.mark_changed {
599 MarkChangedImpl::Packages(packages) => {
600 // Add edges from on_affected to mark_changed.
601 reverse_index.extend(packages.iter().map(|package| {
602 (
603 Some(on_affected),
604 Some(package.id()),
605 ReverseIndexEdge::PackageRule,
606 )
607 }));
608 }
609 MarkChangedImpl::All => {
610 // Add an edge to the None/"all" sentinel value.
611 reverse_index.add_edge(
612 Some(on_affected),
613 None,
614 ReverseIndexEdge::PackageRule,
615 );
616 }
617 }
618 }
619 }
620
621 Self { reverse_index }
622 }
623
624 fn affected_closure(
625 &self,
626 package_graph: &'g PackageGraph,
627 path_changed: &HashSet<&'g PackageId>,
628 summary_changed: &HashSet<&'g PackageId>,
629 ) -> PackageSet<'g> {
630 // This is a *really* interesting DFS, in that there's one restriction: you can't follow
631 // two CargoBuild edges consecutively. Also, in the initial set, path_changed allows
632 // CargoBuild to be followed once while summary_changed doesn't allow it to be followed.
633
634 #[derive(Copy, Clone, Debug, Eq, PartialEq)]
635 enum FollowCargoBuild {
636 Allowed,
637 NotAllowed,
638 }
639
640 use FollowCargoBuild::*;
641
642 // The order of what goes in the stack doesn't matter for correctness, but putting Allowed
643 // at the end (and therefore popping it first) lowers the chance of an upgrade re-traversal.
644 let mut stack: Vec<_> = summary_changed
645 .iter()
646 .map(|id| (*id, NotAllowed))
647 .chain(path_changed.iter().map(|id| (*id, Allowed)))
648 .collect();
649
650 // Do a DFS with two maps, in case there are cycles (can happen with dev deps).
651 let mut discovered = AHashMap::new();
652 let mut finished = HashSet::new();
653
654 while let Some(&(id, follow)) = stack.last() {
655 let push_neighbors = match discovered.entry(id) {
656 Entry::Vacant(entry) => {
657 // First time visiting this node. Push neighbors, don't pop the stack.
658 entry.insert(follow);
659 true
660 }
661 Entry::Occupied(mut entry) => {
662 // This node was seen before.
663 match (entry.get(), follow) {
664 (NotAllowed, Allowed) => {
665 // Upgrade this node to Allowed and push neighbors.
666 entry.insert(follow);
667 true
668 }
669 _ => {
670 // Already been fully discovered or just NotAllowed -> NotAllowed, no
671 // point revisiting it.
672 false
673 }
674 }
675 }
676 };
677
678 if push_neighbors {
679 for (_, neighbor, &edge) in self.reverse_index.edges(Some(id)) {
680 if edge == ReverseIndexEdge::CargoBuild && follow == NotAllowed {
681 // Can't follow two consecutive CargoBuild edges.
682 continue;
683 }
684 match neighbor {
685 Some(neighbor) => {
686 let neighbor_follow = match edge {
687 ReverseIndexEdge::CargoBuild => NotAllowed,
688 ReverseIndexEdge::PackageRule => Allowed,
689 };
690
691 match (discovered.get(&neighbor), neighbor_follow) {
692 (None, _) => {
693 // Node has not been discovered yet. Add it to the stack to
694 // be visited.
695 stack.push((neighbor, neighbor_follow))
696 }
697 (Some(NotAllowed), Allowed) => {
698 // Node was previously discovered with NotAllowed but is
699 // now discovered with Allowed. This is an upgrade. Add it to
700 // the stack to be visited.
701 stack.push((neighbor, neighbor_follow))
702 }
703 _ => {}
704 }
705 }
706 None => {
707 // Build everything, can just exit here.
708 return package_graph.resolve_workspace();
709 }
710 }
711 }
712 } else {
713 stack.pop();
714 finished.insert(id);
715 }
716 }
717
718 // At the end of this process, finished contains all nodes discovered.
719 package_graph
720 .resolve_ids(finished.iter().copied())
721 .expect("all IDs are valid")
722 }
723}