diff --git a/dev-tools/ls-apis/README.adoc b/dev-tools/ls-apis/README.adoc index ff7c3e6fc2c..228690f6812 100644 --- a/dev-tools/ls-apis/README.adoc +++ b/dev-tools/ls-apis/README.adoc @@ -149,6 +149,62 @@ This tool basically works as follows: . Then, it loads and validates the developer-maintained metadata (`api-manifest.toml`). . Then, it applies whatever filter has been selected and prints out whatever information was asked for. +=== Deployment units + +The upgrade DAG's nodes are _deployment units_: sets of Rust packages that are always shipped and updated together. For example, Sled Agent, Propolis, and the switch-zone components all update as one unit (the host OS image), so they form a single deployment unit. + +Deployment units and their packages are declared in `api-manifest.toml`: + +```toml +[[deployment_units]] +name = "Host OS" +packages = [ + "omicron-sled-agent", + "propolis-server", + # ...switch zone components... +] +``` + +Each entry in `packages` is a _top-level package_: a Rust binary shipped in that deployment unit. ls-apis walks each top-level package's Cargo dependencies to discover the APIs it produces and consumes. + +==== Subcomponents + +Sometimes one binary contains a body of code whose API dependencies are best tracked separately from the rest of it. The motivating case is the Rack Setup Service (RSS): it lives in the `sled-agent-rack-setup` library crate, which is linked into the `omicron-sled-agent` binary, but it runs only once (at rack initialization) and has different semantics from the rest of the Sled Agent. + +A _subcomponent_ is such a library, tracked as an API consumer in its own right: + +```toml +[[deployment_units]] +name = "Host OS" +packages = [ "omicron-sled-agent", ... ] +subcomponents = [ + { name = "sled-agent-rack-setup", inside = "omicron-sled-agent", lifecycle = "rack-init" }, +] +``` + +`inside` names the top-level package (in the same deployment unit) that links the subcomponent. When ls-apis walks that package's dependencies, the subcomponent's own crate is skipped, so any API dependency reachable only through the subcomponent is attributed to the subcomponent instead of to the containing binary. (A dependency the binary also reaches by another path is attributed to both.) The subcomponent is then walked separately, as its own consumer. + +A subcomponent is assumed not to host any Dropshot APIs of its own, so subcomponents are not walked as API producers. + +==== Lifecycle + +A component's `lifecycle` describes when its code runs: + +[cols="1,3",options="header"] +|=== +|Lifecycle +|Meaning + +|`steady-state` +|The component runs during normal operation. Its API dependencies are part of the upgrade DAG. This is the default. + +|`rack-init` +|The component runs only at rack initialization, when the whole rack is at a single consistent version. It therefore cannot experience version skew during an online upgrade, so its API dependencies are excluded from the upgrade DAG and from cycle checks. + +|=== + +`lifecycle` may currently be set only on a subcomponent; every top-level package is treated as `steady-state`. + The filtering is a little complicated but very important! === The purpose of filtering diff --git a/dev-tools/ls-apis/api-manifest.toml b/dev-tools/ls-apis/api-manifest.toml index 7b8522aab74..ad60c615397 100644 --- a/dev-tools/ls-apis/api-manifest.toml +++ b/dev-tools/ls-apis/api-manifest.toml @@ -80,6 +80,21 @@ packages = [ "tfportd", "wicketd", ] +# sled-agent-rack-setup is a library inside the omicron-sled-agent binary. It is +# listed here as a subcomponent so its API dependencies (which only matter +# during RSS) are reported separately from the steady-state dependencies of +# omicron-sled-agent. Setting `inside = "omicron-sled-agent"` means that when +# ls-apis walks omicron-sled-agent's Cargo dependencies, sled-agent-rack-setup +# itself is skipped, so API dependencies reachable only through it are +# attributed to the subcomponent rather than to omicron-sled-agent. +# +# The `rack-init` lifecycle marks this subcomponent as running only during rack +# initialization. Edges from `rack-init` components are excluded from the +# upgrade DAG, so that sled-agent-rack-setup doesn't impose any ordering +# constraints on the upgrade DAG. +subcomponents = [ + { name = "sled-agent-rack-setup", inside = "omicron-sled-agent", lifecycle = "rack-init" }, +] # Installinator gets packaged into its own host OS image. [[deployment_units]] @@ -389,10 +404,12 @@ server_package_name = "nexus-lockstep-api" versioned_how = "client" versioned_how_reason = "Circular dependencies between Nexus and other services" # Exactly these consumers (Rust packages specified in one or more of the -# `packages` arrays in the deployment_units table above) are allowed to use this -# API. ls-apis check will error out if the list of consumers is incorrect. +# `packages` arrays in the deployment_units table above, or via `subcomponents` +# entries in the same table) are allowed to use this API. ls-apis check will +# error out if the list of consumers is incorrect. restricted_to_consumers = [ - { name = "omicron-sled-agent", reason = "Only RSS and sled-agent-sim use this API, neither of which is part of upgrade" }, + { name = "omicron-sled-agent", reason = "Only sled-agent-sim uses this API, which is not part of upgrade" }, + { name = "sled-agent-rack-setup", reason = "Only RSS uses this API, which is not part of upgrade" }, ] [[apis]] @@ -548,24 +565,6 @@ Sled Agent uses a Sled Agent client for the `zone-bundle` binary, which is not deployed. """ -[[dependency_filter_rules]] -ancestor = "sled-agent-rack-setup" -client = "cockroach-admin-client" -evaluation = "not-deployed" -note = """ -The Cockroach Admin client is only used during RSS, which we don't care about -for the purpose of upgrade. -""" - -[[dependency_filter_rules]] -ancestor = "sled-agent-rack-setup" -client = "dns-service-client" -evaluation = "not-deployed" -note = """ -The DNS service client is only used during RSS, which we don't care about for -the purpose of upgrade. -""" - [[dependency_filter_rules]] ancestor = "transient-dns-server" client = "dns-service-client" @@ -799,7 +798,6 @@ note = "Sled Agent uses Client::localhost() to connect to ddm." permalinks = [ "https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/ddm_reconciler.rs#L56", "https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/bootstrap/bootstore_setup.rs#L92", - "https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/rack_setup/service.rs#L1086", "https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/sled_agent.rs#L1378", ] @@ -807,12 +805,15 @@ permalinks = [ server = "omicron-sled-agent" client = "mg-admin-client" note = """ -Sled Agent only queries the switch zone on the same sled, -which is within the same deployment unit as the global -zone. +Sled Agent invokes EarlyNetworkSetup::init_switch_config +(services.rs:3431) to configure uplinks on the local switch zone. This in turn +constructs an MgdClient pointed at the local switch zone +(early-networking/lib.rs:442), which is within the same deployment unit as the +global zone. """ permalinks = [ - "https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/bootstrap/early_networking.rs#L483", + "https://github.com/oxidecomputer/omicron/blob/32de20c/sled-agent/src/services.rs#L3431", + "https://github.com/oxidecomputer/omicron/blob/32de20c/sled-agent/early-networking/src/lib.rs#L442", ] [[intra_deployment_unit_only_edges]] @@ -835,39 +836,47 @@ server = "omicron-sled-agent" client = "gateway-client" note = """ BUG: Sled Agent creates two MGS clients. One of them -(early_networking.rs:376) queries the switch zone on -the same sled, which is within the same deployment unit -as the global zone, so this is okay. +(early-networking/lib.rs:331, called from services.rs:3431) queries the +switch zone on the same sled, which is within the same deployment unit as the +global zone, so this is okay. -The other one (early_networking.rs:277) queries both -switch zones, so it crosses deployment units. This needs -to be fixed. +The other one (early-networking/lib.rs:259, called from services.rs:1095) +queries both switch zones, so it crosses deployment units. This needs to be +fixed. Reference: https://github.com/oxidecomputer/omicron/issues/9708 """ permalinks = [ - "https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/bootstrap/early_networking.rs#L376", - "https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/bootstrap/early_networking.rs#L277-L280", + "https://github.com/oxidecomputer/omicron/blob/32de20c/sled-agent/src/services.rs#L3431", + "https://github.com/oxidecomputer/omicron/blob/32de20c/sled-agent/early-networking/src/lib.rs#L331", + "https://github.com/oxidecomputer/omicron/blob/32de20c/sled-agent/src/services.rs#L1095-L1102", + "https://github.com/oxidecomputer/omicron/blob/32de20c/sled-agent/early-networking/src/lib.rs#L259-L262", ] [[intra_deployment_unit_only_edges]] server = "omicron-sled-agent" client = "dpd-client" note = """ -BUG: Sled Agent creates two DPD clients. One of them (early_networking.rs:419) -is always to the switch zone on the same sled, which is in the same deployment -unit. +BUG: Sled Agent creates two DPD clients. One of them +(early-networking/lib.rs:378, called from services.rs:3431) is always to the +switch zone on the same sled, which is in the same deployment unit. -The other one (services.rs:1090) queries both switch zones, so it crosses -deployment units. This needs to be fixed. +The other one (services.rs:1104, iterating addresses returned by +EarlyNetworkSetup::lookup_uplinked_switch_zone_underlay_addrs) queries both +switch zones, so it crosses deployment units. This needs to be fixed. Reference: https://github.com/oxidecomputer/omicron/issues/9708 """ permalinks = [ - "https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/bootstrap/early_networking.rs#L419", - "https://github.com/oxidecomputer/omicron/blob/6cef874/sled-agent/src/services.rs#L1090", + "https://github.com/oxidecomputer/omicron/blob/32de20c/sled-agent/src/services.rs#L3431", + "https://github.com/oxidecomputer/omicron/blob/32de20c/sled-agent/early-networking/src/lib.rs#L378", + "https://github.com/oxidecomputer/omicron/blob/32de20c/sled-agent/src/services.rs#L1104-L1115", ] +# Note: sled-agent-rack-setup (RSS) is marked with `lifecycle = "rack-init"` +# in its subcomponent declaration, so its API consumption is excluded from the +# upgrade DAG entirely. No IDU annotation is needed for edges from RSS. + [[intra_deployment_unit_only_edges]] server = "tfportd" client = "dpd-client" diff --git a/dev-tools/ls-apis/src/api_metadata.rs b/dev-tools/ls-apis/src/api_metadata.rs index d28b8d6f359..b61e6462a31 100644 --- a/dev-tools/ls-apis/src/api_metadata.rs +++ b/dev-tools/ls-apis/src/api_metadata.rs @@ -28,6 +28,12 @@ use std::collections::BTreeSet; pub struct AllApiMetadata { apis: BTreeMap, deployment_units: IdOrdMap, + /// every server component across all deployment units, keyed by name + /// + /// This is the single source of truth for server components. + /// `DeploymentUnitInfo` stores only component *names*; the components + /// themselves (with their lifecycle and kind) live here. + server_components: IdOrdMap, dependency_rules: BTreeMap>, ignored_non_clients: BTreeSet, intra_deployment_unit_only_edges: Vec, @@ -59,11 +65,18 @@ impl AllApiMetadata { self.apis.keys() } - /// Iterate over the package names for all the APIs' servers - pub fn server_components( + /// Iterate over all the server components across all deployment units + pub fn server_components(&self) -> impl Iterator { + self.server_components.iter() + } + + /// Look up a server component by name, or `None` if no component with that + /// name is registered in any deployment unit + pub fn server_component( &self, - ) -> impl Iterator { - self.deployment_units.iter().flat_map(|d| d.packages.iter()) + name: &ServerComponentName, + ) -> Option<&ServerComponent> { + self.server_components.get(name) } /// Look up details about an API based on its client package name @@ -151,12 +164,52 @@ impl AllApiMetadata { #[serde(deny_unknown_fields)] struct RawApiMetadata { apis: Vec, - deployment_units: Vec, + deployment_units: Vec, dependency_filter_rules: Vec, ignored_non_clients: Vec, intra_deployment_unit_only_edges: Vec, } +/// Registers a server component, failing if a component with the same name has +/// already been registered. +/// +/// `server_components` is keyed by component name and spans every deployment +/// unit, so this enforces that each component name appears exactly once across +/// all units' `packages` and `subcomponents`. Without this check, duplicates +/// would surface later as a confusing "in multiple deployment units" error, or +/// as ambiguous component lookups. +fn register_server_component( + server_components: &mut IdOrdMap, + component: ServerComponent, +) -> Result<()> { + if let Err(error) = server_components.insert_unique(component) { + let new = error.new_item(); + // `IdOrdMap` is keyed by component name alone, so the new component + // conflicts with exactly one previously-registered component. + let previous = error.duplicates().first().expect( + "a duplicate-key conflict has exactly one conflicting item", + ); + if previous.deployment_unit == new.deployment_unit { + bail!( + "server component {:?} appears more than once in \ + deployment unit {}; each component must appear exactly \ + once across `packages` and `subcomponents`", + new.name, + new.deployment_unit, + ); + } + bail!( + "server component {:?} appears in multiple deployment unit \ + entries ({} and {}); each component must appear exactly once \ + across `packages` and `subcomponents`", + new.name, + previous.deployment_unit, + new.deployment_unit, + ); + } + Ok(()) +} + impl TryFrom for AllApiMetadata { type Error = anyhow::Error; @@ -175,7 +228,53 @@ impl TryFrom for AllApiMetadata { } let mut deployment_units = IdOrdMap::new(); - for info in raw.deployment_units { + let mut server_components: IdOrdMap = IdOrdMap::new(); + for raw_unit in raw.deployment_units { + // Build this unit's list of component names (packages first, then + // subcomponents) while registering each component in + // `server_components`. + let mut component_names = Vec::new(); + + for pkg in &raw_unit.packages { + component_names.push(pkg.clone()); + register_server_component( + &mut server_components, + ServerComponent { + name: pkg.clone(), + deployment_unit: raw_unit.name.clone(), + lifecycle: Lifecycle::SteadyState, + kind: ServerComponentKind::TopLevel, + }, + )?; + } + + for sub in &raw_unit.subcomponents { + if !raw_unit.packages.contains(&sub.inside) { + bail!( + "subcomponent {:?} has `inside = {:?}`, but no \ + such package exists in deployment unit {:?}'s \ + `packages` list", + sub.name, + sub.inside, + raw_unit.name, + ); + } + component_names.push(sub.name.clone()); + register_server_component( + &mut server_components, + ServerComponent { + name: sub.name.clone(), + deployment_unit: raw_unit.name.clone(), + lifecycle: sub.lifecycle, + kind: ServerComponentKind::Subcomponent { + inside: sub.inside.clone(), + }, + }, + )?; + } + + let info = + DeploymentUnitInfo { name: raw_unit.name, component_names }; if let Err(e) = deployment_units.insert_unique(info) { bail!( "duplicate deployment unit name in API metadata: {}", @@ -211,10 +310,8 @@ impl TryFrom for AllApiMetadata { // Validate that IDU-only edges reference only known server components // and APIs. - let known_components: BTreeSet<_> = - deployment_units.iter().flat_map(|u| u.packages.iter()).collect(); for edge in &raw.intra_deployment_unit_only_edges { - if !known_components.contains(&edge.server) { + if server_components.get(&edge.server).is_none() { bail!( "intra_deployment_unit_only_edges: \ unknown server component {:?}", @@ -234,6 +331,7 @@ impl TryFrom for AllApiMetadata { Ok(AllApiMetadata { apis, deployment_units, + server_components, dependency_rules, ignored_non_clients, intra_deployment_unit_only_edges: raw @@ -414,14 +512,28 @@ pub enum ApiConsumerStatus { /// Describes a unit that combines one or more servers that get deployed /// together -#[derive(Deserialize)] -#[serde(deny_unknown_fields)] +/// +/// This is the validated form of a `[[deployment_units]]` entry; see +/// `RawDeploymentUnitInfo` for the on-disk format. +#[derive(Debug)] pub struct DeploymentUnitInfo { /// human-readable name (e.g. "Nexus", "DNS Server"), also used as primary /// key pub name: DeploymentUnitName, - /// list of Rust packages that are shipped in this unit - pub packages: Vec, + /// names of the server components in this unit (`packages` first, then + /// `subcomponents`) + /// + /// Components can be looked up using [`AllApiMetadata::server_component`]. + component_names: Vec, +} + +impl DeploymentUnitInfo { + /// Iterates over the names of the server components in this unit + pub fn component_names( + &self, + ) -> impl Iterator { + self.component_names.iter() + } } impl IdOrdItem for DeploymentUnitInfo { @@ -432,6 +544,166 @@ impl IdOrdItem for DeploymentUnitInfo { id_upcast!(); } +/// A server component within a deployment unit +/// +/// This is the validated form of a `packages` or `subcomponents` entry. +#[derive(Debug)] +pub struct ServerComponent { + /// the Rust package name for this component, also used as primary key + name: ServerComponentName, + /// the deployment unit that this component is part of + deployment_unit: DeploymentUnitName, + /// when this component's code runs + lifecycle: Lifecycle, + /// whether this is a top-level package or a subcomponent + kind: ServerComponentKind, +} + +impl ServerComponent { + /// Returns the Rust package name for this component + pub fn name(&self) -> &ServerComponentName { + &self.name + } + + /// Returns the deployment unit that this component is part of + pub fn deployment_unit(&self) -> &DeploymentUnitName { + &self.deployment_unit + } + + /// Returns whether this is a top-level package or a subcomponent + pub fn kind(&self) -> &ServerComponentKind { + &self.kind + } + + /// Whether this component's consumed-API edges participate in the upgrade + /// DAG + /// + /// Components whose code doesn't run during steady-state operation (e.g., + /// code that only runs at rack initialization) can't be affected by + /// version skew during an online upgrade, so their API dependencies are + /// excluded from the upgrade DAG and from cycle checks. + pub fn in_upgrade_dag(&self) -> bool { + self.lifecycle.in_upgrade_dag() + } + + /// Returns a short, human-readable note describing how this component + /// differs from an ordinary steady-state, top-level component, or `None` + /// if it is one. + pub fn display_note(&self) -> Option { + let mut notes = Vec::new(); + match &self.kind { + ServerComponentKind::TopLevel => {} + ServerComponentKind::Subcomponent { inside } => { + notes.push(format!("subcomponent of {inside}")); + } + } + match self.lifecycle { + Lifecycle::SteadyState => {} + Lifecycle::RackInit => { + notes.push(String::from("rack-init only")); + } + } + (!notes.is_empty()).then(|| notes.join("; ")) + } +} + +impl IdOrdItem for ServerComponent { + type Key<'a> = &'a ServerComponentName; + fn key(&self) -> Self::Key<'_> { + &self.name + } + id_upcast!(); +} + +/// Distinguishes a deployment unit's top-level packages from its subcomponents +#[derive(Debug)] +pub enum ServerComponentKind { + /// A package shipped directly in the deployment unit + /// + /// The package's Cargo dependencies are walked to discover both the APIs + /// they produce and the APIs they consume. + TopLevel, + /// A library linked inside a `TopLevel` package of the *same* deployment + /// unit, but tracked as a separate consumer of APIs + /// + /// Dependencies reachable only through a subcomponent are attributed to + /// the subcomponent rather than to its parent package. Unlike top-level + /// packages, subcomponents do not themselves host Dropshot APIs, so they + /// are not walked as API producers. + Subcomponent { + /// the package that links this library + /// + /// Guaranteed to name a `TopLevel` component in the same deployment + /// unit. When ls-apis walks `inside`'s Cargo dependencies, the + /// subcomponent's own package is treated as absent, so dependencies + /// reachable *only* through the subcomponent are attributed to it + /// rather than to `inside`. The subcomponent is walked separately as + /// its own consumer; a dependency that `inside` also reaches through + /// another path stays attributed to `inside` as well. + inside: ServerComponentName, + }, +} + +/// Describes when a server component's code runs +#[derive(Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] +#[serde(rename_all = "kebab-case")] +pub enum Lifecycle { + /// The component runs during normal operation. Its API dependencies are + /// part of the upgrade DAG. + #[default] + SteadyState, + /// The component runs only at rack initialization. The rack is at a + /// consistent version when this code runs, so its API dependencies are + /// excluded from the upgrade DAG. + RackInit, +} + +impl Lifecycle { + /// Whether components with this lifecycle participate in the upgrade DAG + fn in_upgrade_dag(self) -> bool { + match self { + Lifecycle::SteadyState => true, + Lifecycle::RackInit => false, + } + } +} + +/// Format of a `[[deployment_units]]` entry in the `api-manifest.toml` file +/// +/// This is processed and validated in the transformation to `AllApiMetadata`. +#[derive(Deserialize, Debug)] +#[serde(deny_unknown_fields)] +struct RawDeploymentUnitInfo { + /// human-readable name, also used as primary key + name: DeploymentUnitName, + /// the set of Rust packages that are shipped in this unit + packages: BTreeSet, + /// list of subcomponents: libraries inside one of `packages` that are + /// tracked as separate consumers of APIs + #[serde(default)] + subcomponents: Vec, + // Note: unlike for subcomponents, we do not currently accept `lifecycle` + // for deployment units because all deployment units are treated as being + // steady-state. This is not inherent, though, and we can add a `lifecycle` + // field here if it ever becomes necessary. +} + +/// Format of a `subcomponents` entry in the `api-manifest.toml` file +#[derive(Deserialize, Debug)] +#[serde(deny_unknown_fields)] +struct RawSubcomponentInfo { + /// the Rust package name for this subcomponent + name: ServerComponentName, + /// the deployment-unit package that contains this subcomponent (i.e., the + /// binary that links this library) + /// + /// Must name an entry in the same deployment unit's `packages` list. + inside: ServerComponentName, + /// when this subcomponent's code runs + #[serde(default)] + lifecycle: Lifecycle, +} + #[derive(Deserialize)] #[serde(deny_unknown_fields)] pub struct DependencyFilterRule { diff --git a/dev-tools/ls-apis/src/bin/ls-apis.rs b/dev-tools/ls-apis/src/bin/ls-apis.rs index b7dcd3ada14..6c5882fa5cf 100644 --- a/dev-tools/ls-apis/src/bin/ls-apis.rs +++ b/dev-tools/ls-apis/src/bin/ls-apis.rs @@ -172,13 +172,21 @@ fn run_apis(apis: &SystemApis, args: ShowDepsArgs) -> Result<()> { for c in apis.api_consumers(&api.client_package_name, args.filter)? { let (repo_name, package_path) = apis.package_label(c.server_pkgname)?; + let note = match metadata + .server_component(c.server_pkgname) + .and_then(|component| component.display_note()) + { + Some(note) => format!(" [{note}]"), + None => String::new(), + }; println!( - " consumed by: {} ({}/{}) via {} path{}", + " consumed by: {} ({}/{}) via {} path{}{}", c.server_pkgname, repo_name, package_path, c.dep_paths.len(), if c.dep_paths.len() == 1 { "" } else { "s" }, + note, ); if args.show_deps { for (i, dep_path) in c.dep_paths.iter().enumerate() { @@ -266,7 +274,15 @@ fn print_server_components<'a>( ) -> Result<()> { for s in server_components.into_iter() { let (repo_name, pkg_path) = apis.package_label(s)?; - println!("{}{} ({}/{})", prefix, s, repo_name, pkg_path); + match metadata.server_component(s).and_then(|c| c.display_note()) { + Some(note) => println!( + "{}{} ({}/{}) [{}]", + prefix, s, repo_name, pkg_path, note, + ), + None => { + println!("{}{} ({}/{})", prefix, s, repo_name, pkg_path) + } + } for api in metadata .apis() .filter(|a| apis.is_producer_of(s, &a.client_package_name)) @@ -307,7 +323,7 @@ fn run_servers(apis: &SystemApis, args: DotArgs) -> Result<()> { print_server_components( apis, metadata, - metadata.server_components(), + metadata.server_components().map(|c| c.name()), "", args.show_deps, args.filter, diff --git a/dev-tools/ls-apis/src/cargo.rs b/dev-tools/ls-apis/src/cargo.rs index 7b851fc75cc..6d068b6ecaa 100644 --- a/dev-tools/ls-apis/src/cargo.rs +++ b/dev-tools/ls-apis/src/cargo.rs @@ -237,11 +237,17 @@ impl Workspace { /// Walks the required (normal and build) dependencies of package `root`. /// /// Returns a [`WalkOutcome`] describing every package reachable from - /// `root`, each paired with a dependency path to it. - pub fn walk_required_deps_recursively<'a>( + /// `root`, each paired with a dependency path to it, plus which of the + /// `omitted_nodes` were actually encountered. + /// + /// Packages in `omitted_nodes` are treated as if they didn't exist in the + /// dependency graph: they are not reported in [`WalkOutcome::found`], and + /// the walk does not descend into them. + pub fn walk_required_deps_recursively<'a, 'p>( &'a self, root: &Package, - ) -> Result> { + omitted_nodes: &BTreeSet<&'p PackageId>, + ) -> Result> { struct Remaining<'n> { node: &'n cargo_metadata::Node, path: DepPath, @@ -262,6 +268,7 @@ impl Workspace { }]; let mut seen: BTreeSet = BTreeSet::new(); let mut found: Vec<(&'a Package, DepPath)> = Vec::new(); + let mut omitted_seen: BTreeSet<&'p PackageId> = BTreeSet::new(); while let Some(Remaining { node: next, path }) = remaining.pop() { for d in &next.deps { @@ -275,6 +282,11 @@ impl Workspace { continue; } + if let Some(omitted) = omitted_nodes.get(did) { + omitted_seen.insert(*omitted); + continue; + } + // unwraps: We verified during loading that we have metadata for // all package ids for which we have nodes in the dependency // tree. We also verified during loading that we have nodes in @@ -293,7 +305,7 @@ impl Workspace { } } - Ok(WalkOutcome { found }) + Ok(WalkOutcome { found, omitted_seen }) } /// Return all package ids for the given `pkgname` @@ -338,13 +350,21 @@ fn cargo_toml_parent( } /// The result of [`Workspace::walk_required_deps_recursively`]. -pub struct WalkOutcome<'a> { +pub struct WalkOutcome<'a, 'p> { /// Every package encountered as a required (normal or build) dependency of /// the walk's `root`, each paired with a dependency path from `root` to /// that package. /// /// A package reachable by more than one path appears once per path. pub found: Vec<(&'a Package, DepPath)>, + + /// The subset of the walk's `omitted_nodes` argument that was actually + /// encountered as a required dependency of `root`. + /// + /// Comparing this against `omitted_nodes` lets the caller detect an + /// expected omission that was not ever seen while traversing the dependency + /// graph. + pub omitted_seen: BTreeSet<&'p PackageId>, } /// Describes a "dependency path": a path through the Cargo dependency graph diff --git a/dev-tools/ls-apis/src/lib.rs b/dev-tools/ls-apis/src/lib.rs index c0cf1186f6f..8ecb59307c8 100644 --- a/dev-tools/ls-apis/src/lib.rs +++ b/dev-tools/ls-apis/src/lib.rs @@ -14,6 +14,7 @@ pub use api_metadata::AllApiMetadata; pub use api_metadata::ApiConsumerStatus; pub use api_metadata::ApiExpectedConsumer; pub use api_metadata::ApiMetadata; +pub use api_metadata::ServerComponent; pub use api_metadata::VersionedHow; pub use system_apis::ApiDependencyFilter; pub use system_apis::FailedConsumerCheck; diff --git a/dev-tools/ls-apis/src/system_apis.rs b/dev-tools/ls-apis/src/system_apis.rs index 2680ca34914..b52cc5271e9 100644 --- a/dev-tools/ls-apis/src/system_apis.rs +++ b/dev-tools/ls-apis/src/system_apis.rs @@ -15,6 +15,7 @@ use crate::api_metadata::ApiExpectedConsumer; use crate::api_metadata::ApiExpectedConsumers; use crate::api_metadata::ApiMetadata; use crate::api_metadata::Evaluation; +use crate::api_metadata::ServerComponentKind; use crate::api_metadata::VersionedHow; use crate::cargo::DepPath; use crate::parse_toml_file; @@ -22,6 +23,7 @@ use crate::workspaces::Workspaces; use anyhow::Result; use anyhow::{Context, anyhow, bail}; use camino::Utf8PathBuf; +use cargo_metadata::PackageId; use iddqd::IdOrdItem; use iddqd::IdOrdMap; use iddqd::id_upcast; @@ -154,6 +156,55 @@ impl SystemApis { .push(api); } + // For each component, compute the set of package IDs to omit when + // walking its dependencies. A subcomponent's package is omitted from + // its parent package's walk, so that the subcomponent's dependency + // subgraph is attributed to the subcomponent rather than to its parent. + let mut omitted_nodes: BTreeMap< + &ServerComponentName, + BTreeSet<&PackageId>, + > = BTreeMap::new(); + // Maps each omitted package ID back to the subcomponent that + // contributed it. Used after the producer walk to produce a helpful + // error if a subcomponent's omission turned out to be a no-op. + let mut omitted_pkgid_subcomponent = BTreeMap::new(); + for component in api_metadata.server_components() { + // Ensure every component has an entry, even if it omits nothing. + omitted_nodes.entry(component.name()).or_default(); + + match component.kind() { + ServerComponentKind::TopLevel => {} + ServerComponentKind::Subcomponent { inside } => { + // A subcomponent is a library crate linked into the + // `inside` binary, so it necessarily lives in the same + // workspace as `inside`. Resolve its package within that + // workspace specifically. + let (workspace, _) = + workspaces.find_package_workspace(inside)?; + let sub_pkg = workspace + .find_workspace_package(component.name()) + .ok_or_else(|| { + anyhow!( + "subcomponent {:?} (in deployment unit \ + {:?}) is not a package in workspace {:?} \ + (the workspace of its `inside` package \ + {:?}); check for a typo in `name`, or that \ + the package actually exists", + component.name(), + component.deployment_unit(), + workspace.name(), + inside, + ) + })?; + omitted_pkgid_subcomponent.insert(&sub_pkg.id, component); + omitted_nodes + .entry(inside) + .or_default() + .insert(&sub_pkg.id); + } + } + } + // Walk the deployment units, then walk each one's list of packages, and // then walk all of its dependencies. Along the way, record whenever we // find a package whose name matches a known server package. If we find @@ -164,20 +215,79 @@ impl SystemApis { // component, etc. let mut tracker = ServerComponentsTracker::new(&server_packages); for dunit_info in api_metadata.deployment_units() { - for dunit_pkg in &dunit_info.packages { + for component_name in dunit_info.component_names() { + let component = + api_metadata.server_component(component_name).expect( + "a deployment unit's component names are all \ + registered server components", + ); tracker.found_deployment_unit_package( &dunit_info.name, - dunit_pkg, + component_name, )?; - let (workspace, server_pkg) = - workspaces.find_package_workspace(dunit_pkg)?; - let dep_path = DepPath::for_pkg(server_pkg.id.clone()); - tracker.found_package(dunit_pkg, dunit_pkg, &dep_path); - - let outcome = - workspace.walk_required_deps_recursively(server_pkg)?; - for (dep_pkg, dep_path) in &outcome.found { - tracker.found_package(dunit_pkg, &dep_pkg.name, dep_path); + + match component.kind() { + ServerComponentKind::TopLevel => { + let (workspace, server_pkg) = workspaces + .find_package_workspace(component_name)?; + let dep_path = DepPath::for_pkg(server_pkg.id.clone()); + tracker.found_package( + component_name, + component_name, + &dep_path, + ); + + let omitted = omitted_nodes.get(component_name).expect( + "every server component has an omitted_nodes entry", + ); + let outcome = workspace + .walk_required_deps_recursively( + server_pkg, omitted, + )?; + for (dep_pkg, dep_path) in &outcome.found { + tracker.found_package( + component_name, + &dep_pkg.name, + dep_path, + ); + } + + // Every subcomponent declared `inside` this package + // must actually be one of its required Cargo + // dependencies. Otherwise, omitting it from this walk + // did nothing, and its `subcomponents` entry is stale + // or wrong. + if let Some(&pkgid) = + omitted.difference(&outcome.omitted_seen).next() + { + let sub = omitted_pkgid_subcomponent + .get(pkgid) + .copied() + .expect( + "every omitted package ID was \ + registered with its subcomponent", + ); + bail!( + "subcomponent {:?} (in deployment unit \ + {:?}) is declared with `inside = {:?}`, \ + but {:?} has no normal or build Cargo \ + dependency on it; remove the stale \ + `subcomponents` entry, correct its \ + `inside` field, or restore the dependency \ + if it was removed or made dev-only", + sub.name(), + sub.deployment_unit(), + component_name, + component_name, + ); + } + } + ServerComponentKind::Subcomponent { .. } => { + // Subcomponents host no Dropshot APIs, so they are + // not walked as API producers here. Their + // dependency subgraph is walked separately, as a + // consumer, in the loop below. + } } } } @@ -212,13 +322,17 @@ impl SystemApis { // Now that we've figured out what servers are where, walk dependencies // of each server component and assemble structures to find which APIs - // are produced and consumed by which components. + // are produced and consumed by which components. The same omitted + // nodes used during the producer walk apply here too. let mut deps_tracker = ClientDependenciesTracker::new(&api_metadata); for server_pkgname in server_component_units.keys() { let (workspace, pkg) = workspaces.find_package_workspace(server_pkgname)?; + let omitted = omitted_nodes + .get(server_pkgname) + .expect("every server component has an omitted_nodes entry"); let outcome = workspace - .walk_required_deps_recursively(pkg) + .walk_required_deps_recursively(pkg, omitted) .with_context(|| { format!( "iterating dependencies of workspace {:?} package {:?}", @@ -574,6 +688,31 @@ impl SystemApis { Ok(DagEdgesFile { units_without_server_side_apis, edges }) } + /// Returns whether `component`'s consumed-API edges participate in the + /// upgrade DAG. + /// + /// Components with a non-steady-state lifecycle can't be affected by + /// version skew during an online upgrade, so their API dependencies are + /// excluded from the DAG and from cycle checks. + /// + /// Panics if `component` is not a registered deployment-unit component. + /// Every caller passes a component obtained from `server_component_units` + /// or `apis_consumed`, both of which are built from registered + /// deployment-unit packages, so the lookup always succeeds. + fn component_in_upgrade_dag( + &self, + component: &ServerComponentName, + ) -> bool { + self.api_metadata + .server_component(component) + .expect( + "component came from server_component_units or \ + apis_consumed, both built from registered \ + deployment-unit components", + ) + .in_upgrade_dag() + } + // The complex type below is only used in this one place: the return value // of this internal helper function. A type alias doesn't seem better. #[allow(clippy::type_complexity)] @@ -613,6 +752,10 @@ impl SystemApis { continue; } + if !self.component_in_upgrade_dag(server_pkg) { + continue; + } + // When filtering DAG-only edges, also skip edges that // represent intra-deployment-unit-only communication // (communication within one instance of one deployment @@ -698,6 +841,9 @@ impl SystemApis { if api.versioned_how != VersionedHow::Server { continue; } + if !self.component_in_upgrade_dag(server_component) { + continue; + } } for other_component in self.api_producers(client_pkg) { @@ -722,6 +868,12 @@ impl SystemApis { let mut required = BTreeSet::new(); for (server, server_unit) in &self.server_component_units { + // Components that don't participate in the upgrade DAG don't + // require an IDU annotation for their edges. + if !self.component_in_upgrade_dag(server) { + continue; + } + for (client, _) in self.component_apis_consumed(server, filter)? { // Only consider server-side-versioned APIs. let api = self @@ -845,7 +997,7 @@ impl SystemApis { // of the DAG or not. Why would we ignore edges based on whether // they're already in the DAG or not? // - // Recall that there are two ways that the metadata can specify that a + // Recall that there are three ways that the metadata can specify that a // particular API is "not part of the update DAG" (which is equivalent // to client-side-managed): // @@ -855,6 +1007,9 @@ impl SystemApis { // "nexus-client" are "non-DAG". This means we promise to make the // Nexus internal API client-managed (i.e., not part of the update // DAG). We verify this promise below. + // - a subcomponent can be marked as not being part of the steady-state + // lifecycle (e.g., `lifecycle = "rack-init"`). Edges from such + // components are excluded from the DAG. // - a specific API can be marked as server-managed (meaning it's part // of the update DAG) or not. That's most of what this function deals // with and proposes changes to. @@ -1431,7 +1586,7 @@ impl<'a> ServerComponentsTracker<'a> { } /// Record that the given package is one of the deployment unit's top-level - /// packages (server components) + /// packages or subcomponents (collectively, server components) pub fn found_deployment_unit_package( &mut self, deployment_unit: &DeploymentUnitName, diff --git a/dev-tools/ls-apis/tests/api_dependencies.out b/dev-tools/ls-apis/tests/api_dependencies.out index 4541eb7c8b1..82177974cc4 100644 --- a/dev-tools/ls-apis/tests/api_dependencies.out +++ b/dev-tools/ls-apis/tests/api_dependencies.out @@ -16,6 +16,7 @@ Clickhouse Single-Node Cluster Admin (client: clickhouse-admin-single-client) CockroachDB Cluster Admin (client: cockroach-admin-client) consumed by: omicron-nexus (omicron/nexus) via 3 paths + consumed by: sled-agent-rack-setup (omicron/sled-agent/rack-setup) via 1 path [subcomponent of omicron-sled-agent; rack-init only] Crucible Agent (client: crucible-agent-client) consumed by: omicron-nexus (omicron/nexus) via 1 path @@ -29,10 +30,12 @@ Maghemite DDM Admin (client: ddm-admin-client) consumed by: installinator (omicron/installinator) via 1 path consumed by: mgd (maghemite/mgd) via 1 path consumed by: omicron-sled-agent (omicron/sled-agent) via 1 path + consumed by: sled-agent-rack-setup (omicron/sled-agent/rack-setup) via 1 path [subcomponent of omicron-sled-agent; rack-init only] consumed by: wicketd (omicron/wicketd) via 1 path DNS Server (client: dns-service-client) consumed by: omicron-nexus (omicron/nexus) via 2 paths + consumed by: sled-agent-rack-setup (omicron/sled-agent/rack-setup) via 1 path [subcomponent of omicron-sled-agent; rack-init only] Dendrite DPD (client: dpd-client) consumed by: ddmd (maghemite/ddmd) via 2 paths @@ -72,15 +75,18 @@ Nexus Internal API (client: nexus-client) consumed by: propolis-server (propolis/bin/propolis-server) via 3 paths Nexus Internal Lockstep API (client: nexus-lockstep-client) - consumed by: omicron-sled-agent (omicron/sled-agent) via 2 paths - status: expected, reason: Only RSS and sled-agent-sim use this API, neither of which is part of upgrade + consumed by: omicron-sled-agent (omicron/sled-agent) via 1 path + status: expected, reason: Only sled-agent-sim uses this API, which is not part of upgrade + consumed by: sled-agent-rack-setup (omicron/sled-agent/rack-setup) via 1 path [subcomponent of omicron-sled-agent; rack-init only] + status: expected, reason: Only RSS uses this API, which is not part of upgrade NTP Admin (client: ntp-admin-client) consumed by: omicron-nexus (omicron/nexus) via 2 paths - consumed by: omicron-sled-agent (omicron/sled-agent) via 1 path + consumed by: sled-agent-rack-setup (omicron/sled-agent/rack-setup) via 1 path [subcomponent of omicron-sled-agent; rack-init only] NTP Admin version 1 (client: ntp-admin-v1-client) consumed by: omicron-sled-agent (omicron/sled-agent) via 1 path + consumed by: sled-agent-rack-setup (omicron/sled-agent/rack-setup) via 1 path [subcomponent of omicron-sled-agent; rack-init only] External API (client: oxide-client) @@ -100,6 +106,7 @@ Repo Depot API (client: repo-depot-client) Sled Agent (client: sled-agent-client) consumed by: omicron-nexus (omicron/nexus) via 8 paths + consumed by: sled-agent-rack-setup (omicron/sled-agent/rack-setup) via 1 path [subcomponent of omicron-sled-agent; rack-init only] Wicketd (client: wicketd-client)