diff --git a/Cargo.lock b/Cargo.lock index 3d627e181..c13581cb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6156,6 +6156,7 @@ dependencies = [ "schemars", "serde", "serde_json", + "serde_yaml", "thiserror 2.0.18", "utoipa", "weaver_common", diff --git a/crates/weaver_emit/src/lib.rs b/crates/weaver_emit/src/lib.rs index ee6cd7a03..34241d043 100644 --- a/crates/weaver_emit/src/lib.rs +++ b/crates/weaver_emit/src/lib.rs @@ -599,6 +599,7 @@ mod tests { kind: SpanKindSpec::Internal, name: SpanName { note: "test span".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![SpanAttribute { base: V2Attribute { diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index c0aae7550..2969f9d71 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -956,6 +956,7 @@ mod tests { kind: SpanKindSpec::Client, name: SpanName { note: "A database client span.".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![], entity_associations: vec![], diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index bdd643f3b..685b48a93 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -492,6 +492,7 @@ mod tests { source: Some(v2::provenance::DependencyRef(0)), path: "some/path".to_owned(), }, + unknown_fields: Default::default(), }], dependencies: { let mut deps = std::collections::BTreeSet::new(); @@ -505,6 +506,7 @@ mod tests { kind: SpanKindSpec::Internal, name: SpanName { note: "My Span".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![span::SpanAttributeRef { base: attribute::AttributeRef(0), @@ -512,10 +514,12 @@ mod tests { weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), sampling_relevant: Some(true), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], metrics: vec![metric::Metric { name: SignalId::from("my-metric".to_owned()), @@ -526,10 +530,12 @@ mod tests { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], events: vec![event::Event { name: SignalId::from("my-event".to_owned()), @@ -538,10 +544,12 @@ mod tests { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], entities: vec![v2::entity::Entity { r#type: SignalId::from("my-entity".to_owned()), @@ -550,12 +558,15 @@ mod tests { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), + unknown_fields: Default::default(), }], description: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], attribute_groups: vec![], + unknown_fields: Default::default(), }, refinements: v2::refinements::Refinements { spans: vec![span::SpanRefinement { @@ -565,6 +576,7 @@ mod tests { kind: SpanKindSpec::Client, name: SpanName { note: "My Refined Span".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![span::SpanAttributeRef { base: attribute::AttributeRef(0), @@ -572,10 +584,12 @@ mod tests { weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), sampling_relevant: Some(true), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }, }], metrics: vec![metric::MetricRefinement { @@ -589,10 +603,12 @@ mod tests { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::Recommended, ), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }, }], events: vec![event::EventRefinement { @@ -604,13 +620,17 @@ mod tests { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::OptIn, ), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }, }], + unknown_fields: Default::default(), }, + unknown_fields: Default::default(), }; let forge_registry = @@ -677,6 +697,7 @@ mod tests { kind: SpanKindSpec::Internal, name: SpanName { note: "My Span".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![span::SpanAttributeRef { base: attribute::AttributeRef(0), // Refers to bad attribute. @@ -684,21 +705,26 @@ mod tests { weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), sampling_relevant: Some(true), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], metrics: vec![], events: vec![], entities: vec![], attribute_groups: vec![], + unknown_fields: Default::default(), }, refinements: v2::refinements::Refinements { spans: vec![], metrics: vec![], events: vec![], + unknown_fields: Default::default(), }, + unknown_fields: Default::default(), }; let result = ForgeResolvedRegistry::try_from(resolved_schema); diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index f471cea3c..0e0dc558a 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -1034,6 +1034,7 @@ mod tests { kind: SpanKindSpec::Internal, name: SpanName { note: "custom.comprehensive.internal".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![SpanAttribute { base: custom_string_attr.clone(), diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index 4b042cabf..ea68b08a9 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -588,6 +588,7 @@ mod tests { kind: SpanKindSpec::Client, name: SpanName { note: "HTTP client span".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![], entity_associations: vec![], diff --git a/crates/weaver_resolved_schema/Cargo.toml b/crates/weaver_resolved_schema/Cargo.toml index 0f41335b7..db3dbc9e5 100644 --- a/crates/weaver_resolved_schema/Cargo.toml +++ b/crates/weaver_resolved_schema/Cargo.toml @@ -19,6 +19,7 @@ weaver_semconv = { path = "../weaver_semconv" } thiserror.workspace = true serde.workspace = true schemars.workspace = true +serde_yaml.workspace = true log.workspace = true utoipa = { workspace = true, optional = true } diff --git a/crates/weaver_resolved_schema/src/v2/attribute.rs b/crates/weaver_resolved_schema/src/v2/attribute.rs index 4c4067338..a73680dab 100644 --- a/crates/weaver_resolved_schema/src/v2/attribute.rs +++ b/crates/weaver_resolved_schema/src/v2/attribute.rs @@ -6,6 +6,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use weaver_semconv::{ attribute::{AttributeType, Examples}, + unknown_fields::UnknownFields, v2::CommonFields, }; @@ -14,7 +15,6 @@ use crate::v2::{provenance::Provenance, Signal}; /// The definition of an Attribute. #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, PartialEq, Hash, Eq)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] #[serde(rename_all = "snake_case")] pub struct Attribute { /// String that uniquely identifies the attribute. @@ -37,6 +37,10 @@ pub struct Attribute { #[serde(default)] #[serde(skip_serializing_if = "Provenance::is_empty")] pub provenance: Provenance, + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } /// Reference to an attribute in the catalog. diff --git a/crates/weaver_resolved_schema/src/v2/attribute_group.rs b/crates/weaver_resolved_schema/src/v2/attribute_group.rs index c0439a211..0fac1cf9d 100644 --- a/crates/weaver_resolved_schema/src/v2/attribute_group.rs +++ b/crates/weaver_resolved_schema/src/v2/attribute_group.rs @@ -4,6 +4,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use weaver_semconv::unknown_fields::UnknownFields; use weaver_semconv::v2::{signal_id::SignalId, CommonFields}; use crate::v2::{attribute::AttributeRef, provenance::Provenance, Signal}; @@ -16,7 +17,6 @@ use crate::v2::{attribute::AttributeRef, provenance::Provenance, Signal}; /// the bundle as a group to different signals. #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, PartialEq)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] #[serde(rename_all = "snake_case")] pub struct AttributeGroup { /// The name of the attribute group, must be unique. @@ -33,6 +33,11 @@ pub struct AttributeGroup { #[serde(default)] #[serde(skip_serializing_if = "Provenance::is_empty")] pub provenance: Provenance, + + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } impl Signal for AttributeGroup { diff --git a/crates/weaver_resolved_schema/src/v2/catalog.rs b/crates/weaver_resolved_schema/src/v2/catalog.rs index ebe42845d..2585879c9 100644 --- a/crates/weaver_resolved_schema/src/v2/catalog.rs +++ b/crates/weaver_resolved_schema/src/v2/catalog.rs @@ -142,6 +142,7 @@ mod test { annotations: annotations.clone(), }, provenance: Default::default(), + unknown_fields: Default::default(), }]); let result = catalog.convert_ref(&crate::attribute::Attribute { diff --git a/crates/weaver_resolved_schema/src/v2/entity.rs b/crates/weaver_resolved_schema/src/v2/entity.rs index 0ce2cb082..55eed2ccc 100644 --- a/crates/weaver_resolved_schema/src/v2/entity.rs +++ b/crates/weaver_resolved_schema/src/v2/entity.rs @@ -4,6 +4,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use weaver_semconv::{ attribute::RequirementLevel, + unknown_fields::UnknownFields, v2::{signal_id::SignalId, CommonFields}, }; @@ -12,7 +13,6 @@ use crate::v2::{attribute::AttributeRef, provenance::Provenance, Signal}; /// The definition of an Entity signal. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct Entity { /// The type of the Entity. pub r#type: SignalId, @@ -32,12 +32,16 @@ pub struct Entity { #[serde(default)] #[serde(skip_serializing_if = "Provenance::is_empty")] pub provenance: Provenance, + + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } /// A special type of reference to attributes that remembers entity-specicific information. #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, Hash, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct EntityAttributeRef { /// Reference, by index, to the attribute catalog. pub base: AttributeRef, @@ -47,6 +51,10 @@ pub struct EntityAttributeRef { /// "conditionally_required", the string provided as `condition` MUST /// specify the conditions under which the attribute is required. pub requirement_level: RequirementLevel, + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } impl Signal for Entity { diff --git a/crates/weaver_resolved_schema/src/v2/event.rs b/crates/weaver_resolved_schema/src/v2/event.rs index dc32d906b..3dee0e6da 100644 --- a/crates/weaver_resolved_schema/src/v2/event.rs +++ b/crates/weaver_resolved_schema/src/v2/event.rs @@ -4,6 +4,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use weaver_semconv::{ attribute::RequirementLevel, + unknown_fields::UnknownFields, v2::{signal_id::SignalId, CommonFields}, }; @@ -12,7 +13,6 @@ use crate::v2::{attribute::AttributeRef, provenance::Provenance, Signal}; /// The definition of an Event signal. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct Event { /// The name of the event. pub name: SignalId, @@ -39,12 +39,16 @@ pub struct Event { #[serde(default)] #[serde(skip_serializing_if = "Provenance::is_empty")] pub provenance: Provenance, + + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } /// A special type of reference to attributes that remembers event-specicific information. #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, Hash, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct EventAttributeRef { /// Reference, by index, to the attribute catalog. pub base: AttributeRef, @@ -54,6 +58,10 @@ pub struct EventAttributeRef { /// "conditionally_required", the string provided as `condition` MUST /// specify the conditions under which the attribute is required. pub requirement_level: RequirementLevel, + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } /// A refinement of an event, for use in code-gen or specific library application. diff --git a/crates/weaver_resolved_schema/src/v2/metric.rs b/crates/weaver_resolved_schema/src/v2/metric.rs index 2c414f28f..424afdee0 100644 --- a/crates/weaver_resolved_schema/src/v2/metric.rs +++ b/crates/weaver_resolved_schema/src/v2/metric.rs @@ -6,13 +6,13 @@ use serde::{Deserialize, Serialize}; use weaver_semconv::{ attribute::RequirementLevel, group::InstrumentSpec, + unknown_fields::UnknownFields, v2::{signal_id::SignalId, CommonFields}, }; /// The definition of a metric signal. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct Metric { /// The name of the metric. pub name: SignalId, @@ -46,12 +46,16 @@ pub struct Metric { #[serde(default)] #[serde(skip_serializing_if = "Provenance::is_empty")] pub provenance: Provenance, + + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } /// A special type of reference to attributes that remembers metric-specicific information. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct MetricAttributeRef { /// Reference, by index, to the attribute catalog. pub base: AttributeRef, @@ -65,6 +69,10 @@ pub struct MetricAttributeRef { /// create timeseries with these attributes, but for any given timeseries instance, the attributes that *were* present /// should *remain* present. That is - a metric timeseries cannot drop attributes during its lifetime. pub requirement_level: RequirementLevel, + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } /// A refinement of a metric signal, for use in code-gen or specific library application. diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index 176ce2dba..25f8ff15c 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -8,6 +8,7 @@ use weaver_semconv::{ deprecated::Deprecated, group::GroupType, schema_url::SchemaUrl, + unknown_fields::UnknownFields, v2::{ attribute_group::AttributeGroupVisibilitySpec, signal_id::SignalId, span::SpanName, CommonFields, @@ -41,13 +42,13 @@ pub mod refinements; pub mod registry; pub mod span; pub mod stats; +pub mod version; /// A Resolved Telemetry Schema. /// A Resolved Telemetry Schema is self-contained and doesn't contain any /// external references to other schemas or semantic conventions. #[derive(Serialize, Deserialize, Clone, Debug, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct ResolvedTelemetrySchema { /// Version of the file structure. /// Always `"resolved/2.0"` in this version. @@ -64,6 +65,10 @@ pub struct ResolvedTelemetrySchema { /// The list of dependencies of the current instrumentation application or library. #[serde(default, skip_serializing_if = "BTreeSet::is_empty")] pub dependencies: BTreeSet, + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } impl ResolvedTelemetrySchema { @@ -149,6 +154,7 @@ impl TryFrom for ResolvedTelemetrySchema { registry, refinements, dependencies, + unknown_fields: Default::default(), }) } } @@ -242,6 +248,7 @@ pub fn convert_v1_to_v2( annotations: a.annotations.unwrap_or_default(), }, provenance, + unknown_fields: Default::default(), } }) .collect(); @@ -281,6 +288,7 @@ pub fn convert_v1_to_v2( base: a, requirement_level: attr.requirement_level.clone(), sampling_relevant: attr.sampling_relevant, + unknown_fields: Default::default(), }); } else { // TODO logic error! @@ -297,6 +305,7 @@ pub fn convert_v1_to_v2( // TODO - Pass advanced name controls through V1 groups. name: SpanName { note: g.name.clone().unwrap_or_default(), + unknown_fields: Default::default(), }, entity_associations: g.entity_associations.clone(), common: CommonFields { @@ -311,6 +320,7 @@ pub fn convert_v1_to_v2( }, attributes: span_attributes, provenance: get_provenance(g), + unknown_fields: Default::default(), }; spans.push(span.clone()); span_refinements.push(SpanRefinement { @@ -336,6 +346,7 @@ pub fn convert_v1_to_v2( // TODO - Pass advanced name controls through V1 groups. name: SpanName { note: g.name.clone().unwrap_or_default(), + unknown_fields: Default::default(), }, entity_associations: g.entity_associations.clone(), common: CommonFields { @@ -350,6 +361,7 @@ pub fn convert_v1_to_v2( }, attributes: span_attributes, provenance: get_provenance(g), + unknown_fields: Default::default(), }, }); } @@ -368,6 +380,7 @@ pub fn convert_v1_to_v2( event_attributes.push(event::EventAttributeRef { base: a, requirement_level: attr.requirement_level.clone(), + unknown_fields: Default::default(), }); } else { // TODO logic error! @@ -391,6 +404,7 @@ pub fn convert_v1_to_v2( annotations: g.annotations.clone().unwrap_or_default(), }, provenance: get_provenance(g), + unknown_fields: Default::default(), }; if !is_refinement { events.push(event.clone()); @@ -426,6 +440,7 @@ pub fn convert_v1_to_v2( metric_attributes.push(metric::MetricAttributeRef { base: a, requirement_level: attr.requirement_level.clone(), + unknown_fields: Default::default(), }); } else { // TODO logic error! @@ -460,6 +475,7 @@ pub fn convert_v1_to_v2( annotations: g.annotations.clone().unwrap_or_default(), }, provenance: get_provenance(g), + unknown_fields: Default::default(), }; if is_refinement { metric_refinements.push(metric::MetricRefinement { @@ -484,12 +500,14 @@ pub fn convert_v1_to_v2( id_attrs.push(entity::EntityAttributeRef { base: a, requirement_level: attr.requirement_level.clone(), + unknown_fields: Default::default(), }); } _ => { desc_attrs.push(entity::EntityAttributeRef { base: a, requirement_level: attr.requirement_level.clone(), + unknown_fields: Default::default(), }); } } @@ -512,6 +530,7 @@ pub fn convert_v1_to_v2( annotations: g.annotations.clone().unwrap_or_default(), }, provenance: get_provenance(g), + unknown_fields: Default::default(), }); } GroupType::AttributeGroup => { @@ -543,6 +562,7 @@ pub fn convert_v1_to_v2( annotations: g.annotations.clone().unwrap_or_default(), }, provenance: get_provenance(g), + unknown_fields: Default::default(), }); } } @@ -583,11 +603,13 @@ pub fn convert_v1_to_v2( events, entities, attribute_groups, + unknown_fields: Default::default(), }; let v2_refinements = Refinements { spans: span_refinements, metrics: metric_refinements, events: event_refinements, + unknown_fields: Default::default(), }; Ok((v2_catalog.into(), v2_registry, v2_refinements, dependencies)) } @@ -1146,6 +1168,7 @@ mod tests { annotations: Default::default(), }, provenance: Default::default(), + unknown_fields: Default::default(), }); baseline.registry.attributes.push(AttributeRef(0)); let changes = baseline.diff(&baseline); @@ -1169,6 +1192,7 @@ mod tests { annotations: Default::default(), }, provenance: Default::default(), + unknown_fields: Default::default(), }); baseline.registry.attributes.push(AttributeRef(0)); let mut latest = empty_v2_schema(); @@ -1189,6 +1213,7 @@ mod tests { annotations: Default::default(), }, provenance: Default::default(), + unknown_fields: Default::default(), }); latest.attribute_catalog.push(AttributeV2 { key: "test.key.new".to_owned(), @@ -1204,6 +1229,7 @@ mod tests { annotations: Default::default(), }, provenance: Default::default(), + unknown_fields: Default::default(), }); latest.registry.attributes.push(AttributeRef(0)); latest.registry.attributes.push(AttributeRef(1)); @@ -1240,6 +1266,7 @@ mod tests { entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }); let mut latest = empty_v2_schema(); latest.registry.metrics.push(Metric { @@ -1250,6 +1277,7 @@ mod tests { entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }); let diff = latest.diff(&baseline); assert!(!diff.is_empty()); @@ -1276,6 +1304,7 @@ mod tests { identity: vec![], description: vec![], provenance: Default::default(), + unknown_fields: Default::default(), }); let mut latest = empty_v2_schema(); latest.registry.entities.push(Entity { @@ -1289,6 +1318,7 @@ mod tests { identity: vec![], description: vec![], provenance: Default::default(), + unknown_fields: Default::default(), }); let diff = latest.diff(&baseline); assert!(!diff.is_empty()); @@ -1313,6 +1343,7 @@ mod tests { attributes: vec![], entity_associations: vec![], provenance: Default::default(), + unknown_fields: Default::default(), }); let mut latest = empty_v2_schema(); latest.registry.events.push(Event { @@ -1326,6 +1357,7 @@ mod tests { ..Default::default() }, provenance: Default::default(), + unknown_fields: Default::default(), }); let diff = latest.diff(&baseline); assert!(!diff.is_empty()); @@ -1355,13 +1387,16 @@ mod tests { metrics: vec![], events: vec![], entities: vec![], + unknown_fields: Default::default(), }, refinements: Refinements { spans: vec![], metrics: vec![], events: vec![], + unknown_fields: Default::default(), }, dependencies: BTreeSet::new(), + unknown_fields: Default::default(), } } } diff --git a/crates/weaver_resolved_schema/src/v2/refinements.rs b/crates/weaver_resolved_schema/src/v2/refinements.rs index cb677f16f..c1c014bee 100644 --- a/crates/weaver_resolved_schema/src/v2/refinements.rs +++ b/crates/weaver_resolved_schema/src/v2/refinements.rs @@ -5,6 +5,7 @@ use crate::v2::{ }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use weaver_semconv::unknown_fields::UnknownFields; /// Semantic convention refinements. /// @@ -21,7 +22,6 @@ use serde::{Deserialize, Serialize}; /// provide optimised methods for generating telemetry signals. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct Refinements { /// A list of span refinements. pub spans: Vec, @@ -31,6 +31,11 @@ pub struct Refinements { /// A list of event refinements. pub events: Vec, + + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } impl Refinements { diff --git a/crates/weaver_resolved_schema/src/v2/registry.rs b/crates/weaver_resolved_schema/src/v2/registry.rs index 9e8698706..f64a53b7e 100644 --- a/crates/weaver_resolved_schema/src/v2/registry.rs +++ b/crates/weaver_resolved_schema/src/v2/registry.rs @@ -5,6 +5,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use weaver_semconv::attribute::AttributeType; +use weaver_semconv::unknown_fields::UnknownFields; use crate::v2::{ attribute::AttributeRef, @@ -28,7 +29,6 @@ use crate::v2::{ /// Note: The registry does not include signal refinements. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct Registry { /// Catalog of attributes definitions. pub attributes: Vec, @@ -47,6 +47,11 @@ pub struct Registry { /// A list of entity signal definitions. pub entities: Vec, + + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } impl Registry { @@ -261,6 +266,7 @@ mod test { annotations: BTreeMap::new(), }, provenance: Default::default(), + unknown_fields: Default::default(), }]; let registry = Registry { attribute_groups: vec![], @@ -269,6 +275,7 @@ mod test { kind: SpanKindSpec::Client, name: SpanName { note: "test".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![], entity_associations: vec![], @@ -280,6 +287,7 @@ mod test { annotations: BTreeMap::new(), }, provenance: Default::default(), + unknown_fields: Default::default(), }], metrics: vec![Metric { name: "test.metric".to_owned().into(), @@ -295,6 +303,7 @@ mod test { annotations: BTreeMap::new(), }, provenance: Default::default(), + unknown_fields: Default::default(), }], events: vec![], entities: vec![Entity { @@ -304,6 +313,7 @@ mod test { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), + unknown_fields: Default::default(), }], description: vec![], common: CommonFields { @@ -314,8 +324,10 @@ mod test { annotations: BTreeMap::new(), }, provenance: Default::default(), + unknown_fields: Default::default(), }], attributes: vec![AttributeRef(0)], + unknown_fields: Default::default(), }; let stats = registry.stats(&catalog); assert_eq!(stats.attributes.attribute_count, 1); diff --git a/crates/weaver_resolved_schema/src/v2/span.rs b/crates/weaver_resolved_schema/src/v2/span.rs index 19966041b..bb417742f 100644 --- a/crates/weaver_resolved_schema/src/v2/span.rs +++ b/crates/weaver_resolved_schema/src/v2/span.rs @@ -5,6 +5,7 @@ use serde::{Deserialize, Serialize}; use weaver_semconv::{ attribute::RequirementLevel, group::SpanKindSpec, + unknown_fields::UnknownFields, v2::{signal_id::SignalId, span::SpanName, CommonFields}, }; @@ -13,7 +14,6 @@ use crate::v2::{attribute::AttributeRef, provenance::Provenance, Signal}; /// The definition of a Span signal. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct Span { /// The type of the Span. This denotes the identity /// of the "shape" of this span, and must be unique. @@ -45,12 +45,16 @@ pub struct Span { #[serde(default)] #[serde(skip_serializing_if = "Provenance::is_empty")] pub provenance: Provenance, + + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } /// A special type of reference to attributes that remembers span-specicific information. #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, Hash, JsonSchema)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] pub struct SpanAttributeRef { /// Reference, by index, to the attribute catalog. pub base: AttributeRef, @@ -64,6 +68,10 @@ pub struct SpanAttributeRef { /// and thus should be set at span start. It defaults to false. #[serde(skip_serializing_if = "Option::is_none")] pub sampling_relevant: Option, + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } /// A refinement of a span, for use in code-gen or specific library application. diff --git a/crates/weaver_resolved_schema/src/v2/version.rs b/crates/weaver_resolved_schema/src/v2/version.rs new file mode 100644 index 000000000..2f6e1f6b2 --- /dev/null +++ b/crates/weaver_resolved_schema/src/v2/version.rs @@ -0,0 +1,345 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Version parsing and forward-compatibility validation for resolved schemas. + +use crate::v2::ResolvedTelemetrySchema; + +/// Parsed file format version. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FileFormatVersion { + /// The prefix (e.g., "resolved" or "manifest"). + pub prefix: String, + /// The major version number. + pub major: u32, + /// The minor version number. + pub minor: u32, +} + +/// Parses a file_format string like "resolved/2.0" into its components. +/// +/// Returns `None` if the string doesn't match the expected pattern. +#[must_use] +pub fn parse_file_format_version(file_format: &str) -> Option { + let (prefix, version) = file_format.split_once('/')?; + let (major_str, minor_str) = version.split_once('.')?; + let major = major_str.parse().ok()?; + let minor = minor_str.parse().ok()?; + Some(FileFormatVersion { + prefix: prefix.to_owned(), + major, + minor, + }) +} + +/// Current resolved schema version constants. +const CURRENT_RESOLVED_PREFIX: &str = "resolved"; +const CURRENT_RESOLVED_MAJOR: u32 = 2; +const CURRENT_RESOLVED_MINOR: u32 = 0; + +/// Errors that can occur during forward-compatibility validation. +#[derive(Debug, thiserror::Error)] +pub enum VersionError { + /// The file_format string could not be parsed. + #[error("Invalid file_format '{file_format}': expected 'prefix/MAJOR.MINOR'")] + InvalidFormat { + /// The raw file_format value. + file_format: String, + }, + + /// The file_format prefix doesn't match the expected value. + #[error("Unexpected file_format prefix '{actual}', expected '{expected}'")] + WrongPrefix { + /// Expected prefix. + expected: String, + /// Actual prefix found. + actual: String, + }, + + /// Major version mismatch — cannot read this schema. + #[error( + "Incompatible major version: file has '{file_format}' but this version of weaver \ + only supports {expected_prefix}/{expected_major}.x" + )] + MajorVersionMismatch { + /// The raw file_format value. + file_format: String, + /// Expected prefix. + expected_prefix: String, + /// Expected major version. + expected_major: u32, + }, + + /// Unknown fields found in a current-or-previous version schema (likely typos). + #[error( + "Unknown fields in '{file_format}' (current version is {current_version}): {fields}. \ + These are likely typos." + )] + UnknownFieldsInCurrentVersion { + /// The raw file_format value. + file_format: String, + /// Current supported version string. + current_version: String, + /// Comma-separated list of unknown field paths. + fields: String, + }, +} + +/// Validates a deserialized resolved schema for forward-compatibility. +/// +/// Rules: +/// 1. Minor version ahead of current: OK, unknown fields are tolerated. +/// 2. Major version mismatch: error. +/// 3. Current or previous version with unknown fields: error (typo detection). +pub fn validate_resolved_schema(schema: &ResolvedTelemetrySchema) -> Result<(), VersionError> { + validate_file_format( + &schema.file_format, + CURRENT_RESOLVED_PREFIX, + CURRENT_RESOLVED_MAJOR, + CURRENT_RESOLVED_MINOR, + || { + // Serialize to Value — UnknownFields emits `__unknown` markers + // that the generic walker can find without hardcoding struct paths. + let value = serde_yaml::to_value(schema) + .expect("ResolvedTelemetrySchema should always be serializable"); + weaver_semconv::unknown_fields::collect_unknown_fields_from_value(&value, "") + }, + ) +} + +/// Generic file_format validation logic shared between resolved schema and manifest. +pub(crate) fn validate_file_format( + file_format: &str, + expected_prefix: &str, + current_major: u32, + current_minor: u32, + collect_fields: impl FnOnce() -> Vec, +) -> Result<(), VersionError> { + let version = + parse_file_format_version(file_format).ok_or_else(|| VersionError::InvalidFormat { + file_format: file_format.to_owned(), + })?; + + if version.prefix != expected_prefix { + return Err(VersionError::WrongPrefix { + expected: expected_prefix.to_owned(), + actual: version.prefix, + }); + } + + if version.major != current_major { + return Err(VersionError::MajorVersionMismatch { + file_format: file_format.to_owned(), + expected_prefix: expected_prefix.to_owned(), + expected_major: current_major, + }); + } + + // Minor version ahead → tolerate unknown fields + if version.minor > current_minor { + return Ok(()); + } + + // Current or previous minor version → check for unknown fields (typo detection) + let unknown = collect_fields(); + if !unknown.is_empty() { + return Err(VersionError::UnknownFieldsInCurrentVersion { + file_format: file_format.to_owned(), + current_version: format!("{expected_prefix}/{current_major}.{current_minor}"), + fields: unknown.join(", "), + }); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Minimal valid resolved schema YAML for testing. + fn minimal_resolved_yaml(file_format: &str, extra_top_level: &str) -> String { + format!( + r#" +file_format: "{file_format}" +schema_url: "https://test.example.com/schemas/1.0.0" +attribute_catalog: [] +registry: + attributes: [] + attribute_groups: [] + spans: [] + metrics: [] + events: [] + entities: [] +refinements: + spans: [] + metrics: [] + events: [] +{extra_top_level} +"# + ) + } + + #[test] + fn test_resolved_schema_minor_ahead_with_unknown_fields() { + let yaml = minimal_resolved_yaml("resolved/2.99", "future_field: true"); + let schema: ResolvedTelemetrySchema = + serde_yaml::from_str(&yaml).expect("should deserialize"); + assert!( + validate_resolved_schema(&schema).is_ok(), + "minor-ahead schema with unknown fields should be accepted" + ); + } + + #[test] + fn test_resolved_schema_major_mismatch() { + let yaml = minimal_resolved_yaml("resolved/99.0", ""); + let schema: ResolvedTelemetrySchema = + serde_yaml::from_str(&yaml).expect("should deserialize"); + let err = validate_resolved_schema(&schema).unwrap_err(); + assert!( + matches!(err, VersionError::MajorVersionMismatch { .. }), + "major mismatch should fail: {err}" + ); + } + + #[test] + fn test_resolved_schema_current_version_with_typo() { + let yaml = minimal_resolved_yaml("resolved/2.0", "typo_field: oops"); + let schema: ResolvedTelemetrySchema = + serde_yaml::from_str(&yaml).expect("should deserialize"); + let err = validate_resolved_schema(&schema).unwrap_err(); + match &err { + VersionError::UnknownFieldsInCurrentVersion { fields, .. } => { + assert!( + fields.contains("typo_field"), + "error should name the unknown field: {fields}" + ); + } + _ => panic!("expected UnknownFieldsInCurrentVersion, got: {err}"), + } + } + + #[test] + fn test_resolved_schema_current_version_clean() { + let yaml = minimal_resolved_yaml("resolved/2.0", ""); + let schema: ResolvedTelemetrySchema = + serde_yaml::from_str(&yaml).expect("should deserialize"); + assert!( + validate_resolved_schema(&schema).is_ok(), + "clean current-version schema should pass" + ); + } + + #[test] + fn test_each_resolved_type_tolerates_unknown_field() { + // Span with unknown field + let yaml = r#" +file_format: "resolved/2.99" +schema_url: "https://test.example.com/schemas/1.0.0" +attribute_catalog: [] +registry: + attributes: [] + attribute_groups: [] + spans: + - type: "test.span" + kind: client + name: + note: "test" + future_span_name_field: true + brief: "A test span" + stability: stable + future_span_field: 42 + metrics: + - name: "test.metric" + instrument: counter + unit: "{count}" + brief: "A test metric" + stability: stable + future_metric_field: true + events: + - name: "test.event" + brief: "A test event" + stability: stable + future_event_field: true + entities: + - type: "test.entity" + identity: [] + brief: "A test entity" + stability: stable + future_entity_field: true + future_registry_field: true +refinements: + spans: [] + metrics: [] + events: [] + future_refinements_field: true +future_top_level_field: true +"#; + let schema: ResolvedTelemetrySchema = + serde_yaml::from_str(yaml).expect("should deserialize with future fields"); + assert!( + validate_resolved_schema(&schema).is_ok(), + "minor-ahead schema with future fields on every type should pass" + ); + } + + #[test] + fn test_parse_file_format_version() { + let v = parse_file_format_version("resolved/2.0").unwrap(); + assert_eq!(v.prefix, "resolved"); + assert_eq!(v.major, 2); + assert_eq!(v.minor, 0); + + let v = parse_file_format_version("manifest/2.0").unwrap(); + assert_eq!(v.prefix, "manifest"); + assert_eq!(v.major, 2); + assert_eq!(v.minor, 0); + + let v = parse_file_format_version("resolved/2.99").unwrap(); + assert_eq!(v.minor, 99); + + assert!(parse_file_format_version("garbage").is_none()); + assert!(parse_file_format_version("no_slash").is_none()); + assert!(parse_file_format_version("prefix/notanumber").is_none()); + } + + #[test] + fn test_validate_file_format_current_version_ok() { + let result = validate_file_format("resolved/2.0", "resolved", 2, 0, Vec::new); + assert!(result.is_ok()); + } + + #[test] + fn test_validate_file_format_minor_ahead_ok() { + let result = validate_file_format("resolved/2.99", "resolved", 2, 0, || { + vec!["should_be_ignored".to_owned()] + }); + assert!(result.is_ok()); + } + + #[test] + fn test_validate_file_format_major_mismatch() { + let result = validate_file_format("resolved/99.0", "resolved", 2, 0, Vec::new); + assert!(matches!( + result, + Err(VersionError::MajorVersionMismatch { .. }) + )); + } + + #[test] + fn test_validate_file_format_unknown_fields_in_current() { + let result = validate_file_format("resolved/2.0", "resolved", 2, 0, || { + vec!["typo_field".to_owned()] + }); + assert!(matches!( + result, + Err(VersionError::UnknownFieldsInCurrentVersion { .. }) + )); + } + + #[test] + fn test_validate_file_format_wrong_prefix() { + let result = validate_file_format("manifest/2.0", "resolved", 2, 0, Vec::new); + assert!(matches!(result, Err(VersionError::WrongPrefix { .. }))); + } +} diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index a3d3cdda5..fbe41e7c3 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -863,6 +863,7 @@ mod tests { attributes: vec![], common: Default::default(), provenance: Default::default(), + unknown_fields: Default::default(), }, ], metrics: vec![weaver_resolved_schema::v2::metric::Metric { @@ -873,6 +874,7 @@ mod tests { entity_associations: vec![], common: Default::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], events: vec![weaver_resolved_schema::v2::event::Event { name: "event.b".to_owned().into(), @@ -880,17 +882,20 @@ mod tests { entity_associations: vec![], common: Default::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], spans: vec![weaver_resolved_schema::v2::span::Span { r#type: "span.d".to_owned().into(), kind: weaver_semconv::group::SpanKindSpec::Client, name: weaver_semconv::v2::span::SpanName { note: "test".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![], entity_associations: vec![], common: Default::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], entities: vec![weaver_resolved_schema::v2::entity::Entity { r#type: "entity.c".to_owned().into(), @@ -898,16 +903,20 @@ mod tests { description: vec![], common: Default::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], attributes: vec![], + unknown_fields: Default::default(), }, attribute_catalog: vec![], refinements: weaver_resolved_schema::v2::refinements::Refinements { spans: vec![], metrics: vec![], events: vec![], + unknown_fields: Default::default(), }, dependencies: std::collections::BTreeSet::new(), + unknown_fields: Default::default(), } } diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index e25209a38..b459bc7dc 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -13,6 +13,7 @@ use walkdir::DirEntry; use weaver_common::result::WResult; use weaver_resolved_schema::v2::ResolvedTelemetrySchema as V2Schema; use weaver_resolved_schema::ResolvedTelemetrySchema as V1Schema; +#[allow(deprecated)] // We still need to exclude the legacy manifest filename use weaver_semconv::registry_repo::{RegistryRepo, LEGACY_REGISTRY_MANIFEST, REGISTRY_MANIFEST}; use weaver_semconv::{group::ImportsWithProvenance, semconv::SemConvSpecWithProvenance}; @@ -254,8 +255,17 @@ fn load_semconv_repository_recursive( /// Loads a resolved repository. fn load_resolved_repository(path: &VirtualDirectoryPath) -> WResult { // TODO - should we handle V1 and V2? - match from_vdir(path) { - Ok(resolved) => WResult::Ok(LoadedSemconvRegistry::ResolvedV2(resolved)), + match from_vdir::(path) { + Ok(resolved) => { + // Two-pass validation: check version compatibility and detect typos. + if let Err(e) = weaver_resolved_schema::v2::version::validate_resolved_schema(&resolved) + { + return WResult::FatalErr(Error::ConversionError { + message: e.to_string(), + }); + } + WResult::Ok(LoadedSemconvRegistry::ResolvedV2(resolved)) + } Err(err) => WResult::FatalErr(err), } } @@ -289,6 +299,7 @@ fn load_definition_repository( .map(|s| s.starts_with('.')) .unwrap_or(false) } + #[allow(deprecated)] // We still need to exclude the legacy manifest filename fn is_semantic_convention_file(entry: &DirEntry) -> bool { let path = entry.path(); let extension = path.extension().unwrap_or_else(|| std::ffi::OsStr::new("")); diff --git a/crates/weaver_search/src/lib.rs b/crates/weaver_search/src/lib.rs index b95a761dc..37e39533b 100644 --- a/crates/weaver_search/src/lib.rs +++ b/crates/weaver_search/src/lib.rs @@ -676,6 +676,7 @@ mod tests { kind: SpanKindSpec::Client, name: SpanName { note: "HTTP client span".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![], entity_associations: vec![], diff --git a/crates/weaver_semconv/src/group.rs b/crates/weaver_semconv/src/group.rs index e5cdcb29b..2bb000b17 100644 --- a/crates/weaver_semconv/src/group.rs +++ b/crates/weaver_semconv/src/group.rs @@ -149,6 +149,7 @@ pub struct GroupWildcard(#[schemars(with = "String")] pub Glob); impl GroupSpec { /// Validation logic for the group. + #[allow(deprecated)] // We check for Stability::Deprecated to report it as unsupported pub(crate) fn validate(&self, path_or_url: &str) -> WResult<(), Error> { let mut errors = vec![]; diff --git a/crates/weaver_semconv/src/lib.rs b/crates/weaver_semconv/src/lib.rs index 21f4d1a67..b271071e0 100644 --- a/crates/weaver_semconv/src/lib.rs +++ b/crates/weaver_semconv/src/lib.rs @@ -25,6 +25,7 @@ pub mod schema_url; pub mod semconv; pub mod stability; pub mod stats; +pub mod unknown_fields; pub mod v2; /// An error that can occur while loading a semantic convention registry. diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index 846a30358..8d13168bf 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -10,9 +10,11 @@ use std::vec; +#[allow(deprecated)] // We still need to detect the legacy manifest filename use crate::registry_repo::LEGACY_REGISTRY_MANIFEST; use crate::schema_url::SchemaUrl; use crate::stability::Stability; +use crate::unknown_fields::UnknownFields; use crate::Error; use crate::Error::{ DeprecatedSyntaxInRegistryManifest, InvalidRegistryManifest, LegacyRegistryManifest, @@ -160,33 +162,44 @@ struct RawManifestFields { #[serde(default)] stability: Stability, resolved_schema_uri: Option, + /// Unknown fields captured for forward-compatibility validation. + #[serde(flatten)] + unknown_fields: UnknownFields, } impl RawManifestFields { /// Convert to [`RegistryManifest`], reporting errors relative to `path`. fn into_manifest(self, path: &std::path::Path) -> Result { - if self.file_format.as_deref() == Some(PUBLICATION_MANIFEST_FILE_FORMAT) { - let schema_url = self - .schema_url - .ok_or_else(|| Error::InvalidPublicationManifest { - path: path.to_path_buf(), - details: "missing required field 'schema_url'".into(), - })?; - let resolved_schema_uri = - self.resolved_schema_uri - .ok_or_else(|| Error::InvalidPublicationManifest { - path: path.to_path_buf(), - details: "missing required field 'resolved_schema_uri'".into(), - })?; - Ok(RegistryManifest::Publication(PublicationRegistryManifest { - file_format: PUBLICATION_MANIFEST_FILE_FORMAT.to_owned(), - schema_url, - description: self.description, - dependencies: self.dependencies, - stability: self.stability, - resolved_schema_uri, - })) - } else { + if let Some(ref fmt) = self.file_format { + if Self::is_publication_manifest(fmt) { + // Validate version compatibility for publication manifests. + Self::validate_manifest_version(fmt, &self.unknown_fields, path)?; + + let schema_url = + self.schema_url + .ok_or_else(|| Error::InvalidPublicationManifest { + path: path.to_path_buf(), + details: "missing required field 'schema_url'".into(), + })?; + let resolved_schema_uri = + self.resolved_schema_uri + .ok_or_else(|| Error::InvalidPublicationManifest { + path: path.to_path_buf(), + details: "missing required field 'resolved_schema_uri'".into(), + })?; + return Ok(RegistryManifest::Publication(PublicationRegistryManifest { + file_format: fmt.clone(), + schema_url, + description: self.description, + dependencies: self.dependencies, + stability: self.stability, + resolved_schema_uri, + unknown_fields: self.unknown_fields, + })); + } + } + + { let mut warnings = vec![]; if let Some(ref fmt) = self.file_format { return Err(InvalidRegistryManifest { @@ -229,6 +242,87 @@ impl RawManifestFields { })) } } + + /// Check if a file_format string identifies a publication manifest (prefix is "manifest"). + fn is_publication_manifest(file_format: &str) -> bool { + file_format + .split_once('/') + .map(|(prefix, _)| prefix == "manifest") + .unwrap_or(false) + } + + /// Validate the publication manifest version for forward-compatibility. + fn validate_manifest_version( + file_format: &str, + unknown_fields: &UnknownFields, + path: &std::path::Path, + ) -> Result<(), Error> { + // Current manifest version is 2.0 + const CURRENT_MAJOR: u32 = 2; + const CURRENT_MINOR: u32 = 0; + + // Parse version from "manifest/MAJOR.MINOR" + let version_part = + file_format + .split_once('/') + .map(|(_, v)| v) + .ok_or_else(|| InvalidRegistryManifest { + path: path.to_path_buf(), + error: format!( + "Invalid file_format '{file_format}': expected 'manifest/MAJOR.MINOR'" + ), + })?; + + let (major_str, minor_str) = + version_part + .split_once('.') + .ok_or_else(|| InvalidRegistryManifest { + path: path.to_path_buf(), + error: format!( + "Invalid file_format '{file_format}': expected 'manifest/MAJOR.MINOR'" + ), + })?; + + let major: u32 = major_str.parse().map_err(|_| InvalidRegistryManifest { + path: path.to_path_buf(), + error: format!("Invalid major version in file_format '{file_format}'"), + })?; + + let minor: u32 = minor_str.parse().map_err(|_| InvalidRegistryManifest { + path: path.to_path_buf(), + error: format!("Invalid minor version in file_format '{file_format}'"), + })?; + + if major != CURRENT_MAJOR { + return Err(InvalidRegistryManifest { + path: path.to_path_buf(), + error: format!( + "Incompatible major version: file has '{file_format}' but this version \ + of weaver only supports manifest/{CURRENT_MAJOR}.x" + ), + }); + } + + // Minor version ahead → tolerate unknown fields + if minor > CURRENT_MINOR { + return Ok(()); + } + + // Current or previous minor version → check for unknown fields (typo detection) + if !unknown_fields.is_empty() { + let fields: Vec<&str> = unknown_fields.keys().map(|s| s.as_str()).collect(); + return Err(InvalidRegistryManifest { + path: path.to_path_buf(), + error: format!( + "Unknown fields in '{file_format}' (current version is \ + manifest/{CURRENT_MAJOR}.{CURRENT_MINOR}): {}. These are likely typos.", + fields.join(", ") + ), + }); + } + + Ok(()) + } } /// A registry manifest that can be either a definition or a publication manifest. @@ -274,6 +368,7 @@ impl RegistryManifest { let manifest = raw.into_manifest(&manifest_path_buf)?; // Check if this is a legacy manifest file + #[allow(deprecated)] let is_legacy = if let Some(file_name) = manifest_path_buf.file_name() { file_name == LEGACY_REGISTRY_MANIFEST } else { @@ -358,6 +453,11 @@ pub struct PublicationRegistryManifest { /// URI pointing to the resolved telemetry schema included in this package. pub resolved_schema_uri: String, + + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } impl PublicationRegistryManifest { @@ -375,6 +475,7 @@ impl PublicationRegistryManifest { dependencies: registry_manifest.dependencies.clone(), stability: registry_manifest.stability.clone(), resolved_schema_uri, + unknown_fields: Default::default(), } } } @@ -717,4 +818,57 @@ resolved_schema_uri: "https://example.com/resolved/1.0.0/resolved.yaml" "expected Publication variant, got {manifest:?}" ); } + + #[test] + fn test_publication_manifest_minor_ahead_with_unknown_fields() { + let manifest = manifest_from_yaml( + r#" +file_format: "manifest/2.99" +schema_url: "https://example.com/schemas/1.0.0" +resolved_schema_uri: "https://example.com/resolved/1.0.0/resolved.yaml" +future_field: true +"#, + ) + .expect("minor-ahead manifest with unknown fields should be accepted"); + + assert!( + matches!(manifest, RegistryManifest::Publication(_)), + "expected Publication variant, got {manifest:?}" + ); + } + + #[test] + fn test_publication_manifest_major_mismatch() { + let result = manifest_from_yaml( + r#" +file_format: "manifest/99.0" +schema_url: "https://example.com/schemas/1.0.0" +resolved_schema_uri: "https://example.com/resolved/1.0.0/resolved.yaml" +"#, + ); + assert!(result.is_err(), "major mismatch should fail"); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("Incompatible major version"), + "error should mention major version: {err}" + ); + } + + #[test] + fn test_publication_manifest_current_version_with_typo() { + let result = manifest_from_yaml( + r#" +file_format: "manifest/2.0" +schema_url: "https://example.com/schemas/1.0.0" +resolved_schema_uri: "https://example.com/resolved/1.0.0/resolved.yaml" +typo_field: oops +"#, + ); + assert!(result.is_err(), "typo in current version should fail"); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("typo_field"), + "error should name the unknown field: {err}" + ); + } } diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index f2ffba11d..3bd460c5f 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -29,6 +29,7 @@ fn find_manifest_path(registry_path: &Path) -> Option { return Some(registry_path.to_path_buf()); } let manifest_path = registry_path.join(REGISTRY_MANIFEST); + #[allow(deprecated)] let legacy_path = registry_path.join(LEGACY_REGISTRY_MANIFEST); if manifest_path.exists() { log_info(format!( diff --git a/crates/weaver_semconv/src/stability.rs b/crates/weaver_semconv/src/stability.rs index 10c71b41b..b5bc84a6e 100644 --- a/crates/weaver_semconv/src/stability.rs +++ b/crates/weaver_semconv/src/stability.rs @@ -38,6 +38,7 @@ impl Default for Stability { /// Implements a human readable display for the stability. impl Display for Stability { + #[allow(deprecated)] fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Stability::Stable => write!(f, "stable"), diff --git a/crates/weaver_semconv/src/unknown_fields.rs b/crates/weaver_semconv/src/unknown_fields.rs new file mode 100644 index 000000000..52b9e8b28 --- /dev/null +++ b/crates/weaver_semconv/src/unknown_fields.rs @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! A newtype wrapper around `HashMap` for capturing +//! unknown fields during deserialization. +//! +//! This wrapper trivially implements `Hash`, `Eq`, and `PartialEq` (ignoring content) +//! so it can be used in types that derive those traits. + +use std::collections::HashMap; +use std::hash::{Hash, Hasher}; + +use serde::{Deserialize, Serialize}; + +/// Reserved key used when serializing non-empty unknown fields. +/// Used by [`crate::unknown_fields::collect_unknown_fields_from_value`] to +/// locate unknown fields in a generic `serde_yaml::Value` tree. +pub const UNKNOWN_FIELDS_MARKER: &str = "__unknown"; + +/// Captures unknown fields during deserialization via `#[serde(flatten)]`. +/// +/// On **deserialization**, behaves as a transparent `HashMap` — any unrecognized +/// keys are collected here. +/// +/// On **serialization**, non-empty maps are wrapped under a single +/// [`UNKNOWN_FIELDS_MARKER`] key so a generic value-tree walker can find them +/// without hardcoding every struct path. +/// +/// Implements `Hash`/`Eq`/`PartialEq` trivially (ignoring content) so it can +/// coexist with types that derive those traits. +#[derive(Clone, Debug, Default, Deserialize)] +#[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] +#[serde(transparent)] +pub struct UnknownFields(pub HashMap); + +impl Serialize for UnknownFields { + fn serialize(&self, serializer: S) -> Result { + use serde::ser::SerializeMap; + if self.0.is_empty() { + let map = serializer.serialize_map(Some(0))?; + map.end() + } else { + let mut map = serializer.serialize_map(Some(1))?; + map.serialize_entry(UNKNOWN_FIELDS_MARKER, &self.0)?; + map.end() + } + } +} + +impl UnknownFields { + /// Returns true if no unknown fields were captured. + #[must_use] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns the keys of the unknown fields. + pub fn keys(&self) -> impl Iterator { + self.0.keys() + } +} + +impl PartialEq for UnknownFields { + fn eq(&self, _other: &Self) -> bool { + true + } +} + +impl Eq for UnknownFields {} + +impl Hash for UnknownFields { + fn hash(&self, _state: &mut H) {} +} + +/// Recursively walks a `serde_yaml::Value` tree and collects paths where +/// [`UNKNOWN_FIELDS_MARKER`] keys are found. +/// +/// This is the generic replacement for per-type manual walkers — any struct +/// that serializes `UnknownFields` via `#[serde(flatten)]` will automatically +/// be covered. +#[must_use] +pub fn collect_unknown_fields_from_value(value: &serde_yaml::Value, path: &str) -> Vec { + match value { + serde_yaml::Value::Mapping(map) => { + let mut result = Vec::new(); + for (key, val) in map { + let key_str = key.as_str().unwrap_or("?"); + let child_path = if path.is_empty() { + key_str.to_owned() + } else { + format!("{path}.{key_str}") + }; + if key_str == UNKNOWN_FIELDS_MARKER { + // Marker found — report the keys inside it. + if let serde_yaml::Value::Mapping(unknowns) = val { + for (uk, _) in unknowns { + let ukey = uk.as_str().unwrap_or("?"); + if path.is_empty() { + result.push(ukey.to_owned()); + } else { + result.push(format!("{path}.{ukey}")); + } + } + } + } else { + result.extend(collect_unknown_fields_from_value(val, &child_path)); + } + } + result + } + serde_yaml::Value::Sequence(seq) => { + let mut result = Vec::new(); + for (i, item) in seq.iter().enumerate() { + let child_path = format!("{path}[{i}]"); + result.extend(collect_unknown_fields_from_value(item, &child_path)); + } + result + } + _ => Vec::new(), + } +} diff --git a/crates/weaver_semconv/src/v2/event.rs b/crates/weaver_semconv/src/v2/event.rs index 4cc06269a..375d9af80 100644 --- a/crates/weaver_semconv/src/v2/event.rs +++ b/crates/weaver_semconv/src/v2/event.rs @@ -11,6 +11,7 @@ use crate::{ deprecated::Deprecated, group::{GroupSpec, GroupType}, stability::Stability, + unknown_fields::UnknownFields, v2::{ attribute::{split_attributes_and_groups, AttributeOrGroupRef}, signal_id::SignalId, @@ -40,7 +41,6 @@ pub struct Event { /// A refinement of an existing event. #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] -#[serde(deny_unknown_fields)] pub struct EventRefinement { /// The ID of the refinement. pub id: SignalId, @@ -71,6 +71,10 @@ pub struct EventRefinement { #[serde(default)] #[serde(skip_serializing_if = "BTreeMap::is_empty")] pub annotations: BTreeMap, + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } impl Event { diff --git a/crates/weaver_semconv/src/v2/metric.rs b/crates/weaver_semconv/src/v2/metric.rs index 0cf45c8f7..b5b923627 100644 --- a/crates/weaver_semconv/src/v2/metric.rs +++ b/crates/weaver_semconv/src/v2/metric.rs @@ -11,6 +11,7 @@ use crate::{ deprecated::Deprecated, group::{GroupSpec, InstrumentSpec}, stability::Stability, + unknown_fields::UnknownFields, v2::{ attribute::{split_attributes_and_groups, AttributeOrGroupRef}, signal_id::SignalId, @@ -50,7 +51,6 @@ pub struct Metric { /// A refinement of an existing metric. #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] -#[serde(deny_unknown_fields)] pub struct MetricRefinement { /// The ID of the refinement. pub id: SignalId, @@ -81,6 +81,10 @@ pub struct MetricRefinement { #[serde(default)] #[serde(skip_serializing_if = "BTreeMap::is_empty")] pub annotations: BTreeMap, + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } impl Metric { diff --git a/crates/weaver_semconv/src/v2/span.rs b/crates/weaver_semconv/src/v2/span.rs index 5248ad513..5efe1560b 100644 --- a/crates/weaver_semconv/src/v2/span.rs +++ b/crates/weaver_semconv/src/v2/span.rs @@ -12,6 +12,7 @@ use crate::{ deprecated::Deprecated, group::{GroupSpec, GroupType, SpanKindSpec}, stability::Stability, + unknown_fields::UnknownFields, v2::{attribute::AttributeRef, signal_id::SignalId, CommonFields}, YamlValue, }; @@ -84,7 +85,6 @@ pub struct Span { /// A refinement of an existing span. #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] -#[serde(deny_unknown_fields)] pub struct SpanRefinement { /// The ID of the refinement. pub id: SignalId, @@ -121,6 +121,10 @@ pub struct SpanRefinement { #[serde(default)] #[serde(skip_serializing_if = "BTreeMap::is_empty")] pub annotations: BTreeMap, + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + pub unknown_fields: UnknownFields, } impl Span { @@ -198,11 +202,15 @@ impl SpanRefinement { /// Specification of the span name. #[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, PartialEq)] #[cfg_attr(feature = "openapi", derive(utoipa::ToSchema))] -#[serde(deny_unknown_fields)] #[serde(rename_all = "snake_case")] pub struct SpanName { /// Required description of how a span name should be created. pub note: String, + /// Unknown fields captured for forward-compatibility. + #[serde(flatten, skip_serializing_if = "UnknownFields::is_empty")] + #[schemars(skip)] + #[cfg_attr(feature = "openapi", schema(ignore))] + pub unknown_fields: UnknownFields, } /// A refinement of an Attribute for a span. diff --git a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml index 2d958f56a..7b8659252 100644 --- a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml +++ b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml @@ -1,6 +1,5 @@ file_format: manifest/2.0 description: Test repository that has been resolved. schema_url: http://resolved/3.0.0 -repository_url: https://github.com/open-telemetry/weaver.git stability: stable resolved_schema_uri: resolved_schema.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 index 4a4ab2ef1..c36be32f1 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 @@ -1,6 +1,5 @@ file_format: manifest/2.0 description: Test repository that has been resolved. schema_url: http://resolved/1.0.0 -repository_url: https://github.com/open-telemetry/weaver.git stability: stable resolved_schema_uri: resolved_1.0.0.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 index 7ce4806d3..76809f38f 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 @@ -1,6 +1,5 @@ file_format: manifest/2.0 description: Test repository that has been resolved. schema_url: http://resolved/2.0.0 -repository_url: https://github.com/open-telemetry/weaver.git stability: stable resolved_schema_uri: https://github.com/open-telemetry/weaver.git\creates/weaver_semconv/tests/published_respository/resolved/resolved_2.0.0 diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index 8e9930704..42d02f20e 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -469,6 +469,7 @@ mod tests { examples: None, common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], dependencies: std::collections::BTreeSet::new(), registry: Registry { @@ -478,12 +479,14 @@ mod tests { attributes: vec![AttributeRef(0)], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], spans: vec![Span { r#type: "trace.test".to_owned().into(), kind: weaver_semconv::group::SpanKindSpec::Client, name: SpanName { note: "note".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![SpanAttributeRef { base: AttributeRef(0), @@ -491,10 +494,12 @@ mod tests { weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), sampling_relevant: None, + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], metrics: vec![Metric { name: "test.metric".to_owned().into(), @@ -505,10 +510,12 @@ mod tests { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], events: vec![Event { name: "test.event".to_owned().into(), @@ -517,10 +524,12 @@ mod tests { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], entities: vec![Entity { r#type: "test.entity".to_owned().into(), @@ -529,11 +538,14 @@ mod tests { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), + unknown_fields: Default::default(), }], description: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }], + unknown_fields: Default::default(), }, refinements: Refinements { spans: vec![SpanRefinement { @@ -543,6 +555,7 @@ mod tests { kind: weaver_semconv::group::SpanKindSpec::Client, name: SpanName { note: "note".to_owned(), + unknown_fields: Default::default(), }, attributes: vec![SpanAttributeRef { base: AttributeRef(0), @@ -550,10 +563,12 @@ mod tests { weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), sampling_relevant: None, + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }, }], metrics: vec![MetricRefinement { @@ -567,10 +582,12 @@ mod tests { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }, }], events: vec![EventRefinement { @@ -582,13 +599,17 @@ mod tests { requirement_level: weaver_semconv::attribute::RequirementLevel::Basic( weaver_semconv::attribute::BasicRequirementLevelSpec::Required, ), + unknown_fields: Default::default(), }], entity_associations: vec![], common: CommonFields::default(), provenance: Default::default(), + unknown_fields: Default::default(), }, }], + unknown_fields: Default::default(), }, + unknown_fields: Default::default(), } } }