diff --git a/CHANGELOG.md b/CHANGELOG.md index 624a49b978b..f7e688f5d3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Correctly handle minidump objecstore upload failures. ([#6033](https://github.com/getsentry/relay/pull/6033)) - Add `client.address` attribute to known IP fields. ([#6058](https://github.com/getsentry/relay/pull/6058)) - Fix a bug in mobile attribute normalization. ([#6065](https://github.com/getsentry/relay/pull/6065)) +- Stop using cross-org DSC data in v2 span normalization. ([#6073](https://github.com/getsentry/relay/pull/6073)) **Internal**: diff --git a/relay-event-normalization/src/eap/mod.rs b/relay-event-normalization/src/eap/mod.rs index 6aaeed6221d..ccdd0713597 100644 --- a/relay-event-normalization/src/eap/mod.rs +++ b/relay-event-normalization/src/eap/mod.rs @@ -11,6 +11,7 @@ use relay_conventions::attributes::*; use relay_conventions::{AttributeInfo, ReplacementName, WriteBehavior}; use relay_event_schema::protocol::{Attribute, AttributeType, Attributes, BrowserContext, Geo}; use relay_protocol::{Annotated, Error, ErrorKind, Meta, Remark, RemarkType, Value}; +use relay_sampling::DynamicSamplingContext; use relay_spans::derive_op_for_v2_span; use crate::span::TABLE_NAME_REGEX; @@ -19,7 +20,7 @@ use crate::span::tag_extraction::{ domain_from_scrubbed_http, domain_from_server_address, span_op_to_category, sql_action_from_query, sql_tables_from_query, }; -use crate::{ClientHints, EnrichedDsc, FromUserAgentInfo as _, RawUserAgentInfo}; +use crate::{ClientHints, FromUserAgentInfo as _, RawUserAgentInfo}; mod ai; mod mobile; @@ -356,7 +357,7 @@ pub fn normalize_user_geo( pub fn normalize_dsc( attributes: &mut Annotated, is_segment: &Annotated, - dsc: Option, + dsc: Option<&DynamicSamplingContext>, ) { let Some(dsc) = dsc else { return; @@ -368,26 +369,28 @@ pub fn normalize_dsc( if attributes.contains_key(SENTRY__DSC__TRACE_ID) { return; } - attributes.insert(SENTRY__DSC__TRACE_ID, dsc.dsc.trace_id.to_string()); + attributes.insert(SENTRY__DSC__TRACE_ID, dsc.trace_id.to_string()); - if let Some(transaction) = &dsc.dsc.transaction { + if let Some(transaction) = &dsc.transaction { attributes.insert(SENTRY__DSC__TRANSACTION, transaction.clone()); } - attributes.insert(SENTRY__DSC__PROJECT_ID, dsc.sampling_project_id.to_string()); + if let Some(project_id) = &dsc.project_id { + attributes.insert(SENTRY__DSC__PROJECT_ID, project_id.to_string()); + } if is_segment.value().is_some_and(|is_segment| *is_segment) { - attributes.insert(SENTRY__DSC__PUBLIC_KEY, dsc.dsc.public_key.to_string()); - if let Some(release) = &dsc.dsc.release { + attributes.insert(SENTRY__DSC__PUBLIC_KEY, dsc.public_key.to_string()); + if let Some(release) = &dsc.release { attributes.insert(SENTRY__DSC__RELEASE, release.clone()); } - if let Some(environment) = &dsc.dsc.environment { + if let Some(environment) = &dsc.environment { attributes.insert(SENTRY__DSC__ENVIRONMENT, environment.clone()); } - if let Some(sample_rate) = dsc.dsc.sample_rate { + if let Some(sample_rate) = dsc.sample_rate { attributes.insert(SENTRY__DSC__SAMPLE_RATE, sample_rate); } - if let Some(sampled) = dsc.dsc.sampled { + if let Some(sampled) = dsc.sampled { attributes.insert(SENTRY__DSC__SAMPLED, sampled); } } @@ -825,6 +828,7 @@ mod tests { DynamicSamplingContext { trace_id: "67e5504410b1426f9247bb680e5fe0c8".parse().unwrap(), public_key: "12345678901234567890123456789012".parse().unwrap(), + project_id: Some(ProjectId::new(42)), release: None, environment: None, transaction: transaction.map(str::to_owned), @@ -847,15 +851,7 @@ mod tests { fn test_normalize_dsc_child_span_no_transaction() { let mut attributes = Annotated::empty(); let dsc = &mock_dsc(None); - let sampling_project_id = ProjectId::new(42); - normalize_dsc( - &mut attributes, - &Annotated::new(false), - Some(EnrichedDsc { - dsc, - sampling_project_id, - }), - ); + normalize_dsc(&mut attributes, &Annotated::new(false), Some(dsc)); assert_annotated_snapshot!(attributes, @r#" { "sentry.dsc.project_id": { @@ -874,15 +870,7 @@ mod tests { fn test_normalize_dsc_child_span() { let mut attributes = Annotated::empty(); let dsc = &mock_dsc(Some("/some/endpoint")); - let sampling_project_id = ProjectId::new(42); - normalize_dsc( - &mut attributes, - &Annotated::new(false), - Some(EnrichedDsc { - dsc, - sampling_project_id, - }), - ); + normalize_dsc(&mut attributes, &Annotated::new(false), Some(dsc)); assert_annotated_snapshot!(attributes, @r#" { "sentry.dsc.project_id": { @@ -905,15 +893,7 @@ mod tests { fn test_normalize_dsc_segment() { let mut attributes = Annotated::empty(); let dsc = &mock_dsc(Some("/some/endpoint")); - let sampling_project_id = ProjectId::new(42); - normalize_dsc( - &mut attributes, - &Annotated::new(true), - Some(EnrichedDsc { - dsc, - sampling_project_id, - }), - ); + normalize_dsc(&mut attributes, &Annotated::new(true), Some(dsc)); assert_annotated_snapshot!(attributes, @r#" { "sentry.dsc.project_id": { diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index d8a71ec781b..631b7153d9d 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -33,6 +33,7 @@ use relay_protocol::{ Annotated, Empty, Error, ErrorKind, FiniteF64, FromValue, Getter, Meta, Object, Remark, RemarkType, TryFromFloatError, Value, }; +use relay_sampling::DynamicSamplingContext; use smallvec::SmallVec; use uuid::Uuid; @@ -41,8 +42,8 @@ use crate::span::ai::enrich_ai_event_data; use crate::span::tag_extraction::{extract_segment_name_from_event, extract_span_tags_from_event}; use crate::utils::{self, MAX_DURATION_MOBILE_MS, get_event_user_tag}; use crate::{ - BorrowedSpanOpDefaults, BreakdownsConfig, CombinedMeasurementsConfig, EnrichedDsc, GeoIpLookup, - MaxChars, ModelMetadata, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, + BorrowedSpanOpDefaults, BreakdownsConfig, CombinedMeasurementsConfig, GeoIpLookup, MaxChars, + ModelMetadata, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, TransactionNameConfig, breakdowns, event_error, legacy, mechanism, remove_other, schema, span, stacktrace, transactions, trimming, user_agent, }; @@ -182,8 +183,8 @@ pub struct NormalizationConfig<'a> { /// the SDK. pub force_trace_context: bool, - /// Dynamic sampling context and additional attributes used for dsc span normalization. - pub dsc: Option>, + /// Dynamic sampling context used for dsc span normalization. + pub dsc: Option<&'a DynamicSamplingContext>, } impl Default for NormalizationConfig<'_> { diff --git a/relay-event-normalization/src/normalize/mod.rs b/relay-event-normalization/src/normalize/mod.rs index 1855471a32a..04045640980 100644 --- a/relay-event-normalization/src/normalize/mod.rs +++ b/relay-event-normalization/src/normalize/mod.rs @@ -3,11 +3,9 @@ use std::{collections::HashMap, sync::LazyLock}; use regex::Regex; use relay_base_schema::metrics::MetricUnit; -use relay_base_schema::project::ProjectId; use relay_event_schema::protocol::VALID_PLATFORMS; use relay_pattern::Pattern; use relay_protocol::{FiniteF64, RuleCondition}; -use relay_sampling::DynamicSamplingContext; use serde::{Deserialize, Serialize}; pub mod breakdowns; @@ -366,15 +364,6 @@ pub struct ModelMetadataEntry { pub context_size: Option, } -/// [`DynamicSamplingContext`] plus additional attributes used for dsc span normalization. -#[derive(Debug, Clone, Copy)] -pub struct EnrichedDsc<'a> { - /// Dynamic sampling context containing the trace id and root transaction that started the trace. - pub dsc: &'a DynamicSamplingContext, - /// ID of the project where the trace originated. - pub sampling_project_id: ProjectId, -} - #[cfg(test)] mod tests { use chrono::{TimeZone, Utc}; diff --git a/relay-event-normalization/src/normalize/span/mod.rs b/relay-event-normalization/src/normalize/span/mod.rs index 1b2c289ce26..cd514c7f366 100644 --- a/relay-event-normalization/src/normalize/span/mod.rs +++ b/relay-event-normalization/src/normalize/span/mod.rs @@ -3,10 +3,9 @@ use regex::Regex; use relay_event_schema::protocol::{Event, SpanData, TraceContext}; use relay_protocol::Annotated; +use relay_sampling::DynamicSamplingContext; use std::sync::LazyLock; -use crate::EnrichedDsc; - pub mod ai; pub mod country_subregion; pub mod description; @@ -61,7 +60,7 @@ pub fn normalize_app_start_spans(event: &mut Event) { /// /// If `sentry.dsc.trace_id` is already present in a span's `data`, the function does nothing for /// that span. -pub fn normalize_dsc_for_event_spans(event: &mut Event, dsc: Option) { +pub fn normalize_dsc_for_event_spans(event: &mut Event, dsc: Option<&DynamicSamplingContext>) { if let Some(ctx) = event.context_mut::() { normalize_dsc_for_span_data(&mut ctx.data, dsc); } @@ -77,7 +76,10 @@ pub fn normalize_dsc_for_event_spans(event: &mut Event, dsc: Option /// Writes DSC attributes needed for dynamic sampling into `span_data`. /// /// If `sentry.dsc.trace_id` is already present in `span_data`, the function does nothing. -pub fn normalize_dsc_for_span_data(span_data: &mut Annotated, dsc: Option) { +pub fn normalize_dsc_for_span_data( + span_data: &mut Annotated, + dsc: Option<&DynamicSamplingContext>, +) { let Some(dsc) = dsc else { return; }; @@ -86,9 +88,11 @@ pub fn normalize_dsc_for_span_data(span_data: &mut Annotated, dsc: Opt if data.sentry_dsc_trace_id.value().is_some() { return; } - data.sentry_dsc_trace_id = Annotated::new(dsc.dsc.trace_id.to_string()); - data.sentry_dsc_project_id = Annotated::new(dsc.sampling_project_id.to_string()); - if let Some(transaction) = &dsc.dsc.transaction { + data.sentry_dsc_trace_id = Annotated::new(dsc.trace_id.to_string()); + if let Some(project_id) = &dsc.project_id { + data.sentry_dsc_project_id = Annotated::new(project_id.to_string()); + } + if let Some(transaction) = &dsc.transaction { data.sentry_dsc_transaction = Annotated::new(transaction.to_string()); } } diff --git a/relay-sampling/src/dsc.rs b/relay-sampling/src/dsc.rs index 82d20af7c44..2f6b6b290d6 100644 --- a/relay-sampling/src/dsc.rs +++ b/relay-sampling/src/dsc.rs @@ -10,7 +10,7 @@ use std::collections::BTreeMap; use std::fmt; -use relay_base_schema::project::ProjectKey; +use relay_base_schema::project::{ProjectId, ProjectKey}; use relay_event_schema::protocol::TraceId; use relay_protocol::{Getter, Val}; use serde::{Deserialize, Serialize}; @@ -28,6 +28,13 @@ pub struct DynamicSamplingContext { pub trace_id: TraceId, /// The project key. pub public_key: ProjectKey, + /// The sampling project id. + /// + /// Not part of the DSC protocol, but accessed and used together with the rest of the DSC. + /// Placed here for ergonomy and to avoid having to mutate the request context when the sampling + /// org is different from the sending org. + #[serde(skip)] + pub project_id: Option, /// The release. #[serde(default)] pub release: Option, @@ -562,6 +569,7 @@ mod tests { let dsc = DynamicSamplingContext { trace_id: "67e5504410b1426f9247bb680e5fe0c8".parse().unwrap(), public_key: ProjectKey::parse("abd0f232775f45feab79864e580d160b").unwrap(), + project_id: Some(ProjectId::new(42)), release: Some("1.1.1".into()), user: TraceUserContext { user_segment: "user-seg".into(), @@ -600,6 +608,7 @@ mod tests { let dsc = DynamicSamplingContext { trace_id: "67e5504410b1426f9247bb680e5fe0c8".parse().unwrap(), public_key: ProjectKey::parse("abd0f232775f45feab79864e580d160b").unwrap(), + project_id: Some(ProjectId::new(42)), release: None, user: TraceUserContext::default(), environment: None, @@ -619,6 +628,7 @@ mod tests { let dsc = DynamicSamplingContext { trace_id: "67e5504410b1426f9247bb680e5fe0c8".parse().unwrap(), public_key: ProjectKey::parse("abd0f232775f45feab79864e580d160b").unwrap(), + project_id: Some(ProjectId::new(42)), release: None, user: TraceUserContext::default(), environment: None, diff --git a/relay-sampling/src/evaluation.rs b/relay-sampling/src/evaluation.rs index 306a949d5f4..b77fb00631a 100644 --- a/relay-sampling/src/evaluation.rs +++ b/relay-sampling/src/evaluation.rs @@ -258,6 +258,7 @@ impl fmt::Display for MatchedRuleIds { #[cfg(test)] mod tests { use chrono::TimeZone; + use relay_base_schema::project::ProjectId; use relay_protocol::RuleCondition; use similar_asserts::assert_eq; use std::str::FromStr; @@ -299,6 +300,7 @@ mod tests { let mut dsc = DynamicSamplingContext { trace_id: "67e5504410b1426f9247bb680e5fe0c8".parse().unwrap(), public_key: "12345678123456781234567812345678".parse().unwrap(), + project_id: Some(ProjectId::new(42)), release: None, environment: None, transaction: None, diff --git a/relay-server/src/envelope/mod.rs b/relay-server/src/envelope/mod.rs index 7b8ab9fb158..7fd647106d4 100644 --- a/relay-server/src/envelope/mod.rs +++ b/relay-server/src/envelope/mod.rs @@ -189,6 +189,18 @@ impl EnvelopeHeaders { } } + /// Returns a mutable reference to the dynamic sampling context from the headers, if present. + pub fn dsc_mut(&mut self) -> Option<&mut DynamicSamplingContext> { + match &mut self.trace { + None => None, + Some(ErrorBoundary::Err(e)) => { + relay_log::debug!(error = e.as_ref(), "failed to parse sampling context"); + None + } + Some(ErrorBoundary::Ok(t)) => Some(t), + } + } + /// Overrides the dynamic sampling context in envelope headers. pub fn set_dsc(&mut self, dsc: DynamicSamplingContext) { self.trace = Some(ErrorBoundary::Ok(dsc)); @@ -1243,6 +1255,7 @@ mod tests { let dsc = DynamicSamplingContext { trace_id: "67e5504410b1426f9247bb680e5fe0c8".parse().unwrap(), public_key: ProjectKey::parse("abd0f232775f45feab79864e580d160b").unwrap(), + project_id: None, release: Some("1.1.1".to_owned()), user: Default::default(), replay_id: None, diff --git a/relay-server/src/processing/legacy_spans/normalize.rs b/relay-server/src/processing/legacy_spans/normalize.rs index d76d3e59b3c..00aba6c5d4b 100644 --- a/relay-server/src/processing/legacy_spans/normalize.rs +++ b/relay-server/src/processing/legacy_spans/normalize.rs @@ -2,7 +2,6 @@ use crate::services::processor::ProcessingError; use chrono::{DateTime, Utc}; -use relay_event_normalization::EnrichedDsc; use relay_event_normalization::span::{self, ai}; use relay_event_normalization::{ BorrowedSpanOpDefaults, ClientHints, CombinedMeasurementsConfig, FromUserAgentInfo, @@ -15,6 +14,7 @@ use relay_event_schema::processor::{ProcessingState, process_value}; use relay_event_schema::protocol::{BrowserContext, EventId, IpAddr, Span, SpanData}; use relay_metrics::UnixTimestamp; use relay_protocol::{Annotated, Empty, Value}; +use relay_sampling::DynamicSamplingContext; /// Config needed to normalize a standalone span. #[derive(Clone, Debug)] @@ -57,7 +57,7 @@ pub struct NormalizeSpanConfig<'a> { pub geo_lookup: &'a GeoIpLookup, pub span_op_defaults: BorrowedSpanOpDefaults<'a>, /// Dynamic sampling context plus additional attributes used for dsc span normalization. - pub dsc: Option>, + pub dsc: Option<&'a DynamicSamplingContext>, } fn set_segment_attributes(span: &mut Annotated) { diff --git a/relay-server/src/processing/legacy_spans/process.rs b/relay-server/src/processing/legacy_spans/process.rs index 12e6a0596b3..6dfbc0e26dd 100644 --- a/relay-server/src/processing/legacy_spans/process.rs +++ b/relay-server/src/processing/legacy_spans/process.rs @@ -1,6 +1,4 @@ -use relay_event_normalization::{ - CombinedMeasurementsConfig, EnrichedDsc, GeoIpLookup, MeasurementsConfig, -}; +use relay_event_normalization::{CombinedMeasurementsConfig, GeoIpLookup, MeasurementsConfig}; use relay_event_schema::processor::{ProcessingAction, ProcessingState, process_value}; use relay_event_schema::protocol::Span; use relay_metrics::MetricNamespace; @@ -46,18 +44,14 @@ pub fn normalize( ) { let aggregator_config = ctx.config.aggregator_config_for(MetricNamespace::Spans); let model_data = ctx.global_config.ai_model_metadata(); - let sampling_project_id = ctx + let mut dsc = spans.headers.dsc().cloned(); + let project_id = ctx .sampling_project_info - .and_then(|p| p.project_id) - .or(ctx.project_info.project_id); - let dsc = spans.headers.dsc().cloned(); - let dsc = dsc - .as_ref() - .zip(sampling_project_id) - .map(|(dsc, sampling_project_id)| EnrichedDsc { - dsc, - sampling_project_id, - }); + .unwrap_or(ctx.project_info) + .project_id; + if let Some(dsc) = &mut dsc { + dsc.project_id = project_id; + } let norm = normalize::NormalizeSpanConfig { received_at: spans.received_at(), timestamp_range: aggregator_config.timestamp_range(), @@ -79,7 +73,7 @@ pub fn normalize( client_ip: spans.headers.meta().client_addr().map(Into::into), geo_lookup, span_op_defaults: ctx.global_config.span_op_defaults.borrow(), - dsc, + dsc: dsc.as_ref(), }; spans.retain( diff --git a/relay-server/src/processing/spans/dynamic_sampling.rs b/relay-server/src/processing/spans/dynamic_sampling.rs index 488380fa2eb..fae0d34f7ca 100644 --- a/relay-server/src/processing/spans/dynamic_sampling.rs +++ b/relay-server/src/processing/spans/dynamic_sampling.rs @@ -8,16 +8,15 @@ use relay_metrics::{Bucket, BucketMetadata, BucketValue, UnixTimestamp}; use relay_protocol::{FiniteF64, get_value}; use relay_quotas::{DataCategory, Scoping}; use relay_sampling::config::RuleType; +use relay_sampling::dsc::TraceUserContext; use relay_sampling::evaluation::{SamplingDecision, SamplingEvaluator}; use relay_sampling::{DynamicSamplingContext, SamplingConfig}; use crate::envelope::ClientName; -use crate::managed::Managed; +use crate::managed::{Managed, Rejected}; use crate::metrics_extraction::ExtractedMetrics; use crate::processing::Context; -use crate::processing::spans::{ - Error, ExpandedSpan, ExpandedSpans, Indexed, Result, SerializedSpans, -}; +use crate::processing::spans::{Error, ExpandedSpan, ExpandedSpans, Indexed, Result}; use crate::services::outcome::Outcome; use crate::services::projects::project::ProjectInfo; use crate::statsd::RelayCounters; @@ -52,54 +51,62 @@ pub fn validate_configs(ctx: Context<'_>) { } } -/// Validates the presence of a dynamic sampling context when processing Spans. +/// Validates and, if necessary, resynthesizes the DSC for a batch of V2 spans. /// -/// Each envelope received by Relay must contain a valid dynamic sampling context. -/// This is not a technical requirement as a missing dynamic sampling context is treated as having -/// a sample rate of 100%, but to ensure SDKs implement the protocol correctly it is validated. +/// A DSC must be present. This is not a technical requirement as a missing DSC simply leads to a +/// 100 % sample rate. Instead, the intention of the enforcement is to ensure that SDKs implement +/// the protocol correctly. /// /// An exception exists for our OTeL integration, which currently never sets a dynamic sampling /// context. In the future we may want to extract a DSC from the OTeL payload to allow dynamic -/// sampling if the necessary attributes are present. -pub fn validate_dsc_presence(spans: &SerializedSpans) -> Result<()> { - let dsc = spans.headers.dsc(); - - // Envelopes created by Relay may not carry a DSC. This is currently the case for envelopes - // created through the OTeL integration. - // - // This validation is only best effort (not a hard requirement) to get SDKs to implement DSC - // correctly, so it is okay to be a bit more lenient here and trust ourselves. - let client_is_relay = spans.headers.meta().client_name() == ClientName::Relay; - - if !client_is_relay && dsc.is_none() { - return Err(Error::MissingDynamicSamplingContext); - } - - Ok(()) -} - -/// Validates the dynamic sampling context against the parsed spans. +/// sampling if the necessary attributes are present. OTeL spans are recognized by the client name +/// being `Relay`. /// -/// The values of the dynamic sampling context must match the values provided in the spans. -/// Currently this only validates the trace id. -pub fn validate_dsc(spans: &ExpandedSpans) -> Result<()> { - let Some(dsc) = spans.headers.dsc() else { - // It's okay if we don't have a DSC here. We may be in a processing path which has spans - // from mixed traces without a DSC (=100% sample rate). - // For example via the OTeL integration. - return Ok(()); - }; - - for span in &spans.spans { - let span = &span.span; - let trace_id = get_value!(span.trace_id); - - if trace_id != Some(&dsc.trace_id) { - return Err(Error::DynamicSamplingContextMismatch); +/// An unresolved trace root (sampling) project causes the DSC to be resynthesized and the trace attributed to +/// the spans' own project. This happens when the sampling project is disabled or belongs to a +/// different org than the spans. +pub fn validate_and_set_dsc( + spans: &mut Managed, + ctx: &Context<'_>, +) -> Result<(), Rejected> { + let public_key = spans.scoping().project_key; + spans.try_modify(|spans, _| { + let client_is_relay = spans.headers.meta().client_name() == ClientName::Relay; + let dsc = match spans.headers.dsc_mut() { + None if client_is_relay => return Ok(()), + None => return Err(Error::MissingDynamicSamplingContext), + Some(dsc) => dsc, + }; + + for span in &spans.spans { + let span = &span.span; + if get_value!(span.trace_id) != Some(&dsc.trace_id) { + return Err(Error::DynamicSamplingContextMismatch); + } } - } - Ok(()) + match ctx.sampling_project_info { + None => { + *dsc = DynamicSamplingContext { + trace_id: dsc.trace_id, + public_key, + project_id: ctx.project_info.project_id, + release: None, + environment: None, + transaction: None, + sample_rate: dsc.sample_rate, + sampled: None, + user: TraceUserContext::default(), + replay_id: None, + other: Default::default(), + }; + } + Some(sampling_project_info) => { + dsc.project_id = sampling_project_info.project_id; + } + }; + Ok(()) + }) } /// Computes the sampling decision for a batch of spans. diff --git a/relay-server/src/processing/spans/mod.rs b/relay-server/src/processing/spans/mod.rs index c249b16d637..2191e14700e 100644 --- a/relay-server/src/processing/spans/mod.rs +++ b/relay-server/src/processing/spans/mod.rs @@ -183,17 +183,16 @@ impl processing::Processor for SpansProcessor { validate::invalid(&spans).reject(&spans)?; dynamic_sampling::validate_configs(ctx); - dynamic_sampling::validate_dsc_presence(&spans).reject(&spans)?; - let spans = process::expand(spans)?; + let mut spans = process::expand(spans)?; + + dynamic_sampling::validate_and_set_dsc(&mut spans, &ctx)?; let mut spans = match dynamic_sampling::run(spans, ctx) { Ok(spans) => spans, Err(metrics) => return Ok(Output::metrics(metrics)), }; - dynamic_sampling::validate_dsc(&spans).reject(&spans)?; - process::normalize(&mut spans, &self.geo_lookup, ctx); filter::filter(&mut spans, ctx); diff --git a/relay-server/src/processing/spans/process.rs b/relay-server/src/processing/spans/process.rs index 9c2a5f9060b..d4d4f5a8076 100644 --- a/relay-server/src/processing/spans/process.rs +++ b/relay-server/src/processing/spans/process.rs @@ -2,7 +2,7 @@ use std::collections::BTreeMap; use std::time::Duration; use relay_event_normalization::eap::ClientUserAgentInfo; -use relay_event_normalization::{EnrichedDsc, GeoIpLookup, RequiredMode, SchemaProcessor, eap}; +use relay_event_normalization::{GeoIpLookup, RequiredMode, SchemaProcessor, eap}; use relay_event_schema::processor::{ProcessingState, ValueType, process_value}; use relay_event_schema::protocol::{Span, SpanId, SpanV2}; use relay_protocol::Annotated; @@ -207,17 +207,6 @@ fn normalize_span( ); if let Some(span) = span.value_mut() { - let sampling_project_id = ctx - .sampling_project_info - .and_then(|p| p.project_id) - .or(ctx.project_info.project_id); - let dsc = headers - .dsc() - .zip(sampling_project_id) - .map(|(dsc, sampling_project_id)| EnrichedDsc { - dsc, - sampling_project_id, - }); let duration = span_duration(span); let allowed_hosts = ctx.global_config.options.http_span_allowed_hosts.as_slice(); let model_metdata = ctx.global_config.ai_model_metadata(); @@ -243,7 +232,7 @@ fn normalize_span( } eap::normalize_user_agent(&mut span.attributes, client_ua_info); eap::normalize_user_geo(&mut span.attributes, |ip| geo_lookup.lookup(ip)); - eap::normalize_dsc(&mut span.attributes, &span.is_segment, dsc); + eap::normalize_dsc(&mut span.attributes, &span.is_segment, headers.dsc()); if ctx.is_processing() { eap::normalize_ai(&mut span.attributes, duration, model_metdata); } diff --git a/relay-server/src/processing/utils/dsc.rs b/relay-server/src/processing/utils/dsc.rs index 9d3850c4e73..45193ef74cd 100644 --- a/relay-server/src/processing/utils/dsc.rs +++ b/relay-server/src/processing/utils/dsc.rs @@ -1,4 +1,5 @@ -use relay_base_schema::{events::EventType, project::ProjectKey}; +use relay_base_schema::events::EventType; +use relay_base_schema::project::{ProjectId, ProjectKey}; use relay_event_schema::protocol::{Event, TraceContext}; use relay_protocol::Annotated; use relay_sampling::{DynamicSamplingContext, dsc::TraceUserContext}; @@ -27,16 +28,19 @@ use crate::processing::Context; /// /// If there is no transaction event in the envelope, this function will do nothing. /// -/// The function will return the sampling project information of the root project for the event. If -/// no sampling project information is specified, the project information of the event’s project -/// will be returned. +/// The function will use the sampling project information of the trace root project. If +/// the sampling project information is missing - due to the project being disabled or belonging to +/// a separate org - the event’s own project information will be used instead. pub fn validate_and_set_dsc( headers: &mut EnvelopeHeaders, event: &Annotated, ctx: &mut Context, ) { - let original_dsc = headers.dsc(); - if original_dsc.is_some() && ctx.sampling_project_info.is_some() { + let mut original_dsc = headers.dsc_mut(); + if let Some(dsc) = original_dsc.as_mut() + && let Some(sp) = ctx.sampling_project_info + { + dsc.project_id = sp.project_id; return; } @@ -44,12 +48,12 @@ pub fn validate_and_set_dsc( // below already checks for the event type. if let Some(event) = event.value() && let Some(key_config) = ctx.project_info.get_public_key_config() - && let Some(mut dsc) = dsc_from_event(key_config.public_key, event) + && let Some(mut dsc) = + dsc_from_event(key_config.public_key, ctx.project_info.project_id, event) { // All other information in the DSC must be discarded, but the sample rate was // actually applied by the client and is therefore correct. - let original_sample_rate = original_dsc.and_then(|dsc| dsc.sample_rate); - dsc.sample_rate = dsc.sample_rate.or(original_sample_rate); + dsc.sample_rate = original_dsc.as_ref().and_then(|dsc| dsc.sample_rate); headers.set_dsc(dsc); @@ -69,7 +73,11 @@ pub fn validate_and_set_dsc( /// /// Since sampling information is not available in the event payload, the `sample_rate` field /// cannot be set when computing the dynamic sampling context from a transaction event. -pub fn dsc_from_event(public_key: ProjectKey, event: &Event) -> Option { +pub fn dsc_from_event( + public_key: ProjectKey, + project_id: Option, + event: &Event, +) -> Option { if event.ty.value() != Some(&EventType::Transaction) { return None; } @@ -81,6 +89,7 @@ pub fn dsc_from_event(public_key: ProjectKey, event: &Event) -> Option) -> DynamicSamplingConte DynamicSamplingContext { trace_id: "67e5504410b1426f9247bb680e5fe0c8".parse().unwrap(), public_key: "12345678901234567890123456789012".parse().unwrap(), + project_id: Some(ProjectId::new(42)), release: None, environment: None, transaction: None, diff --git a/relay-server/src/utils/dynamic_sampling.rs b/relay-server/src/utils/dynamic_sampling.rs index 334c74a01b0..aaf700c441b 100644 --- a/relay-server/src/utils/dynamic_sampling.rs +++ b/relay-server/src/utils/dynamic_sampling.rs @@ -71,6 +71,7 @@ impl From> for SamplingResult { #[cfg(test)] mod tests { use relay_base_schema::events::EventType; + use relay_base_schema::project::ProjectId; use relay_event_schema::protocol::Event; use relay_event_schema::protocol::{EventId, LenientString}; use relay_protocol::Annotated; @@ -104,6 +105,7 @@ mod tests { DynamicSamplingContext { trace_id: "67e5504410b1426f9247bb680e5fe0c8".parse().unwrap(), public_key: "12345678901234567890123456789012".parse().unwrap(), + project_id: Some(ProjectId::new(42)), release: release.map(|value| value.to_owned()), environment: environment.map(|value| value.to_owned()), transaction: transaction.map(|value| value.to_owned()), diff --git a/tests/integration/test_dynamic_sampling.py b/tests/integration/test_dynamic_sampling.py index de258244835..ab4912e0646 100644 --- a/tests/integration/test_dynamic_sampling.py +++ b/tests/integration/test_dynamic_sampling.py @@ -952,6 +952,9 @@ def get_v2_envelope( "is_segment": True, "name": "root", "status": "ok", + "attributes": { + "sentry.segment.name": {"type": "string", "value": "/segment/"}, + }, }, # Child span. { @@ -963,6 +966,9 @@ def get_v2_envelope( "is_segment": False, "name": "child1", "status": "ok", + "attributes": { + "sentry.segment.name": {"type": "string", "value": "/segment/"}, + }, }, # Child span which already has `sentry.dsc.*` attributes set. { @@ -978,6 +984,7 @@ def get_v2_envelope( "sentry.dsc.trace_id": {"type": "string", "value": trace_id}, "sentry.dsc.transaction": {"type": "string", "value": "/spandata/"}, "sentry.dsc.project_id": {"type": "string", "value": "41"}, + "sentry.segment.name": {"type": "string", "value": "/segment/"}, }, }, ] @@ -1052,7 +1059,7 @@ def test_dsc_normalization( # ---------------------------------------------------------------------- # DSC with tx + different org ("dsc_with_tx", "diff_org", "tx"): ("/event/", project_id, org_id), - ("dsc_with_tx", "diff_org", "v2"): ("/dsc/", project_id, org_id), + ("dsc_with_tx", "diff_org", "v2"): (None, project_id, org_id), # ---------------------------------------------------------------------- # DSC without tx + different org ("dsc_no_tx", "diff_org", "tx"): ("/event/", project_id, org_id),