diff --git a/devdocs/config/version-2.0/.verify/default-config-current.yaml b/devdocs/config/version-2.0/.verify/default-config-current.yaml new file mode 100644 index 0000000000..3d4fda54fa --- /dev/null +++ b/devdocs/config/version-2.0/.verify/default-config-current.yaml @@ -0,0 +1,371 @@ +ebpf: + bpf_debug: false + wakeup_len: 500 + batch_length: 100 + batch_timeout: 1s + track_request_headers: false + http_request_timeout: 0s + context_propagation: disabled + override_bpfloop_enabled: false + traffic_control_backend: auto + disable_black_box_cp: false + high_request_volume: false + heuristic_sql_detect: false + instrument_cuda: auto + protocol_debug_print: false + redis_db_cache: + enabled: false + max_size: 1000 + buffer_sizes: + http: 0 + mysql: 0 + kafka: 0 + postgres: 0 + mysql_prepared_statements_cache_size: 1024 + postgres_prepared_statements_cache_size: 1024 + kafka_topic_uuid_cache_size: 1024 + mongo_requests_cache_size: 1024 + payload_extraction: + http: + graphql: + enabled: false + elasticsearch: + enabled: false + aws: + enabled: false + sqlpp: + enabled: false + endpoint_patterns: + - /query/service + max_transaction_time: 5m0s + dns_request_timeout: 5s + log_enricher: + services: [] + cache_ttl: 30m0s + cache_size: 128 + async_writer_workers: 8 + async_writer_channel_len: 500 + couchbase_db_cache_size: 1024 + bpf_fs_path: /sys/fs/bpf/ +network: + enable: false + source: socket_filter + agent_ip: "" + agent_ip_iface: external + agent_ip_type: any + interfaces: [] + exclude_interfaces: + - lo + protocols: [] + exclude_protocols: [] + cache_max_flows: 5000 + cache_active_timeout: 5s + deduper: first_come + deduper_fc_ttl: 0s + direction: both + sampling: 0 + listen_interfaces: watch + listen_poll_period: 10s + geo_ip: + ipinfo: + path: "" + maxmind: + country_path: "" + asn_path: "" + cache_len: 512 + cache_expiry: 1h0m0s + reverse_dns: + type: none + cache_len: 256 + cache_expiry: 1h0m0s + print_flows: false + cidrs: [] +filter: + application: {} + network: {} +attributes: + kubernetes: + enable: autodetect + cluster_name: "" + kubeconfig_path: "" + informers_sync_timeout: 30s + informers_resync_period: 30m0s + drop_external: false + disable_informers: [] + meta_cache_address: "" + meta_restrict_local_node: false + meta_source_labels: + service_name: "" + service_namespace: "" + resource_labels: + service.name: + - app.kubernetes.io/name + service.namespace: + - app.kubernetes.io/part-of + service.version: + - app.kubernetes.io/version + service_name_template: "" + instance_id: + dns: true + override_hostname: "" + select: {} + host_id: + override: "" + extra_group_attributes: {} + rename_unresolved_hosts: unresolved + rename_unresolved_hosts_outgoing: outgoing + rename_unresolved_hosts_incoming: incoming + metric_span_names_limit: 100 +routes: + unmatched: heuristic + patterns: [] + ignored_patterns: [] + ignore_mode: "" + wildcard_char: '*' + max_path_segment_cardinality: 10 +name_resolver: + sources: + - k8s + cache_len: 1024 + cache_expiry: 5m0s +otel_metrics_export: + allow_service_graph_self_references: false + buckets: + duration_histogram: + - 0 + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.075 + - 0.1 + - 0.25 + - 0.5 + - 0.75 + - 1 + - 2.5 + - 5 + - 7.5 + - 10 + request_size_histogram: + - 0 + - 32 + - 64 + - 128 + - 256 + - 512 + - 1024 + - 2048 + - 4096 + - 8192 + response_size_histogram: + - 0 + - 32 + - 64 + - 128 + - 256 + - 512 + - 1024 + - 2048 + - 4096 + - 8192 + endpoint: '***' + extra_span_resource_attributes: [] + features: 0 + histogram_aggregation: explicit_bucket_histogram + insecure_skip_verify: false + instrumentations: + - '*' + interval: 0s + otel_sdk_log_level: "" + protocol: "" + reporters_cache_len: 256 + ttl: 5m0s +otel_traces_export: + backoff_initial_interval: 0s + backoff_max_elapsed_time: 0s + backoff_max_interval: 0s + batch_timeout: 15s + endpoint: '***' + insecure_skip_verify: false + instrumentations: + - http + - grpc + - sql + - redis + - kafka + - mqtt + - mongo + - couchbase + max_queue_size: 4096 + otel_sdk_log_level: "" + protocol: "" + reporters_cache_len: 256 + sampler: + name: "" + arg: "" +prometheus_export: + port: 0 + path: /metrics + disable_build_info: false + features: 0 + instrumentations: + - '*' + buckets: + duration_histogram: + - 0 + - 0.005 + - 0.01 + - 0.025 + - 0.05 + - 0.075 + - 0.1 + - 0.25 + - 0.5 + - 0.75 + - 1 + - 2.5 + - 5 + - 7.5 + - 10 + request_size_histogram: + - 0 + - 32 + - 64 + - 128 + - 256 + - 512 + - 1024 + - 2048 + - 4096 + - 8192 + response_size_histogram: + - 0 + - 32 + - 64 + - 128 + - 256 + - 512 + - 1024 + - 2048 + - 4096 + - 8192 + ttl: 5m0s + service_cache_size: 10000 + allow_service_graph_self_references: false + extra_resource_attributes: [] + extra_span_resource_attributes: [] +trace_printer: disabled +executable_path: "" +autotargetexe: "" +open_port: "" +autotargetlanguage: "" +target_pids: "" +service_name: "" +service_namespace: "" +metrics: + features: 8 +discovery: + services: [] + exclude_services: [] + default_exclude_services: + - name: "" + namespace: "" + open_ports: "" + target_pids: [] + exe_path: (?:^|/)(obi$|otelcol[^/]*$) + languages: "" + exe_path_regexp: "" + k8s_pod_labels: {} + k8s_pod_annotations: {} + containers_only: false + exports: null + sampler: null + routes: null + metrics: + features: 0 + - name: "" + namespace: "" + open_ports: "" + target_pids: [] + exe_path: "" + languages: "" + exe_path_regexp: "" + k8s_pod_labels: {} + k8s_pod_annotations: {} + containers_only: false + exports: null + sampler: null + routes: null + metrics: + features: 0 + k8s_namespace: ^kube-system$|^kube-node-lease$|^local-path-storage$|^cert-manager$|^monitoring$|^gke-connect$|^gke-gmp-system$|^gke-managed-cim$|^gke-managed-filestorecsi$|^gke-managed-metrics-server$|^gke-managed-system$|^gke-system$|^gke-managed-volumepopulator$|^gatekeeper-system + instrument: [] + exclude_instrument: [] + default_exclude_instrument: + - name: "" + namespace: "" + open_ports: "" + languages: "" + target_pids: [] + exe_path: '{*/obi,obi,*otelcol,*otelcol-contrib,*otelcol-contrib[!/]*}' + k8s_pod_labels: {} + k8s_pod_annotations: {} + containers_only: false + exports: null + sampler: null + routes: null + metrics: + features: 0 + - name: "" + namespace: "" + open_ports: "" + languages: "" + target_pids: [] + exe_path: "" + k8s_pod_labels: {} + k8s_pod_annotations: {} + containers_only: false + exports: null + sampler: null + routes: null + metrics: + features: 0 + k8s_namespace: '{kube-system,kube-node-lease,local-path-storage,cert-manager,monitoring,gke-connect,gke-gmp-system,gke-managed-cim,gke-managed-filestorecsi,gke-managed-metrics-server,gke-managed-system,gke-system,gke-managed-volumepopulator,gatekeeper-system}' + poll_interval: 0s + skip_go_specific_tracers: false + bpf_pid_filter_off: false + exclude_otel_instrumented_services: true + default_otlp_grpc_port: 4317 + min_process_age: 5s + exclude_otel_instrumented_services_span_metrics: false + route_harvester_timeout: 10s + disabled_route_harvesters: [] + route_harvester_advanced: + java_harvest_delay: 1m0s + excluded_linux_system_paths: + - /lib/systemd/ + - /usr/lib/systemd/ + - /usr/libexec/ + - /sbin/ + - /usr/sbin/ +log_level: INFO +shutdown_timeout: 10s +enforce_sys_caps: false +channel_buffer_len: 50 +channel_send_timeout: 1m0s +channel_send_timeout_panic: false +profile_port: 0 +internal_metrics: + prometheus: + path: /internal/metrics + exporter: disabled + bpf_metric_scrape_interval: 15s +log_config: "" +nodejs: + enabled: true +javaagent: + enabled: true + debug: false + debug_instrumentation: false + attach_timeout: 10s diff --git a/devdocs/config/version-2.0/.verify/dump_default_config.go b/devdocs/config/version-2.0/.verify/dump_default_config.go new file mode 100644 index 0000000000..e6a6d68bdd --- /dev/null +++ b/devdocs/config/version-2.0/.verify/dump_default_config.go @@ -0,0 +1,24 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + "log" + "os" + + "gopkg.in/yaml.v3" + + "go.opentelemetry.io/obi/pkg/obi" +) + +func main() { + encoder := yaml.NewEncoder(os.Stdout) + + if err := encoder.Encode(obi.DefaultConfig); err != nil { + log.Fatalf("Error encoding YAML to stdout: %v", err) + } + if err := encoder.Close(); err != nil { + log.Fatalf("Error closing encoder: %v", err) + } +} diff --git a/devdocs/config/version-2.0/config-v2.md b/devdocs/config/version-2.0/config-v2.md new file mode 100644 index 0000000000..0cb1cf1e37 --- /dev/null +++ b/devdocs/config/version-2.0/config-v2.md @@ -0,0 +1,642 @@ +# OBI Configuration v2.0 Design + +Status: Draft for discussion +Audience: OBI maintainers and contributors +Scope: configuration model, schema, validation, and migration UX + +The current configuration model has evolved organically with a focus on implementation needs and incremental user feedback. +This has led to structural inconsistencies, redundant controls, and a mix of user-facing and internal configuration in the same sections. +To address this, a user-centric redesign of the configuration schema is proposed here, optimizing for common user journeys, clear ownership of concerns, and a clean separation between user-facing configuration and internal implementation details. + +Goals: + +- Define a clear, consistent configuration schema that maps directly to user intent and common use cases. +- Provide an extension to the OpenTelemetry declarative configuration model that configures OBI-specific behavior. +- Guarantee a smooth migration path from the current v1 configuration shape to the new v2 shape, with clear validation and tooling support. +- Ensure the configuration can be used cleanly in both standalone daemon and Collector receiver deployments. + +## Design principles + +To ensure that the redesign is guided by consistent values and priorities, we define the following design principles for the configuration model, schema, validation, and migration UX. + +- **Journey-first, user-mental-model first** + - Configuration should match what users are trying to do, not internal implementation layering. + - Structure should optimize for readability and safe default operation. + +- **One concern, one place** + - Every concern has one canonical home. + - Avoid parallel knobs for the same behavior across sections. + - OBI-specific concerns remain under `extensions.obi`, independent of generic instrumentation sections. + +- **Compatible with OpenTelemetry declarative configuration** + - Top-level OTel is authoritative for pipeline semantics: + - Exporters/processors/samplers belong to top-level declarative OTel configuration sections. + - OBI extension config should not reintroduce a competing pipeline model. + - OBI-specific behavior lives under `extensions.obi`: + - Runtime capture, selection, protocol controls, enrichment, and OBI limits are extension concerns. + - OBI config should stay namespaced and composable. + - Ownership boundary: + - `instrumentation/development` is not merged into OBI-specific controls. + - OBI behavior is configured through `extensions.obi` only. + +- **Deployment-aware structure** + - OBI runs in two modes: standalone daemon and Collector receiver. + - Configuration structure should reflect which parts are valid in each mode. + - The receiver-valid sub-config should be embeddable directly, without requiring users to manually extract a subset. + - Standalone-only concerns (daemon process management, enrichment, log annotation) must not leak into receiver deployments. + +- **Protocol-local ownership over global toggles** + - Protocol behavior should be configured under each protocol section. + - Enablement and filtering should be signal-scoped at the protocol/network ownership point. + +- **Deterministic precedence over hidden heuristics** + - Ordered rules should define precedence explicitly. + - Configuration should avoid ambiguous override behavior. + - Per-workload overrides use an explicit, closed vocabulary rather than generic deep-merge semantics. + +- **Reduce redundancy and surprise** + - Remove redundant gates that can silently disable already-configured behavior. + - Keep naming concise when section context already conveys meaning. + +- **Versioning should be explicit and layered** + - The root declarative document version and OBI extension version are separate concerns. + - Parsing flow should validate declarative shape first, then parse `extensions.obi` by its own version. + +- **Backward compatibility is deliberate, not accidental** + - Detect declarative vs legacy shape deterministically. + - Legacy aliases are compatibility inputs that map into canonical v2 shape. + +- **Proof-backed evolution** + - Structural changes should be backed by explicit mapping, validation, and parity checks. + - There exists a clear migration path to support users in moving from v1 to v2. + +These principles are intentionally user-centered and decision-oriented, prioritizing clear user mental models, safe defaults, and a clean separation of concerns in the configuration schema. + +## User Journeys + +To ground this redesign in user needs, we start with the top user journeys and expectations. + +### Onboard and activate + +1. A user wants to instrument all services running on platform ``. + - Linux hosts (amd64/arm64) + - Kubernetes workloads + - Collector receiver deployments +2. A user wants to get useful default telemetry quickly, without deep OBI knowledge. +3. A user wants to enable network observability in addition to application observability. + +### Target and scope + +1. A user wants to instrument only `` services and exclude everything else. + - process identity (executable path, PID) + - network identity (open ports) + - language identity (programming language) + - Kubernetes/container identity (metadata, labels/annotations, containers-only) +2. A user wants to combine multiple target rules to scope instrumentation and control telemetry volume/cost. +3. A user wants to avoid instrumenting services that are already instrumented. +4. A user wants to apply per-service configuration (for example disable traces for one service, or set custom HTTP routes for another). + +### Export and integrate + +1. A user wants to send telemetry to an OTLP backend. +2. A user wants to expose Prometheus metrics when needed. +3. A user wants to leverage Collector processing and exporting pipelines when running OBI as a receiver. + +### Enrich and optimize + +1. A user wants to enable Kubernetes metadata enrichment for all instrumented services. +2. A user wants to enable protocol-specific parsing only for selected sources (for example HTTP payload extraction). +3. A user wants controls to limit cardinality and data growth. + +### Operate in production + +1. A user wants safe production operations with clear logging, profiling, and shutdown controls. +2. A user wants troubleshooting workflows for "no data", partial data, or unexpected cardinality spikes. +3. A user wants clear visibility into effective/resolved configuration before rollout. + +### Validate and migrate + +1. A user wants invalid or conflicting configuration to fail fast with actionable errors. +2. A user wants to migrate from legacy config keys to the new schema with minimal manual edits. +3. A user wants stable configuration patterns across environments with minimal duplication. + +## Target v2.0 Configuration Shape + +- [Full default-values example](./examples/default-configuration.yaml) (all fields mapped from current defaults) +- [JSON Schema](./obi-extension.schema.json) (schema for `extensions.obi`) + +### High-level shape + +At a high level, the target configuration shape is a standard [OpenTelemetry declarative configuration](https://github.com/open-telemetry/opentelemetry-configuration) document with a root `file_format` field and top-level sections for `resource`, `propagator`, `tracer_provider`, and `meter_provider`. +All OBI-specific configuration lives under `extensions.obi`. + +The `extensions.obi` block is divided by deployment scope: + +- `capture`: valid in **all** deployment modes. Contains everything OBI needs to select workloads and capture telemetry. When running OBI as a Collector receiver, this block is embedded directly in the receiver configuration — no manual extraction required. +- `enrich`, `correlation`, `daemon`: **standalone-mode only**. These sections are not valid in Collector receiver deployments. The Collector pipeline handles enrichment (via processors) and process lifecycle (logging, profiling, shutdown) in receiver mode. + +```yaml +file_format: '1.0-rc.1' + +resource: {} +propagator: {} +tracer_provider: {} +meter_provider: {} + +extensions: + obi: + version: "2.0" + + # Receiver-embeddable: valid in all deployment modes. + capture: + policy: + default_action: include + match_order: first_match_wins + rules: [] + instrumentation: + http: + enabled: { traces: true, metrics: true } + filters: { traces: {}, metrics: {} } + grpc: + enabled: { traces: true, metrics: true } + filters: { traces: {}, metrics: {} } + sql: + enabled: { traces: true, metrics: true } + filters: { traces: {}, metrics: {} } + mysql: {} + postgres: {} + redis: + enabled: { traces: true, metrics: true } + filters: { traces: {}, metrics: {} } + kafka: + enabled: { traces: true, metrics: true } + filters: { traces: {}, metrics: {} } + mongo: + enabled: { traces: true, metrics: true } + filters: { traces: {}, metrics: {} } + couchbase: + enabled: { traces: true, metrics: true } + filters: { traces: {}, metrics: {} } + dns: + enabled: { traces: false, metrics: false } + filters: { traces: {}, metrics: {} } + gpu: + enabled: { traces: true, metrics: true } + filters: { traces: {}, metrics: {} } + runtimes: + go: + enabled: true + filter: {} + nodejs: + enabled: true + filter: {} + java: + enabled: true + filter: {} + debug: {} + attach_timeout: 10s + network: + capture: {} + limits: {} + engine: {} + safety: {} + channels: {} + telemetry: {} + + # Standalone-mode only: not valid in Collector receiver deployments. + enrich: + enrichers: + kubernetes: {} + service_name: {} + attributes: {} + + correlation: + log_trace_annotation: + enabled: false + filter: {} + + daemon: + logging: {} + profiling: {} + shutdown: {} + internal_metrics: {} + telemetry: {} +``` + +### `version` property + +The `extensions.obi.version` field defines the version of the OBI extension schema being used. +This allows the parsing and validation logic to apply the correct schema rules and migration logic based on the declared version. + +### `capture` Section + +The `extensions.obi.capture` section is the receiver-embeddable core of the OBI configuration. +It defines what OBI instruments and how it captures telemetry. +This is the **only** section valid in Collector receiver deployments. + +#### Why `capture` is a named grouping + +Early design iterations kept all top-level OBI sections flat: `selection`, `instrumentation`, `runtimes`, `network`, `operations`, `enrich`, `correlation`. +The `capture` grouping was introduced for two reasons: + +1. **Receiver embedding**: OBI runs in two deployment modes — standalone daemon and Collector receiver. In receiver mode, OBI is a telemetry source only. Side-effect features (k8s enrichment, log annotation) and process management (logging, profiling, shutdown) are not the receiver's responsibility — the Collector pipeline handles those. Having a single named block (`capture`) that represents exactly what the receiver embeds makes the boundary unambiguous and avoids requiring users or tools to manually enumerate which fields are valid. + +2. **Correctness over documentation**: An alternative was a flat structure with a `deployment: standalone | receiver` flag, where the parser would reject standalone-only fields in receiver mode. This was rejected because it makes the boundary a runtime enforcement concern rather than a structural schema concern. With `capture` as an explicit block, the schema itself communicates the boundary, and a schema-only view of the Collector receiver config is the `capture` block — no validation flags needed. + +`capture` contains: + +- `policy`: global rule evaluation behavior (default action, match order, timing). +- `rules`: ordered workload selection rules (include/exclude by process identity, Kubernetes metadata, etc.). +- `instrumentation`: protocol-specific capture controls (HTTP, gRPC, SQL, Redis, Kafka, MongoDB, Couchbase, DNS, GPU). +- `runtimes`: language runtime injection controls (Go probes, Node.js SIGUSR1, Java agent attachment). +- `network`: network flow capture configuration. +- `limits`: cardinality and memory guardrails. +- `engine`: eBPF engine internals (batching, pid filter, BPF filesystem, propagation, traffic backend, transaction limits, debug). +- `safety`: system capability enforcement checks. +- `channels`: internal backpressure controls. +- `telemetry`: reporter cache sizes and metric TTL tuning for OBI capture internals. + +#### Workload selection: `capture.policy` and `capture.rules` + +`capture.policy` defines global rule evaluation behavior, and `capture.rules` is an ordered list of workload inclusion/exclusion rules. +Rules are based on process identity, network identity, language, Kubernetes metadata, and already-instrumented status. +These are the primary user controls for defining which services get instrumented by OBI. + +**Why `policy` and `rules` are direct children of `capture`, not nested under `capture.selection`** + +An earlier draft had a `selection` sub-section under `capture` (i.e., `capture.selection.policy` and `capture.selection.rules`). +The extra nesting was removed for the following reasons: + +- `capture.rules` is the field the vast majority of users write. Any indirection before reaching it is friction on the most common path. +- The `selection` grouping added no semantic clarity — within `capture`, everything is selection-and-capture configuration. The word `selection` was a label for a concept that `capture` already names. +- Removing the indirection saves one nesting level on every rule users write, with no loss of meaning. +- `capture.policy` and `capture.rules` read naturally as "the capture policy" and "the capture rules", reinforcing the parent section's meaning rather than fighting it. + +#### Per-workload refinement: `refine` on include rules + +Include rules may carry an optional `refine` block that overrides global defaults for matched workloads. + +**Why `refine` exists** + +v1 supported per-selection-rule overrides for exports, sampler, routes, and metrics (`ExportModes`, `SamplerConfig`, `Routes`, `SvcMetricsConfig`). +The initial v2 design had no equivalent, which would have required users to either apply global settings to all workloads uniformly or replace the whole config per environment. +This was raised as a key gap by reviewers (grcevski, fstab) — a concrete example: globally emit metrics only, but for a specific namespace emit traces as well; or globally use heuristic routes, but for a specific service specify exact path patterns. + +**Why `refine` uses an explicit closed vocabulary, not generic deep-merge** + +The alternative to an explicit vocabulary is a `refine` block that accepts any subset of the global config shape and deep-merges it. +This was rejected because: + +- Deep-merge semantics are ambiguous for arrays (append vs. replace?), maps (key-level merge vs. whole-map replace?), and absent fields (inherit vs. zero?). Each ambiguity needs a specified rule, and each rule is a source of user confusion. +- The actual v1 per-rule overrides were a small, well-defined set. Generalizing to an arbitrary deep-merge would have supported hypothetical cases at the cost of making the common cases harder to reason about. +- An explicit vocabulary makes the schema self-documenting: users see exactly what can be overridden per workload. + +Current overridable fields in `refine`: + +- `exports`: override which signals (`traces`, `metrics`) are emitted for this workload. +- `http.routes`: override HTTP route patterns and fallback policy for this workload. +- `http.filters`: replace HTTP trace/metric filters for this workload. + +New fields can be added to the `refine` vocabulary deliberately as use cases emerge. + +Example use cases: + +```yaml +capture: + rules: + # Disable traces for a low-priority namespace; keep metrics. + - action: include + name: low-priority-ns + match: + kubernetes: + namespace_glob: ["staging-*"] + refine: + exports: + traces: false + metrics: true + + # Custom HTTP routes for a service that uses path parameters. + - action: include + name: orders-service + match: + kubernetes: + namespace_glob: ["orders"] + refine: + http: + routes: + unmatched: wildcard + patterns: + - /orders/{id} + - /orders/{id}/items +``` + +Sampling overrides are **not** part of the `refine` block. +Per-workload sampling is handled via `tracer_provider.sampler` using the `obi_rule_based` custom sampler, which matches on resource attributes. +See the [Sampling model](#sampling-model) section below. + +### Sampling model + +Sampling remains owned by top-level OTel declarative configuration under `tracer_provider.sampler`. +OBI does not define a parallel sampling section under `extensions.obi`, and selection rules do not override sampler behavior. + +**Why sampling is not in `capture.rules[].refine`** + +The `tracer_provider.sampler` is already the standard, extensible place for sampling policy in OTel declarative config. +Adding a parallel `sampler` field inside `capture.rules[].refine` would violate the "compatible with OTel declarative configuration" principle by introducing a competing pipeline model. +Instead, the `obi_rule_based` custom sampler plugin (a planned v2 deliverable) allows workload-matching sampling behavior to be expressed inside `tracer_provider.sampler`, keeping the concern in its canonical location while still meeting the per-workload use case. + +For v2 scope, OBI will provide and ship an OBI sampler plugin implementation in this project, +so users can reference it directly from `tracer_provider.sampler`. + +When workload-specific sampling behavior is needed, users should configure it through the sampler itself: + +- Use built-in OTel samplers when global behavior is sufficient. +- Use the `obi_rule_based` custom sampler plugin when rule/pattern-based workload sampling is required. + +The plugin implementation will include: + +- sampler component implementation in OBI, +- registration/wiring in OBI runtime initialization, +- validation/documentation for supported sampler rule semantics. + +This keeps concerns separated and explicit: + +- `extensions.obi.capture`: workload discovery and capture configuration. +- `tracer_provider.sampler`: trace sampling policy. + +Example (global built-in sampler): + +```yaml +tracer_provider: + sampler: + parent_based: + root: + trace_id_ratio_based: + ratio: 0.10 +``` + +Example (custom sampler plugin with workload-matching semantics): + +```yaml +tracer_provider: + sampler: + obi_rule_based: + fallback: + always_on: {} + rules: + - match: + attributes: + service.namespace: + - low-priority + sample: + trace_id_ratio_based: + ratio: 0.01 + - match: + attributes: + service.name: + - checkout + sample: + always_on: {} +``` + +### `capture.instrumentation` Section + +The `capture.instrumentation` section defines protocol-specific instrumentation controls, including enablement and filtering for traces and metrics. + +All protocols (HTTP, gRPC, SQL, Redis, Kafka, MongoDB, Couchbase, DNS, GPU) have a consistent base structure for defining whether traces and metrics are enabled and what filters apply to each signal. +Each protocol can also have its own specific configuration subsections. +For example, SQL has `mysql` and `postgres` for driver-specific controls, HTTP has `routes.discovery` for route harvesting controls, etc. + +### `capture.runtimes` Section + +The `capture.runtimes` section defines how language-specific runtime instrumentation injection mechanisms are controlled. +These include Go probes, Node.js SIGUSR1 signal injection, and Java agent attachment. + +Unlike protocol instrumentation, runtimes are not about capturing specific telemetry signals — they are about *how* to instrument a service once it's selected. +Each runtime has a simple structure: `enabled` (boolean) controls whether to attempt injection, and `filter` provides optional per-runtime refinement for which selected services receive the injection. +Java also includes additional runtime-specific configuration such as debug controls and attachment timeout. + +### `capture.network` Section + +The `capture.network` section defines how network observability is configured, including endpoint identity, selection criteria, flow lifecycle controls, interface discovery behavior, enrichment options, and diagnostics. +This section is the primary user control for defining how OBI captures and processes network telemetry. + +### `capture.engine` Section + +The `capture.engine` section controls eBPF engine internals: event batching, PID-based filtering, BPF filesystem path, context propagation mode, traffic control backend, transaction duration limits, and debug toggles. + +**Why `engine`, not `capture.capture`** + +Earlier drafts named this sub-section `capture` (i.e., `operations.capture`), which would have produced the awkward path `capture.capture.*` after the restructure. +It was renamed `engine` to accurately describe what it contains (eBPF engine internals) while remaining deployment-neutral — advanced users who tune these settings already know they are configuring BPF behavior. +The alternative `ebpf` was considered but rejected as more implementation-specific than `engine`. + +### `enrich` Section + +The `extensions.obi.enrich` section defines enrichment behavior for telemetry, including Kubernetes metadata, service naming policy, and general attribute enrichment rules. +This section is **standalone-mode only**. + +#### Why `enrich` is standalone-only + +In Collector receiver deployments, OBI is a telemetry source. Enrichment is the Collector's responsibility: + +- The [`k8sattributesprocessor`](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/k8sattributesprocessor) covers Kubernetes pod/namespace/deployment metadata and service name derivation following OTel semantic conventions. +- Running OBI's built-in k8s enricher alongside `k8sattributesprocessor` in the same pipeline results in duplicate Kubernetes API queries and potentially conflicting attribute values. +- Attribute enrichment and service naming rules in `enrich` are conceptually a post-capture processing step, which belongs in the Collector pipeline in receiver mode. + +This was raised directly by reviewers (dmitryax) who noted the overlap with existing Collector processors. + +In standalone mode, `enrich` remains essential — there is no Collector pipeline to delegate enrichment to. + +For Kubernetes environments using OBI as a receiver, use `k8sattributesprocessor` and set `enrich.enrichers.kubernetes.mode: disabled` if the `enrich` section is present (or omit `enrich` entirely): + +```yaml +extensions: + obi: + enrich: + enrichers: + kubernetes: + mode: disabled # use k8sattributesprocessor in the Collector pipeline instead +``` + +The `mode` field supports: `autodetect` (default — enable if k8s environment is detected), `enabled`, and `disabled`. + +### `correlation` Section + +The `extensions.obi.correlation` section defines trace-context correlation features that propagate OBI-generated trace context into external streams. +Unlike telemetry instrumentation (protocol signals), correlation features operate *after* traces are captured to enrich related observability data. + +For example, `log_trace_annotation` allows trace context to be injected into application logs from selected services, linking logs to traces through context correlation. + +This section is **standalone-mode only**. + +#### Why `correlation` is standalone-only, and the future of log trace annotation + +`log_trace_annotation` is a side-effectful operation — it writes back to log streams, which is not a telemetry-source concern. +When running as a Collector receiver, these side effects are not appropriate for a receiver component. +Log trace annotation as a standalone Collector component (e.g., a processor or connector) is planned as a separate deliverable, separate from the OBI receiver configuration. + +### `daemon` Section + +The `extensions.obi.daemon` section defines OBI daemon process controls. +This section is **standalone-mode only** — in Collector receiver deployments, the Collector manages all of these concerns. + +**Why `daemon`, not `operations`** + +The previous design had a flat `operations` section containing a mix of capture-valid fields (batching, BPF filesystem, limits) and daemon-only fields (logging, profiling, shutdown, internal metrics). +The restructure into `capture` and `daemon` emerged from analyzing which fields are valid in receiver mode: + +- Fields that govern eBPF capture behavior are valid in all modes → moved into `capture.*` +- Fields that govern the OBI process itself are not valid in receiver mode → grouped in `daemon` + +The name `daemon` was chosen over `process` (too generic), `agent` (overloaded in OTel), `operations` (too broad after the split), and `self` (too terse for a configuration section name). +`daemon` is honest and unambiguous: it configures the OBI daemon process. + +`daemon` contains: + +- `logging`: OBI process log level, format, and debug trace output mode. +- `profiling`: optional pprof endpoint for the OBI process. +- `shutdown`: graceful shutdown timeout. +- `internal_metrics`: OBI daemon's own metrics export (Prometheus or OTLP). +- `telemetry.metrics.prometheus`: Prometheus-exporter-specific metric shaping for OBI standalone output. + +### Compatibility and mapping from v1 + +v2 is a structural redesign of v1, with deterministic compatibility mapping. +Use the table below to find any v1 field and its v2 canonical location. + +Important mapping notes: + +- OTel pipeline structure ownership moved to top-level declarative sections: + - `otel_metrics_export` pipeline structure and transport settings → `meter_provider.*` + - `prometheus_export.path` → `meter_provider.*` + - `otel_traces_export` pipeline structure and transport/sampler settings → `tracer_provider.*` +- The old flat `operations` section is split by deployment scope: + - Capture-valid fields move into `extensions.obi.capture.*` (valid in all deployment modes). + - Daemon-only fields move into `extensions.obi.daemon.*` (standalone mode only). +- Some mappings are non-1:1: + - `filter.application` fans out to `capture.instrumentation..filters.{traces,metrics}`. + - `filter.network` fans out to `capture.network.capture.filters.{traces,metrics}`. + - `metrics.features` maps to `capture.instrumentation..enabled.metrics` + `capture.network.capture.enabled`. + - `discovery.skip_go_specific_tracers` maps to `capture.runtimes.go.enabled` with inverted semantics. + +| v1 field | v2 canonical location | Notes | +|---|---|---| +| `attributes.kubernetes.informers_sync_timeout` | `extensions.obi.enrich.enrichers.kubernetes.informers.initial_sync_timeout` | Move | +| `attributes.kubernetes.informers_resync_period` | `extensions.obi.enrich.enrichers.kubernetes.informers.resync_period` | Move | +| `attributes.metric_span_names_limit` | `extensions.obi.capture.limits.metric_span_names` | Move + rename | +| `attributes.rename_unresolved_hosts` | `extensions.obi.enrich.service_name.unresolved_hosts.names.default` | Move | +| `channel_buffer_len` | `extensions.obi.capture.channels.buffer_len` | Move | +| `channel_send_timeout` | `extensions.obi.capture.channels.send_timeout` | Move | +| `channel_send_timeout_panic` | `extensions.obi.capture.channels.panic_on_send_timeout` | Move + rename | +| `discovery.bpf_pid_filter_off` | `extensions.obi.capture.engine.pid_filter.disabled` | Move + rename | +| `discovery.default_otlp_grpc_port` | `extensions.obi.capture.rules[].match.process.exports_otlp.port` | Move + reshape | +| `discovery.disabled_route_harvesters` | `extensions.obi.capture.instrumentation.http.routes.discovery.disabled_languages` | Move + rename | +| `discovery.exclude_otel_instrumented_services` | `extensions.obi.capture.rules[].match.process.exports_otlp` (exclude rule) | Move + reshape | +| `discovery.excluded_linux_system_paths` | `extensions.obi.capture.rules[].match.process.exe_path_glob` (exclude rule) | Move + reshape | +| `discovery.min_process_age` | `extensions.obi.capture.policy.min_process_age` | Move | +| `discovery.route_harvester_advanced.java_harvest_delay` | `extensions.obi.capture.instrumentation.http.routes.discovery.java.delay` | Move + rename | +| `discovery.route_harvester_timeout` | `extensions.obi.capture.instrumentation.http.routes.discovery.timeout` | Move + rename | +| `discovery.skip_go_specific_tracers` | `extensions.obi.capture.runtimes.go.enabled` | Inverted boolean mapping | +| `ebpf.batch_length` | `extensions.obi.capture.engine.batching.batch_length` | Move | +| `ebpf.batch_timeout` | `extensions.obi.capture.engine.batching.batch_timeout` | Move | +| `ebpf.bpf_fs_path` | `extensions.obi.capture.engine.bpf_filesystem.path` | Move + rename | +| `ebpf.buffer_sizes.http` | `extensions.obi.capture.instrumentation.http.buffer_size` | Move | +| `ebpf.buffer_sizes.kafka` | `extensions.obi.capture.instrumentation.kafka.buffer_size` | Move | +| `ebpf.buffer_sizes.mysql` | `extensions.obi.capture.instrumentation.sql.mysql.buffer_size` | Move | +| `ebpf.buffer_sizes.postgres` | `extensions.obi.capture.instrumentation.sql.postgres.buffer_size` | Move | +| `ebpf.dns_request_timeout` | `extensions.obi.capture.instrumentation.dns.request_timeout` | Move | +| `ebpf.heuristic_sql_detect` | `extensions.obi.capture.instrumentation.sql.heuristic_detect` | Move + rename | +| `ebpf.kafka_topic_uuid_cache_size` | `extensions.obi.capture.instrumentation.kafka.topic_uuid_cache_size` | Move | +| `ebpf.log_enricher.cache_size` | `extensions.obi.correlation.log_trace_annotation.cache.size` | Move + rename | +| `ebpf.log_enricher.cache_ttl` | `extensions.obi.correlation.log_trace_annotation.cache.ttl` | Move + rename | +| `ebpf.log_enricher.async_writer_workers` | `extensions.obi.correlation.log_trace_annotation.async_writer.workers` | Move + rename | +| `ebpf.log_enricher.async_writer_channel_len` | `extensions.obi.correlation.log_trace_annotation.async_writer.channel_len` | Move + rename | +| `ebpf.max_transaction_time` | `extensions.obi.capture.engine.transactions.max_duration` | Move + rename | +| `ebpf.mysql_prepared_statements_cache_size` | `extensions.obi.capture.instrumentation.sql.mysql.prepared_statements_cache_size` | Move | +| `ebpf.payload_extraction.http.graphql.enabled` | `extensions.obi.capture.instrumentation.http.payload_extraction.graphql.enabled` | Move | +| `ebpf.payload_extraction.http.sqlpp.enabled` | `extensions.obi.capture.instrumentation.http.payload_extraction.sqlpp.enabled` | Move | +| `ebpf.postgres_prepared_statements_cache_size` | `extensions.obi.capture.instrumentation.sql.postgres.prepared_statements_cache_size` | Move | +| `ebpf.redis_db_cache.enabled` | `extensions.obi.capture.instrumentation.redis.db_cache.enabled` | Move | +| `ebpf.traffic_control_backend` | `extensions.obi.capture.engine.traffic.control_backend` | Move + rename | +| `ebpf.wakeup_len` | `extensions.obi.capture.engine.batching.wakeup_len` | Move | +| `enforce_sys_caps` | `extensions.obi.capture.safety.enforce_system_capabilities` | Move + rename | +| `filter.application` | `extensions.obi.capture.instrumentation..filters.{traces,metrics}` | Fan-out to all protocols/signals | +| `filter.network` | `extensions.obi.capture.network.capture.filters.{traces,metrics}` | Fan-out to both signals | +| `internal_metrics.bpf_metric_scrape_interval` | `extensions.obi.daemon.internal_metrics.bpf.scrape_interval` | Move + rename | +| `internal_metrics.exporter` | `extensions.obi.daemon.internal_metrics.exporter` | Move | +| `internal_metrics.prometheus.path` | `extensions.obi.daemon.internal_metrics.prometheus.path` | Move | +| `javaagent.attach_timeout` | `extensions.obi.capture.runtimes.java.attach_timeout` | Move | +| `javaagent.debug` | `extensions.obi.capture.runtimes.java.debug.enabled` | Move + rename | +| `javaagent.debug_instrumentation` | `extensions.obi.capture.runtimes.java.debug.bytecode_instrumentation` | Move + rename | +| `javaagent.enabled` | `extensions.obi.capture.runtimes.java.enabled` | Simplified to boolean | +| `log_config` | `extensions.obi.daemon.logging.format` | Move + rename | +| `log_level` | `extensions.obi.daemon.logging.level` | Move | +| `metrics.features` | `extensions.obi.capture.instrumentation..enabled.metrics` + `extensions.obi.capture.network.capture.enabled` | Split mapping | +| `name_resolver.cache_expiry` | `extensions.obi.enrich.service_name.cache.ttl` | Move + rename | +| `name_resolver.cache_len` | `extensions.obi.enrich.service_name.cache.size` | Move + rename | +| `network.agent_ip` | `extensions.obi.capture.network.capture.endpoint_identity.agent_ip` | Move | +| `network.agent_ip_iface` | `extensions.obi.capture.network.capture.endpoint_identity.agent_ip_interface` | Move + rename | +| `network.agent_ip_type` | `extensions.obi.capture.network.capture.endpoint_identity.agent_ip_family` | Move + rename | +| `network.cache_active_timeout` | `extensions.obi.capture.network.capture.flow_lifecycle.active_timeout` | Move + rename | +| `network.cache_max_flows` | `extensions.obi.capture.network.capture.flow_lifecycle.max_tracked_flows` | Move + rename | +| `network.deduper` | `extensions.obi.capture.network.capture.flow_lifecycle.deduplication.strategy` | Move + rename | +| `network.deduper_fc_ttl` | `extensions.obi.capture.network.capture.flow_lifecycle.deduplication.first_come_ttl` | Move + rename | +| `network.direction` | `extensions.obi.capture.network.capture.selection.direction` | Move | +| `network.enable` | `extensions.obi.capture.network.capture.enabled` | Move + rename | +| `network.geo_ip.cache_expiry` | `extensions.obi.capture.network.capture.enrichment.geo_ip.cache.ttl` | Move + rename | +| `network.listen_interfaces` | `extensions.obi.capture.network.capture.interface_discovery.mode` | Move + reshape | +| `network.listen_poll_period` | `extensions.obi.capture.network.capture.interface_discovery.poll_interval` | Move + rename | +| `network.print_flows` | `extensions.obi.capture.network.capture.diagnostics.print_flows` | Move | +| `network.reverse_dns.cache_expiry` | `extensions.obi.capture.network.capture.enrichment.reverse_dns.cache.ttl` | Move + rename | +| `network.sampling` | `extensions.obi.capture.network.capture.flow_lifecycle.sampling` | Move | +| `network.source` | `extensions.obi.capture.network.capture.source` | Move | +| `nodejs.enabled` | `extensions.obi.capture.runtimes.nodejs.enabled` | Simplified to boolean | +| `otel_metrics_export.histogram_aggregation` | `meter_provider.readers[0].periodic.exporter.otlp_grpc.default_histogram_aggregation` | OTel ownership move + declarative reader/exporter shape | +| `otel_metrics_export.reporters_cache_len` | `extensions.obi.capture.telemetry.metrics.reporters_cache_len` | Move to capture telemetry tuning | +| `otel_metrics_export.ttl` | `extensions.obi.capture.telemetry.metrics.ttl` | Move to capture telemetry tuning | +| `otel_metrics_export.extra_span_resource_attributes` | `extensions.obi.daemon.telemetry.metrics.prometheus.extra_span_resource_attributes` | Move to daemon telemetry tuning | +| `otel_traces_export.batch_timeout` | `tracer_provider.processors[0].batch.schedule_delay` | OTel ownership move + rename + duration(ms) representation | +| `otel_traces_export.max_queue_size` | `tracer_provider.processors[0].batch.max_queue_size` | OTel ownership move + declarative processor list shape | +| `otel_traces_export.reporters_cache_len` | `extensions.obi.capture.telemetry.traces.reporters_cache_len` | Move to capture telemetry tuning | +| `otel_traces_export.sampler.arg` | `tracer_provider.sampler` | OTel ownership move. Map to built-in sampler arguments when possible; per-workload semantics require the `obi_rule_based` sampler plugin. | +| `otel_traces_export.sampler.name` | `tracer_provider.sampler` | OTel ownership move. Map to built-in sampler names when possible; per-workload semantics require the `obi_rule_based` sampler plugin. | +| `profile_port` | `extensions.obi.daemon.profiling.port` | Move | +| `prometheus_export.allow_service_graph_self_references` | `extensions.obi.daemon.telemetry.metrics.prometheus.allow_service_graph_self_references` | Move to daemon telemetry tuning | +| `prometheus_export.extra_resource_attributes` | `extensions.obi.daemon.telemetry.metrics.prometheus.extra_resource_attributes` | Move to daemon telemetry tuning | +| `prometheus_export.extra_span_resource_attributes` | `extensions.obi.daemon.telemetry.metrics.prometheus.extra_span_resource_attributes` | Move to daemon telemetry tuning | +| `prometheus_export.port` | `meter_provider.readers[1].pull.exporter.prometheus/development.port` | OTel ownership move + declarative reader/exporter shape | +| `prometheus_export.path` | _No canonical OTel core path in current declarative schema_ | Distribution-specific/unsupported in current target shape | +| `prometheus_export.service_cache_size` | `extensions.obi.daemon.telemetry.metrics.prometheus.span_metrics_service_cache_size` | Move to daemon telemetry tuning + rename | +| `routes.max_path_segment_cardinality` | `extensions.obi.capture.instrumentation.http.routes.max_path_segment_cardinality` | Move | +| `routes.unmatched` | `extensions.obi.capture.instrumentation.http.routes.unmatched` | Move | +| `routes.wildcard_char` | `extensions.obi.capture.instrumentation.http.routes.wildcard_char` | Move | +| `shutdown_timeout` | `extensions.obi.daemon.shutdown.timeout` | Move | +| `trace_printer` | `extensions.obi.daemon.logging.debug_trace_output` | Move + rename | + +## Related docs + +- Migration, validation, and tooling plan: [migration.md](migration.md) +- OBI extension schema: [obi-extension.schema.json](obi-extension.schema.json) +- Default configuration example: [examples/default-configuration.yaml](examples/default-configuration.yaml) + +## Appendix: upstream alignment status (2026-02-24) + +The OTel declarative schema does not currently define `extensions` as a first-class root node, +but the root schema allows additional properties and does not explicitly exclude it. + +After review and discussion in upstream issues: + +- [Placement discussion](https://github.com/open-telemetry/opentelemetry-configuration/issues/335) +- [OBI comment with context](https://github.com/open-telemetry/opentelemetry-configuration/issues/335#issuecomment-3954773010) +- [Ownership/overlap follow-up](https://github.com/open-telemetry/opentelemetry-configuration/issues/545) + +Decision for OBI v2: + +- Keep `extensions.obi` as the canonical OBI-owned configuration namespace. +- Keep top-level declarative OTel sections authoritative for pipeline semantics. +- Do not treat `instrumentation/development` as an OBI configuration source. + +This is an intentional middle-ground while upstream schema guidance evolves. +OBI will support `extensions.obi` with its own parser and validation rules until a better +standardized schema location is available. diff --git a/devdocs/config/version-2.0/examples/default-configuration.yaml b/devdocs/config/version-2.0/examples/default-configuration.yaml new file mode 100644 index 0000000000..af56e6a0ae --- /dev/null +++ b/devdocs/config/version-2.0/examples/default-configuration.yaml @@ -0,0 +1,613 @@ +# Authored v2 configuration example (journey-first, one-concern-one-place). +# Values are mapped from current defaults in pkg/obi/DefaultConfig. + +# OTel declarative document version. +file_format: '1.0-rc.1' + +# Standard OTel resource attributes (service.name, service.namespace, etc.). +resource: {} +# Global propagation format configuration (tracecontext, baggage, b3, etc.). +propagator: {} + +# Trace pipeline definition (OTel-native). OBI-specific tracing controls live under extensions.obi.*. +tracer_provider: + # Sampling policy is configured in top-level OTel tracer_provider.sampler. + # OBI selection decides which workloads are instrumented; it does not override sampler behavior. + # The custom `obi_rule_based` sampler shown below is intended to be implemented + # and shipped by OBI as part of the v2 work. + # + # Example (global built-in sampler): + # sampler: + # parent_based: + # root: + # trace_id_ratio_based: + # ratio: 0.10 + # + # Example (custom sampler plugin with workload rules): + # sampler: + # obi_rule_based: + # fallback: + # always_on: {} + # rules: + # - match: + # attributes: + # service.namespace: + # - low-priority + # sample: + # trace_id_ratio_based: + # ratio: 0.01 + # - match: + # attributes: + # service.name: + # - checkout + # sample: + # always_on: {} + # Trace processors applied before export. + processors: + - batch: + # Max in-memory queue before flush. + max_queue_size: 4096 + # Delay interval between batch exports in milliseconds. + schedule_delay: 15000 + # Trace exporter. + exporter: + otlp_grpc: + endpoint: "" + tls: + insecure: false + +# Metrics pipeline definition (OTel-native). OBI-specific metric behavior lives under extensions.obi.*. +meter_provider: + # Metric readers/exporters. + readers: + - periodic: + # Metrics export interval in milliseconds. + interval: 60000 + exporter: + otlp_grpc: + endpoint: "" + default_histogram_aggregation: explicit_bucket_histogram + tls: + insecure: false + - pull: + exporter: + prometheus/development: + port: 0 + +extensions: + obi: + # OBI extension schema version (parsed after root declarative parse succeeds). + version: "2.0" + + # capture defines what OBI instruments and how it captures telemetry. + # This is the only block valid in Collector receiver deployments. + # When running OBI as a Collector receiver, embed this block directly + # in the receiver configuration. + capture: + # policy controls global rule evaluation behavior. + policy: + # Include by default unless matched by an exclude rule. + default_action: include + # First matching rule decides outcome. + match_order: first_match_wins + # Process re-scan interval (0s uses implementation default/event-driven behavior). + poll_interval: 0s + # Ignore very new processes until they are old enough to evaluate. + min_process_age: 5s + # rules is an ordered list of workload inclusion/exclusion rules. + rules: + - action: exclude + name: exclude-obi-and-collectors + description: Exclude OBI and collector binaries to avoid self-instrumentation and collector recursion. + match: + process: + exe_path_glob: + - "*/obi" + - "obi" + - "*otelcol" + - "*otelcol-contrib" + - "*otelcol-contrib[!/]*" + - action: exclude + name: exclude-system-namespaces + description: Exclude common platform/system Kubernetes namespaces from instrumentation by default. + match: + kubernetes: + namespace_glob: + - kube-system + - kube-node-lease + - local-path-storage + - cert-manager + - monitoring + - gke-connect + - gke-gmp-system + - gke-managed-cim + - gke-managed-filestorecsi + - gke-managed-metrics-server + - gke-managed-system + - gke-system + - gke-managed-volumepopulator + - gatekeeper-system + - action: exclude + name: exclude-otlp-exporters + description: Exclude services that already export OTLP to prevent duplicate telemetry pipelines. + match: + process: + exports_otlp: + port: 4317 + protocol: protobuf + - action: exclude + name: exclude-linux-system-paths + description: Exclude Linux system/service executable paths that are not typical application workloads. + match: + process: + exe_path_glob: + - /lib/systemd/* + - /usr/lib/systemd/* + - /usr/libexec/* + - /sbin/* + - /usr/sbin/* + + # Example (commented): per-workload refinement on include rules. + # The `refine` block overrides global defaults for workloads matched by this rule. + # Use this to disable signals, add per-service HTTP routes, or tighten filters + # for a specific workload without duplicating the whole config. + # + # - action: include + # name: shopping-cart + # description: Include shopping cart services with trace-only export and custom HTTP routes. + # match: + # kubernetes: + # namespace_glob: + # - shopping-* + # refine: + # # Disable metrics for this workload; emit traces only. + # exports: + # traces: true + # metrics: false + # http: + # # Per-workload HTTP route patterns (override global patterns for this service). + # routes: + # unmatched: wildcard + # patterns: + # - /cart/{id} + # - /cart/{id}/items + # # Per-workload HTTP filters (replace global filters for this service). + # filters: + # traces: + # url.path: + # not_match: "/health*" + # metrics: {} + + instrumentation: + # HTTP protocol instrumentation and HTTP-specific enrichment/parsing. + http: + enabled: + traces: true + metrics: true + # Filter behavior: allow records that match all configured criteria; drop non-matching records. + # Use `match` to keep matching values, and `not_match` to keep values that do not match. + # Example (commented): keep prod namespaces and drop health endpoints. + # filters: + # traces: + # k8s.namespace.name: + # match: "prod-*" + # url.path: + # not_match: "/health*" + # metrics: + # k8s.namespace.name: + # match: "prod-*" + filters: + traces: {} + metrics: {} + # Capture configured incoming/outgoing request headers when enabled. + track_request_headers: false + # HTTP request timeout guard for capture/processing. + request_timeout: 0s + # Internal parser/capture buffer size (0 uses default behavior). + buffer_size: 0 + # HTTP route normalization and low-cardinality route naming controls. + routes: + # Fallback policy when no explicit route match is found. + unmatched: heuristic + # Optional user-supplied route patterns. + patterns: [] + # Optional route patterns to ignore. + ignored_patterns: [] + # Ignore applies to traces, metrics, or all. + ignore_mode: all + # Route wildcard replacement character. + wildcard_char: "*" + # Caps per-segment route cardinality in heuristic mode. + max_path_segment_cardinality: 10 + # Route template discovery from runtime/language frameworks. + discovery: + # Global timeout for route template discovery runs. + timeout: 10s + # Languages to skip for route template discovery. + disabled_languages: [] + java: + # Delay before Java route template discovery to allow runtime readiness. + delay: 1m0s + # HTTP payload-level extraction features. + payload_extraction: + graphql: + enabled: false + elasticsearch: + enabled: false + aws: + enabled: false + sqlpp: + enabled: false + endpoint_patterns: + - /query/service + # gRPC protocol instrumentation toggle. + grpc: + enabled: + traces: true + metrics: true + filters: + traces: {} + metrics: {} + # SQL protocol instrumentation and driver-specific tuning. + sql: + enabled: + traces: true + metrics: true + filters: + traces: {} + metrics: {} + heuristic_detect: false + mysql: + buffer_size: 0 + prepared_statements_cache_size: 1024 + postgres: + buffer_size: 0 + prepared_statements_cache_size: 1024 + # Redis instrumentation and DB cache behavior. + redis: + enabled: + traces: true + metrics: true + filters: + traces: {} + metrics: {} + db_cache: + enabled: false + max_size: 1000 + # Kafka instrumentation tuning. + kafka: + enabled: + traces: true + metrics: true + filters: + traces: {} + metrics: {} + buffer_size: 0 + topic_uuid_cache_size: 1024 + # Mongo instrumentation tuning. + mongo: + enabled: + traces: true + metrics: true + filters: + traces: {} + metrics: {} + requests_cache_size: 1024 + # Couchbase instrumentation tuning. + couchbase: + enabled: + traces: true + metrics: true + filters: + traces: {} + metrics: {} + db_cache_size: 1024 + # DNS event instrumentation. + dns: + enabled: + traces: false + metrics: false + filters: + traces: {} + metrics: {} + request_timeout: 5s + # GPU instrumentation mode. + gpu: + enabled: + traces: true + metrics: true + filters: + traces: {} + metrics: {} + enabled_mode: auto + + # runtimes controls injection of language-specific runtime instrumentation mechanisms. + # These are independent from protocol instrumentation (HTTP, gRPC, etc.) and are used + # to inject Go probes, Node.js SIGUSR1 signal handlers, or Java agents into workloads + # that match the selection rules. The `enabled` flag controls whether to attempt + # injection; `filter` refines which selected services receive the injection. + runtimes: + # Go runtime instrumentation allows Go packages to be instrumented with probes. + # When enabled, Go services can use Go-specific probes. When disabled, they fall back + # to generic network-only instrumentation. + go: + enabled: true + # Optional filter to refine which selected services get Go instrumentation. + # By default (empty filter), all selected Go services receive instrumentation. + filter: {} + # Node.js runtime instrumentation allows Node.js services to be instrumented via + # SIGUSR1 signal injection. The enabled flag controls whether to attempt injection. + nodejs: + enabled: true + # Optional filter to refine which selected services get Node.js instrumentation. + # By default (empty filter), all selected Node.js services receive instrumentation. + filter: {} + # Java runtime instrumentation allows Java services to be instrumented via agent + # attachment. The enabled flag controls whether to attempt agent injection. + java: + enabled: true + # Optional filter to refine which selected services get Java instrumentation. + # By default (empty filter), all selected Java services receive instrumentation. + filter: {} + # Java-specific debug controls. + debug: + enabled: false + bytecode_instrumentation: false + # Timeout for Java agent attachment. + attach_timeout: 10s + + # network controls how network flow telemetry is captured and interpreted + # for in-scope workloads (interfaces/protocols/cidrs, dedupe, enrichment, diagnostics). + network: + capture: + # Master switch for network flow observability. + enabled: false + # Capture backend: socket_filter (default) or tc. + source: socket_filter + # Identity of this OBI agent in flow metadata. + endpoint_identity: + agent_ip: "" + agent_ip_interface: external + agent_ip_family: any + # Scope/filtering for captured network traffic. + selection: + interfaces: + include: [] + exclude: + - lo + protocols: + include: [] + exclude: [] + # Optional CIDR definitions for src/dst network grouping attributes. + cidrs: [] + direction: both + # Network-level filtering for flow capture/output by signal. + # Filter behavior: allow records that match all configured criteria; drop non-matching records. + # Example (commented): keep pod traffic and drop kube-system. + # filters: + # metrics: + # dst.k8s.namespace.name: + # match: "*" + # src.k8s.namespace.name: + # not_match: "kube-system" + # traces: {} + filters: + traces: {} + metrics: {} + # Flow cache and reduction behavior. + flow_lifecycle: + max_tracked_flows: 5000 + active_timeout: 5s + deduplication: + # Dedup strategy for multi-interface duplicate flows. + strategy: first_come + first_come_ttl: 0s + # Packet sampling rate (0 means disabled/default behavior). + sampling: 0 + # How interface changes are detected. + interface_discovery: + # watch (event-based) or poll. + mode: watch + poll_interval: 10s + # Optional network-level data enrichment. + enrichment: + geo_ip: + ipinfo: + path: "" + maxmind: + country_path: "" + asn_path: "" + cache: + size: 512 + ttl: 1h0m0s + reverse_dns: + mode: none + cache: + size: 256 + ttl: 1h0m0s + # Debug/diagnostics output controls. + diagnostics: + print_flows: false + + # limits constrains cardinality and memory growth during capture. + limits: + network_packets: 5000 + metric_span_names: 100 + + # engine controls eBPF engine internals. + engine: + debug: + bpf: false + protocol_print: false + pid_filter: + disabled: false + batching: + wakeup_len: 500 + batch_length: 100 + batch_timeout: 1s + propagation: + context_propagation: disabled + override_bpfloop_enabled: false + disable_black_box_cp: false + traffic: + control_backend: auto + high_request_volume: false + transactions: + max_duration: 5m0s + bpf_filesystem: + path: /sys/fs/bpf/ + + # safety enforces host/runtime capability checks. + safety: + enforce_system_capabilities: false + + # channels controls internal backpressure behavior. + channels: + buffer_len: 50 + send_timeout: 1m0s + panic_on_send_timeout: false + + # telemetry tunes reporter caches and metric TTLs for OBI capture internals. + telemetry: + traces: + reporters_cache_len: 256 + metrics: + ttl: 5m0s + reporters_cache_len: 256 + + # enrich defines metadata enrichment behavior for telemetry. + # Standalone-mode only: not valid in Collector receiver deployments. + # In receiver mode, use k8sattributesprocessor and similar Collector processors instead. + enrich: + # Runtime configuration for metadata enrichers. + enrichers: + kubernetes: + # autodetect: enable if a Kubernetes environment is detected (default). + # enabled: always enable. + # disabled: disable. Recommended when running OBI as a Collector receiver + # and using k8sattributesprocessor to avoid duplicate k8s API queries. + mode: autodetect + cluster_name: "" + auth: + kubeconfig_path: "" + # Informer cache/sync controls. + informers: + initial_sync_timeout: 30s + resync_period: 30m0s + disabled: [] + drop_external: false + # Optional remote/local metadata cache controls. + metadata_cache: + address: "" + restrict_local_node: false + source_labels: + service_name: "" + service_namespace: "" + # DNS enricher runtime configuration. + dns: + enabled: true + # Service identity enrichment policy from configured enrichers. + service_name: + # Shared cache for peer/host service-name lookups. + cache: + size: 1024 + ttl: 5m0s + # Rule-based service identity mapping. + # Rule order is the precedence model: earlier rules win when they set the same target field. + rules: + - id: k8s-default + from: kubernetes + description: Default Kubernetes label mapping into canonical service identity. + map: + service.name: + - app.kubernetes.io/name + service.namespace: + - app.kubernetes.io/part-of + service.version: + - app.kubernetes.io/version + # Fallback names when peer/host resolution fails. + unresolved_hosts: + names: + default: unresolved + outgoing: outgoing + incoming: incoming + # Attribute enrichment/selection controls. + attributes: + # Rule order is the precedence model: earlier rules win when they set the same target attribute. + rules: + - id: k8s-default-attributes + from: kubernetes + description: Default Kubernetes-derived attribute enrichment and selection. + add: + # Explicit attribute mapping policy for Kubernetes metadata. + map: + k8s.namespace.name: + - kubernetes.namespace + k8s.pod.name: + - kubernetes.pod.name + k8s.deployment.name: + - kubernetes.workload.deployment + k8s.node.name: + - kubernetes.node.name + - id: dns-default-attributes + from: dns + description: Default DNS-based host and instance identity enrichment. + add: + # Explicit attribute mapping policy for DNS metadata. + map: + host.name: + - dns.host_name + - dns.ptr_name + service.instance.id: + - dns.host_name + server.address: + - dns.resolved_ip + + # correlation propagates OBI trace context into external streams. + # Standalone-mode only: not valid in Collector receiver deployments. + # Log trace annotation as a Collector component is planned as a separate deliverable. + correlation: + log_trace_annotation: + enabled: false + # Optional per-correlator refinement filter. + # By default (empty filter), applies to all selected services + # that have log streams where trace context can be injected. + filter: {} + cache: + ttl: 30m0s + size: 128 + async_writer: + workers: 8 + channel_len: 500 + + # daemon controls the OBI process itself: logging, profiling, shutdown, and internal metrics. + # Standalone-mode only: not valid in Collector receiver deployments. + # In receiver mode the Collector manages these concerns. + daemon: + # Logging behavior and startup config dump. + logging: + level: INFO + format: "" + debug_trace_output: disabled + # Optional profiling endpoint. + profiling: + port: 0 + # Graceful shutdown controls. + shutdown: + timeout: 10s + # OBI daemon's own metrics export configuration. + internal_metrics: + exporter: disabled + prometheus: + port: 0 + path: /internal/metrics + bpf: + scrape_interval: 15s + # Prometheus-exporter-specific metric shaping for OBI standalone output. + telemetry: + metrics: + prometheus: + allow_service_graph_self_references: false + span_metrics_service_cache_size: 10000 + extra_resource_attributes: [] + extra_span_resource_attributes: [] diff --git a/devdocs/config/version-2.0/migration.md b/devdocs/config/version-2.0/migration.md new file mode 100644 index 0000000000..e33956a778 --- /dev/null +++ b/devdocs/config/version-2.0/migration.md @@ -0,0 +1,173 @@ +# OBI Configuration Migration Plan + +Status: Draft for discussion +Audience: OBI maintainers and contributors +Scope: migration behavior, validation policy, rollout strategy, and tooling expectations + +This document defines how the project and users will migrate configuration from the v1 to v2 model safely and predictably. + +Goals: + +- Deterministic parsing and validation for v2 inputs. +- Consistent behavior across standalone host and collector receiver host. +- Actionable diagnostics for operators before rollout. + +## v2 Configuration Parsing + +A new configuration package will be added. Its purpose will be to provide: + +- Parsing functionality of the `extension.obi` portion of the `v2` configuration +- Export types representing the OBI configuration + +Using this new package, both the OBI command and the collector receiver will parse user provided configuration. +It will be up to these callers to determine: + +- how to fallback to v1 support when the parser informs it that the input format is v1 +- how to setup the SDK which is outside the scope of the v2 configuration package + +### Integration with `otelconf` + +It is assumed that users that need SDK will use the `go.opentelemetry.io/contrib/otelconf` package to parse top-level objects of the declarative config accordingly. +SDK object construction is outside the v2 configuration package scope and configuration for that portion of the configuration will be ignored. +The OBI v2 configuration package only parses and validates `extensions.obi`. +It does not merge or translate `instrumentation/development` into OBI-owned settings. + +### Deployment-mode validation + +`extensions.obi` has a two-tier structure: + +- `capture`: receiver-embeddable — valid in **all** deployment modes. +- `enrich`, `correlation`, `daemon`: **standalone-mode only** — not valid in Collector receiver deployments. + +When parsing configuration for a Collector receiver context, the v2 parser will reject any configuration that includes standalone-only sections (`enrich`, `correlation`, `daemon`) and surface a structured error with remediation guidance. +This validation is structural: the parser does not rely on a `deployment:` flag in the config — the section presence itself is the indicator. + +When parsing for a standalone context, all sections are valid. +The presence of `enrich`, `correlation`, and `daemon` is not required — these sections have defaults when omitted. + +### Backward compatibility behavior + +Based on the structure of the configuration, the version of that configuration can be determined from: + +- Root `version` identifies OTel declarative document contract. +- `extensions.obi.version` identifies OBI extension contract. + +From this, the v2 configuration package will behave as follows: + +- The v2 parser only accepts supported v2 configuration contracts. +- If config is not v2 (including detectable v1 shape), return a structured version error with actionable guidance. +- Caller decides fallback behavior (for example, route to legacy v1 parsing/setup path). +- The v2 parser does not perform legacy setup or implicit v1→v2 translation. +- If both `extensions.obi` and `instrumentation/development` are present, OBI behavior is sourced from `extensions.obi` only. + +Going forward, the configuration package may need to add support for future versions (i.e. v3). +It will be structured in a way to seamlessly support these new configuration files. + +### Why these responsibilities belong to the caller + +The v2 configuration package is deliberately a parsing and validation layer — not a setup or migration layer. +This separation was a conscious design choice: + +- Version detection and fallback routing are host-specific concerns. The standalone `obi` command and the Collector receiver have different error surfaces, logging facilities, and user communication channels. Centralizing routing logic in the package would force one error-handling strategy onto both hosts. +- A parser that silently attempts v1→v2 translation would hide version mismatches from operators. Explicit versioning with a structured error gives the caller — and ultimately the operator — full visibility into what version was provided and what was expected. +- Keeping the package scope narrow (parse + validate `extensions.obi`) makes it testable in isolation, without requiring a full OBI host context. + +## Migration CLI + +The `obi` command needs to have a configuration migration tool added to it. +It needs to support semantics like the following. + +```shell +obi config migrate --from v1 --to v2 +``` + +- Read v1 or mixed legacy input. +- Produce canonical v2 output. +- Emit a mapping report (moved, renamed, split/fan-out, inverted semantics). +- Emit warnings for deprecated aliases. +- Fail only when rewrite is non-deterministic. + +### What non-deterministic means + +Most v1→v2 mappings are 1:1 moves and renames (see the [v1→v2 mapping table](./config-v2.md#compatibility-and-mapping-from-v1)). +A small set of mappings are structurally non-trivial: + +- **Fan-out**: `filter.application` fans out to per-protocol `capture.instrumentation..filters.{traces,metrics}`. The migration tool applies the v1 value as the default for all protocols and emits a mapping report explaining the fan-out. +- **Shape change**: `discovery.excluded_linux_system_paths` and `discovery.exclude_otel_instrumented_services` are rewritten into structured rule entries under `capture.rules`. The migration tool generates these entries and flags them for operator review. +- **Inverted boolean**: `discovery.skip_go_specific_tracers: true` maps to `capture.runtimes.go.enabled: false`. The migration tool applies the inversion and emits a note. +- **Sampler**: `otel_traces_export.sampler.name` and `.arg` migrate to `tracer_provider.sampler`. Simple cases (e.g., `always_on`, `trace_id_ratio_based`) map directly to built-in OTel declarative sampler types. Custom or workload-specific sampler configs may require operator intervention and use of the `obi_rule_based` sampler plugin. + +Only mappings that cannot be resolved without operator input cause migration to fail with a non-deterministic error. + +## Validation CLI + +The `obi` command needs to have a configuration validation tool added to it. +It needs to support semantics like the following. + +```shell +obi config validate ./path/to/config +``` + +- Read v1 or later configuration as input via an argument +- Parse and validate the configuration +- Emit warnings for invalid configuration detected +- Emit warnings for deprecated configuration versions +- In receiver context (detected via flag or auto-detection), reject standalone-only sections (`enrich`, `correlation`, `daemon`) with an explicit error identifying the section and remediation steps + +## Rollout strategy + +### Phase 0 — Build contract and tooling + +- Finalize v2 configuration artifacts: schema, example, migration doc, parity check. +- Implement the new `extensions.obi` v2 configuration package (parse + validate `capture`, `enrich`, `correlation`, `daemon`). +- Implement and ship the `obi_rule_based` sampler plugin (referenced via `tracer_provider.sampler`) with documented rule semantics. + - This is required before v2 GA: per-workload sampling is a first-class use case and must be addressable without requiring workarounds. + - Integrate sampler plugin registration/wiring in OBI startup paths (standalone and receiver embedding paths where applicable). +- Implement migration CLI. +- Implement validation CLI. + +### Phase 1 — Freeze and identify + +- Freeze v1 key surface except critical fixes. +- Lock version-detection and compatibility behavior. +- Communicate v1 freeze to users and direct them to migration tooling. + +### Phase 2 — Dual-read period + +- Attempt v2 parser first; on explicit not-v2 result, invoke legacy parser path. +- Both parsers active simultaneously; no user-visible behavior change for v1 configs. +- Use this phase to gather feedback on v2 ergonomics and migration tooling. + +### Phase 3 — v2-first default + +- Default docs/examples/CI to v2. +- Deprecate the v1 configuration. Warn users in logs and validation output, and tell them how to migrate with tooling. +- v1 parsing remains available but is no longer the recommended path. + +### Phase 4 — v1 retirement + +- Remove v1 parsing. Error on v1 input, and tell users how to migrate with tooling. + +### Why this phased approach + +The dual-read period (Phase 2) is the key risk mitigation: + +- Users on v1 configs continue working without changes during v2 stabilization. +- The version-detection boundary is exercised in production before v1 parsing is removed. +- Feedback on v2 ergonomics and migration tooling can be incorporated before the v2-first default. + +A hard cutover (skip Phase 2) was considered and rejected because it places migration burden on operators with no fallback path if the v2 parser has edge cases. The phased approach lets operators validate at their own pace before the v1 path is gone. + +## Operator-facing quality bar + +Before rollout, migration UX should ensure: + +- Every failure has clear remediation text. +- Every warning identifies exact source key and target key. +- Resolved/effective config is inspectable. +- Same input produces same output across environments. +- Receiver-context validation clearly identifies which standalone-only sections are invalid and why. + +## Open decisions + +- Timeline for final v1 removal after v2 GA. diff --git a/devdocs/config/version-2.0/obi-extension.schema.json b/devdocs/config/version-2.0/obi-extension.schema.json new file mode 100644 index 0000000000..513952496b --- /dev/null +++ b/devdocs/config/version-2.0/obi-extension.schema.json @@ -0,0 +1,1528 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://opentelemetry.io/obi/schemas/obi-extension.schema.json", + "title": "OBI Extension Configuration", + "description": "Schema for the `extensions.obi` section in OTel declarative config files.", + "type": "object", + "additionalProperties": false, + "properties": { + "version": { + "type": "string", + "description": "OBI extension schema version.\nProperty is required and must be `2.0`.\n", + "const": "2.0" + }, + "capture": { + "$ref": "#/$defs/Capture", + "description": "Configure what OBI captures and how.\nThis block is valid in all deployment modes (standalone daemon and Collector receiver).\nWhen running OBI as a Collector receiver, this is the sub-config that is embedded directly.\nIf omitted, defaults are used.\n" + }, + "enrich": { + "$ref": "#/$defs/Enrich", + "description": "Configure metadata enrichment behavior.\nStandalone-mode only: not valid in Collector receiver deployments.\nIf omitted, enrichment defaults are used.\n" + }, + "correlation": { + "$ref": "#/$defs/Correlation", + "description": "Configure trace-context correlation features for external streams.\nStandalone-mode only: not valid in Collector receiver deployments.\nIf omitted, correlation defaults are used.\n" + }, + "daemon": { + "$ref": "#/$defs/Daemon", + "description": "Configure OBI daemon process behavior (logging, profiling, shutdown, internal metrics).\nStandalone-mode only: not valid in Collector receiver deployments.\nIf omitted, daemon defaults are used.\n" + } + }, + "$defs": { + "SignalEnabled": { + "type": "object", + "description": "Enable/disable each signal explicitly for a protocol scope.", + "additionalProperties": false, + "properties": { + "traces": { + "type": "boolean", + "default": true, + "description": "Enable trace signal generation for this scope.\nIf omitted, true is used.\n" + }, + "metrics": { + "type": "boolean", + "default": true, + "description": "Enable metric signal generation for this scope.\nIf omitted, true is used.\n" + } + } + }, + "MatchDefinition": { + "type": "object", + "description": "Match expression using either an include (`match`) or exclude (`not_match`) wildcard pattern.", + "additionalProperties": false, + "properties": { + "match": { + "type": "string", + "description": "Include wildcard pattern. Use `*` for zero or more characters and `?` for a single character.\nCannot be set together with `.not_match`.\n" + }, + "not_match": { + "type": "string", + "description": "Exclude wildcard pattern. Use `*` for zero or more characters and `?` for a single character.\nCannot be set together with `.match`.\n" + } + }, + "oneOf": [ + { + "required": [ + "match" + ], + "not": { + "required": [ + "not_match" + ] + } + }, + { + "required": [ + "not_match" + ], + "not": { + "required": [ + "match" + ] + } + } + ] + }, + "FilterMap": { + "type": "object", + "description": "Map of filter keys to match definitions.", + "additionalProperties": { + "$ref": "#/$defs/MatchDefinition" + } + }, + "SignalFilters": { + "type": "object", + "description": "Signal-specific filters for traces and metrics.", + "additionalProperties": false, + "properties": { + "traces": { + "$ref": "#/$defs/FilterMap", + "description": "Filters applied to traces in this scope.\nIf omitted, no trace filtering is applied.\n" + }, + "metrics": { + "$ref": "#/$defs/FilterMap", + "description": "Filters applied to metrics in this scope.\nIf omitted, no metric filtering is applied.\n" + } + } + }, + "SelectionExportsOTLP": { + "type": "object", + "description": "Match predicate for detecting workloads that already export OTLP.", + "additionalProperties": false, + "properties": { + "port": { + "type": "integer", + "minimum": 1, + "maximum": 65535, + "description": "OTLP receiver port expected on the target workload.\nValue must be between 1 and 65535.\n" + }, + "protocol": { + "type": "string", + "description": "OTLP transport/protocol identifier expected on the target workload (for example `protobuf`).\n" + } + }, + "required": [ + "port", + "protocol" + ] + }, + "SelectionProcessMatch": { + "type": "object", + "description": "Process-level selection predicates.", + "additionalProperties": true, + "properties": { + "exe_path_glob": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + }, + "description": "Glob patterns matched against process executable paths.\n" + }, + "exports_otlp": { + "$ref": "#/$defs/SelectionExportsOTLP", + "description": "Predicate matching workloads that expose OTLP exports.\n" + } + } + }, + "SelectionKubernetesMatch": { + "type": "object", + "description": "Kubernetes-level selection predicates.", + "additionalProperties": true, + "properties": { + "namespace_glob": { + "type": "array", + "minItems": 1, + "items": { + "type": "string" + }, + "description": "Glob patterns matched against Kubernetes namespace names.\n" + } + } + }, + "SelectionRuleMatch": { + "type": "object", + "description": "Rule match clause with light validation for known predicates while remaining extensible.", + "minProperties": 1, + "additionalProperties": true, + "properties": { + "process": { + "$ref": "#/$defs/SelectionProcessMatch", + "description": "Process identity and runtime predicate matches.\n" + }, + "kubernetes": { + "$ref": "#/$defs/SelectionKubernetesMatch", + "description": "Kubernetes metadata predicate matches.\n" + } + } + }, + "SelectionRuleRefine": { + "type": "object", + "description": "Optional workload-scoped configuration overrides applied on top of global defaults when an include rule matches.\nOnly explicitly listed fields are overridable.\nOnly meaningful on include rules; ignored on exclude rules.\n", + "additionalProperties": false, + "properties": { + "exports": { + "type": "object", + "description": "Override which signals are emitted for matched workloads.\nIf omitted, global instrumentation enablement applies.\n", + "additionalProperties": false, + "properties": { + "traces": { + "type": "boolean", + "description": "Enable or disable trace emission for this workload.\nIf omitted, the global protocol trace enablement applies.\n" + }, + "metrics": { + "type": "boolean", + "description": "Enable or disable metric emission for this workload.\nIf omitted, the global protocol metric enablement applies.\n" + } + } + }, + "http": { + "type": "object", + "description": "HTTP-specific per-workload overrides.\nFields here take precedence over the global `instrumentation.http` values for matched workloads.\n", + "additionalProperties": false, + "properties": { + "routes": { + "type": "object", + "description": "Per-workload HTTP route override.\nIf omitted, global route configuration applies.\n", + "additionalProperties": false, + "properties": { + "unmatched": { + "type": "string", + "enum": [ + "heuristic", + "wildcard", + "random_hash", + "disabled" + ], + "description": "Fallback policy when no explicit route pattern matches.\nIf omitted, global `instrumentation.http.routes.unmatched` applies.\n" + }, + "patterns": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Explicit route patterns for this workload.\nThese are merged with (and take precedence over) global patterns.\nIf omitted, global patterns apply.\n" + }, + "ignored_patterns": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Route patterns to ignore for this workload.\nIf omitted, global ignored patterns apply.\n" + } + } + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Per-workload HTTP trace and metric filters.\nThese replace (not merge with) the global HTTP filters for matched workloads.\nIf omitted, global filters apply.\n" + } + } + } + } + }, + "SelectionPolicy": { + "type": "object", + "description": "Global rule evaluation behavior.", + "additionalProperties": false, + "properties": { + "default_action": { + "type": "string", + "enum": [ + "include", + "exclude" + ], + "default": "include", + "description": "Default action when no rule matches.\nValues include: `include`, `exclude`.\nIf omitted, `include` is used.\n" + }, + "match_order": { + "type": "string", + "enum": [ + "first_match_wins", + "last_match_wins" + ], + "default": "first_match_wins", + "description": "Rule evaluation order semantics.\nValues include: `first_match_wins`, `last_match_wins`.\nIf omitted, `first_match_wins` is used.\n" + }, + "poll_interval": { + "type": "string", + "default": "5s", + "description": "Polling interval used to refresh selection inputs.\nIf omitted, `5s` is used.\n" + }, + "min_process_age": { + "type": "string", + "default": "5s", + "description": "Minimum process age required before a process becomes eligible for selection.\nIf omitted, `5s` is used.\n" + } + } + }, + "SelectionRules": { + "type": "array", + "description": "Ordered selection rules.\nIf omitted, the default rule set is used.\n", + "minItems": 1, + "default": [ + { + "action": "exclude", + "name": "exclude-obi-and-collectors", + "description": "Exclude OBI and collector binaries to avoid self-instrumentation and collector recursion.", + "match": { + "process": { + "exe_path_glob": [ + "*/obi", + "obi", + "*otelcol", + "*otelcol-contrib", + "*otelcol-contrib[!/]*" + ] + } + } + }, + { + "action": "exclude", + "name": "exclude-system-namespaces", + "description": "Exclude common platform/system Kubernetes namespaces from instrumentation by default.", + "match": { + "kubernetes": { + "namespace_glob": [ + "kube-system", + "kube-node-lease", + "local-path-storage", + "cert-manager", + "monitoring", + "gke-connect", + "gke-gmp-system", + "gke-managed-cim", + "gke-managed-filestorecsi", + "gke-managed-metrics-server", + "gke-managed-system", + "gke-system", + "gke-managed-volumepopulator", + "gatekeeper-system" + ] + } + } + }, + { + "action": "exclude", + "name": "exclude-otlp-exporters", + "description": "Exclude services that already export OTLP to prevent duplicate telemetry pipelines.", + "match": { + "process": { + "exports_otlp": { + "port": 4317, + "protocol": "protobuf" + } + } + } + }, + { + "action": "exclude", + "name": "exclude-linux-system-paths", + "description": "Exclude Linux system/service executable paths that are not typical application workloads.", + "match": { + "process": { + "exe_path_glob": [ + "/lib/systemd/*", + "/usr/lib/systemd/*", + "/usr/libexec/*", + "/sbin/*", + "/usr/sbin/*" + ] + } + } + } + ], + "items": { + "type": "object", + "additionalProperties": true, + "properties": { + "action": { + "type": "string", + "enum": [ + "include", + "exclude" + ], + "description": "Rule action when `.match` evaluates true.\nValues include: `include`, `exclude`.\nProperty is required and must be non-null.\n" + }, + "name": { + "type": "string", + "description": "Human-readable rule name.\nIf omitted, implementation may derive a generated identifier.\n" + }, + "description": { + "type": "string", + "description": "Human-readable rule intent.\nIf omitted, no description is recorded.\n" + }, + "match": { + "$ref": "#/$defs/SelectionRuleMatch", + "description": "Rule match clause. Known predicates are lightly validated while extension predicates remain allowed.\nProperty is required and must be non-null.\n" + }, + "refine": { + "$ref": "#/$defs/SelectionRuleRefine", + "description": "Optional workload-scoped configuration overrides for matched workloads.\nOnly meaningful on include rules; ignored on exclude rules.\nIf omitted, global defaults apply without modification.\n" + } + }, + "required": [ + "action", + "match" + ] + } + }, + "Capture": { + "type": "object", + "description": "What OBI captures and how.\nValid in all deployment modes. When running as a Collector receiver, this block is embedded directly in the receiver config.", + "additionalProperties": false, + "properties": { + "policy": { + "$ref": "#/$defs/SelectionPolicy", + "description": "Global rule evaluation behavior.\nIf omitted, defaults are used.\n" + }, + "rules": { + "$ref": "#/$defs/SelectionRules", + "description": "Ordered workload selection rules.\nIf omitted, the default rule set is used.\n" + }, + "instrumentation": { + "$ref": "#/$defs/Instrumentation", + "description": "Protocol instrumentation controls.\nIf omitted, per-protocol defaults are used.\n" + }, + "runtimes": { + "$ref": "#/$defs/Runtimes", + "description": "Runtime injection mechanisms (Go, Node.js, Java).\nIf omitted, runtime injection defaults are used.\n" + }, + "network": { + "$ref": "#/$defs/Network", + "description": "Network capture controls.\nIf omitted, network capture defaults are used.\n" + }, + "limits": { + "type": "object", + "description": "Numeric limits to constrain memory and cardinality growth.\nIf omitted, defaults are used.\n", + "additionalProperties": false, + "properties": { + "network_packets": { + "type": "integer", + "minimum": 1, + "default": 5000, + "description": "Maximum number of network packets to retain in internal processing windows.\nValue must be positive.\nIf omitted, `5000` is used.\n" + }, + "metric_span_names": { + "type": "integer", + "minimum": 0, + "default": 100, + "description": "Maximum tracked span-name cardinality for derived metrics.\nValue must be non-negative.\nA value of `0` disables span-name limiting.\nIf omitted, `100` is used.\n" + } + } + }, + "engine": { + "type": "object", + "description": "eBPF engine internals: batching, pid filtering, BPF filesystem, context propagation, traffic backend, transaction limits, and debug toggles.", + "additionalProperties": false, + "properties": { + "debug": { + "type": "object", + "additionalProperties": false, + "properties": { + "bpf": { + "type": "boolean", + "default": false, + "description": "Enable eBPF debug logging.\nIf omitted, false is used.\n" + }, + "protocol_print": { + "type": "boolean", + "default": false, + "description": "Enable protocol debug printing.\nIf omitted, false is used.\n" + } + } + }, + "pid_filter": { + "type": "object", + "additionalProperties": false, + "properties": { + "disabled": { + "type": "boolean", + "default": false, + "description": "Disable PID-based filtering in discovery/capture flows.\nIf omitted, false is used.\n" + } + } + }, + "batching": { + "type": "object", + "additionalProperties": false, + "properties": { + "wakeup_len": { + "type": "integer", + "minimum": 0, + "default": 500, + "description": "Ringbuffer wakeup threshold for capture batching.\nIf omitted, `500` is used.\n" + }, + "batch_length": { + "type": "integer", + "minimum": 1, + "default": 100, + "description": "Maximum number of captured events grouped per batch.\nIf omitted, `100` is used.\n" + }, + "batch_timeout": { + "type": "string", + "default": "1s", + "description": "Maximum time to wait before flushing a partial batch.\nIf omitted, `1s` is used.\n" + } + } + }, + "propagation": { + "type": "object", + "additionalProperties": false, + "properties": { + "context_propagation": { + "type": "string", + "enum": [ + "disabled", + "headers", + "tcp", + "ip", + "all" + ], + "default": "disabled", + "description": "Context propagation mode.\nValues include: `disabled`, `headers`, `tcp`, `ip`, `all`.\nIf omitted, `disabled` is used.\n" + }, + "override_bpfloop_enabled": { + "type": "boolean", + "default": false, + "description": "Override kernel bpf_loop capability checks.\nIf omitted, false is used.\n" + }, + "disable_black_box_cp": { + "type": "boolean", + "default": false, + "description": "Disable black-box context propagation.\nIf omitted, false is used.\n" + } + } + }, + "traffic": { + "type": "object", + "additionalProperties": false, + "properties": { + "control_backend": { + "type": "string", + "enum": [ + "auto", + "tc", + "tcx" + ], + "default": "auto", + "description": "Traffic control backend selection.\nValues include: `auto`, `tc`, `tcx`.\nIf omitted, `auto` is used.\n" + }, + "high_request_volume": { + "type": "boolean", + "default": false, + "description": "Optimize capture behavior for high request volume workloads.\nIf omitted, false is used.\n" + } + } + }, + "transactions": { + "type": "object", + "additionalProperties": false, + "properties": { + "max_duration": { + "type": "string", + "default": "5m0s", + "description": "Maximum duration allowed for correlating request/response transactions.\nIf omitted, `5m0s` is used.\n" + } + } + }, + "bpf_filesystem": { + "type": "object", + "additionalProperties": false, + "properties": { + "path": { + "type": "string", + "default": "/sys/fs/bpf/", + "description": "Filesystem path used for pinned BPF objects/maps.\nIf omitted, `/sys/fs/bpf/` is used.\n" + } + } + } + } + }, + "safety": { + "type": "object", + "additionalProperties": false, + "properties": { + "enforce_system_capabilities": { + "type": "boolean", + "default": false, + "description": "Enforce required system capabilities on startup.\nIf omitted, false is used.\n" + } + } + }, + "channels": { + "type": "object", + "description": "Internal channel backpressure controls.", + "additionalProperties": false, + "properties": { + "buffer_len": { + "type": "integer", + "minimum": 0, + "default": 50, + "description": "Runtime channel buffer length.\nIf omitted, `50` is used.\n" + }, + "send_timeout": { + "type": "string", + "default": "1m0s", + "description": "Runtime channel send timeout.\nIf omitted, `1m0s` is used.\n" + }, + "panic_on_send_timeout": { + "type": "boolean", + "default": false, + "description": "Panic when a runtime channel send timeout occurs.\nIf omitted, false is used.\n" + } + } + }, + "telemetry": { + "type": "object", + "description": "Reporter cache and metric TTL tuning for OBI capture internals.", + "additionalProperties": false, + "properties": { + "traces": { + "type": "object", + "additionalProperties": false, + "properties": { + "reporters_cache_len": { + "type": "integer", + "minimum": 0, + "default": 256, + "description": "Trace reporter cache size used by OBI internals.\nIf omitted, `256` is used.\n" + } + } + }, + "metrics": { + "type": "object", + "additionalProperties": false, + "properties": { + "ttl": { + "type": "string", + "default": "5m0s", + "description": "Metric time-to-live for stale metric series generated by OBI internals.\nIf omitted, `5m0s` is used.\n" + }, + "reporters_cache_len": { + "type": "integer", + "minimum": 0, + "default": 256, + "description": "Metric reporter cache size used by OBI internals.\nIf omitted, `256` is used.\n" + } + } + } + } + } + } + }, + "Daemon": { + "type": "object", + "description": "OBI daemon process controls. Standalone-mode only: not valid in Collector receiver deployments.", + "additionalProperties": false, + "properties": { + "logging": { + "type": "object", + "description": "Logging behavior and debug-output controls.", + "additionalProperties": false, + "properties": { + "level": { + "type": "string", + "enum": [ + "DEBUG", + "INFO", + "WARN", + "ERROR" + ], + "default": "INFO", + "description": "OBI log level.\nValues include: `DEBUG`, `INFO`, `WARN`, `ERROR`.\nIf omitted, `INFO` is used.\n" + }, + "format": { + "type": "string", + "enum": [ + "", + "yaml", + "json" + ], + "default": "", + "description": "Startup configuration log format. Empty string disables startup config logging.\nValues include: `yaml`, `json`.\nIf omitted, empty string is used.\n" + }, + "debug_trace_output": { + "type": "string", + "enum": [ + "disabled", + "counter", + "text", + "json", + "json_indent" + ], + "default": "disabled", + "description": "Debug trace output mode.\nValues include: `disabled`, `counter`, `text`, `json`, `json_indent`.\nIf omitted, `disabled` is used.\n" + } + } + }, + "profiling": { + "type": "object", + "additionalProperties": false, + "properties": { + "port": { + "type": "integer", + "minimum": 0, + "default": 0, + "description": "Profiling server port. Set `0` to disable.\nIf omitted, `0` is used.\n" + } + } + }, + "shutdown": { + "type": "object", + "additionalProperties": false, + "properties": { + "timeout": { + "type": "string", + "default": "10s", + "description": "Graceful shutdown timeout.\nIf omitted, `10s` is used.\n" + } + } + }, + "internal_metrics": { + "type": "object", + "description": "OBI daemon's own metrics exporter configuration.", + "additionalProperties": false, + "properties": { + "exporter": { + "type": "string", + "enum": [ + "disabled", + "prometheus", + "otel" + ], + "default": "disabled", + "description": "Internal metrics exporter mode.\nValues include: `disabled`, `prometheus`, `otel`.\nIf omitted, `disabled` is used.\n" + }, + "prometheus": { + "type": "object", + "additionalProperties": false, + "properties": { + "port": { + "type": "integer", + "minimum": 0, + "default": 0, + "description": "Internal Prometheus exporter port. Set `0` to disable listener.\nIf omitted, `0` is used.\n" + }, + "path": { + "type": "string", + "default": "/internal/metrics", + "description": "Internal Prometheus metrics path.\nIf omitted, `/internal/metrics` is used.\n" + } + } + }, + "bpf": { + "type": "object", + "additionalProperties": false, + "properties": { + "scrape_interval": { + "type": "string", + "default": "15s", + "description": "Scrape interval for BPF internal metrics collection.\nIf omitted, `15s` is used.\n" + } + } + } + } + }, + "telemetry": { + "type": "object", + "description": "Prometheus-exporter-specific metric shaping for OBI standalone output.", + "additionalProperties": false, + "properties": { + "metrics": { + "type": "object", + "additionalProperties": false, + "properties": { + "prometheus": { + "type": "object", + "description": "Prometheus-exporter-specific metric tuning.", + "additionalProperties": false, + "properties": { + "allow_service_graph_self_references": { + "type": "boolean", + "default": false, + "description": "Allow service graph self-references in OBI-generated Prometheus metrics.\nIf omitted, false is used.\n" + }, + "span_metrics_service_cache_size": { + "type": "integer", + "minimum": 0, + "default": 10000, + "description": "Service cache size for span-metrics aggregation in OBI-generated Prometheus metrics.\nIf omitted, `10000` is used.\n" + }, + "extra_resource_attributes": { + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "description": "Additional resource attributes to include in OBI-generated Prometheus metrics.\nIf omitted, no extra attributes are added.\n" + }, + "extra_span_resource_attributes": { + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "description": "Additional span resource attributes to include in OBI-generated Prometheus span metrics.\nIf omitted, no extra attributes are added.\n" + } + } + } + } + } + } + } + } + }, + "InstrumentationProtocol": { + "type": "object", + "description": "Shared envelope for a protocol/language instrumentation section.", + "additionalProperties": false, + "properties": { + "enabled": { + "$ref": "#/$defs/SignalEnabled", + "description": "Enable controls for traces and metrics.\nIf omitted, signal defaults are used.\n" + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Signal-specific filters used by this section.\nIf omitted, no filtering is applied.\n" + } + } + }, + "HttpInstrumentation": { + "type": "object", + "description": "HTTP instrumentation controls.", + "additionalProperties": false, + "properties": { + "enabled": { + "$ref": "#/$defs/SignalEnabled", + "description": "Enable HTTP traces/metrics.\nIf omitted, signal defaults are used.\n" + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Signal-specific HTTP filters.\nIf omitted, no filtering is applied.\n" + }, + "track_request_headers": { + "type": "boolean", + "default": false, + "description": "Capture configured inbound/outbound HTTP headers.\nIf omitted, false is used.\n" + }, + "request_timeout": { + "type": "string", + "default": "0s", + "description": "HTTP request timeout guard for capture/processing.\nIf omitted, `0s` is used.\n" + }, + "buffer_size": { + "type": "integer", + "minimum": 0, + "default": 0, + "description": "HTTP parser/capture buffer size.\nIf omitted, `0` is used.\n" + }, + "routes": { + "type": "object", + "description": "HTTP route normalization and discovery controls.", + "additionalProperties": false, + "properties": { + "unmatched": { + "type": "string", + "default": "heuristic", + "description": "Fallback route policy when no explicit pattern matches.\nIf omitted, `heuristic` is used.\n" + }, + "patterns": { + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "description": "User-supplied route patterns.\nIf omitted, no explicit patterns are used.\n" + }, + "ignored_patterns": { + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "description": "Route patterns to ignore.\nIf omitted, no patterns are ignored.\n" + }, + "ignore_mode": { + "type": "string", + "default": "all", + "description": "Signal mode for ignored patterns.\nIf omitted, `all` is used.\n" + }, + "wildcard_char": { + "type": "string", + "default": "*", + "description": "Wildcard replacement character for route templates.\nIf omitted, `*` is used.\n" + }, + "max_path_segment_cardinality": { + "type": "integer", + "minimum": 0, + "default": 10, + "description": "Maximum per-segment path cardinality in heuristic mode.\nIf omitted, `10` is used.\n" + }, + "discovery": { + "type": "object", + "description": "HTTP route template discovery controls.", + "additionalProperties": false, + "properties": { + "timeout": { + "type": "string", + "default": "10s", + "description": "Global timeout for route discovery runs.\nIf omitted, `10s` is used.\n" + }, + "disabled_languages": { + "type": "array", + "items": { + "type": "string" + }, + "default": [], + "description": "Languages excluded from route discovery.\nIf omitted, no languages are excluded.\n" + }, + "java": { + "type": "object", + "additionalProperties": false, + "properties": { + "delay": { + "type": "string", + "default": "1m0s", + "description": "Delay before Java route discovery starts.\nIf omitted, `1m0s` is used.\n" + } + } + } + } + } + } + }, + "payload_extraction": { + "type": "object", + "description": "HTTP payload extraction feature toggles.", + "additionalProperties": false, + "properties": { + "graphql": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable GraphQL payload extraction.\nIf omitted, false is used.\n" + } + } + }, + "elasticsearch": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable Elasticsearch payload extraction.\nIf omitted, false is used.\n" + } + } + }, + "aws": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable AWS payload extraction.\nIf omitted, false is used.\n" + } + } + }, + "sqlpp": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable SQL++ payload extraction.\nIf omitted, false is used.\n" + }, + "endpoint_patterns": { + "type": "array", + "items": { + "type": "string" + }, + "default": [ + "/query/service" + ], + "description": "Endpoint patterns eligible for SQL++ payload extraction.\nIf omitted, `/query/service` is used.\n" + } + } + } + } + } + } + }, + "TraceAnnotation": { + "type": "object", + "description": "Trace-context annotation controls for process logs.", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable trace-context annotation on log streams for selected workloads.\nIf omitted, false is used.\n" + }, + "filter": { + "$ref": "#/$defs/FilterMap", + "description": "Attribute-based refinement filter for log trace annotation.\nApplied after selection admits a workload.\nIf omitted, no additional filtering is applied.\n" + }, + "cache": { + "type": "object", + "additionalProperties": false, + "properties": { + "ttl": { + "type": "string", + "default": "30m0s", + "description": "Trace annotation cache TTL.\nIf omitted, `30m0s` is used.\n" + }, + "size": { + "type": "integer", + "minimum": 0, + "default": 128, + "description": "Trace annotation cache size.\nIf omitted, `128` is used.\n" + } + } + }, + "async_writer": { + "type": "object", + "additionalProperties": false, + "properties": { + "workers": { + "type": "integer", + "minimum": 0, + "default": 8, + "description": "Trace annotation async writer worker count.\nIf omitted, `8` is used.\n" + }, + "channel_len": { + "type": "integer", + "minimum": 0, + "default": 500, + "description": "Trace annotation async writer channel length.\nIf omitted, `500` is used.\n" + } + } + } + } + }, + "Correlation": { + "type": "object", + "description": "Trace-context correlation feature controls for external streams.", + "additionalProperties": false, + "properties": { + "log_trace_annotation": { + "$ref": "#/$defs/TraceAnnotation", + "description": "Trace-context annotation controls for process logs.\nIf omitted, defaults are used.\n" + } + }, + "default": { + "log_trace_annotation": { + "enabled": false, + "filter": {}, + "cache": { + "ttl": "30m0s", + "size": 128 + }, + "async_writer": { + "workers": 8, + "channel_len": 500 + } + } + } + }, + "SqlInstrumentation": { + "type": "object", + "description": "SQL instrumentation controls.", + "additionalProperties": false, + "properties": { + "enabled": { + "$ref": "#/$defs/SignalEnabled", + "description": "Enable SQL traces/metrics.\nIf omitted, signal defaults are used.\n" + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Signal-specific SQL filters.\nIf omitted, no filtering is applied.\n" + }, + "heuristic_detect": { + "type": "boolean", + "default": false, + "description": "Enable heuristic SQL protocol detection.\nIf omitted, false is used.\n" + }, + "mysql": { + "type": "object", + "additionalProperties": false, + "properties": { + "buffer_size": { + "type": "integer", + "minimum": 0, + "default": 0, + "description": "MySQL parser buffer size.\nIf omitted, `0` is used.\n" + }, + "prepared_statements_cache_size": { + "type": "integer", + "minimum": 0, + "default": 1024, + "description": "MySQL prepared statements cache size.\nIf omitted, `1024` is used.\n" + } + } + }, + "postgres": { + "type": "object", + "additionalProperties": false, + "properties": { + "buffer_size": { + "type": "integer", + "minimum": 0, + "default": 0, + "description": "PostgreSQL parser buffer size.\nIf omitted, `0` is used.\n" + }, + "prepared_statements_cache_size": { + "type": "integer", + "minimum": 0, + "default": 1024, + "description": "PostgreSQL prepared statements cache size.\nIf omitted, `1024` is used.\n" + } + } + } + } + }, + "RedisInstrumentation": { + "type": "object", + "description": "Redis instrumentation controls.", + "additionalProperties": false, + "properties": { + "enabled": { + "$ref": "#/$defs/SignalEnabled", + "description": "Enable Redis traces/metrics.\nIf omitted, signal defaults are used.\n" + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Signal-specific Redis filters.\nIf omitted, no filtering is applied.\n" + }, + "db_cache": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable Redis DB cache.\nIf omitted, false is used.\n" + }, + "max_size": { + "type": "integer", + "minimum": 0, + "default": 1000, + "description": "Redis DB cache max size.\nIf omitted, `1000` is used.\n" + } + } + } + } + }, + "KafkaInstrumentation": { + "type": "object", + "description": "Kafka instrumentation controls.", + "additionalProperties": false, + "properties": { + "enabled": { + "$ref": "#/$defs/SignalEnabled", + "description": "Enable Kafka traces/metrics.\nIf omitted, signal defaults are used.\n" + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Signal-specific Kafka filters.\nIf omitted, no filtering is applied.\n" + }, + "buffer_size": { + "type": "integer", + "minimum": 0, + "default": 0, + "description": "Kafka parser buffer size.\nIf omitted, `0` is used.\n" + }, + "topic_uuid_cache_size": { + "type": "integer", + "minimum": 0, + "default": 1024, + "description": "Kafka topic UUID cache size.\nIf omitted, `1024` is used.\n" + } + } + }, + "MongoInstrumentation": { + "type": "object", + "description": "MongoDB instrumentation controls.", + "additionalProperties": false, + "properties": { + "enabled": { + "$ref": "#/$defs/SignalEnabled", + "description": "Enable MongoDB traces/metrics.\nIf omitted, signal defaults are used.\n" + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Signal-specific MongoDB filters.\nIf omitted, no filtering is applied.\n" + }, + "requests_cache_size": { + "type": "integer", + "minimum": 0, + "default": 1024, + "description": "Mongo request cache size.\nIf omitted, `1024` is used.\n" + } + } + }, + "CouchbaseInstrumentation": { + "type": "object", + "description": "Couchbase instrumentation controls.", + "additionalProperties": false, + "properties": { + "enabled": { + "$ref": "#/$defs/SignalEnabled", + "description": "Enable Couchbase traces/metrics.\nIf omitted, signal defaults are used.\n" + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Signal-specific Couchbase filters.\nIf omitted, no filtering is applied.\n" + }, + "db_cache_size": { + "type": "integer", + "minimum": 0, + "default": 1024, + "description": "Couchbase DB cache size.\nIf omitted, `1024` is used.\n" + } + } + }, + "DnsInstrumentation": { + "type": "object", + "description": "DNS instrumentation controls.", + "additionalProperties": false, + "properties": { + "enabled": { + "$ref": "#/$defs/SignalEnabled", + "description": "Enable DNS traces/metrics.\nIf omitted, DNS defaults are used.\n", + "default": { + "traces": false, + "metrics": false + } + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Signal-specific DNS filters.\nIf omitted, no filtering is applied.\n" + }, + "request_timeout": { + "type": "string", + "default": "5s", + "description": "DNS request timeout.\nIf omitted, `5s` is used.\n" + } + } + }, + "GpuInstrumentation": { + "type": "object", + "description": "GPU instrumentation controls.", + "additionalProperties": false, + "properties": { + "enabled": { + "$ref": "#/$defs/SignalEnabled", + "description": "Enable GPU traces/metrics.\nIf omitted, signal defaults are used.\n" + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Signal-specific GPU filters.\nIf omitted, no filtering is applied.\n" + }, + "enabled_mode": { + "type": "string", + "default": "auto", + "description": "GPU instrumentation mode.\nIf omitted, `auto` is used.\n" + } + } + }, + "RuntimeInjection": { + "type": "object", + "description": "Runtime injection mechanism configuration. Controls whether a runtime injection (Go probes, Node.js SIGUSR1, Java agent) is enabled for workloads selected via the global selection rules.", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": true, + "description": "Enable this runtime injection mechanism.\nIf omitted, true is used.\n" + }, + "filter": { + "$ref": "#/$defs/FilterMap", + "description": "Optional per-injector refinement filter.\nIf omitted, injection applies to all selected services.\n" + } + } + }, + "JavaRuntime": { + "type": "object", + "description": "Java runtime injection controls.", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": true, + "description": "Enable Java agent injection.\nIf omitted, true is used.\n" + }, + "filter": { + "$ref": "#/$defs/FilterMap", + "description": "Optional per-injector refinement filter.\nIf omitted, injection applies to all selected services.\n" + }, + "debug": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable Java agent debug mode.\nIf omitted, false is used.\n" + }, + "bytecode_instrumentation": { + "type": "boolean", + "default": false, + "description": "Enable Java bytecode instrumentation debug logs.\nIf omitted, false is used.\n" + } + } + }, + "attach_timeout": { + "type": "string", + "default": "10s", + "description": "Java attach timeout.\nIf omitted, `10s` is used.\n" + } + } + }, + "Runtimes": { + "type": "object", + "description": "Runtime injection mechanism configurations. These control how language-specific runtime instrumentation is injected into workloads selected via the global selection rules.", + "additionalProperties": false, + "properties": { + "go": { + "$ref": "#/$defs/RuntimeInjection", + "description": "Go runtime instrumentation controls.\nIf omitted, Go injection defaults are used.\n", + "default": { + "enabled": true, + "filter": {} + } + }, + "nodejs": { + "$ref": "#/$defs/RuntimeInjection", + "description": "Node.js runtime instrumentation controls.\nIf omitted, Node.js injection defaults are used.\n", + "default": { + "enabled": true, + "filter": {} + } + }, + "java": { + "$ref": "#/$defs/JavaRuntime", + "description": "Java runtime instrumentation controls.\nIf omitted, Java injection defaults are used.\n", + "default": { + "enabled": true, + "filter": {} + } + } + } + }, + "Instrumentation": { + "type": "object", + "description": "Protocol and language instrumentation configuration.", + "additionalProperties": false, + "properties": { + "http": { + "$ref": "#/$defs/HttpInstrumentation", + "description": "HTTP instrumentation controls.\nIf omitted, HTTP defaults are used.\n" + }, + "grpc": { + "$ref": "#/$defs/InstrumentationProtocol", + "description": "gRPC instrumentation controls.\nIf omitted, gRPC defaults are used.\n", + "default": { + "enabled": { + "traces": true, + "metrics": true + }, + "filters": {} + } + }, + "sql": { + "$ref": "#/$defs/SqlInstrumentation", + "description": "SQL instrumentation controls.\nIf omitted, SQL defaults are used.\n" + }, + "redis": { + "$ref": "#/$defs/RedisInstrumentation", + "description": "Redis instrumentation controls.\nIf omitted, Redis defaults are used.\n" + }, + "kafka": { + "$ref": "#/$defs/KafkaInstrumentation", + "description": "Kafka instrumentation controls.\nIf omitted, Kafka defaults are used.\n" + }, + "mongo": { + "$ref": "#/$defs/MongoInstrumentation", + "description": "MongoDB instrumentation controls.\nIf omitted, MongoDB defaults are used.\n" + }, + "couchbase": { + "$ref": "#/$defs/CouchbaseInstrumentation", + "description": "Couchbase instrumentation controls.\nIf omitted, Couchbase defaults are used.\n" + }, + "dns": { + "$ref": "#/$defs/DnsInstrumentation", + "description": "DNS instrumentation controls.\nIf omitted, DNS defaults are used.\n" + }, + "gpu": { + "$ref": "#/$defs/GpuInstrumentation", + "description": "GPU event instrumentation controls.\nIf omitted, GPU defaults are used.\n" + } + }, + "default": { + "http": { + "enabled": { + "traces": true, + "metrics": true + }, + "filters": {} + }, + "grpc": { + "enabled": { + "traces": true, + "metrics": true + }, + "filters": {} + }, + "sql": { + "enabled": { + "traces": true, + "metrics": true + }, + "filters": {} + }, + "redis": { + "enabled": { + "traces": true, + "metrics": true + }, + "filters": {} + }, + "kafka": { + "enabled": { + "traces": true, + "metrics": true + }, + "filters": {} + }, + "mongo": { + "enabled": { + "traces": true, + "metrics": true + }, + "filters": {} + }, + "couchbase": { + "enabled": { + "traces": true, + "metrics": true + }, + "filters": {} + }, + "dns": { + "enabled": { + "traces": false, + "metrics": false + }, + "filters": {} + }, + "gpu": { + "enabled": { + "traces": true, + "metrics": true + }, + "filters": {} + } + } + }, + "Network": { + "type": "object", + "description": "Network capture configuration.", + "additionalProperties": false, + "properties": { + "capture": { + "type": "object", + "description": "Packet capture controls and network-sourced filtering.", + "additionalProperties": true, + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable network capture.\nIf omitted, false is used.\n" + }, + "source": { + "type": "string", + "enum": [ + "socket_filter", + "tc" + ], + "default": "socket_filter", + "description": "Network capture source implementation.\nValues include: `socket_filter`, `tc`.\nIf omitted, `socket_filter` is used.\n" + }, + "filters": { + "$ref": "#/$defs/SignalFilters", + "description": "Signal-specific filters applied to network capture output.\nIf omitted, no filtering is applied.\n" + } + } + } + } + }, + "Enrich": { + "type": "object", + "description": "Enrichment configuration. Additional implementation-specific enrichment settings are allowed.", + "additionalProperties": true, + "properties": { + "enrichers": { + "type": "object", + "description": "Enable/disable named enrichers and set enricher options.\nIf omitted, implementation defaults are used.\n", + "additionalProperties": true, + "properties": { + "kubernetes": { + "type": "object", + "description": "Kubernetes metadata enricher configuration.\nWhen running OBI as a standalone daemon this enricher provides k8s pod/namespace/deployment metadata.\nWhen running OBI as a Collector receiver, consider setting `mode: disabled` and using the\n`k8sattributesprocessor` instead to avoid duplicate k8s API queries.\nIf omitted, implementation defaults are used.\n", + "additionalProperties": true, + "properties": { + "mode": { + "type": "string", + "enum": [ + "autodetect", + "enabled", + "disabled" + ], + "default": "autodetect", + "description": "Kubernetes enricher activation mode.\n`autodetect`: enable if a Kubernetes environment is detected (default).\n`enabled`: always enable.\n`disabled`: disable. Recommended when running as a Collector receiver and using `k8sattributesprocessor`.\nIf omitted, `autodetect` is used.\n" + } + } + } + } + }, + "service_name": { + "type": "object", + "description": "Service name derivation controls.\nIf omitted, implementation defaults are used.\n" + }, + "attributes": { + "type": "object", + "description": "Attribute enrichment rules and defaults.\nIf omitted, implementation defaults are used.\n" + } + } + } + }, + "required": [ + "version" + ] +} diff --git a/devdocs/config/version-2.0/validate_example.py b/devdocs/config/version-2.0/validate_example.py new file mode 100644 index 0000000000..62a8a75113 --- /dev/null +++ b/devdocs/config/version-2.0/validate_example.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import argparse +import json +import sys +import urllib.request +from pathlib import Path + +import yaml +from jsonschema import Draft202012Validator + + +DEFAULT_OTEL_SCHEMA_URL = ( + "https://raw.githubusercontent.com/open-telemetry/opentelemetry-configuration/" + "49c531f78f86b85e220ec23c5be1a925254f0f9d/opentelemetry_configuration.json" +) + + +def parse_args() -> argparse.Namespace: + here = Path(__file__).resolve().parent + parser = argparse.ArgumentParser( + description="Validate an OBI v2 extension example against the local JSON schema." + ) + parser.add_argument( + "--schema", + type=Path, + default=here / "obi-extension.schema.json", + help="Path to the OBI extension JSON schema.", + ) + parser.add_argument( + "--config", + type=Path, + default=here / "examples" / "default-configuration.yaml", + help="Path to a full OTel declarative YAML config.", + ) + parser.add_argument( + "--subtree", + type=str, + default="extensions.obi", + help="Dot path in the YAML document to validate (default: extensions.obi).", + ) + parser.add_argument( + "--max-errors", + type=int, + default=20, + help="Maximum number of validation errors to print.", + ) + parser.add_argument( + "--otel-schema-url", + type=str, + default=DEFAULT_OTEL_SCHEMA_URL, + help="URL for full-document OTel declarative JSON schema validation.", + ) + parser.add_argument( + "--skip-otel", + action="store_true", + help="Skip full-document OTel declarative schema validation.", + ) + return parser.parse_args() + + +def get_subtree(data: object, dot_path: str) -> object: + current = data + for key in [segment for segment in dot_path.split(".") if segment]: + if not isinstance(current, dict) or key not in current: + raise KeyError(f"Missing path segment: {key}") + current = current[key] + return current + + +def print_errors(errors: list, max_errors: int) -> None: + for err in errors[: max(max_errors, 1)]: + path = ".".join(str(x) for x in err.path) or "" + print(f"- {path}: {err.message}") + if len(errors) > max_errors: + print(f"... {len(errors) - max_errors} more error(s) omitted") + + +def main() -> int: + args = parse_args() + + try: + schema = json.loads(args.schema.read_text(encoding="utf-8")) + except Exception as exc: + print(f"Failed to read schema file {args.schema}: {exc}", file=sys.stderr) + return 2 + + try: + document = yaml.safe_load(args.config.read_text(encoding="utf-8")) + except Exception as exc: + print(f"Failed to read config file {args.config}: {exc}", file=sys.stderr) + return 2 + + try: + instance = get_subtree(document, args.subtree) + except KeyError as exc: + print(f"Invalid subtree path '{args.subtree}': {exc}", file=sys.stderr) + return 2 + + validator = Draft202012Validator(schema) + errors = sorted(validator.iter_errors(instance), key=lambda err: list(err.path)) + + if not errors: + print(f"OBI VALID: {args.config} -> {args.subtree} conforms to {args.schema}") + else: + print( + f"OBI INVALID: {args.config} -> {args.subtree} has {len(errors)} validation error(s)" + ) + print_errors(errors, args.max_errors) + + otel_errors = [] + if args.skip_otel: + print("OTEL SKIPPED: full-document OTel validation disabled by --skip-otel") + else: + try: + with urllib.request.urlopen(args.otel_schema_url, timeout=30) as response: + otel_schema = json.load(response) + except Exception as exc: + print( + f"Failed to load OTel schema from {args.otel_schema_url}: {exc}", + file=sys.stderr, + ) + return 2 + + otel_validator = Draft202012Validator(otel_schema) + otel_errors = sorted( + otel_validator.iter_errors(document), key=lambda err: list(err.path) + ) + + if not otel_errors: + print( + f"OTEL VALID: {args.config} conforms to OTel schema from {args.otel_schema_url}" + ) + else: + print( + f"OTEL INVALID: {args.config} has {len(otel_errors)} validation error(s)" + ) + print_errors(otel_errors, args.max_errors) + + return 0 if not errors and (args.skip_otel or not otel_errors) else 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/devdocs/config/version-2.0/verify.go b/devdocs/config/version-2.0/verify.go new file mode 100644 index 0000000000..3f4481b0cb --- /dev/null +++ b/devdocs/config/version-2.0/verify.go @@ -0,0 +1,479 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package main + +import ( + _ "embed" + "errors" + "fmt" + "os" + "strconv" + "strings" + "time" + + "gopkg.in/yaml.v3" +) + +func asMap(v any) map[string]any { + if v == nil { + return nil + } + m, ok := v.(map[string]any) + if !ok { + return nil + } + return m +} + +func get(root map[string]any, path ...string) (any, bool) { + cur := any(root) + for i, p := range path { + if arr, ok := cur.([]any); ok { + idx, err := strconv.Atoi(p) + if err != nil || idx < 0 || idx >= len(arr) { + return nil, false + } + cur = arr[idx] + continue + } + + m := asMap(cur) + if m == nil { + return nil, false + } + if i == 0 && p == "obi" { + if _, ok := m["obi"]; !ok { + extensionsAny, ok := m["extensions"] + if ok { + extensionsMap := asMap(extensionsAny) + if extensionsMap != nil { + if obiAny, ok := extensionsMap["obi"]; ok { + cur = obiAny + continue + } + } + } + } + } + n, ok := m[p] + if !ok { + return nil, false + } + cur = n + } + return cur, true +} + +func mustEq(cur map[string]any, ex map[string]any, curPath []string, exPath []string) error { + cv, ok := get(cur, curPath...) + if !ok { + return fmt.Errorf("missing current key %v", curPath) + } + ev, ok := get(ex, exPath...) + if !ok { + return fmt.Errorf("missing example key %v", exPath) + } + + if fmt.Sprintf("%v", cv) != fmt.Sprintf("%v", ev) { + return fmt.Errorf("mismatch current %v=%v example %v=%v", curPath, cv, exPath, ev) + } + return nil +} + +func mustEqDurationToMilliseconds(cur map[string]any, ex map[string]any, curPath []string, exPath []string) error { + cv, ok := get(cur, curPath...) + if !ok { + return fmt.Errorf("missing current key %v", curPath) + } + ev, ok := get(ex, exPath...) + if !ok { + return fmt.Errorf("missing example key %v", exPath) + } + + curDuration, err := time.ParseDuration(fmt.Sprintf("%v", cv)) + if err != nil { + return fmt.Errorf("invalid current duration %v=%v", curPath, cv) + } + + var exMillis int64 + switch value := ev.(type) { + case int: + exMillis = int64(value) + case int64: + exMillis = value + case float64: + exMillis = int64(value) + case string: + parsed, parseErr := strconv.ParseInt(value, 10, 64) + if parseErr != nil { + return fmt.Errorf("invalid example milliseconds %v=%v", exPath, ev) + } + exMillis = parsed + default: + return fmt.Errorf("unsupported example milliseconds type for %v=%v", exPath, ev) + } + + if curDuration.Milliseconds() != exMillis { + return fmt.Errorf("mismatch current %v=%vms example %v=%v", curPath, curDuration.Milliseconds(), exPath, exMillis) + } + + return nil +} + +func toStringSlice(v any) []string { + items, ok := v.([]any) + if !ok { + return nil + } + out := make([]string, 0, len(items)) + for _, item := range items { + out = append(out, fmt.Sprintf("%v", item)) + } + return out +} + +func mustMapExcludedSystemPaths(cur map[string]any, ex map[string]any) error { + currentPathsValue, ok := get(cur, "discovery", "excluded_linux_system_paths") + if !ok { + return errors.New("missing current key [discovery excluded_linux_system_paths]") + } + currentPaths := toStringSlice(currentPathsValue) + if len(currentPaths) == 0 { + return errors.New("current discovery.excluded_linux_system_paths is empty or not a list") + } + + rulesValue, ok := get(ex, "obi", "capture", "rules") + if !ok { + return errors.New("missing example key [obi capture rules]") + } + rules, ok := rulesValue.([]any) + if !ok { + return errors.New("example obi.capture.rules is not a list") + } + + foundGlobs := map[string]bool{} + for _, ruleAny := range rules { + rule, ok := ruleAny.(map[string]any) + if !ok { + continue + } + if fmt.Sprintf("%v", rule["action"]) != "exclude" { + continue + } + match, ok := rule["match"].(map[string]any) + if !ok { + continue + } + process, ok := match["process"].(map[string]any) + if !ok { + continue + } + globs := toStringSlice(process["exe_path_glob"]) + for _, g := range globs { + foundGlobs[g] = true + } + } + + for _, p := range currentPaths { + expectedGlob := strings.TrimSuffix(p, "/") + "/*" + if !foundGlobs[expectedGlob] { + return fmt.Errorf("missing scope rule glob for excluded system path: expected %s", expectedGlob) + } + } + + return nil +} + +func mustMapAlreadyInstrumentedExclusion(cur map[string]any, ex map[string]any) error { + currentValue, ok := get(cur, "discovery", "exclude_otel_instrumented_services") + if !ok { + return errors.New("missing current key [discovery exclude_otel_instrumented_services]") + } + wantExclude := fmt.Sprintf("%v", currentValue) == "true" + + defaultPortValue, ok := get(cur, "discovery", "default_otlp_grpc_port") + if !ok { + return errors.New("missing current key [discovery default_otlp_grpc_port]") + } + wantPort := fmt.Sprintf("%v", defaultPortValue) + + rulesValue, ok := get(ex, "obi", "capture", "rules") + if !ok { + return errors.New("missing example key [obi capture rules]") + } + rules, ok := rulesValue.([]any) + if !ok { + return errors.New("example obi.capture.rules is not a list") + } + + found := false + for _, ruleAny := range rules { + rule, ok := ruleAny.(map[string]any) + if !ok { + continue + } + if fmt.Sprintf("%v", rule["action"]) != "exclude" { + continue + } + match, ok := rule["match"].(map[string]any) + if !ok { + continue + } + process, ok := match["process"].(map[string]any) + if !ok { + continue + } + exportsOTLP, ok := process["exports_otlp"].(map[string]any) + if !ok { + continue + } + if fmt.Sprintf("%v", exportsOTLP["port"]) != wantPort { + return fmt.Errorf("mismatch discovery.default_otlp_grpc_port=%s vs process.exports_otlp.port=%v", wantPort, exportsOTLP["port"]) + } + if fmt.Sprintf("%v", exportsOTLP["protocol"]) == "" { + return errors.New("missing process.exports_otlp.protocol in already-instrumented exclusion rule") + } + found = true + break + } + + if wantExclude && !found { + return errors.New("missing selection rule for already-instrumented exclusion") + } + if !wantExclude && found { + return errors.New("unexpected already-instrumented exclusion rule while source default is false") + } + + return nil +} + +func mustMapGoSpecificTracers(cur map[string]any, ex map[string]any) error { + currentValue, ok := get(cur, "discovery", "skip_go_specific_tracers") + if !ok { + return errors.New("missing current key [discovery skip_go_specific_tracers]") + } + currentSkip := fmt.Sprintf("%v", currentValue) == "true" + + goEnabled, ok := get(ex, "obi", "capture", "runtimes", "go", "enabled") + if !ok { + return errors.New("missing example key [obi runtimes go enabled]") + } + enableGo := fmt.Sprintf("%v", goEnabled) == "true" + wantEnabled := !currentSkip + if enableGo != wantEnabled { + return fmt.Errorf("mismatch discovery.skip_go_specific_tracers=%v vs obi.runtimes.go.enabled=%v", currentSkip, enableGo) + } + + return nil +} + +func mustMapApplicationFiltersPerInstrumentation(cur map[string]any, ex map[string]any) error { + currentValue, ok := get(cur, "filter", "application") + if !ok { + return errors.New("missing current key [filter application]") + } + + protocols := []string{"http", "grpc", "sql", "redis", "kafka", "mongo", "couchbase", "dns", "gpu"} + signals := []string{"traces", "metrics"} + + for _, protocol := range protocols { + for _, signal := range signals { + exampleValue, ok := get(ex, "obi", "capture", "instrumentation", protocol, "filters", signal) + if !ok { + return fmt.Errorf("missing example key [obi capture instrumentation %s filters %s]", protocol, signal) + } + if fmt.Sprintf("%v", currentValue) != fmt.Sprintf("%v", exampleValue) { + return fmt.Errorf("filter.application mismatch for protocol %s signal %s", protocol, signal) + } + } + } + + return nil +} + +func mustMapNetworkFiltersPerSignal(cur map[string]any, ex map[string]any) error { + currentValue, ok := get(cur, "filter", "network") + if !ok { + return errors.New("missing current key [filter network]") + } + + signals := []string{"traces", "metrics"} + for _, signal := range signals { + exampleValue, ok := get(ex, "obi", "capture", "network", "capture", "filters", signal) + if !ok { + return fmt.Errorf("missing example key [obi capture network capture filters %s]", signal) + } + if fmt.Sprintf("%v", currentValue) != fmt.Sprintf("%v", exampleValue) { + return fmt.Errorf("filter.network mismatch for signal %s", signal) + } + } + + return nil +} + +//go:embed .verify/default-config-current.yaml +var defaultConf []byte + +//go:embed examples/default-configuration.yaml +var v2DefaultConf []byte + +func main() { + var cur map[string]any + var ex map[string]any + if err := yaml.Unmarshal(defaultConf, &cur); err != nil { + panic(err) + } + if err := yaml.Unmarshal(v2DefaultConf, &ex); err != nil { + panic(err) + } + + checks := []struct { + cur []string + ex []string + }{ + {[]string{"ebpf", "batch_length"}, []string{"obi", "capture", "engine", "batching", "batch_length"}}, + {[]string{"ebpf", "batch_timeout"}, []string{"obi", "capture", "engine", "batching", "batch_timeout"}}, + {[]string{"ebpf", "wakeup_len"}, []string{"obi", "capture", "engine", "batching", "wakeup_len"}}, + {[]string{"ebpf", "traffic_control_backend"}, []string{"obi", "capture", "engine", "traffic", "control_backend"}}, + {[]string{"ebpf", "bpf_fs_path"}, []string{"obi", "capture", "engine", "bpf_filesystem", "path"}}, + {[]string{"ebpf", "max_transaction_time"}, []string{"obi", "capture", "engine", "transactions", "max_duration"}}, + {[]string{"discovery", "bpf_pid_filter_off"}, []string{"obi", "capture", "engine", "pid_filter", "disabled"}}, + {[]string{"ebpf", "dns_request_timeout"}, []string{"obi", "capture", "instrumentation", "dns", "request_timeout"}}, + {[]string{"ebpf", "payload_extraction", "http", "graphql", "enabled"}, []string{"obi", "capture", "instrumentation", "http", "payload_extraction", "graphql", "enabled"}}, + {[]string{"ebpf", "payload_extraction", "http", "sqlpp", "enabled"}, []string{"obi", "capture", "instrumentation", "http", "payload_extraction", "sqlpp", "enabled"}}, + {[]string{"ebpf", "log_enricher", "cache_ttl"}, []string{"obi", "correlation", "log_trace_annotation", "cache", "ttl"}}, + {[]string{"ebpf", "log_enricher", "cache_size"}, []string{"obi", "correlation", "log_trace_annotation", "cache", "size"}}, + {[]string{"ebpf", "log_enricher", "async_writer_workers"}, []string{"obi", "correlation", "log_trace_annotation", "async_writer", "workers"}}, + {[]string{"ebpf", "log_enricher", "async_writer_channel_len"}, []string{"obi", "correlation", "log_trace_annotation", "async_writer", "channel_len"}}, + {[]string{"ebpf", "buffer_sizes", "http"}, []string{"obi", "capture", "instrumentation", "http", "buffer_size"}}, + {[]string{"ebpf", "heuristic_sql_detect"}, []string{"obi", "capture", "instrumentation", "sql", "heuristic_detect"}}, + {[]string{"ebpf", "buffer_sizes", "mysql"}, []string{"obi", "capture", "instrumentation", "sql", "mysql", "buffer_size"}}, + {[]string{"ebpf", "mysql_prepared_statements_cache_size"}, []string{"obi", "capture", "instrumentation", "sql", "mysql", "prepared_statements_cache_size"}}, + {[]string{"ebpf", "buffer_sizes", "postgres"}, []string{"obi", "capture", "instrumentation", "sql", "postgres", "buffer_size"}}, + {[]string{"ebpf", "postgres_prepared_statements_cache_size"}, []string{"obi", "capture", "instrumentation", "sql", "postgres", "prepared_statements_cache_size"}}, + {[]string{"ebpf", "redis_db_cache", "enabled"}, []string{"obi", "capture", "instrumentation", "redis", "db_cache", "enabled"}}, + {[]string{"ebpf", "buffer_sizes", "kafka"}, []string{"obi", "capture", "instrumentation", "kafka", "buffer_size"}}, + + {[]string{"network", "enable"}, []string{"obi", "capture", "network", "capture", "enabled"}}, + {[]string{"network", "source"}, []string{"obi", "capture", "network", "capture", "source"}}, + {[]string{"network", "agent_ip"}, []string{"obi", "capture", "network", "capture", "endpoint_identity", "agent_ip"}}, + {[]string{"network", "agent_ip_iface"}, []string{"obi", "capture", "network", "capture", "endpoint_identity", "agent_ip_interface"}}, + {[]string{"network", "agent_ip_type"}, []string{"obi", "capture", "network", "capture", "endpoint_identity", "agent_ip_family"}}, + {[]string{"network", "cache_max_flows"}, []string{"obi", "capture", "network", "capture", "flow_lifecycle", "max_tracked_flows"}}, + {[]string{"network", "cache_active_timeout"}, []string{"obi", "capture", "network", "capture", "flow_lifecycle", "active_timeout"}}, + {[]string{"network", "deduper"}, []string{"obi", "capture", "network", "capture", "flow_lifecycle", "deduplication", "strategy"}}, + {[]string{"network", "deduper_fc_ttl"}, []string{"obi", "capture", "network", "capture", "flow_lifecycle", "deduplication", "first_come_ttl"}}, + {[]string{"network", "sampling"}, []string{"obi", "capture", "network", "capture", "flow_lifecycle", "sampling"}}, + {[]string{"network", "direction"}, []string{"obi", "capture", "network", "capture", "selection", "direction"}}, + {[]string{"network", "listen_interfaces"}, []string{"obi", "capture", "network", "capture", "interface_discovery", "mode"}}, + {[]string{"network", "listen_poll_period"}, []string{"obi", "capture", "network", "capture", "interface_discovery", "poll_interval"}}, + {[]string{"network", "geo_ip", "cache_expiry"}, []string{"obi", "capture", "network", "capture", "enrichment", "geo_ip", "cache", "ttl"}}, + {[]string{"network", "reverse_dns", "cache_expiry"}, []string{"obi", "capture", "network", "capture", "enrichment", "reverse_dns", "cache", "ttl"}}, + {[]string{"network", "print_flows"}, []string{"obi", "capture", "network", "capture", "diagnostics", "print_flows"}}, + {[]string{"discovery", "min_process_age"}, []string{"obi", "capture", "policy", "min_process_age"}}, + {[]string{"discovery", "route_harvester_timeout"}, []string{"obi", "capture", "instrumentation", "http", "routes", "discovery", "timeout"}}, + {[]string{"discovery", "disabled_route_harvesters"}, []string{"obi", "capture", "instrumentation", "http", "routes", "discovery", "disabled_languages"}}, + {[]string{"discovery", "route_harvester_advanced", "java_harvest_delay"}, []string{"obi", "capture", "instrumentation", "http", "routes", "discovery", "java", "delay"}}, + + {[]string{"name_resolver", "cache_len"}, []string{"obi", "enrich", "service_name", "cache", "size"}}, + {[]string{"name_resolver", "cache_expiry"}, []string{"obi", "enrich", "service_name", "cache", "ttl"}}, + + {[]string{"attributes", "metric_span_names_limit"}, []string{"obi", "capture", "limits", "metric_span_names"}}, + {[]string{"attributes", "rename_unresolved_hosts"}, []string{"obi", "enrich", "service_name", "unresolved_hosts", "names", "default"}}, + {[]string{"attributes", "kubernetes", "informers_sync_timeout"}, []string{"obi", "enrich", "enrichers", "kubernetes", "informers", "initial_sync_timeout"}}, + {[]string{"attributes", "kubernetes", "informers_resync_period"}, []string{"obi", "enrich", "enrichers", "kubernetes", "informers", "resync_period"}}, + + {[]string{"routes", "unmatched"}, []string{"obi", "capture", "instrumentation", "http", "routes", "unmatched"}}, + {[]string{"routes", "wildcard_char"}, []string{"obi", "capture", "instrumentation", "http", "routes", "wildcard_char"}}, + {[]string{"routes", "max_path_segment_cardinality"}, []string{"obi", "capture", "instrumentation", "http", "routes", "max_path_segment_cardinality"}}, + + {[]string{"otel_metrics_export", "histogram_aggregation"}, []string{"meter_provider", "readers", "0", "periodic", "exporter", "otlp_grpc", "default_histogram_aggregation"}}, + {[]string{"otel_metrics_export", "reporters_cache_len"}, []string{"obi", "capture", "telemetry", "metrics", "reporters_cache_len"}}, + {[]string{"otel_metrics_export", "ttl"}, []string{"obi", "capture", "telemetry", "metrics", "ttl"}}, + {[]string{"otel_metrics_export", "extra_span_resource_attributes"}, []string{"obi", "daemon", "telemetry", "metrics", "prometheus", "extra_span_resource_attributes"}}, + + {[]string{"otel_traces_export", "max_queue_size"}, []string{"tracer_provider", "processors", "0", "batch", "max_queue_size"}}, + {[]string{"otel_traces_export", "reporters_cache_len"}, []string{"obi", "capture", "telemetry", "traces", "reporters_cache_len"}}, + + {[]string{"prometheus_export", "port"}, []string{"meter_provider", "readers", "1", "pull", "exporter", "prometheus/development", "port"}}, + {[]string{"prometheus_export", "service_cache_size"}, []string{"obi", "daemon", "telemetry", "metrics", "prometheus", "span_metrics_service_cache_size"}}, + {[]string{"prometheus_export", "allow_service_graph_self_references"}, []string{"obi", "daemon", "telemetry", "metrics", "prometheus", "allow_service_graph_self_references"}}, + {[]string{"prometheus_export", "extra_resource_attributes"}, []string{"obi", "daemon", "telemetry", "metrics", "prometheus", "extra_resource_attributes"}}, + {[]string{"prometheus_export", "extra_span_resource_attributes"}, []string{"obi", "daemon", "telemetry", "metrics", "prometheus", "extra_span_resource_attributes"}}, + + {[]string{"log_config"}, []string{"obi", "daemon", "logging", "format"}}, + {[]string{"log_level"}, []string{"obi", "daemon", "logging", "level"}}, + {[]string{"trace_printer"}, []string{"obi", "daemon", "logging", "debug_trace_output"}}, + {[]string{"shutdown_timeout"}, []string{"obi", "daemon", "shutdown", "timeout"}}, + {[]string{"profile_port"}, []string{"obi", "daemon", "profiling", "port"}}, + {[]string{"enforce_sys_caps"}, []string{"obi", "capture", "safety", "enforce_system_capabilities"}}, + {[]string{"channel_buffer_len"}, []string{"obi", "capture", "channels", "buffer_len"}}, + {[]string{"channel_send_timeout"}, []string{"obi", "capture", "channels", "send_timeout"}}, + {[]string{"channel_send_timeout_panic"}, []string{"obi", "capture", "channels", "panic_on_send_timeout"}}, + {[]string{"internal_metrics", "exporter"}, []string{"obi", "daemon", "internal_metrics", "exporter"}}, + {[]string{"internal_metrics", "prometheus", "path"}, []string{"obi", "daemon", "internal_metrics", "prometheus", "path"}}, + {[]string{"internal_metrics", "bpf_metric_scrape_interval"}, []string{"obi", "daemon", "internal_metrics", "bpf", "scrape_interval"}}, + + {[]string{"nodejs", "enabled"}, []string{"obi", "capture", "runtimes", "nodejs", "enabled"}}, + {[]string{"javaagent", "enabled"}, []string{"obi", "capture", "runtimes", "java", "enabled"}}, + {[]string{"javaagent", "debug"}, []string{"obi", "capture", "runtimes", "java", "debug", "enabled"}}, + {[]string{"javaagent", "debug_instrumentation"}, []string{"obi", "capture", "runtimes", "java", "debug", "bytecode_instrumentation"}}, + {[]string{"javaagent", "attach_timeout"}, []string{"obi", "capture", "runtimes", "java", "attach_timeout"}}, + } + + failures := 0 + for _, c := range checks { + if err := mustEq(cur, ex, c.cur, c.ex); err != nil { + fmt.Println("FAIL:", err) + failures++ + } + } + + if err := mustEqDurationToMilliseconds( + cur, + ex, + []string{"otel_traces_export", "batch_timeout"}, + []string{"tracer_provider", "processors", "0", "batch", "schedule_delay"}, + ); err != nil { + fmt.Println("FAIL:", err) + failures++ + } + + if failures > 0 { + fmt.Printf("verification failed: %d mismatches\n", failures) + os.Exit(1) + } + + if err := mustMapExcludedSystemPaths(cur, ex); err != nil { + fmt.Println("FAIL:", err) + fmt.Printf("verification failed: %d mismatches\n", failures+1) + os.Exit(1) + } + + if err := mustMapAlreadyInstrumentedExclusion(cur, ex); err != nil { + fmt.Println("FAIL:", err) + fmt.Printf("verification failed: %d mismatches\n", failures+1) + os.Exit(1) + } + + if err := mustMapGoSpecificTracers(cur, ex); err != nil { + fmt.Println("FAIL:", err) + fmt.Printf("verification failed: %d mismatches\n", failures+1) + os.Exit(1) + } + + if err := mustMapApplicationFiltersPerInstrumentation(cur, ex); err != nil { + fmt.Println("FAIL:", err) + fmt.Printf("verification failed: %d mismatches\n", failures+1) + os.Exit(1) + } + + if err := mustMapNetworkFiltersPerSignal(cur, ex); err != nil { + fmt.Println("FAIL:", err) + fmt.Printf("verification failed: %d mismatches\n", failures+1) + os.Exit(1) + } + + fmt.Printf("feature parity verification passed: %d mapped default checks\n", len(checks)+6) +}