diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 3f25d41e80..5661850231 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -48,6 +48,7 @@ libdd-http-client @DataDog/apm-common-components-core libdd-library-config*/ @DataDog/apm-sdk-capabilities-rust libdd-log*/ @DataDog/apm-common-components-core libdd-otel-thread-ctx/ @DataDog/apm-common-components-core +libdd-otel-thread-ctx-ffi/ @DataDog/apm-common-components-core libdd-profiling*/ @DataDog/libdatadog-profiling libdd-shared-runtime*/ @DataDog/apm-common-components-core libdd-telemetry*/ @DataDog/apm-common-components-core diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3e2d3405a8..b3b021aeeb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,6 +34,16 @@ jobs: large-packages: true docker-images: false swap-storage: true + # Building (and testing) libdd-otel-thread-ctx-ffi requires a custom + # linker step, which requires lld (v19+) to be available. + - name: Install LLD + if: runner.os == 'Linux' + shell: bash + run: | + set -euxo pipefail + sudo apt-get install -y lld-20 + sudo ln -s /usr/bin/ld.lld-20 /usr/bin/ld.lld + sudo chmod 777 /usr/bin/ld.lld - name: Checkout sources uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # 4.2.2 with: diff --git a/Cargo.lock b/Cargo.lock index 32454cdc83..f32ba61c15 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3146,6 +3146,15 @@ dependencies = [ "cc", ] +[[package]] +name = "libdd-otel-thread-ctx-ffi" +version = "1.0.0" +dependencies = [ + "build_common", + "libdd-common-ffi", + "libdd-otel-thread-ctx", +] + [[package]] name = "libdd-profiling" version = "1.0.0" diff --git a/Cargo.toml b/Cargo.toml index 1d75806752..05f02b3c53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ members = [ "datadog-live-debugger", "datadog-live-debugger-ffi", "libdd-otel-thread-ctx", + "libdd-otel-thread-ctx-ffi", "libdd-profiling", "libdd-profiling-ffi", "libdd-profiling-protobuf", diff --git a/libdd-otel-thread-ctx-ffi/Cargo.toml b/libdd-otel-thread-ctx-ffi/Cargo.toml new file mode 100644 index 0000000000..8001933cd6 --- /dev/null +++ b/libdd-otel-thread-ctx-ffi/Cargo.toml @@ -0,0 +1,26 @@ +# Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "libdd-otel-thread-ctx-ffi" +version = "1.0.0" +description = "FFI bindings for the OTel thread-level context publisher" +edition.workspace = true +rust-version.workspace = true +license.workspace = true +publish = false + +[lib] +crate-type = ["staticlib", "cdylib", "lib"] +bench = false + +[dependencies] +libdd-common-ffi = { path = "../libdd-common-ffi", default-features = false } +libdd-otel-thread-ctx = { path = "../libdd-otel-thread-ctx" } + +[features] +default = ["cbindgen"] +cbindgen = ["build_common/cbindgen", "libdd-common-ffi/cbindgen"] + +[build-dependencies] +build_common = { path = "../build-common" } diff --git a/libdd-otel-thread-ctx-ffi/build.rs b/libdd-otel-thread-ctx-ffi/build.rs new file mode 100644 index 0000000000..41ad86983a --- /dev/null +++ b/libdd-otel-thread-ctx-ffi/build.rs @@ -0,0 +1,29 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 +extern crate build_common; + +use build_common::generate_and_configure_header; +use std::env; + +fn main() { + generate_and_configure_header("otel-thread-ctx.h"); + let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); + + // Export the TLSDESC thread-local variable to the dynamic symbol table so + // external readers (e.g. the eBPF profiler) can locate it. Rust's cdylib + // linker applies a version script with `local: *` that hides all symbols + // not explicitly whitelisted, and also causes lld to relax the TLSDESC + // access to local-exec (LE), eliminating the dynsym entry entirely. + // Passing our own version script with an explicit `global:` entry for the + // symbol beats the `local: *` wildcard and prevents that relaxation. + // + // Merging multiple version scripts is not supported by GNU ld, so we also + // force lld explicitly. + if target_os == "linux" { + let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); + println!("cargo:rustc-cdylib-link-arg=-fuse-ld=lld"); + println!( + "cargo:rustc-cdylib-link-arg=-Wl,--version-script={manifest_dir}/tls-dynamic-list.txt" + ); + } +} diff --git a/libdd-otel-thread-ctx-ffi/cbindgen.toml b/libdd-otel-thread-ctx-ffi/cbindgen.toml new file mode 100644 index 0000000000..d8d02a5896 --- /dev/null +++ b/libdd-otel-thread-ctx-ffi/cbindgen.toml @@ -0,0 +1,35 @@ +# Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +# SPDX-License-Identifier: Apache-2.0 + +language = "C" +cpp_compat = true +tab_width = 2 +header = """// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 +""" +include_guard = "DDOG_OTEL_THREAD_CTX_H" +style = "both" +pragma_once = true +no_includes = true +sys_includes = ["stdbool.h", "stddef.h", "stdint.h"] + +[parse] +parse_deps = true +include = ["libdd-common-ffi", "libdd-otel-thread-ctx"] + +[export] +prefix = "ddog_" +renaming_overrides_prefixing = true + +[export.rename] +# AtomicU8 doesn't have a proper mapping, and is a Rust implementation detail. +# We map it to plain uint8_t in the C header, since it has the same +# representation. +"AtomicU8" = "uint8_t" + +[export.mangle] +rename_types = "PascalCase" + +[enum] +prefix_with_name = true +rename_variants = "ScreamingSnakeCase" diff --git a/libdd-otel-thread-ctx-ffi/src/lib.rs b/libdd-otel-thread-ctx-ffi/src/lib.rs new file mode 100644 index 0000000000..52f12ff6bd --- /dev/null +++ b/libdd-otel-thread-ctx-ffi/src/lib.rs @@ -0,0 +1,120 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! FFI bindings for the OTel thread-level context publisher. +//! +//! All symbols are only available on Linux, since the TLSDESC TLS mechanism +//! required by the spec is Linux-specific. + +#[cfg(target_os = "linux")] +pub use linux::*; + +#[cfg(target_os = "linux")] +mod linux { + use libdd_otel_thread_ctx::linux::{ThreadContext, ThreadContextRecord}; + + /// Maximum size in bytes of the `attrs_data` field of [`ddog_ThreadContextRecord`]. + // This is ugly, but I couldn't get cbindgen to generate the corresponding #define in any other + // way. It doesn't like re-exports (pub use), and doing someting like `pub const + // MAX_ATTRS_DATA_SIZE = _MAX` (where `_MAX` has been imported properly), it generates something + // dumb such as `#define ddog_MAX_ATTRS_DATA_SIZE = _MAX` instead of propagating the actual + // value. + // This solution at leat marginally better than prepending a hardcoded define manually in + // build.rs, as it will at least keep the value in sync. + pub const MAX_ATTRS_DATA_SIZE: usize = 612; + const _: () = assert!( + MAX_ATTRS_DATA_SIZE == libdd_otel_thread_ctx::linux::MAX_ATTRS_DATA_SIZE, + "MAX_ATTRS_DATA_SIZE out of sync with libdd-otel-thread-ctx" + ); + use std::ptr::NonNull; + + /// Allocate and initialise a new thread context. + /// + /// Returns a non-null owned handle that must eventually be released with + /// `ddog_otel_thread_ctx_free`. + #[no_mangle] + pub extern "C" fn ddog_otel_thread_ctx_new( + trace_id: &[u8; 16], + span_id: &[u8; 8], + local_root_span_id: &[u8; 8], + ) -> NonNull { + ThreadContext::new(*trace_id, *span_id, *local_root_span_id, &[]).into_ptr() + } + + /// Free an owned thread context. + /// + /// # Safety + /// + /// `ctx` must be a valid non-null pointer obtained from `ddog_otel_thread_ctx_new` or + /// `ddog_otel_thread_ctx_detach`, and must not be used after this call. In particular, `ctx` + /// must not be currently attached to a thread. + #[no_mangle] + pub unsafe extern "C" fn ddog_otel_thread_ctx_free(ctx: *mut ThreadContextRecord) { + if let Some(ctx) = NonNull::new(ctx) { + let _ = ThreadContext::from_ptr(ctx); + } + } + + /// Attach `ctx` to the current thread. Returns the previously attached context if any, or null + /// otherwise. + /// + /// # Safety + /// + /// `ctx` must be a valid non-null pointer obtained from this API. Ownership of `ctx` is + /// transferred to the TLS slot: the caller must not drop `ctx` while it is still actively + /// attached. + /// + /// ## In-place update + /// + /// The preferred method to update the thread context in place is [ddog_otel_thread_ctx_update]. + /// + /// If calling into native code is too costly, it is possible to update an attached context + /// directly in-memory without going through libdatadog (contexts are guaranteed to have a + /// stable address through their lifetime). **HOWEVER, IF DOING SO, PLEASE BE VERY CAUTIOUS OF + /// THE FOLLOWING POINTS**: + /// + /// 1. The update process requires a [seqlock](https://en.wikipedia.org/wiki/Seqlock)-like + /// pattern: [ThreadContextRecord::valid] must be first set to `0` before the update and set + /// to `1` again at the end. Additionally, depending on your language's memory model, you + /// might need specific synchronization primitives (compiler fences, atomics, etc.), since + /// the context can be read by an asynchronous signal handler at any point in time. See the + /// [Otel thread context + /// specification](https://github.com/open-telemetry/opentelemetry-specification/pull/4947) + /// for more details. + /// 2. Only update the context from the thread it's attached to. Contexts are designed to be + /// attached, written to and read from on the same thread (whether from signal code or + /// program code). Thus, they are NOT thread-safe. Given the current specification, I don't + /// think it's possible to safely update an attached context from a different thread, since + /// the signal handler doesn't assume the context can be written to concurrently from another + /// thread. + #[no_mangle] + pub unsafe extern "C" fn ddog_otel_thread_ctx_attach( + ctx: *mut ThreadContextRecord, + ) -> Option> { + ThreadContext::from_ptr(NonNull::new(ctx)?) + .attach() + .map(ThreadContext::into_ptr) + } + + /// Remove the currently attached context from the TLS slot. + /// + /// Returns the detached context (caller now owns it and must release it with + /// `ddog_otel_thread_ctx_free`), or null if the slot was empty. + #[no_mangle] + pub extern "C" fn ddog_otel_thread_ctx_detach() -> Option> { + ThreadContext::detach().map(ThreadContext::into_ptr) + } + + /// Update the currently attached context in-place. + /// + /// If no context is currently attached, one is created and attached, equivalent to calling + /// `ddog_otel_thread_ctx_new` followed by `ddog_otel_thread_ctx_attach`. + #[no_mangle] + pub extern "C" fn ddog_otel_thread_ctx_update( + trace_id: &[u8; 16], + span_id: &[u8; 8], + local_root_span_id: &[u8; 8], + ) { + ThreadContext::update(*trace_id, *span_id, *local_root_span_id, &[]); + } +} diff --git a/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs b/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs new file mode 100644 index 0000000000..a429a29a00 --- /dev/null +++ b/libdd-otel-thread-ctx-ffi/tests/elf_properties.rs @@ -0,0 +1,84 @@ +// Copyright 2026-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Verify ELF properties of the built cdylib on Linux. +//! +//! These tests check that: +//! - `otel_thread_ctx_v1` is exported in the dynamic symbol table as a TLS GLOBAL symbol. +//! - `otel_thread_ctx_v1` is accessed via TLSDESC relocations (R_X86_64_TLSDESC or +//! R_AARCH64_TLSDESC), as required by the OTel thread-level context sharing spec. +//! +//! The cdylib path is derived at runtime from the test executable location. +//! Both the test binary and the cdylib live in `target/<[triple/]profile>/deps/`. + +#![cfg(target_os = "linux")] + +use std::path::PathBuf; +use std::process::Command; + +const SYMBOL: &str = "otel_thread_ctx_v1"; + +fn cdylib_path() -> PathBuf { + // test binary: target/<[triple/]profile>/deps/ + // cdylib: target/<[triple/]profile>/deps/liblibdd_otel_thread_ctx_ffi.so + let exe = std::env::current_exe().expect("failed to read current executable path"); + exe.parent() + .expect("unexpected test executable path structure") + .join("liblibdd_otel_thread_ctx_ffi.so") +} + +fn check_cdylib_readable(path: &PathBuf) { + assert!( + std::fs::File::open(path).is_ok(), + "cdylib at {} could not be opened for reading", + path.display() + ); +} + +fn readelf(args: &[&str], path: &PathBuf) -> String { + let out = Command::new("readelf") + .args(args) + .arg(path) + .output() + .expect("failed to run readelf. Is binutils installed?"); + String::from_utf8_lossy(&out.stdout).into_owned() +} + +#[test] +#[cfg_attr(miri, ignore)] +fn otel_thread_ctx_v1_in_dynsym() { + let path = cdylib_path(); + check_cdylib_readable(&path); + let output = readelf(&["-W", "--dyn-syms"], &path); + let line = output + .lines() + .find(|l| l.contains(SYMBOL)) + .unwrap_or_else(|| panic!("'{SYMBOL}' not found in dynsym of {}", path.display())); + assert!( + line.contains("TLS") && line.contains("GLOBAL"), + "'{SYMBOL}' is in dynsym but not as TLS GLOBAL — got:\n {line}" + ); +} + +#[test] +#[cfg_attr(miri, ignore)] +fn otel_thread_ctx_v1_tlsdesc_reloc() { + let path = cdylib_path(); + check_cdylib_readable(&path); + let output = readelf(&["-W", "--relocs"], &path); + let found = output.lines().any(|l| { + l.contains(SYMBOL) && (l.contains("R_X86_64_TLSDESC") || l.contains("R_AARCH64_TLSDESC")) + }); + assert!( + found, + "No TLSDESC relocation found for '{SYMBOL}' in {}\n\ + All relocations mentioning the symbol:\n{}", + path.display(), + output + .lines() + .filter(|l| l.contains(SYMBOL)) + .map(|l| format!(" {l}")) + .collect::>() + .join("\n") + ); +} diff --git a/libdd-otel-thread-ctx-ffi/tls-dynamic-list.txt b/libdd-otel-thread-ctx-ffi/tls-dynamic-list.txt new file mode 100644 index 0000000000..254be05ddd --- /dev/null +++ b/libdd-otel-thread-ctx-ffi/tls-dynamic-list.txt @@ -0,0 +1,3 @@ +{ + global: otel_thread_ctx_v1; +}; diff --git a/libdd-otel-thread-ctx/src/lib.rs b/libdd-otel-thread-ctx/src/lib.rs index 373934b147..e22ac395c8 100644 --- a/libdd-otel-thread-ctx/src/lib.rs +++ b/libdd-otel-thread-ctx/src/lib.rs @@ -140,7 +140,7 @@ pub mod linux { // `valid`. We just use a const assertion in `new()` to avoid surprises and make sure this // struct has the right total size. #[repr(C)] - struct ThreadContextRecord { + pub struct ThreadContextRecord { /// Trace identifier; all-zeroes means "no trace". trace_id: [u8; 16], /// Span identifier. @@ -302,26 +302,26 @@ pub mod linux { )) } - /// Turn this thread context into a raw pointer to the underlying [ThreadContextRecord]. - /// The pointer must be reconstructed through [`Self::from_raw`] in order to be properly + /// Turn this thread context into a pointer to the underlying [ThreadContextRecord]. + /// The pointer must be reconstructed through [`Self::from_ptr`] in order to be properly /// dropped, or the record will leak. - fn into_raw(self) -> *mut ThreadContextRecord { + pub fn into_ptr(self) -> NonNull { let mdrop = mem::ManuallyDrop::new(self); - mdrop.0.as_ptr() + mdrop.0 } - /// Reconstruct a [ThreadContextRecord] from a raw pointer that is either `null` or comes - /// from [`Self::into_raw`]. Return `None` if `ptr` is null. + /// Reconstruct a [ThreadContextRecord] from a pointer that comes + /// from [`Self::into_ptr`]. /// /// # Safety /// - /// - `ptr` must be `null` or come from a prior call to [`Self::into_raw`]. + /// - `ptr` must come from a prior call to [`Self::into_ptr`]. /// - if `ptr` is aliased, accesses through aliases must not be interleaved with method /// calls on the returned [ThreadContextRecord]. More precisely, mutable references might /// be reconstructed during those calls, so any constraint from either Stacked Borrows, /// Tree Borrows or whatever is the current aliasing model implemented in Miri applies. - unsafe fn from_raw(ptr: *mut ThreadContextRecord) -> Option { - NonNull::new(ptr).map(Self) + pub unsafe fn from_ptr(ptr: NonNull) -> Self { + Self(ptr) } } @@ -345,8 +345,9 @@ pub mod linux { slot: &AtomicPtr, tgt: *mut ThreadContextRecord, ) -> Option { - // Safety: a non-null value in the slot came from a prior `into_raw` call. - unsafe { ThreadContext::from_raw(slot.swap(tgt, Ordering::Relaxed)) } + // Safety: a non-null value in the slot came from a prior `into_ptr` call. + NonNull::new(slot.swap(tgt, Ordering::Relaxed)) + .map(|ptr| unsafe { ThreadContext::from_ptr(ptr) }) } /// Publish a new (or previously detached) thread context record by writing its pointer @@ -365,7 +366,7 @@ pub mod linux { // // We still need a release fence to avoid exposing uninitialized memory to the handler. compiler_fence(Ordering::Release); - Self::swap(get_tls_slot(), self.into_raw()) + Self::swap(get_tls_slot(), self.into_ptr().as_ptr()) } /// Update the currently attached record in-place. Sets `valid = 0` before the update and @@ -399,7 +400,9 @@ pub mod linux { // `ThreadContext::new` already initialises `valid = 1`. let _ = Self::swap( slot, - ThreadContext::new(trace_id, span_id, local_root_span_id, attrs).into_raw(), + ThreadContext::new(trace_id, span_id, local_root_span_id, attrs) + .into_ptr() + .as_ptr(), ); } } diff --git a/tools/docker/Dockerfile.build b/tools/docker/Dockerfile.build index b07e7ca6d6..07591333d3 100644 --- a/tools/docker/Dockerfile.build +++ b/tools/docker/Dockerfile.build @@ -125,6 +125,7 @@ COPY "builder/Cargo.toml" "builder/" COPY "datadog-ffe/Cargo.toml" "datadog-ffe/" COPY "datadog-ffe-ffi/Cargo.toml" "datadog-ffe-ffi/" COPY "libdd-otel-thread-ctx/Cargo.toml" "libdd-otel-thread-ctx/" +COPY "libdd-otel-thread-ctx-ffi/Cargo.toml" "libdd-otel-thread-ctx-ffi/" RUN find -name "Cargo.toml" | sed -e s#Cargo.toml#src/lib.rs#g | xargs -n 1 sh -c 'mkdir -p $(dirname $1); touch $1; echo $1' create_stubs RUN echo \ bin_tests/src/bin/crashtracker_bin_test.rs \