From 70f9c1bdd3d1f978f341a9849c0fe23eca1c14d3 Mon Sep 17 00:00:00 2001
From: Ali <alliasgher123@gmail.com>
Date: Tue, 14 Apr 2026 02:18:19 +0500
Subject: [PATCH 1/3] fix(semconv): attach spec-mandated explicit bucket
 boundaries to GenAI histogram helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The four GenAI histogram helpers in opentelemetry-semantic-conventions
called meter.create_histogram without passing
explicit_bucket_boundaries_advisory. The SDK therefore fell back to
_DEFAULT_EXPLICIT_BUCKET_HISTOGRAM_AGGREGATION_BOUNDARIES, which is
tuned for request-duration metrics in the seconds range and produces
unusable histograms for latency-per-token and TTFT metrics — the exact
problem flagged in the semconv spec which says these metrics SHOULD be
specified with ExplicitBucketBoundaries.

Pass the semconv-prescribed boundaries for all four helpers:

* gen_ai.client.operation.duration / gen_ai.server.request.duration /
  gen_ai.server.time_to_first_token share the latency boundary set
  [0.01 .. 81.92] seconds.
* gen_ai.server.time_per_output_token uses the per-token boundary set
  [0.01 .. 2.5] seconds.

Add tests asserting each factory passes the correct
explicit_bucket_boundaries_advisory to Meter.create_histogram.

Fixes #4946

Signed-off-by: Ali <alliasgher123@gmail.com>
---
 CHANGELOG.md                                  |  2 +
 .../_incubating/metrics/gen_ai_metrics.py     | 53 +++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4537ad3f8bc..fe13652968a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   ([#4907](https://github.com/open-telemetry/opentelemetry-python/issues/4907))
 - Drop Python 3.9 support
   ([#5076](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/5076))
+- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers (`gen_ai.client.operation.duration`, `gen_ai.server.request.duration`, `gen_ai.server.time_to_first_token`, `gen_ai.server.time_per_output_token`); without them the default SDK buckets produced unusable histograms for latency-per-token metrics
+  ([#4946](https://github.com/open-telemetry/opentelemetry-python/issues/4946))
 
 
 ## Version 1.41.0/0.62b0 (2026-04-09)
diff --git a/opentelemetry-semantic-conventions/src/opentelemetry/semconv/_incubating/metrics/gen_ai_metrics.py b/opentelemetry-semantic-conventions/src/opentelemetry/semconv/_incubating/metrics/gen_ai_metrics.py
index 7a7afa33888..b8f6e5a7a91 100644
--- a/opentelemetry-semantic-conventions/src/opentelemetry/semconv/_incubating/metrics/gen_ai_metrics.py
+++ b/opentelemetry-semantic-conventions/src/opentelemetry/semconv/_incubating/metrics/gen_ai_metrics.py
@@ -25,12 +25,34 @@
 """
 
 
+# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclientoperationduration
+_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS: Final = (
+    0.01,
+    0.02,
+    0.04,
+    0.08,
+    0.16,
+    0.32,
+    0.64,
+    1.28,
+    2.56,
+    5.12,
+    10.24,
+    20.48,
+    40.96,
+    81.92,
+)
+
+
 def create_gen_ai_client_operation_duration(meter: Meter) -> Histogram:
     """GenAI operation duration"""
     return meter.create_histogram(
         name=GEN_AI_CLIENT_OPERATION_DURATION,
         description="GenAI operation duration.",
         unit="s",
+        explicit_bucket_boundaries_advisory=list(
+            _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
+        ),
     )
 
 
@@ -61,10 +83,15 @@ def create_gen_ai_client_token_usage(meter: Meter) -> Histogram:
 
 def create_gen_ai_server_request_duration(meter: Meter) -> Histogram:
     """Generative AI server request duration such as time-to-last byte or last output token"""
+    # Shares the latency-style boundaries with client operation duration and
+    # time-to-first-token per the semconv spec.
     return meter.create_histogram(
         name=GEN_AI_SERVER_REQUEST_DURATION,
         description="Generative AI server request duration such as time-to-last byte or last output token.",
         unit="s",
+        explicit_bucket_boundaries_advisory=list(
+            _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
+        ),
     )
 
 
@@ -78,12 +105,33 @@ def create_gen_ai_server_request_duration(meter: Meter) -> Histogram:
 """
 
 
+# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiservertime_per_output_token
+_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS: Final = (
+    0.01,
+    0.025,
+    0.05,
+    0.075,
+    0.1,
+    0.15,
+    0.2,
+    0.3,
+    0.4,
+    0.5,
+    0.75,
+    1.0,
+    2.5,
+)
+
+
 def create_gen_ai_server_time_per_output_token(meter: Meter) -> Histogram:
     """Time per output token generated after the first token for successful responses"""
     return meter.create_histogram(
         name=GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN,
         description="Time per output token generated after the first token for successful responses.",
         unit="s",
+        explicit_bucket_boundaries_advisory=list(
+            _GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS
+        ),
     )
 
 
@@ -97,8 +145,13 @@ def create_gen_ai_server_time_per_output_token(meter: Meter) -> Histogram:
 
 def create_gen_ai_server_time_to_first_token(meter: Meter) -> Histogram:
     """Time to generate first token for successful responses"""
+    # Shares the latency-style boundaries with client operation duration per
+    # the semconv spec.
     return meter.create_histogram(
         name=GEN_AI_SERVER_TIME_TO_FIRST_TOKEN,
         description="Time to generate first token for successful responses.",
         unit="s",
+        explicit_bucket_boundaries_advisory=list(
+            _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
+        ),
     )

From 0fcdbe0091b7a8dc93588264e4802e71d597c4fb Mon Sep 17 00:00:00 2001
From: Ali <alliasgher123@gmail.com>
Date: Wed, 15 Apr 2026 17:24:39 +0500
Subject: [PATCH 2/3] fix(changelog): correct broken contrib link for
 python-3.9 drop entry

The link pointed to opentelemetry-python-contrib/pull/5076 (404) but
should reference opentelemetry-python/pull/5076, matching the main branch.

Signed-off-by: Ali <alliasgher123@gmail.com>
---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fe13652968a..9ab2de856cc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,7 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - logs: add exception support to Logger emit and LogRecord attributes
   ([#4907](https://github.com/open-telemetry/opentelemetry-python/issues/4907))
 - Drop Python 3.9 support
-  ([#5076](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/5076))
+  ([#5076](https://github.com/open-telemetry/opentelemetry-python/pull/5076))
 - `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers (`gen_ai.client.operation.duration`, `gen_ai.server.request.duration`, `gen_ai.server.time_to_first_token`, `gen_ai.server.time_per_output_token`); without them the default SDK buckets produced unusable histograms for latency-per-token metrics
   ([#4946](https://github.com/open-telemetry/opentelemetry-python/issues/4946))
 

From 84e66da3d0ae090eb33ed43b92c8d79de4bf30e8 Mon Sep 17 00:00:00 2001
From: Ali <alliasgher123@gmail.com>
Date: Fri, 17 Apr 2026 02:06:58 +0500
Subject: [PATCH 3/3] chore: shorten CHANGELOG entry and link to PR

Signed-off-by: Ali <alliasgher123@gmail.com>
---
 CHANGELOG.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9ab2de856cc..5f273d82913 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,8 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   ([#4907](https://github.com/open-telemetry/opentelemetry-python/issues/4907))
 - Drop Python 3.9 support
   ([#5076](https://github.com/open-telemetry/opentelemetry-python/pull/5076))
-- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers (`gen_ai.client.operation.duration`, `gen_ai.server.request.duration`, `gen_ai.server.time_to_first_token`, `gen_ai.server.time_per_output_token`); without them the default SDK buckets produced unusable histograms for latency-per-token metrics
-  ([#4946](https://github.com/open-telemetry/opentelemetry-python/issues/4946))
+- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers
+  ([#5084](https://github.com/open-telemetry/opentelemetry-python/issues/5084))
 
 
 ## Version 1.41.0/0.62b0 (2026-04-09)