From 2f2395a2bce8c58aa9ac89c43929767f00204b12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20J=C3=A4ckle?= Date: Tue, 26 May 2026 21:22:26 +0200 Subject: [PATCH] defer CBOR/string pre-encoding in SoftReferencedFieldMap Every ImmutableJsonObject construction eagerly serialised its field map to CBOR (or to a JSON string when CBOR is unavailable) so the SoftReference could be safely cleared. JFR on prod things-service (3.9.1) showed this accounting for ~6-10% of CPU on the build path. Defer encoding to the first call that actually needs it (toString, writeValue, upperBoundForStringSize) and only soften the field-map reference once a recoverable representation exists. The fieldsRef and representation fields are volatile so concurrent readers cannot observe a SoftReference before its backing representation, and the equals fallback compares both the field set and the rendered JSON string form so that JsonValues which encode identically but are class-incompatible (e.g. NullAttributes vs JsonValue.nullLiteral()) continue to compare equal as they did when CBOR encoding was eager. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../eclipse/ditto/json/cbor/CborJsonTest.java | 23 ++- json/pom.xml | 24 +++ .../ditto/json/ImmutableJsonObject.java | 106 ++++++++---- .../json/ImmutableJsonObjectBenchmark.java | 155 ++++++++++++++++++ .../ditto/json/ImmutableJsonObjectTest.java | 84 ++++++---- 5 files changed, 326 insertions(+), 66 deletions(-) create mode 100644 json/src/test/java/org/eclipse/ditto/json/ImmutableJsonObjectBenchmark.java diff --git a/json-cbor/src/test/java/org/eclipse/ditto/json/cbor/CborJsonTest.java b/json-cbor/src/test/java/org/eclipse/ditto/json/cbor/CborJsonTest.java index c1063addf57..ed9b30e1592 100644 --- a/json-cbor/src/test/java/org/eclipse/ditto/json/cbor/CborJsonTest.java +++ b/json-cbor/src/test/java/org/eclipse/ditto/json/cbor/CborJsonTest.java @@ -202,15 +202,24 @@ public void writeImmutableJsonObjectWritesExpectedSimple() throws IOException { @Test public void validateImmutableJsonObjectInternalCachingBehaviour() throws IOException { - final JsonObject objectWithSelfGeneratedCache = JsonFactory.newObjectBuilder(KNOWN_FIELDS.values()).build(); - assertInternalCachesAreAsExpected(objectWithSelfGeneratedCache, true, false); - - final ByteBuffer byteBuffer = cborFactory.toByteBuffer(objectWithSelfGeneratedCache); + // After the lazy-encoding refactor, a JsonObject produced by the builder holds neither + // representation until the first toString() / writeValue() / size-validating call. + final JsonObject objectFromBuilder = JsonFactory.newObjectBuilder(KNOWN_FIELDS.values()).build(); + assertInternalCachesAreAsExpected(objectFromBuilder, false, false); + + // toByteBuffer triggers two materialisations on the same object: + // determineSize -> getUpperBoundForStringSize -> asJsonObjectString (string rep) + // writeToOutputStream -> writeValue -> createCborRepresentation (cbor rep) + // Both are cached on the source object as a side effect of serialising it. + final ByteBuffer byteBuffer = cborFactory.toByteBuffer(objectFromBuilder); final JsonObject objectWithCborCache = cborFactory.readFrom(byteBuffer).asObject(); - assertInternalCachesAreAsExpected(objectWithSelfGeneratedCache, true, false); - final JsonObject objectWithJsonCache = JsonFactory.newObject(objectWithSelfGeneratedCache.toString()); - assertInternalCachesAreAsExpected(objectWithSelfGeneratedCache, true, true); + assertInternalCachesAreAsExpected(objectFromBuilder, true, true); + // toString() now hits the already-cached string rep, no additional materialisation. + final JsonObject objectWithJsonCache = JsonFactory.newObject(objectFromBuilder.toString()); + assertInternalCachesAreAsExpected(objectFromBuilder, true, true); + // Constructing from CBOR / JSON inputs continues to pass that representation through + // to the field map unchanged (no lazy materialisation needed). assertInternalCachesAreAsExpected(objectWithCborCache, true, false); assertInternalCachesAreAsExpected(objectWithJsonCache, false, true); } diff --git a/json/pom.xml b/json/pom.xml index cac1c54d4bd..7061abeb4c4 100755 --- a/json/pom.xml +++ b/json/pom.xml @@ -76,10 +76,34 @@ jsonassert test + + org.openjdk.jmh + jmh-core + test + + + org.openjdk.jmh + jmh-generator-annprocess + test + + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + + + + org.codehaus.mojo flatten-maven-plugin diff --git a/json/src/main/java/org/eclipse/ditto/json/ImmutableJsonObject.java b/json/src/main/java/org/eclipse/ditto/json/ImmutableJsonObject.java index a745047c1e5..94a39d8fc9f 100644 --- a/json/src/main/java/org/eclipse/ditto/json/ImmutableJsonObject.java +++ b/json/src/main/java/org/eclipse/ditto/json/ImmutableJsonObject.java @@ -608,30 +608,38 @@ static final class SoftReferencedFieldMap { } - private String jsonObjectStringRepresentation; - private byte[] cborObjectRepresentation; + // The three mutable fields below participate in the lazy-encoding invariant: + // whenever fieldsRef holds a SoftReference, at least one representation must be + // non-null so recoverFields() can rebuild the map after a soft-clear. Without + // memory barriers, a thread that observes the SoftReference may not yet see the + // representation write that preceded it, which could trip the IllegalStateException + // in recoverFields. volatile gives us the cross-thread happens-before we need + // without taking a lock. + private volatile String jsonObjectStringRepresentation; + private volatile byte[] cborObjectRepresentation; private int hashCode; - private SoftReference> fieldsReference; + // Either a Map held strongly (when no serialised representation + // exists yet and we would have no way to recover the fields if the reference were + // cleared) or a SoftReference> once a representation is + // available. {@link #fields()} unwraps both cases. + private volatile Object fieldsRef; private SoftReferencedFieldMap(final Map jsonFieldMap, @Nullable final String stringRepresentation, @Nullable final byte[] cborObjectRepresentation) { requireNonNull(jsonFieldMap, "The fields of JSON object must not be null!"); - fieldsReference = new SoftReference<>(Collections.unmodifiableMap(new LinkedHashMap<>(jsonFieldMap))); - jsonObjectStringRepresentation = stringRepresentation; + final Map immutable = + Collections.unmodifiableMap(new LinkedHashMap<>(jsonFieldMap)); + this.jsonObjectStringRepresentation = stringRepresentation; this.cborObjectRepresentation = cborObjectRepresentation; - if (jsonObjectStringRepresentation == null && cborObjectRepresentation == null) { - if (CBOR_FACTORY.isCborAvailable()) { - try { - this.cborObjectRepresentation = CBOR_FACTORY.createCborRepresentation(jsonFieldMap, - guessSerializedSize()); - } catch (final IOException e) { - assert false; // this should not happen, so assertions will throw during testing - jsonObjectStringRepresentation = createStringRepresentation(jsonFieldMap); - } - } else { - jsonObjectStringRepresentation = createStringRepresentation(jsonFieldMap); - } + // Soft-reference the field map only when a serialised representation already + // exists; the representation is what {@link #recoverFields()} parses back when + // the soft reference is cleared under GC pressure. Without one, we must hold + // the map strongly to avoid data loss. + if (stringRepresentation != null || cborObjectRepresentation != null) { + this.fieldsRef = new SoftReference<>(immutable); + } else { + this.fieldsRef = immutable; } hashCode = 0; } @@ -722,22 +730,41 @@ Iterator getIterator() { } private Map fields() { - Map result = fieldsReference.get(); - if (null == result) { - result = recoverFields(); - fieldsReference = new SoftReference<>(result); + final Object ref = fieldsRef; + if (ref instanceof SoftReference) { + @SuppressWarnings("unchecked") + final SoftReference> softRef = + (SoftReference>) ref; + Map result = softRef.get(); + if (null == result) { + result = recoverFields(); + fieldsRef = new SoftReference<>(result); + } + return result; + } + @SuppressWarnings("unchecked") + final Map strong = (Map) ref; + return strong; + } + + private void softenFieldsRef(final Map fields) { + if (!(fieldsRef instanceof SoftReference)) { + fieldsRef = new SoftReference<>(fields); } - return result; } private Map recoverFields() { + final Map recovered; if (CBOR_FACTORY.isCborAvailable() && cborObjectRepresentation != null) { - return parseToMap(cborObjectRepresentation); - } - if (jsonObjectStringRepresentation != null) { - return parseToMap(jsonObjectStringRepresentation); + recovered = parseToMap(cborObjectRepresentation); + } else if (jsonObjectStringRepresentation != null) { + recovered = parseToMap(jsonObjectStringRepresentation); + } else { + throw new IllegalStateException("Fatal cache miss on JsonObject"); } - throw new IllegalStateException("Fatal cache miss on JsonObject"); + // Wrap so callers using getIterator() / values().iterator().remove() cannot + // mutate the recovered field map and break this object's immutability. + return Collections.unmodifiableMap(recovered); } private static Map parseToMap(final String jsonObjectString) { @@ -777,7 +804,16 @@ public boolean equals(final Object o) { Arrays.equals(cborObjectRepresentation, that.cborObjectRepresentation)) { return true; } - return Objects.equals(fields(), that.fields()); + // Two JsonObjects with the same field set are equal regardless of insertion + // order (Map.equals is set-based). When that returns false, fall back to + // comparing the rendered JSON string form: this catches cases where the field + // maps hold semantically-equivalent but class-incompatible JsonValue instances + // (e.g. NullAttributes vs JsonValue.nullLiteral(), both rendering as "null"). + // Pre-refactor, the eager CBOR encoding masked this asymmetry because both + // null kinds encode to the same byte; the lazy refactor preserves master's + // observable equality contract by combining both checks here. + return Objects.equals(fields(), that.fields()) + || asJsonObjectString().equals(that.asJsonObjectString()); } @Override @@ -792,14 +828,18 @@ public int hashCode() { String asJsonObjectString() { if (jsonObjectStringRepresentation == null) { - jsonObjectStringRepresentation = createStringRepresentation(this.fields()); + final Map currentFields = fields(); + jsonObjectStringRepresentation = createStringRepresentation(currentFields); + softenFieldsRef(currentFields); } return jsonObjectStringRepresentation; } void writeValue(final SerializationContext serializationContext) throws IOException { if (CBOR_FACTORY.isCborAvailable() && cborObjectRepresentation == null) { - cborObjectRepresentation = CBOR_FACTORY.createCborRepresentation(this.fields(), guessSerializedSize()); + final Map currentFields = fields(); + cborObjectRepresentation = CBOR_FACTORY.createCborRepresentation(currentFields, guessSerializedSize()); + softenFieldsRef(currentFields); } serializationContext.writeCachedElement(cborObjectRepresentation); } @@ -816,6 +856,12 @@ private int guessSerializedSize() { } public long upperBoundForStringSize() { + // Callers (size-validation code paths in commands and CBOR sizing) need a real + // bound. Materialise the string representation lazily here when neither form + // exists; subsequent calls and the eventual serialization will reuse the cache. + if (jsonObjectStringRepresentation == null && cborObjectRepresentation == null) { + asJsonObjectString(); + } long max = 0L; if (jsonObjectStringRepresentation != null) { max = jsonObjectStringRepresentation.length(); diff --git a/json/src/test/java/org/eclipse/ditto/json/ImmutableJsonObjectBenchmark.java b/json/src/test/java/org/eclipse/ditto/json/ImmutableJsonObjectBenchmark.java new file mode 100644 index 00000000000..ef888a419c7 --- /dev/null +++ b/json/src/test/java/org/eclipse/ditto/json/ImmutableJsonObjectBenchmark.java @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2026 Contributors to the Eclipse Foundation + * + * See the NOTICE file(s) distributed with this work for additional + * information regarding copyright ownership. + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Public License 2.0 which is available at + * http://www.eclipse.org/legal/epl-2.0 + * + * SPDX-License-Identifier: EPL-2.0 + */ +package org.eclipse.ditto.json; + +import java.util.concurrent.TimeUnit; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +/** + * JMH micro-benchmark for {@link ImmutableJsonObject} construction, used to validate the + * lazy-encoding refactor in {@code SoftReferencedFieldMap}. + * + *

Scenarios

+ *

Each {@code @Benchmark} exercises the programmatic-build path + * ({@link JsonObjectBuilder}) that is the hot site in production for + * {@code Thing.toJson}, {@code Feature.toJson}, {@code AbstractCommandResponse.toJson} + * and similar emit-side flows.

+ * + *
    + *
  • buildSmallNoSerialize — build a 5-field object, discard. Measures + * pure construction cost; this is the maximum-saving case once eager pre-encoding is + * deferred (short-lived JsonObjects that never get serialised).
  • + *
  • buildLargeNoSerialize — same, but 50 fields. Larger field map = more + * eager work being eliminated.
  • + *
  • buildSmallThenToString — build then call {@code toString()}. After + * the refactor this should match the baseline (string is materialised lazily on + * first call) — serves as a no-regression check for the case where the cache + * is consumed.
  • + *
  • buildLargeThenToString — same, 50 fields.
  • + *
  • buildSmallAccessFields — build then call {@code getField} 5 times. + * Should be substantially faster after the refactor since no encoding happens.
  • + *
+ * + *

How to run

+ *
+ * mvn test-compile -pl json -am -Djapicmp.skip=true
+ * java -cp "$(mvn -pl json dependency:build-classpath -Dmdep.outputFile=/dev/stdout -q):json/target/classes:json/target/test-classes" \
+ *      org.eclipse.ditto.json.ImmutableJsonObjectBenchmark
+ * 
+ */ +@State(Scope.Benchmark) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 3, time = 1) +@Measurement(iterations = 5, time = 2) +@Fork(1) +@Threads(1) +public class ImmutableJsonObjectBenchmark { + + private static final int LARGE_FIELD_COUNT = 50; + + private String[] smallKeys; + private JsonValue[] smallValues; + private String[] largeKeys; + private JsonValue[] largeValues; + + @Setup + public void setup() { + smallKeys = new String[]{"thingId", "policyId", "_revision", "definition", "_modified"}; + smallValues = new JsonValue[]{ + JsonValue.of("ns:thing-1"), + JsonValue.of("ns:policy-1"), + JsonValue.of(42L), + JsonValue.of("ditto:robot:1.0.0"), + JsonValue.of("2026-05-26T07:11:46Z"), + }; + + largeKeys = new String[LARGE_FIELD_COUNT]; + largeValues = new JsonValue[LARGE_FIELD_COUNT]; + for (int i = 0; i < LARGE_FIELD_COUNT; i++) { + largeKeys[i] = "feature_" + i; + largeValues[i] = JsonValue.of("value_" + i); + } + } + + @Benchmark + public void buildSmallNoSerialize(final Blackhole bh) { + final JsonObjectBuilder builder = JsonFactory.newObjectBuilder(); + for (int i = 0; i < smallKeys.length; i++) { + builder.set(smallKeys[i], smallValues[i]); + } + bh.consume(builder.build()); + } + + @Benchmark + public void buildLargeNoSerialize(final Blackhole bh) { + final JsonObjectBuilder builder = JsonFactory.newObjectBuilder(); + for (int i = 0; i < largeKeys.length; i++) { + builder.set(largeKeys[i], largeValues[i]); + } + bh.consume(builder.build()); + } + + @Benchmark + public void buildSmallThenToString(final Blackhole bh) { + final JsonObjectBuilder builder = JsonFactory.newObjectBuilder(); + for (int i = 0; i < smallKeys.length; i++) { + builder.set(smallKeys[i], smallValues[i]); + } + bh.consume(builder.build().toString()); + } + + @Benchmark + public void buildLargeThenToString(final Blackhole bh) { + final JsonObjectBuilder builder = JsonFactory.newObjectBuilder(); + for (int i = 0; i < largeKeys.length; i++) { + builder.set(largeKeys[i], largeValues[i]); + } + bh.consume(builder.build().toString()); + } + + @Benchmark + public void buildSmallAccessFields(final Blackhole bh) { + final JsonObjectBuilder builder = JsonFactory.newObjectBuilder(); + for (int i = 0; i < smallKeys.length; i++) { + builder.set(smallKeys[i], smallValues[i]); + } + final JsonObject obj = builder.build(); + for (int i = 0; i < smallKeys.length; i++) { + bh.consume(obj.getValue(smallKeys[i])); + } + } + + public static void main(final String[] args) throws RunnerException { + final Options opt = new OptionsBuilder() + .include(ImmutableJsonObjectBenchmark.class.getSimpleName()) + .build(); + new Runner(opt).run(); + } +} diff --git a/json/src/test/java/org/eclipse/ditto/json/ImmutableJsonObjectTest.java b/json/src/test/java/org/eclipse/ditto/json/ImmutableJsonObjectTest.java index 4c54b3b1ec7..194acc6f6c0 100644 --- a/json/src/test/java/org/eclipse/ditto/json/ImmutableJsonObjectTest.java +++ b/json/src/test/java/org/eclipse/ditto/json/ImmutableJsonObjectTest.java @@ -1516,43 +1516,69 @@ private static JsonFieldSelector selector(final String s) { } @Test - public void validateSoftReferenceStrategy() throws IllegalAccessException, NoSuchFieldException { + public void buildingDoesNotEagerlyEncodeAnyRepresentation() throws Exception { final ImmutableJsonObject jsonObject = ImmutableJsonObject.of(KNOWN_FIELDS); - assertInternalCachesAreAsExpected(jsonObject, true); - final Field valueListField = jsonObject.getClass().getDeclaredField("fieldMap"); - valueListField.setAccessible(true); - final ImmutableJsonObject.SoftReferencedFieldMap - valueList = (ImmutableJsonObject.SoftReferencedFieldMap) valueListField.get(jsonObject); + assertThat(getInternalField(jsonObject, "jsonObjectStringRepresentation")) + .as("string representation must not be eagerly created on construction") + .isNull(); + assertThat(getInternalField(jsonObject, "cborObjectRepresentation")) + .as("CBOR representation must not be eagerly created on construction") + .isNull(); + assertThat(getInternalField(jsonObject, "fieldsRef")) + .as("field map must be held strongly while no representation has been materialised") + .isInstanceOf(Map.class); + } + + @Test + public void toStringMaterialisesStringRepAndSoftensFieldMap() throws Exception { + final ImmutableJsonObject jsonObject = ImmutableJsonObject.of(KNOWN_FIELDS); + + // Trigger lazy materialisation. + jsonObject.toString(); + + assertThat(getInternalField(jsonObject, "jsonObjectStringRepresentation")) + .as("string representation is cached after first toString()") + .isNotNull(); + assertThat(getInternalField(jsonObject, "fieldsRef")) + .as("field map is softened once a recoverable representation is cached") + .isInstanceOf(SoftReference.class); + } - final Field softReferenceField = valueList.getClass().getDeclaredField("fieldsReference"); - softReferenceField.setAccessible(true); - SoftReference softReference = (SoftReference) softReferenceField.get(valueList); + @Test + public void upperBoundForStringSizeMaterialisesStringRepLazily() throws Exception { + final ImmutableJsonObject jsonObject = ImmutableJsonObject.of(KNOWN_FIELDS); + + // Size-validation callers (Modify commands, CBOR sizing) need a real bound. + assertThat(jsonObject.getUpperBoundForStringSize()).isPositive(); + assertThat(getInternalField(jsonObject, "jsonObjectStringRepresentation")) + .as("upperBoundForStringSize triggers lazy materialisation of the string representation") + .isNotNull(); + } - softReference.clear(); + @Test + public void softReferenceClearedAfterSerialisationRecoversFields() throws Exception { + final ImmutableJsonObject jsonObject = ImmutableJsonObject.of(KNOWN_FIELDS); + // Materialise so the field map is softened and we have a representation to recover from. + jsonObject.toString(); + @SuppressWarnings("rawtypes") + final SoftReference softRef = (SoftReference) getInternalField(jsonObject, "fieldsRef"); + softRef.clear(); + + // recoverFields() must parse the cached string representation back into a usable map. assertThat(jsonObject.getValue(KNOWN_KEY_FOO)).isPresent(); } - private void assertInternalCachesAreAsExpected(final JsonObject jsonObject, final boolean jsonExpected) { - try { - final Field valueListField = jsonObject.getClass().getDeclaredField("fieldMap"); - valueListField.setAccessible(true); - final ImmutableJsonObject.SoftReferencedFieldMap - fieldMap = (ImmutableJsonObject.SoftReferencedFieldMap) valueListField.get(jsonObject); - - final Field jsonStringField = fieldMap.getClass().getDeclaredField("jsonObjectStringRepresentation"); - jsonStringField.setAccessible(true); - String jsonString = (String) jsonStringField.get(fieldMap); - - assertThat(jsonString != null).isEqualTo(jsonExpected); - } catch (IllegalAccessException | NoSuchFieldException e) { - System.err.println( - "Failed to access internal caching fields in JsonObject using reflection. " + - "This might just be a bug in the test." - ); - e.printStackTrace(); - } + private static Object getInternalField(final JsonObject jsonObject, final String fieldName) + throws Exception { + final Field fieldMapField = jsonObject.getClass().getDeclaredField("fieldMap"); + fieldMapField.setAccessible(true); + final ImmutableJsonObject.SoftReferencedFieldMap softFieldMap = + (ImmutableJsonObject.SoftReferencedFieldMap) fieldMapField.get(jsonObject); + final Field target = softFieldMap.getClass().getDeclaredField(fieldName); + target.setAccessible(true); + return target.get(softFieldMap); } }