Skip to content
Open
5 changes: 5 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,9 @@ require('werelogs').stderrUtils.catchAndTimestampStderr(
require('cluster').isPrimary ? 1 : null,
);

// Start tracing before requiring anything that hooks into HTTP, MongoDB,
// or ioredis — instrumentation patches modules on require, so anything
// loaded earlier than init() would run unpatched.
require('./lib/tracing').init();

require('./lib/server.js')();
15 changes: 15 additions & 0 deletions lib/api/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ const parseCopySource = require('./apiUtils/object/parseCopySource');
const { tagConditionKeyAuth } = require('./apiUtils/authorization/tagConditionKeys');
const { isRequesterASessionUser } = require('./apiUtils/authorization/permissionChecks');
const checkHttpHeadersSize = require('./apiUtils/object/checkHttpHeadersSize');
const { instrumentApiMethod } = require('../instrumentation/simple');
const constants = require('../../constants');
const { config } = require('../Config.js');
const metadata = require('../metadata/wrapper');
Expand Down Expand Up @@ -586,4 +587,18 @@ const api = {
handleAuthorizationResults,
};

// Denylist (not allowlist) so newly-added S3 handlers are auto-traced
// without a separate registration step. The three skipped keys are
// internal helpers, not S3 operations.
const NON_INSTRUMENTED_KEYS = new Set([
'callApiMethod',
'checkAuthResults',
'handleAuthorizationResults',
]);
for (const [name, handler] of Object.entries(api)) {
if (typeof handler === 'function' && !NON_INSTRUMENTED_KEYS.has(name)) {
api[name] = instrumentApiMethod(handler, name);
}
}

module.exports = api;
85 changes: 85 additions & 0 deletions lib/instrumentation/simple.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
'use strict';

const tracing = require('../tracing');

let tracer = null;
function getTracer() {
if (tracer) {
return tracer;
}
const { trace } = require('@opentelemetry/api');
const { version } = require('../../package.json');
tracer = trace.getTracer('cloudserver-api', version);
return tracer;
}

function instrumentApiMethod(apiMethod, methodName) {
if (!tracing.isEnabled()) {
return apiMethod;
}

const api = require('@opentelemetry/api');
const spanName = `api.${methodName}`;

return function instrumented(...args) {
const callbackIndex = args.findLastIndex(a => typeof a === 'function');
const span = getTracer().startSpan(spanName, { kind: api.SpanKind.INTERNAL });

// End-once guard. Multiple termination paths can race: the
// wrapped callback may fire and then the handler may also throw
// synchronously, or a callback-and-Promise hybrid handler may
// resolve after firing the callback.
let spanEnded = false;
const endSpan = err => {
if (spanEnded) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But then we will not have the following trace if we have a double-ending ? Why did you introduce what ? Did you encounter that ? If we remove that, what is the impact ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes — encountered during initial unit-test development. The case: a callback-style handler fires the callback (which ends the span via the wrapped-callback closure) and then throws synchronously afterwards (e.g. the callback body itself threw). The outer try/catch then calls endSpan(err) and we get two .end() calls. OTEL warns on double-end and the second status overwrites the first, so the trace would record ERROR when the operation actually succeeded. The guard makes the first ending win. The ends span exactly once when callback fires then handler throws test in instrumentationSimple.spec.js exercises exactly this path.

return;
}
spanEnded = true;
if (err) {
span.recordException(err);
span.setStatus({ code: api.SpanStatusCode.ERROR });
if (err.code) {
span.setAttribute('cloudserver.error_code', err.code);
}
} else {
span.setStatus({ code: api.SpanStatusCode.OK });
}
span.end();
};

const wrappedArgs = [...args];
if (callbackIndex !== -1) {
const originalCallback = args[callbackIndex];
wrappedArgs[callbackIndex] = function wrappedCallback(err, ...results) {
endSpan(err);
return originalCallback.call(this, err, ...results);
};
}

const ctx = api.trace.setSpan(api.context.active(), span);
try {
const result = api.context.with(ctx, () =>
apiMethod.apply(this, wrappedArgs));
if (callbackIndex === -1) {
if (result && typeof result.then === 'function') {
return result.then(
value => { endSpan(); return value; },
err => { endSpan(err); throw err; },
);

Check notice

Code scanning / CodeQL

Promise .then() usage (async migration) Note

This call uses .then(). Refactor to async/await.
Comment on lines +65 to +68
}
endSpan();
}
// Callback-style handler: the wrapped callback drives the
// span lifecycle. If the handler also returns a thenable
// (hybrid migration shape), pass it through untouched —
// attaching a second .then() chain would surface as an
// unhandled rejection in callback-only callers.
return result;
} catch (error) {
Comment thread
delthas marked this conversation as resolved.
endSpan(error);
throw error;
}
};
}

module.exports = { instrumentApiMethod };
Comment thread
delthas marked this conversation as resolved.
Comment thread
delthas marked this conversation as resolved.
20 changes: 11 additions & 9 deletions lib/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const arsenal = require('arsenal');
const { setServerHeader } = arsenal.s3routes.routesUtils;
const { RedisClient, StatsClient } = arsenal.metrics;
const monitoringClient = require('./utilities/monitoringHandler');
const tracing = require('./tracing');

const logger = require('./utilities/logger');
const { internalHandlers } = require('./utilities/internalHandlers');
Expand Down Expand Up @@ -206,6 +207,7 @@ class S3Server {
vault,
},
};

arsenal.s3routes.routes(req, res, params, logger, this.config);
}

Expand Down Expand Up @@ -323,32 +325,32 @@ class S3Server {
this.servers.push(server);
}

/*
* This exits the running process properly.
*/
cleanUp() {
logger.info('server shutting down');
// Stop token refill job if running
if (this.config.rateLimiting?.enabled) {
stopRefillJob(logger);
}
Promise.all(this.servers.map(server =>
new Promise(resolve => server.close(resolve))
)).then(() => process.exit(0));
))
.then(() => tracing.close())
.finally(() => process.exit(0));
}

caughtExceptionShutdown() {
if (!this.cluster) {
process.exit(1);
tracing.close().finally(() => process.exit(1));
return;
}
logger.error('shutdown of worker due to exception', {
workerId: this.worker ? this.worker.id : undefined,
workerPid: this.worker ? this.worker.process.pid : undefined,
});
// Will close all servers, cause disconnect event on primary and kill
// worker process with 'SIGTERM'.
// worker.kill() is graceful (closes servers, disconnects IPC) but
// does not fire our SIGTERM handler, so the BatchSpanProcessor
// would lose buffered spans without an explicit flush here.
if (this.worker) {
this.worker.kill();
tracing.close().finally(() => this.worker.kill());
}
}

Expand Down
24 changes: 24 additions & 0 deletions lib/tracing/healthPaths.js
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we use about that ? We usually trace them as they can have an impact (as you said high-frequency). The filter is not done here but in the tool used to explore trace ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both options exist; we deliberately filter at ingest. Three reasons:

  1. Volume: k8s liveness/readiness probes hit at 1–10 Hz per pod, plus Prometheus scrape on /metrics at 15s. With parentBasedSampler honoring upstream decisions (which we want, to follow NGINX/Beyla traces correctly), an upstream traceparent with sampled=01 would force every probe to be sampled. That's bytes on the wire to the collector, OTLP exporter pressure, and storage cost paid for traces nobody queries.
  2. Signal/noise in the trace UI: even with backend filters, autocomplete and "recent operations" lists fill up with probe entries.
  3. Convention: most production OTEL deployment guides drop probes at the SDK.

The filter is isHealthPath in lib/tracing/healthPaths.js — five explicit paths. Easy to remove or invert later if the cost/benefit changes.

Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
'use strict';

// Probe + scrape paths that should never produce a span. Filtered at
// ingest (not at the trace backend) because probe rate × pod count ×
// always-on sampling overwhelms the exporter and storage with traffic
// nobody queries.
const HEALTH_PATHS = new Set([
'/live',
'/ready',
'/_/healthcheck',
'/_/healthcheck/deep',
'/metrics',
]);

function isHealthPath(url) {
if (typeof url !== 'string' || url.length === 0) {
return false;
}
const qIdx = url.indexOf('?');
const path = qIdx === -1 ? url : url.slice(0, qIdx);
return HEALTH_PATHS.has(path);
}

module.exports = { isHealthPath };
114 changes: 114 additions & 0 deletions lib/tracing/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
'use strict';

const { buildTrustedHosts, makeRequestHook } = require('./trustedHosts');
const { isHealthPath } = require('./healthPaths');

let sdk = null;

function isEnabled() {
return process.env.ENABLE_OTEL === 'true';
}

function init() {
if (!isEnabled() || sdk) {
return;
}

const endpoint = process.env.OTEL_EXPORTER_OTLP_TRACES_ENDPOINT;
if (!endpoint) {
throw new Error(
'ENABLE_OTEL=true but OTEL_EXPORTER_OTLP_TRACES_ENDPOINT is unset',
);
}

const { diag, DiagConsoleLogger, DiagLogLevel } = require('@opentelemetry/api');
const { NodeSDK } = require('@opentelemetry/sdk-node');
const { resourceFromAttributes } = require('@opentelemetry/resources');
const { OTLPTraceExporter } = require('@opentelemetry/exporter-trace-otlp-http');
const { HttpInstrumentation } = require('@opentelemetry/instrumentation-http');
const { IORedisInstrumentation } = require('@opentelemetry/instrumentation-ioredis');
const { MongoDBInstrumentation } = require('@opentelemetry/instrumentation-mongodb');
const {
ParentBasedSampler,
TraceIdRatioBasedSampler,
} = require('@opentelemetry/sdk-trace-base');
const { version } = require('../../package.json');
const { config } = require('../Config');

diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.WARN);

const parsedRatio = parseFloat(process.env.OTEL_SAMPLING_RATIO);
const samplingRatio = Number.isFinite(parsedRatio) ? parsedRatio : 0.01;

const trustedHosts = buildTrustedHosts(config);

const ignoreIncomingRequestHook = req =>
req.method === 'OPTIONS' || isHealthPath(req.url);

sdk = new NodeSDK({
resource: resourceFromAttributes({
'service.name': process.env.OTEL_SERVICE_NAME || 'cloudserver',
'service.version': process.env.OTEL_SERVICE_VERSION || version,
'service.namespace': process.env.OTEL_SERVICE_NAMESPACE || 'scality',
}),
traceExporter: new OTLPTraceExporter({ url: endpoint }),
logRecordProcessors: [],
metricReaders: [],
spanLimits: {
attributeValueLengthLimit: 4096,
attributeCountLimit: 128,
eventCountLimit: 128,
linkCountLimit: 128,
},
sampler: new ParentBasedSampler({
root: new TraceIdRatioBasedSampler(samplingRatio),
}),
instrumentations: [
new HttpInstrumentation({
ignoreIncomingRequestHook,
requestHook: makeRequestHook(trustedHosts),
}),
new IORedisInstrumentation({ requireParentSpan: true }),
// Mask leaf values in db.statement so query shape is captured
// without user data (object keys, filter values) flowing to
// the trace backend.
new MongoDBInstrumentation({ enhancedDatabaseReporting: false }),
],
});

sdk.start();
}

// Cap the flush window. The BatchSpanProcessor's default 30s export
// timeout would otherwise block process.exit, and Kubernetes' default
// terminationGracePeriodSeconds is also 30s — we'd get SIGKILL'd
// before the flush ever completed.
const SHUTDOWN_DEADLINE_MS = 5000;

async function close() {
// Capture + clear before awaiting so concurrent callers (SIGTERM
// during an uncaught-exception flow, for example) don't both call
// sdk.shutdown() — the SDK doesn't guarantee idempotent shutdown.
const local = sdk;
if (!local) {
return;
}
sdk = null;
try {
await Promise.race([
local.shutdown(),
// .unref() so the timer doesn't pin the event loop open
// when sdk.shutdown() resolves first.
new Promise(resolve => {
setTimeout(resolve, SHUTDOWN_DEADLINE_MS).unref();
}),
]);
} catch (err) {
// Loggers may already be torn down at this point in shutdown;
// log to stderr directly.
// eslint-disable-next-line no-console
console.error('tracing close failed', err);
}
}

module.exports = { init, close, isEnabled };
Loading
Loading