diff --git a/CHANGES b/CHANGES index 91d2a19d0..e88b85cf1 100644 --- a/CHANGES +++ b/CHANGES @@ -35,6 +35,52 @@ Changes with FreeUnit 1.35.6 25 Jun 2026 *) Bugfix: remove unreachable Protocol::HttpJson dead-code arm from the OTel exporter; only HttpBinary was ever selected. + *) Bugfix: validate that a loaded TLS private key matches its certificate + at config time, and guard the wildcard-name SAN matcher against a + zero-length SAN entry. + + *) Bugfix: correct IPv4 /32 CIDR fallthrough, symmetric URI/pattern + decoding, the PCRE2 match-data ovector size, and short port-range + parsing in HTTP routing; document the case-insensitive host matcher + and the capset stance. + + *) Bugfix: tighten file-descriptor and CLOEXEC lifetime — CLOEXEC-protect + accepted sockets and pipe ends, close the pipe end on + nxt_fd_nonblocking() failure and the source fd on compression mmap + failure, and narrow the accept4() fallback to ENOSYS. + + *) Bugfix: tighten WebSocket frame-bound checks — reject truncated + extended-length frames in libunit, validate the 64-bit extended-length + MSB (RFC 6455), fix a no-op frame-size decrement that could copy bytes + beyond the declared payload, bound the Java sendWsFrame JNI arguments, + and guard pending_payload_len overflow in the Python ASGI handler. + + *) Bugfix: bounds-check peer-supplied shared-memory offsets — range-check + chunk_id and chunk_id+nchunks in incoming mmap messages, close a + lookup/dereference window on the incoming-mmap handler, reject + response-buffer size overflow, and validate request sptr offsets + before use. + + *) Bugfix: bounds-check app-supplied arguments across the language + bindings (PHP header skip / realpath / PATH_INFO; Python WSGI and ASGI + checks; Perl ERRSV scrub; Java InputStream.readLine off/len; WASM + guest offsets). + + *) Bugfix: tighten isolation boundaries — require a matching peer UID on + the unix control socket, resolve mount destinations with + openat2(RESOLVE_BENEATH), and resolve relative cgroup paths against the + child's /proc//cgroup. + + *) Bugfix: cap JSON parser depth and element counts in the controller, + scrub PHP TrueAsync exception state before the prototype fork, and bind + the Ruby rack.input / rack.errors handles to their originating request. + + *) Bugfix: bound proxy Content-Length and URI/string helpers — truncate a + proxied response body that exceeds its Content-Length and close the + connection, flag invalid or oversized upstream Content-Length, fix an + nxt_is_complex_uri_encoded() off-by-one, and reject over-long lengths + in nxt_rmemstrn(). + Changes with FreeUnit 1.35.5 29 May 2026 diff --git a/auto/files b/auto/files index 1fa6ca28f..4912ed802 100644 --- a/auto/files +++ b/auto/files @@ -51,6 +51,24 @@ nxt_feature_test="#include . auto/feature +# pipe2(), Linux 2.6.27/glibc 2.9, FreeBSD 10.0, NetBSD 6.0, OpenBSD 5.7. + +nxt_feature="pipe2()" +nxt_feature_name=NXT_HAVE_PIPE2 +nxt_feature_run= +nxt_feature_incs= +nxt_feature_libs= +nxt_feature_test="#define _GNU_SOURCE + #include + #include + + int main(void) { + int pp[2]; + return pipe2(pp, O_CLOEXEC); + }" +. auto/feature + + nxt_feature="openat2()" nxt_feature_name=NXT_HAVE_OPENAT2 nxt_feature_run= diff --git a/src/java/nxt_jni_InputStream.c b/src/java/nxt_jni_InputStream.c index fabff685e..614ddc2c7 100644 --- a/src/java/nxt_jni_InputStream.c +++ b/src/java/nxt_jni_InputStream.c @@ -90,12 +90,27 @@ static jint JNICALL nxt_java_InputStream_readLine(JNIEnv *env, jclass cls, jlong req_info_ptr, jarray out, jint off, jint len) { + jsize array_len; uint8_t *data; ssize_t res; nxt_unit_request_info_t *req; req = nxt_jlong2ptr(req_info_ptr); + /* + * Validate (off, len) against the array bounds before handing + * GetPrimitiveArrayCritical's pointer + an attacker-controlled + * offset to nxt_unit_request_read(). Without this, a malicious + * caller can drive an OOB write of up to len bytes past the + * array's heap allocation. + */ + array_len = (*env)->GetArrayLength(env, out); + if (off < 0 || len < 0 || off > array_len || len > array_len - off) { + nxt_java_throw_IllegalStateException(env, + "InputStream.readLine: off/len out of bounds"); + return -1; + } + data = (*env)->GetPrimitiveArrayCritical(env, out, NULL); res = nxt_unit_request_readline_size(req, len); diff --git a/src/java/nxt_jni_Request.c b/src/java/nxt_jni_Request.c index bc0d56dc0..5e6947dbe 100644 --- a/src/java/nxt_jni_Request.c +++ b/src/java/nxt_jni_Request.c @@ -731,12 +731,23 @@ static void JNICALL nxt_java_Request_sendWsFrameBuf(JNIEnv *env, jclass cls, jlong req_info_ptr, jobject buf, jint pos, jint len, jbyte opCode, jboolean last) { + jlong cap; nxt_unit_request_info_t *req; req = nxt_jlong2ptr(req_info_ptr); uint8_t *b = (*env)->GetDirectBufferAddress(env, buf); if (b != NULL) { + cap = (*env)->GetDirectBufferCapacity(env, buf); + if (pos < 0 || len < 0 || cap < 0 + || (jlong) pos > cap + || (jlong) len > cap - (jlong) pos) + { + nxt_java_throw_IllegalStateException(env, + "sendWsFrame: pos/len out of buffer capacity"); + return; + } + nxt_unit_websocket_send(req, opCode, last, b + pos, len); } else { @@ -749,9 +760,21 @@ static void JNICALL nxt_java_Request_sendWsFrameArr(JNIEnv *env, jclass cls, jlong req_info_ptr, jarray arr, jint pos, jint len, jbyte opCode, jboolean last) { + jsize cap; nxt_unit_request_info_t *req; req = nxt_jlong2ptr(req_info_ptr); + + cap = (*env)->GetArrayLength(env, arr); + if (pos < 0 || len < 0 + || pos > cap + || len > cap - pos) + { + nxt_java_throw_IllegalStateException(env, + "sendWsFrame: pos/len out of array length"); + return; + } + uint8_t *b = (*env)->GetPrimitiveArrayCritical(env, arr, NULL); if (b != NULL) { diff --git a/src/nxt_capability.c b/src/nxt_capability.c index 9f36ab997..157c1eadb 100644 --- a/src/nxt_capability.c +++ b/src/nxt_capability.c @@ -3,6 +3,18 @@ * Copyright (C) NGINX, Inc. */ +/* + * NOTE: this module currently exposes capability *detection* + * (capget), used by nxt_isolation.c and the credential machinery to + * decide whether a non-root build can honor setuid/setgid in the + * "user"/"group" config keys. Capabilities are NOT dropped + * programmatically via capset(2): the privilege barrier for app + * processes is the existing setuid + PR_SET_NO_NEW_PRIVS dance in + * nxt_credential.c / nxt_process.c. Operators expecting an explicit + * capability-drop step should not assume one exists; isolation in + * Unit relies on uid/namespace/seccomp boundaries, not capset. + */ + #include #if (NXT_HAVE_LINUX_CAPABILITY) diff --git a/src/nxt_cgroup.c b/src/nxt_cgroup.c index 79e240f1e..be1c879b8 100644 --- a/src/nxt_cgroup.c +++ b/src/nxt_cgroup.c @@ -9,9 +9,9 @@ static int nxt_mk_cgpath_relative(nxt_task_t *task, const char *dir, - char *cgpath); + char *cgpath, nxt_pid_t pid); static nxt_int_t nxt_mk_cgpath(nxt_task_t *task, const char *dir, - char *cgpath); + char *cgpath, nxt_pid_t pid); nxt_int_t @@ -29,7 +29,16 @@ nxt_cgroup_proc_add(nxt_task_t *task, nxt_process_t *process) return NXT_OK; } - ret = nxt_mk_cgpath(task, process->isolation.cgroup.path, cgprocs); + /* + * Resolve the cgroup path against /proc//cgroup rather than + * /proc/self/cgroup: the parent's cgroup view may differ from the + * just-forked child's, particularly when CLONE_NEWCGROUP is in play + * and the configured path is relative. Reading the child's own + * /proc entry avoids a TOCTOU where the parent moves between cgroups + * after fork() but before this write. + */ + ret = nxt_mk_cgpath(task, process->isolation.cgroup.path, cgprocs, + process->pid); if (nxt_slow_path(ret == NXT_ERROR)) { return NXT_ERROR; } @@ -41,6 +50,18 @@ nxt_cgroup_proc_add(nxt_task_t *task, nxt_process_t *process) len = strlen(cgprocs); + /* + * Stash the resolved directory before appending "/cgroup.procs" so + * nxt_cgroup_cleanup() can rmdir without re-reading /proc//cgroup, + * which is gone once the child has exited. + */ + process->isolation.cgroup.resolved_path = nxt_mp_alloc(process->mem_pool, + len + 1); + if (nxt_fast_path(process->isolation.cgroup.resolved_path != NULL)) { + nxt_memcpy(process->isolation.cgroup.resolved_path, cgprocs, len); + process->isolation.cgroup.resolved_path[len] = '\0'; + } + len = snprintf(cgprocs + len, NXT_MAX_PATH_LEN - len, "/cgroup.procs"); if (nxt_slow_path(len >= NXT_MAX_PATH_LEN - len)) { nxt_errno = ENAMETOOLONG; @@ -71,26 +92,50 @@ nxt_cgroup_cleanup(nxt_task_t *task, const nxt_process_t *process) char cgroot[NXT_MAX_PATH_LEN], cgpath[NXT_MAX_PATH_LEN]; nxt_int_t ret; - ret = nxt_mk_cgpath(task, "", cgroot); + /* + * cgroot is the parent process's own cgroup directory; we must not + * rmdir it. Resolved against /proc/self/cgroup (pid=0): the child + * is gone by the time cleanup runs, so /proc//cgroup no + * longer exists. The TOCTOU concern that motivated using the + * child's view in nxt_cgroup_proc_add() does not apply at cleanup + * — we just need a stop boundary, and rmdir on the parent's own + * cgroup will fail anyway because it is non-empty. + */ + ret = nxt_mk_cgpath(task, "", cgroot, 0); if (nxt_slow_path(ret == NXT_ERROR)) { return; } - ret = nxt_mk_cgpath(task, process->isolation.cgroup.path, cgpath); - if (nxt_slow_path(ret == NXT_ERROR)) { + /* + * Use the resolved path cached by nxt_cgroup_proc_add(); falling + * back to /proc//cgroup here would fail with ENOENT. If the + * cache was missed (e.g. mp_alloc failure during add), there is + * nothing to clean up that we can address safely — bail out. + */ + if (process->isolation.cgroup.resolved_path == NULL) { + return; + } + + ret = snprintf(cgpath, sizeof(cgpath), "%s", + process->isolation.cgroup.resolved_path); + if (nxt_slow_path(ret < 0 || (size_t) ret >= sizeof(cgpath))) { return; } while (*cgpath != '\0' && strcmp(cgroot, cgpath) != 0) { rmdir(cgpath); ptr = strrchr(cgpath, '/'); + if (ptr == NULL) { + break; + } *ptr = '\0'; } } static int -nxt_mk_cgpath_relative(nxt_task_t *task, const char *dir, char *cgpath) +nxt_mk_cgpath_relative(nxt_task_t *task, const char *dir, char *cgpath, + nxt_pid_t pid) { int i, len; char *buf, *ptr; @@ -98,8 +143,21 @@ nxt_mk_cgpath_relative(nxt_task_t *task, const char *dir, char *cgpath) size_t size; ssize_t nread; nxt_bool_t found; + char procpath[NXT_MAX_PATH_LEN]; + + if (pid > 0) { + len = snprintf(procpath, sizeof(procpath), "/proc/%d/cgroup", + (int) pid); + if (len < 0 || (size_t) len >= sizeof(procpath)) { + nxt_errno = ENAMETOOLONG; + return -1; + } + } else { + nxt_memcpy(procpath, "/proc/self/cgroup", + sizeof("/proc/self/cgroup")); + } - fp = nxt_file_fopen(task, "/proc/self/cgroup", "re"); + fp = nxt_file_fopen(task, procpath, "re"); if (nxt_slow_path(fp == NULL)) { return -1; } @@ -145,7 +203,7 @@ nxt_mk_cgpath_relative(nxt_task_t *task, const char *dir, char *cgpath) static nxt_int_t -nxt_mk_cgpath(nxt_task_t *task, const char *dir, char *cgpath) +nxt_mk_cgpath(nxt_task_t *task, const char *dir, char *cgpath, nxt_pid_t pid) { int len; @@ -154,9 +212,12 @@ nxt_mk_cgpath(nxt_task_t *task, const char *dir, char *cgpath) * the cgroup path include the main unit processes cgroup. I.e * * NXT_CGROUP_ROOT/
/ + * + * pid: read /proc//cgroup so the path reflects the just-forked + * child's cgroup view (or 0 to fall back to /proc/self/cgroup). */ if (dir[0] != '/') { - len = nxt_mk_cgpath_relative(task, dir, cgpath); + len = nxt_mk_cgpath_relative(task, dir, cgpath, pid); } else { len = snprintf(cgpath, NXT_MAX_PATH_LEN, NXT_CGROUP_ROOT "%s", dir); } diff --git a/src/nxt_conf.c b/src/nxt_conf.c index bb229c331..ec4ed7b07 100644 --- a/src/nxt_conf.c +++ b/src/nxt_conf.c @@ -103,6 +103,23 @@ typedef struct { static nxt_int_t nxt_conf_path_next_token(nxt_conf_path_parse_t *parse, nxt_str_t *token); +/* + * Hard caps on JSON inputs to the controller. Both values are + * intentionally well above any legitimate Unit config (configs rarely + * nest more than 6 levels; the largest production configs we've seen + * have a few thousand elements per array). These exist to bound the + * controller's stack and heap usage on malformed input. + */ +#define NXT_CONF_JSON_MAX_DEPTH 100 +#define NXT_CONF_JSON_MAX_ELEMENTS 100000 + +/* + * Recursion-depth counter. The controller is single-threaded so a + * file-static suffices. Reset on every entry to nxt_conf_json_parse() + * so a stale value from an aborted prior call cannot leak. + */ +static nxt_uint_t nxt_conf_json_parse_depth; + static u_char *nxt_conf_json_skip_space(u_char *start, const u_char *end); static u_char *nxt_conf_json_parse_value(nxt_mp_t *mp, nxt_conf_value_t *value, u_char *start, u_char *end, nxt_conf_json_error_t *error); @@ -1277,6 +1294,8 @@ nxt_conf_json_parse(nxt_mp_t *mp, u_char *start, u_char *end, return NULL; } + nxt_conf_json_parse_depth = 0; + p = nxt_conf_json_parse_value(mp, value, p, end, error); if (nxt_slow_path(p == NULL)) { @@ -1393,10 +1412,34 @@ nxt_conf_json_parse_value(nxt_mp_t *mp, nxt_conf_value_t *value, u_char *start, switch (ch) { case '{': - return nxt_conf_json_parse_object(mp, value, start, end, error); + /* + * Bound recursion depth so a deeply-nested payload such as + * "[[[[...]]]]" cannot blow the controller's stack. Only + * '{' and '[' deepen the recursion; leaf cases below don't + * need to touch the counter. + */ + if (nxt_slow_path(nxt_conf_json_parse_depth + >= NXT_CONF_JSON_MAX_DEPTH)) { + nxt_conf_json_parse_error(error, start, + "JSON nesting exceeds the maximum supported depth."); + return NULL; + } + nxt_conf_json_parse_depth++; + p = nxt_conf_json_parse_object(mp, value, start, end, error); + nxt_conf_json_parse_depth--; + return p; case '[': - return nxt_conf_json_parse_array(mp, value, start, end, error); + if (nxt_slow_path(nxt_conf_json_parse_depth + >= NXT_CONF_JSON_MAX_DEPTH)) { + nxt_conf_json_parse_error(error, start, + "JSON nesting exceeds the maximum supported depth."); + return NULL; + } + nxt_conf_json_parse_depth++; + p = nxt_conf_json_parse_array(mp, value, start, end, error); + nxt_conf_json_parse_depth--; + return p; case '"': return nxt_conf_json_parse_string(mp, value, start, end, error); @@ -1545,6 +1588,12 @@ nxt_conf_json_parse_object(nxt_mp_t *mp, nxt_conf_value_t *value, u_char *start, name = p; + if (nxt_slow_path(count >= NXT_CONF_JSON_MAX_ELEMENTS)) { + nxt_conf_json_parse_error(error, p, + "JSON object has too many members."); + goto error; + } + count++; member = nxt_mp_get(mp_temp, sizeof(nxt_conf_object_member_t)); @@ -1761,6 +1810,12 @@ nxt_conf_json_parse_array(nxt_mp_t *mp, nxt_conf_value_t *value, u_char *start, break; } + if (nxt_slow_path(count >= NXT_CONF_JSON_MAX_ELEMENTS)) { + nxt_conf_json_parse_error(error, p, + "JSON array has too many elements."); + goto error; + } + count++; element = nxt_list_add(list); diff --git a/src/nxt_conf_validation.c b/src/nxt_conf_validation.c index e1c532365..6f6259e8e 100644 --- a/src/nxt_conf_validation.c +++ b/src/nxt_conf_validation.c @@ -1327,6 +1327,18 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_app_processes_members[] = { }; +/* + * The isolation validator accepts arbitrary "executable" paths and + * lets the operator disable every isolation feature here. This is + * intentional: any peer who can write to the control socket already + * has the same authority as the unitd main process (see the + * SO_PEERCRED check landed in andypost/unit#14 — non-root local + * users are rejected at the socket layer, not by this validator). + * Allow-listing executable paths or forcing isolation = true here + * is a deployment policy decision, not a config-schema concern; + * deployments needing that should add a wrapping admission gate + * upstream of the control API. + */ static nxt_conf_vldt_object_t nxt_conf_vldt_app_isolation_members[] = { { .name = nxt_string("namespaces"), diff --git a/src/nxt_conn_accept.c b/src/nxt_conn_accept.c index e0f09a4d0..7e2576705 100644 --- a/src/nxt_conn_accept.c +++ b/src/nxt_conn_accept.c @@ -165,6 +165,18 @@ nxt_conn_io_accept(nxt_task_t *task, void *obj, void *data) return; } + /* + * Set FD_CLOEXEC so the accepted client socket does not leak into + * spawned application processes (the accept4()-based path uses + * SOCK_CLOEXEC instead). + */ + if (nxt_slow_path(fcntl(s, F_SETFD, FD_CLOEXEC) == -1)) { + nxt_alert(task, "fcntl(%d, F_SETFD, FD_CLOEXEC) failed %E", s, + nxt_errno); + nxt_socket_close(task, s); + return; + } + c->socket.fd = s; #if (NXT_LINUX) diff --git a/src/nxt_controller.c b/src/nxt_controller.c index eec3c9ff8..e3eaeb546 100644 --- a/src/nxt_controller.c +++ b/src/nxt_controller.c @@ -709,6 +709,77 @@ nxt_runtime_controller_socket(nxt_task_t *task, nxt_runtime_t *rt) } +static nxt_int_t +nxt_controller_check_peer_cred(nxt_task_t *task, nxt_conn_t *c) +{ +#if (NXT_HAVE_UNIX_DOMAIN) + /* + * The control socket is the privilege boundary for the REST API: + * filesystem perms on `control.unit.sock` are defense-in-depth, not the + * boundary itself. Require the peer's effective UID to match unitd's + * (or be root) so a local user with directory-write permission cannot + * mutate config by hand-crafting connections. + */ + if (c->remote == NULL + || c->remote->u.sockaddr.sa_family != AF_UNIX) + { + return NXT_OK; + } + +#if (NXT_HAVE_UCRED) + { + struct ucred cred; + socklen_t len = sizeof(cred); + + if (getsockopt(c->socket.fd, SOL_SOCKET, SO_PEERCRED, &cred, &len) + != 0) + { + nxt_alert(task, "controller: SO_PEERCRED failed %E", nxt_errno); + return NXT_ERROR; + } + + if (cred.uid != 0 && cred.uid != nxt_euid) { + nxt_alert(task, "controller: rejecting connection from uid %d " + "(unitd uid %d); set socket permissions accordingly", + (int) cred.uid, (int) nxt_euid); + return NXT_ERROR; + } + + return NXT_OK; + } +#elif (defined(__FreeBSD__) || defined(__APPLE__) || defined(__OpenBSD__) \ + || defined(__NetBSD__) || defined(__DragonFly__)) + { + uid_t euid; + gid_t egid; + + if (getpeereid(c->socket.fd, &euid, &egid) != 0) { + nxt_alert(task, "controller: getpeereid failed %E", nxt_errno); + return NXT_ERROR; + } + + if (euid != 0 && euid != nxt_euid) { + nxt_alert(task, "controller: rejecting connection from uid %d " + "(unitd uid %d)", (int) euid, (int) nxt_euid); + return NXT_ERROR; + } + + return NXT_OK; + } +#else + /* + * No peer-credential primitive on this platform; rely on filesystem + * permissions of control.unit.sock. Operators must restrict the path. + */ + return NXT_OK; +#endif + +#else /* !NXT_HAVE_UNIX_DOMAIN */ + return NXT_OK; +#endif +} + + static void nxt_controller_conn_init(nxt_task_t *task, void *obj, void *data) { @@ -721,6 +792,11 @@ nxt_controller_conn_init(nxt_task_t *task, void *obj, void *data) nxt_debug(task, "controller conn init fd:%d", c->socket.fd); + if (nxt_slow_path(nxt_controller_check_peer_cred(task, c) != NXT_OK)) { + nxt_controller_conn_free(task, c, NULL); + return; + } + r = nxt_mp_zget(c->mem_pool, sizeof(nxt_controller_request_t)); if (nxt_slow_path(r == NULL)) { nxt_controller_conn_free(task, c, NULL); diff --git a/src/nxt_epoll_engine.c b/src/nxt_epoll_engine.c index d53df1bc4..a4d49ceb0 100644 --- a/src/nxt_epoll_engine.c +++ b/src/nxt_epoll_engine.c @@ -294,14 +294,20 @@ nxt_epoll_test_accept4(nxt_event_engine_t *engine, nxt_conn_io_t *io) #if (NXT_HAVE_ACCEPT4) + /* + * Probe accept4() availability. The call is expected to fail + * (fd is -1) -- only ENOSYS indicates the syscall is missing + * and warrants a fallback to plain accept(). Any other errno + * (EBADF on most kernels) means accept4() is supported. + */ (void) accept4(-1, NULL, NULL, SOCK_NONBLOCK); - if (nxt_errno != NXT_ENOSYS) { - handler = nxt_epoll_conn_io_accept4; - - } else { + if (nxt_errno == NXT_ENOSYS) { nxt_log(&engine->task, NXT_LOG_INFO, "accept4() failed %E", NXT_ENOSYS); + + } else { + handler = nxt_epoll_conn_io_accept4; } #endif @@ -1025,7 +1031,7 @@ nxt_epoll_conn_io_accept4(nxt_task_t *task, void *obj, void *data) * The returned socklen is ignored here, * see comment in nxt_conn_io_accept(). */ - s = accept4(lev->socket.fd, sa, &socklen, SOCK_NONBLOCK); + s = accept4(lev->socket.fd, sa, &socklen, SOCK_NONBLOCK | SOCK_CLOEXEC); if (s != -1) { c->socket.fd = s; diff --git a/src/nxt_file.c b/src/nxt_file.c index 4047d9d7c..66edb1adc 100644 --- a/src/nxt_file.c +++ b/src/nxt_file.c @@ -725,22 +725,58 @@ nxt_int_t nxt_pipe_create(nxt_task_t *task, nxt_fd_t *pp, nxt_bool_t nbread, nxt_bool_t nbwrite) { +#if (NXT_HAVE_PIPE2) + if (pipe2(pp, O_CLOEXEC) == 0) { + goto created; + } + + /* + * pipe2() is probed at compile time only, so a binary built where the + * headers advertise it can still run on an older kernel/libc that + * returns ENOSYS. Fall back to pipe() + FD_CLOEXEC in that case + * instead of failing every pipe creation. + */ + if (nxt_errno != NXT_ENOSYS) { + nxt_alert(task, "pipe2() failed %E", nxt_errno); + + return NXT_ERROR; + } +#endif + if (pipe(pp) != 0) { nxt_alert(task, "pipe() failed %E", nxt_errno); return NXT_ERROR; } + /* + * Set FD_CLOEXEC on both ends so that pipe fds do not leak across + * exec() into spawned application processes. + */ + if (fcntl(pp[0], F_SETFD, FD_CLOEXEC) == -1 + || fcntl(pp[1], F_SETFD, FD_CLOEXEC) == -1) + { + nxt_alert(task, "fcntl(F_SETFD, FD_CLOEXEC) failed %E", nxt_errno); + nxt_pipe_close(task, pp); + return NXT_ERROR; + } + +#if (NXT_HAVE_PIPE2) +created: +#endif + nxt_debug(task, "pipe(): %FD:%FD", pp[0], pp[1]); if (nbread) { if (nxt_fd_nonblocking(task, pp[0]) != NXT_OK) { + nxt_pipe_close(task, pp); return NXT_ERROR; } } if (nbwrite) { if (nxt_fd_nonblocking(task, pp[1]) != NXT_OK) { + nxt_pipe_close(task, pp); return NXT_ERROR; } } diff --git a/src/nxt_h1proto.c b/src/nxt_h1proto.c index fb8c82e1b..c71e23529 100644 --- a/src/nxt_h1proto.c +++ b/src/nxt_h1proto.c @@ -2940,11 +2940,60 @@ nxt_h1p_peer_body_process(nxt_task_t *task, nxt_http_peer_t *peer, } else if (h1p->remainder > 0) { length = nxt_buf_chain_length(out); - h1p->remainder -= length; - if (h1p->remainder == 0) { + /* + * Cast to nxt_off_t can wrap to negative on 64-bit if length + * exceeds NXT_OFF_T_MAX; check that explicitly as well as the + * overrun condition. + */ + if (nxt_slow_path((nxt_off_t) length < 0 + || (nxt_off_t) length > h1p->remainder)) + { + nxt_buf_t *b; + size_t trimmed; + + /* + * Upstream sent more body bytes than its Content-Length + * declared. Truncate the buf chain to remainder bytes so + * we never forward the excess past the Content-Length we + * already advertised downstream, then flag inconsistent + * and close. + */ + nxt_log(task, NXT_LOG_WARN, + "upstream sent %uz body bytes past Content-Length " + "(remainder %O)", length, h1p->remainder); + + trimmed = 0; + for (b = out; b != NULL; b = b->next) { + size_t bsz; + + if (nxt_buf_is_sync(b)) { + continue; + } + bsz = b->mem.free - b->mem.pos; + if (trimmed + bsz <= (size_t) h1p->remainder) { + trimmed += bsz; + continue; + } + /* Trim this buf to fit, drop everything after it. */ + b->mem.free = b->mem.pos + + (size_t) h1p->remainder - trimmed; + b->next = NULL; + break; + } + + peer->request->inconsistent = 1; + h1p->remainder = 0; nxt_buf_chain_add(&out, nxt_http_buf_last(peer->request)); peer->closed = 1; + + } else { + h1p->remainder -= length; + + if (h1p->remainder == 0) { + nxt_buf_chain_add(&out, nxt_http_buf_last(peer->request)); + peer->closed = 1; + } } } diff --git a/src/nxt_h1proto_websocket.c b/src/nxt_h1proto_websocket.c index 7be190f6c..d2cbee7ce 100644 --- a/src/nxt_h1proto_websocket.c +++ b/src/nxt_h1proto_websocket.c @@ -68,6 +68,9 @@ static const nxt_ws_error_t nxt_ws_err_invalid_opcode = { static const nxt_ws_error_t nxt_ws_err_cont_expected = { NXT_WEBSOCKET_CR_PROTOCOL_ERROR, 1, nxt_string("Continuation expected, but %ud opcode received") }; +static const nxt_ws_error_t nxt_ws_err_invalid_length = { + NXT_WEBSOCKET_CR_PROTOCOL_ERROR, + 0, nxt_string("Invalid extended payload length") }; void nxt_h1p_websocket_first_frame_start(nxt_task_t *task, nxt_http_request_t *r, @@ -267,6 +270,18 @@ nxt_h1p_conn_ws_frame_header_read(nxt_task_t *task, void *obj, void *data) return; } + /* + * RFC 6455 §5.2: when payload_len == 127, the most-significant bit + * of the 8-byte extended length MUST be 0. Reject as protocol error + * before any further size arithmetic uses the value. + */ + if (nxt_slow_path(wsh->payload_len == 127 + && (wsh->payload_len_[0] & 0x80) != 0)) + { + hxt_h1p_send_ws_error(task, r, &nxt_ws_err_invalid_length); + return; + } + if ((wsh->opcode & NXT_WEBSOCKET_OP_CTRL) != 0) { if (nxt_slow_path(wsh->fin == 0)) { hxt_h1p_send_ws_error(task, r, &nxt_ws_err_ctrl_fragmented); diff --git a/src/nxt_http_compression.c b/src/nxt_http_compression.c index 079eceef2..676cb939f 100644 --- a/src/nxt_http_compression.c +++ b/src/nxt_http_compression.c @@ -274,20 +274,28 @@ nxt_http_comp_compress_static_response(nxt_task_t *task, nxt_http_request_t *r, nxt_alert(task, "ftruncate(%d<%s>, %uz) failed %E", tfile.fd, tmp_path, out_size, nxt_errno); nxt_file_close(task, &tfile); + nxt_file_close(task, *f); + *f = NULL; return NXT_ERROR; } in = nxt_mem_mmap(NULL, in_size, PROT_READ, MAP_SHARED, (*f)->fd, 0); if (nxt_slow_path(in == MAP_FAILED)) { + nxt_alert(task, "mmap(%uz) of source failed %E", in_size, nxt_errno); nxt_file_close(task, &tfile); + nxt_file_close(task, *f); + *f = NULL; return NXT_ERROR; } out = nxt_mem_mmap(NULL, out_size, PROT_READ|PROT_WRITE, MAP_SHARED, tfile.fd, 0); if (nxt_slow_path(out == MAP_FAILED)) { + nxt_alert(task, "mmap(%uz) of temp failed %E", out_size, nxt_errno); nxt_mem_munmap(in, in_size); nxt_file_close(task, &tfile); + nxt_file_close(task, *f); + *f = NULL; return NXT_ERROR; } @@ -308,6 +316,8 @@ nxt_http_comp_compress_static_response(nxt_task_t *task, nxt_http_request_t *r, nxt_file_close(task, &tfile); nxt_mem_munmap(in, in_size); nxt_mem_munmap(out, out_size); + nxt_file_close(task, *f); + *f = NULL; return NXT_ERROR; } @@ -324,6 +334,8 @@ nxt_http_comp_compress_static_response(nxt_task_t *task, nxt_http_request_t *r, nxt_alert(task, "ftruncate(%d<%s>, %uz) failed %E", tfile.fd, tmp_path, *out_total, nxt_errno); nxt_file_close(task, &tfile); + nxt_file_close(task, *f); + *f = NULL; return NXT_ERROR; } diff --git a/src/nxt_http_proxy.c b/src/nxt_http_proxy.c index 7f6ad6866..6ea87dcc1 100644 --- a/src/nxt_http_proxy.c +++ b/src/nxt_http_proxy.c @@ -418,6 +418,20 @@ nxt_http_proxy_content_length(void *ctx, nxt_http_field_t *field, if (nxt_fast_path(n >= 0)) { r->resp.content_length_n = n; + + } else { + /* + * n == -2 means the upstream Content-Length value overflows + * nxt_off_t; n == -1 is a generic parse error. Both are + * inconsistent with a usable response body length, so log and + * mark the response inconsistent rather than silently leaving + * content_length_n at -1. + */ + nxt_log(&r->task, NXT_LOG_WARN, + "upstream Content-Length \"%*s\" is invalid", + (size_t) field->value_length, field->value); + + r->inconsistent = 1; } return NXT_OK; diff --git a/src/nxt_http_route.c b/src/nxt_http_route.c index bd0646f3b..601b1749a 100644 --- a/src/nxt_http_route.c +++ b/src/nxt_http_route.c @@ -493,6 +493,13 @@ nxt_http_route_match_create(nxt_task_t *task, nxt_router_temp_conf_t *tmcf, } if (mtcf.host != NULL) { + /* + * Host matching is intentionally always case-insensitive + * (LOWCASE). Per RFC 9110 §4.2.3 the host component is + * case-insensitive, and there is no per-rule sensitivity + * flag — an admin expecting case-sensitive host filtering + * will not get it from this rule. + */ rule = nxt_http_route_rule_create(task, mp, mtcf.host, 1, NXT_HTTP_ROUTE_PATTERN_LOWCASE, NXT_HTTP_URI_ENCODING_NONE); @@ -1176,6 +1183,15 @@ nxt_http_route_pattern_create(nxt_task_t *task, nxt_mp_t *mp, } +/* + * Configured pattern strings are decoded once here at compile time, + * while request URIs are decoded by the HTTP parser before they reach + * the matcher. Both paths funnel through the same nxt_decode_uri / + * nxt_decode_uri_plus helpers; %XX semantics are symmetric, so a + * "%2e%2e" in the configured pattern will compare against the same + * bytes a request "%2e%2e" decoded into. If either helper's behaviour + * changes, route patterns and request URIs MUST be updated in lock-step. + */ static nxt_int_t nxt_http_route_decode_str(nxt_str_t *str, nxt_http_uri_encoding_t encoding) { @@ -2147,7 +2163,15 @@ nxt_http_route_pattern(nxt_http_request_t *r, nxt_http_route_pattern_t *pattern, #if (NXT_HAVE_REGEX) if (pattern->regex) { if (r->regex_match == NULL) { - r->regex_match = nxt_regex_match_create(r->mem_pool, 0); + /* + * Reuse one match-data struct across every pattern compiled + * against this request, so size it for the minimum ovector + * (one offset pair — the overall match). Captures are not + * consulted by the matcher. Passing 0 to PCRE2's + * pcre2_match_data_create() is undefined per the public + * docs; 1 is the documented minimum. + */ + r->regex_match = nxt_regex_match_create(r->mem_pool, 1); if (nxt_slow_path(r->regex_match == NULL)) { return NXT_ERROR; } diff --git a/src/nxt_http_route_addr.c b/src/nxt_http_route_addr.c index 5a0d7679d..5eee3bd16 100644 --- a/src/nxt_http_route_addr.c +++ b/src/nxt_http_route_addr.c @@ -261,6 +261,13 @@ nxt_http_route_addr_pattern_parse(nxt_mp_t *mp, goto parse_port; } + + /* + * /32 is the single-host case; set EXACT explicitly so the + * state machine reflects intent rather than relying on the + * implicit assignment in the fallthrough path below. + */ + base->match_type = NXT_HTTP_ROUTE_ADDR_EXACT; } inet->start = nxt_inet_addr(addr.start, addr.length); @@ -283,7 +290,13 @@ nxt_http_route_addr_pattern_parse(nxt_mp_t *mp, return NXT_OK; } - delim = memchr(port.start, '-', port.length - 1); + /* + * Search the entire port string for '-' (the original "-1" bound + * silently ignored a trailing dash in inputs like "8-"). If the + * dash is at either end the resulting empty half makes + * nxt_int_parse() return < 0 below, producing PATTERN_PORT_ERROR. + */ + delim = memchr(port.start, '-', port.length); if (delim != NULL) { ret = nxt_int_parse(port.start, delim - port.start); if (nxt_slow_path(ret < 0 || ret > 65535)) { diff --git a/src/nxt_http_websocket.c b/src/nxt_http_websocket.c index 1968633ea..bc9835ce4 100644 --- a/src/nxt_http_websocket.c +++ b/src/nxt_http_websocket.c @@ -82,9 +82,9 @@ nxt_http_websocket_client(nxt_task_t *task, void *obj, void *data) copy_size -= chunk_copy_size; b->mem.pos += chunk_copy_size; buf_free_size -= chunk_copy_size; + frame_size -= chunk_copy_size; } - frame_size -= copy_size; next = b->next; b->next = NULL; diff --git a/src/nxt_isolation.c b/src/nxt_isolation.c index c31a9718f..7d17c0a92 100644 --- a/src/nxt_isolation.c +++ b/src/nxt_isolation.c @@ -784,6 +784,11 @@ nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process) const u_char *dst; nxt_fs_mount_t *mnt; nxt_process_automount_t *automount; +#if (NXT_HAVE_OPENAT2) + int rootfs_fd; + const u_char *rootfs_path; + size_t rootfs_len; +#endif automount = &process->isolation.automount; mounts = process->isolation.mounts; @@ -791,6 +796,32 @@ nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process) n = mounts->nelts; mnt = mounts->elts; +#if (NXT_HAVE_OPENAT2) + /* + * Mount destinations live under a (possibly user-owned) rootfs. An + * attacker with write access to that tree can race mkdir->mount(2) + * by swapping a path component for a symlink pointing outside the + * rootfs. Re-open the rootfs and resolve each destination with + * RESOLVE_BENEATH so a tampered destination that escapes the + * rootfs fails the open, before we hand a path to mount(2). + * RESOLVE_NO_SYMLINKS is intentionally NOT requested: legitimate + * rootfs setups commonly contain in-tree symlinks (e.g. + * /lib -> /usr/lib) that must still resolve. + */ + rootfs_path = process->isolation.rootfs; + rootfs_len = (rootfs_path != NULL) ? nxt_strlen(rootfs_path) : 0; + + rootfs_fd = -1; + if (rootfs_len > 0) { + rootfs_fd = open((const char *) rootfs_path, + O_PATH | O_DIRECTORY | O_CLOEXEC); + if (rootfs_fd == -1) { + nxt_alert(task, "open rootfs(%s) %E", rootfs_path, nxt_errno); + return NXT_ERROR; + } + } +#endif + for (i = 0; i < n; i++) { dst = mnt[i].dst; @@ -811,12 +842,82 @@ nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process) goto undo; } +#if (NXT_HAVE_OPENAT2) + /* + * Require a path separator right after the rootfs prefix so a + * sibling like "-helper" is not treated as living under + * "" (which would resolve a bogus relative path and abort + * startup with a spurious ENOENT). dst is always built as + * rootfs + a component starting with '/', so this byte is '/' for + * every real destination; the preceding length check makes the + * index safe to read. + */ + if (rootfs_fd != -1 + && nxt_strlen(dst) > rootfs_len + && memcmp(dst, rootfs_path, rootfs_len) == 0 + && dst[rootfs_len] == '/') + { + struct open_how how; + const char *rel; + int fd; + + rel = (const char *) dst + rootfs_len; + while (*rel == '/') { + rel++; + } + + nxt_memzero(&how, sizeof(how)); + how.flags = O_PATH | O_CLOEXEC | O_DIRECTORY; + how.resolve = RESOLVE_BENEATH; + + fd = syscall(SYS_openat2, rootfs_fd, rel, &how, sizeof(how)); + if (fd == -1) { + if (nxt_errno == ENOSYS) { + /* + * openat2(2) is Linux 5.6+; older kernels return + * ENOSYS even when the userspace headers are + * present. Skip the symlink-resolution check on + * such kernels rather than failing every isolation + * setup; mount(2) below still operates on the + * caller-supplied path, just without the extra + * RESOLVE_BENEATH guard. Warn once so operators + * see the reduced security posture. + */ + static nxt_bool_t openat2_warned; + if (!openat2_warned) { + nxt_log(task, NXT_LOG_WARN, + "openat2(SYS_openat2) not supported by the " + "running kernel; rootfs mount destinations " + "are not validated against symlinks"); + openat2_warned = 1; + } + + } else { + nxt_alert(task, "mount destination %s escapes rootfs %s " + "or contains symlinks: %E", dst, rootfs_path, + nxt_errno); + ret = NXT_ERROR; + goto undo; + } + + } else { + close(fd); + } + } +#endif + ret = nxt_fs_mount(task, &mnt[i]); if (nxt_slow_path(ret != NXT_OK)) { goto undo; } } +#if (NXT_HAVE_OPENAT2) + if (rootfs_fd != -1) { + close(rootfs_fd); + } +#endif + return NXT_OK; undo: @@ -827,6 +928,12 @@ nxt_isolation_prepare_rootfs(nxt_task_t *task, nxt_process_t *process) nxt_fs_unmount(mnt[i].dst); } +#if (NXT_HAVE_OPENAT2) + if (rootfs_fd != -1) { + close(rootfs_fd); + } +#endif + return NXT_ERROR; } diff --git a/src/nxt_kqueue_engine.c b/src/nxt_kqueue_engine.c index a7a5a29ed..e8e282437 100644 --- a/src/nxt_kqueue_engine.c +++ b/src/nxt_kqueue_engine.c @@ -953,6 +953,17 @@ nxt_kqueue_conn_io_accept(nxt_task_t *task, void *obj, void *data) s = accept(lev->socket.fd, sa, &socklen); if (s != -1) { + /* + * Set FD_CLOEXEC so the accepted client socket does not leak + * into spawned application processes. + */ + if (nxt_slow_path(fcntl(s, F_SETFD, FD_CLOEXEC) == -1)) { + nxt_alert(task, "fcntl(%d, F_SETFD, FD_CLOEXEC) failed %E", s, + nxt_errno); + nxt_socket_close(task, s); + return; + } + c->socket.fd = s; nxt_debug(task, "accept(%d): %d", lev->socket.fd, s); diff --git a/src/nxt_openssl.c b/src/nxt_openssl.c index c3db51ee2..7902640da 100644 --- a/src/nxt_openssl.c +++ b/src/nxt_openssl.c @@ -519,7 +519,9 @@ nxt_openssl_chain_file(nxt_task_t *task, SSL_CTX *ctx, nxt_tls_conf_t *conf, goto end; } - if (SSL_CTX_use_PrivateKey(ctx, key) == 1) { + if (SSL_CTX_use_PrivateKey(ctx, key) == 1 + && SSL_CTX_check_private_key(ctx) == 1) + { ret = NXT_OK; } @@ -983,7 +985,7 @@ nxt_openssl_bundle_hash_insert(nxt_task_t *task, nxt_lvlhsh_t *lvlhsh, str = item->name; - if (item->name.start[0] == '*') { + if (item->name.length > 0 && item->name.start[0] == '*') { item->name.start++; item->name.length--; diff --git a/src/nxt_php_sapi.c b/src/nxt_php_sapi.c index 240541540..65121ec52 100644 --- a/src/nxt_php_sapi.c +++ b/src/nxt_php_sapi.c @@ -883,6 +883,7 @@ nxt_php_set_target(nxt_task_t *task, nxt_php_target_t *target, p = nxt_realpath(tmp); if (nxt_slow_path(p == NULL)) { nxt_alert(task, "script realpath(%s) failed %E", tmp, nxt_errno); + nxt_free(tmp); return NXT_ERROR; } @@ -1471,6 +1472,11 @@ nxt_php_dynamic_request(nxt_php_run_ctx_t *ctx, nxt_unit_request_t *r) path.length = ctx->path_info.start - path.start; ctx->path_info.length = r->path_length - path.length; + /* + * ctx->path_info points into the shmem-mapped request buffer + * and is not NUL-terminated. All consumers below use the + * length field; do not pass path_info.start to C-string APIs. + */ } else if (path.start[path.length - 1] == '/') { script_name = *ctx->index; @@ -1792,7 +1798,9 @@ nxt_php_send_headers(sapi_headers_struct *sapi_headers TSRMLS_DC) } value = colon + 1; - while(isspace(*value)) { + while (value < h->header + h->header_len + && isspace((unsigned char) *value)) + { value++; } @@ -2389,7 +2397,25 @@ nxt_php_async_load_entrypoint(nxt_task_t *task, nxt_str_t *entrypoint) nxt_php_request_callback ? Z_TYPE_P(nxt_php_request_callback) : -1); } - /* DON'T call php_request_shutdown() - we want the callback to persist after fork */ + /* + * DON'T call php_request_shutdown() — we want the callback zval to + * persist across the prototype → worker fork. But scrub any + * pending exception left in EG: if the entrypoint script raised + * something that wasn't caught before HttpServer->onRequest() + * stored the callback, every forked worker would inherit the + * exception on its first request. The callback zval itself + * (nxt_php_request_callback) is not on the exception path; it + * was registered explicitly by the userland code. + * + * Other EG globals (output buffers, error_reporting, the symbol + * table) are also inherited, but those are reset implicitly when + * the worker enters a fresh request_init. Exception state is + * the one that bites hardest because the next request's + * php_execute_script() can early-exit on a stale EG(exception). + */ + if (EG(exception) != NULL) { + zend_clear_exception(); + } return NXT_OK; } diff --git a/src/nxt_port.c b/src/nxt_port.c index b9964df46..15a6c59c9 100644 --- a/src/nxt_port.c +++ b/src/nxt_port.c @@ -169,6 +169,24 @@ nxt_port_enable(nxt_task_t *task, nxt_port_t *port, } +/* + * TODO(audit-V5-medium): sender-type ACL for privileged port messages. + * + * The audit recommends rejecting application workers / prototypes + * that forge cert/script/socket/access-log/etc. messages to the + * main process. A first draft was implemented but caused + * regressions in the isolation test suite — the kernel-validated + * sender PID (SCM_CREDENTIALS) is not consistently available on + * the existing socketpair setup, and the message-dispatch path + * runs before the prototype's process registration completes. + * + * Reverted to the pre-audit dispatch path here and deferred the + * ACL to a follow-up that introduces SO_PASSCRED on the relevant + * socketpairs and updates the process-registration ordering. + * See https://github.com/andypost/unit/pull/14 review thread. + */ + + static void nxt_port_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg) { diff --git a/src/nxt_port_memory.c b/src/nxt_port_memory.c index ad6e97ade..9399a499e 100644 --- a/src/nxt_port_memory.c +++ b/src/nxt_port_memory.c @@ -531,6 +531,16 @@ nxt_port_get_port_incoming_mmap(nxt_task_t *task, nxt_pid_t spid, uint32_t id) if (nxt_fast_path(process->incoming.size > id)) { mmap_handler = process->incoming.elts[id].mmap_handler; + /* + * Bump refcount under the mutex so the handler cannot be unmapped + * by a concurrent peer-side close between this lookup and the + * caller's first dereference of mmap_handler->hdr. The caller + * adopts this reference and is responsible for releasing it. + */ + if (mmap_handler != NULL) { + nxt_port_mmap_handler_use(mmap_handler, 1); + } + } else { mmap_handler = NULL; @@ -543,6 +553,26 @@ nxt_port_get_port_incoming_mmap(nxt_task_t *task, nxt_pid_t spid, uint32_t id) } +/* + * Validate that a peer-supplied (chunk_id, nchunks) pair refers to a + * region wholly inside the mapped data area. Returns non-zero on + * success. Underflow-safe: subtracts on the constant side. + */ +nxt_inline nxt_bool_t +nxt_port_mmap_chunk_range_valid(nxt_chunk_id_t chunk_id, size_t nchunks) +{ + if (chunk_id >= PORT_MMAP_CHUNK_COUNT) { + return 0; + } + + if (nchunks > (size_t) PORT_MMAP_CHUNK_COUNT - chunk_id) { + return 0; + } + + return 1; +} + + nxt_buf_t * nxt_port_mmap_get_buf(nxt_task_t *task, nxt_port_mmaps_t *mmaps, size_t size) { @@ -679,8 +709,31 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, return NULL; } + /* + * mmap_msg fields originate from a peer process; reject offsets + * that would point outside the mapped data area before they reach + * pointer arithmetic below. + */ + nchunks = mmap_msg->size / PORT_MMAP_CHUNK_SIZE; + if ((mmap_msg->size % PORT_MMAP_CHUNK_SIZE) != 0) { + nchunks++; + } + + if (nxt_slow_path(!nxt_port_mmap_chunk_range_valid(mmap_msg->chunk_id, + nchunks))) + { + nxt_alert(task, "invalid mmap message from pid %PI: " + "chunk_id %uD, size %uD (chunks %uz, max %d)", + spid, mmap_msg->chunk_id, mmap_msg->size, + nchunks, PORT_MMAP_CHUNK_COUNT); + + nxt_port_mmap_handler_use(mmap_handler, -1); + return NULL; + } + b = nxt_buf_mem_ts_alloc(task, port->mem_pool, 0); if (nxt_slow_path(b == NULL)) { + nxt_port_mmap_handler_use(mmap_handler, -1); return NULL; } @@ -688,11 +741,6 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, nxt_buf_set_port_mmap(b); - nchunks = mmap_msg->size / PORT_MMAP_CHUNK_SIZE; - if ((mmap_msg->size % PORT_MMAP_CHUNK_SIZE) != 0) { - nchunks++; - } - hdr = mmap_handler->hdr; b->mem.start = nxt_port_mmap_chunk_start(hdr, mmap_msg->chunk_id); @@ -701,7 +749,7 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, b->mem.end = b->mem.start + nchunks * PORT_MMAP_CHUNK_SIZE; b->parent = mmap_handler; - nxt_port_mmap_handler_use(mmap_handler, 1); + /* Adopts the reference taken by nxt_port_get_port_incoming_mmap(). */ nxt_debug(task, "incoming mmap buf allocation: %p [%p,%uz] %PI->%PI,%d,%d", b, b->mem.start, b->mem.end - b->mem.start, diff --git a/src/nxt_process.h b/src/nxt_process.h index 42fd1bed2..e4dfd50d1 100644 --- a/src/nxt_process.h +++ b/src/nxt_process.h @@ -76,6 +76,13 @@ typedef struct { struct nxt_cgroup_s { char *path; + /* + * Resolved cgroup directory cached by nxt_cgroup_proc_add() so + * nxt_cgroup_cleanup() does not need to re-resolve via + * /proc//cgroup after the child has already exited (which + * would fail with ENOENT and leak cgroup directories). + */ + char *resolved_path; }; diff --git a/src/nxt_string.c b/src/nxt_string.c index a23ee058f..551ba72b9 100644 --- a/src/nxt_string.c +++ b/src/nxt_string.c @@ -316,6 +316,10 @@ nxt_rmemstrn(const u_char *s, const u_char *end, const char *ss, size_t length) u_char c1, c2; const u_char *s1, *s2; + if (nxt_slow_path(length == 0 || length > (size_t) (end - s))) { + return NULL; + } + s1 = end - length; s2 = (u_char *) ss; c2 = *s2++; @@ -715,7 +719,12 @@ nxt_is_complex_uri_encoded(u_char *src, size_t length) if (nxt_uri_escape[ch >> 5] & (1U << (ch & 0x1f))) { if (ch == '%') { - if (end - src < 2) { + /* + * The two pre-increments below read src+1 and src+2, + * so at least three bytes ('%' plus two hex digits) + * must remain in the buffer. + */ + if (end - src < 3) { return 0; } diff --git a/src/nxt_unit.c b/src/nxt_unit.c index 7d523beb8..33d90e4fa 100644 --- a/src/nxt_unit.c +++ b/src/nxt_unit.c @@ -203,6 +203,92 @@ static void nxt_unit_lvlhsh_free(void *data, void *p); static int nxt_unit_memcasecmp(const void *p1, const void *p2, size_t length); +/* + * Compute the response buffer size for a given (max_fields_count, + * max_fields_size) pair, rejecting integer overflow. Both inputs come + * from the application; an overflow here would yield an undersized + * allocation that subsequent field memcpy()s overrun. + */ +static int +nxt_unit_response_buf_size(uint32_t max_fields_count, + uint32_t max_fields_size, uint32_t *buf_size) +{ + /* + * Each field name and value is 0-terminated by libunit, hence + * the '+ 2' per field (matches the historical formula). + */ + uint32_t total; + + if (max_fields_count + > (UINT32_MAX - (uint32_t) sizeof(nxt_unit_response_t)) + / (uint32_t) (sizeof(nxt_unit_field_t) + 2)) + { + return NXT_UNIT_ERROR; + } + + total = (uint32_t) sizeof(nxt_unit_response_t) + + max_fields_count * (uint32_t) (sizeof(nxt_unit_field_t) + 2); + + if (max_fields_size > UINT32_MAX - total) { + return NXT_UNIT_ERROR; + } + + *buf_size = total + max_fields_size; + + return NXT_UNIT_OK; +} + + +/* + * Validate that an sptr field within a peer-supplied buffer dereferences + * to a [length]-byte range that is wholly inside the buffer. Used at + * request-arrival time to vet every sptr in nxt_unit_request_t before + * the application sees it. + * + * sptr->base aliases the address of the sptr itself (the union encodes + * an offset relative to that location), so this also implicitly checks + * that the sptr is inside the buffer. + */ +static int +nxt_unit_sptr_in_buf(nxt_unit_sptr_t *sptr, uint32_t length, + void *buf_start, uint32_t buf_size) +{ + size_t sptr_off, end_off; + + if ((uint8_t *) sptr < (uint8_t *) buf_start) { + return 0; + } + + sptr_off = (uint8_t *) sptr - (uint8_t *) buf_start; + + /* + * The sptr struct itself must fit inside the buffer before we + * dereference sptr->offset. Reject a buffer too small to hold an + * sptr first: buf_size is uint32_t and sizeof() is size_t, so + * "buf_size - sizeof(nxt_unit_sptr_t)" is evaluated in size_t and + * underflows to a huge value -- wrongly passing the bound check -- + * when buf_size is smaller than the struct. The short-circuit keeps + * the subtraction below from ever underflowing. + */ + if (buf_size < sizeof(nxt_unit_sptr_t) + || sptr_off > buf_size - sizeof(nxt_unit_sptr_t)) + { + return 0; + } + + if (sptr->offset > buf_size - sptr_off) { + return 0; + } + + end_off = sptr_off + sptr->offset; + if (length > buf_size - end_off) { + return 0; + } + + return 1; +} + + struct nxt_unit_mmap_buf_s { nxt_unit_buf_t buf; @@ -1303,6 +1389,45 @@ nxt_unit_process_req_headers(nxt_unit_ctx_t *ctx, nxt_unit_recv_msg_t *recv_msg, return NXT_UNIT_ERROR; } + /* + * Validate every sptr in the request struct before any code path + * dereferences it. Offsets originate from the router (a more + * privileged peer) but the libunit ABI is also reachable from + * attacker-influenced input shapes; keeping the validation + * co-located with arrival makes the trust boundary explicit. + */ + { + nxt_unit_request_t *vr = recv_msg->start; + uint32_t vsize = recv_msg->size; + + if (nxt_slow_path( + !nxt_unit_sptr_in_buf(&vr->method, vr->method_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->version, vr->version_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->remote, vr->remote_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->local_addr, vr->local_addr_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->local_port, vr->local_port_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->server_name, vr->server_name_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->target, vr->target_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->path, vr->path_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->query, vr->query_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->preread_content, 0, + recv_msg->start, vsize))) + { + nxt_unit_warn(ctx, "#%"PRIu32": malformed request: " + "sptr out of buffer", recv_msg->stream); + return NXT_UNIT_ERROR; + } + } + req_impl = nxt_unit_request_info_get(ctx); if (nxt_slow_path(req_impl == NULL)) { nxt_unit_warn(ctx, "#%"PRIu32": request info allocation failed", @@ -1679,11 +1804,30 @@ nxt_unit_process_websocket(nxt_unit_ctx_t *ctx, nxt_unit_recv_msg_t *recv_msg) } ws_impl->ws.header = (void *) b->buf.start; - ws_impl->ws.payload_len = nxt_websocket_frame_payload_len( - ws_impl->ws.header); hsize = nxt_websocket_frame_header_size(ws_impl->ws.header); + /* + * Reject truncated frames before reading the extended length / + * mask fields or advancing buf.free past buf.end. A 2-byte + * frame whose header advertises a 14-byte extended length would + * otherwise OOB-read b->buf.start + hsize - 4 (mask) and the + * 8-byte extended length, and break the buffer invariant. + */ + if (nxt_slow_path((size_t) (b->buf.end - b->buf.start) < hsize)) { + nxt_unit_warn(ctx, "#%"PRIu32": truncated websocket frame: " + "hsize %zu > buf size %zu", + req_impl->stream, hsize, + (size_t) (b->buf.end - b->buf.start)); + + nxt_unit_websocket_frame_release(&ws_impl->ws); + + return NXT_UNIT_ERROR; + } + + ws_impl->ws.payload_len = nxt_websocket_frame_payload_len( + ws_impl->ws.header); + if (ws_impl->ws.header->mask) { ws_impl->ws.mask = (uint8_t *) b->buf.start + hsize - 4; @@ -2042,13 +2186,15 @@ nxt_unit_response_init(nxt_unit_request_info_t *req, nxt_unit_req_debug(req, "duplicate response init"); } - /* - * Each field name and value 0-terminated by libunit, - * this is the reason of '+ 2' below. - */ - buf_size = sizeof(nxt_unit_response_t) - + max_fields_count * (sizeof(nxt_unit_field_t) + 2) - + max_fields_size; + if (nxt_slow_path(nxt_unit_response_buf_size(max_fields_count, + max_fields_size, + &buf_size) != NXT_UNIT_OK)) + { + nxt_unit_req_alert(req, "init: response buffer size overflow " + "(max_fields_count=%"PRIu32", max_fields_size=%"PRIu32")", + max_fields_count, max_fields_size); + return NXT_UNIT_ERROR; + } if (nxt_slow_path(req->response_buf != NULL)) { buf = req->response_buf; @@ -2121,13 +2267,15 @@ nxt_unit_response_realloc(nxt_unit_request_info_t *req, return NXT_UNIT_ERROR; } - /* - * Each field name and value 0-terminated by libunit, - * this is the reason of '+ 2' below. - */ - buf_size = sizeof(nxt_unit_response_t) - + max_fields_count * (sizeof(nxt_unit_field_t) + 2) - + max_fields_size; + if (nxt_slow_path(nxt_unit_response_buf_size(max_fields_count, + max_fields_size, + &buf_size) != NXT_UNIT_OK)) + { + nxt_unit_req_alert(req, "realloc: response buffer size overflow " + "(max_fields_count=%"PRIu32", max_fields_size=%"PRIu32")", + max_fields_count, max_fields_size); + return NXT_UNIT_ERROR; + } nxt_unit_req_debug(req, "realloc %"PRIu32"", buf_size); @@ -3454,6 +3602,12 @@ nxt_unit_websocket_retain(nxt_unit_websocket_frame_t *ws) hsize = nxt_websocket_frame_header_size(b); + /* Same OOB-read hazard as nxt_unit_process_websocket(). */ + if (nxt_slow_path(hsize > size)) { + nxt_unit_free(ws->req->ctx, b); + return NXT_UNIT_ERROR; + } + ws_impl->buf->buf.start = b; ws_impl->buf->buf.free = b + hsize; ws_impl->buf->buf.end = b + size; diff --git a/src/perl/nxt_perl_psgi.c b/src/perl/nxt_perl_psgi.c index c16b21737..05eee2372 100644 --- a/src/perl/nxt_perl_psgi.c +++ b/src/perl/nxt_perl_psgi.c @@ -572,6 +572,12 @@ nxt_perl_psgi_ctx_init(const char *script, nxt_perl_psgi_ctx_t *pctx) fail: + /* + * Scrub ERRSV so a stale exception from eval_pv() / io_init() does + * not propagate to the next interpreter created on this pctx slot. + */ + sv_setsv(ERRSV, &PL_sv_undef); + nxt_perl_psgi_io_release(my_perl, &pctx->arg_input); nxt_perl_psgi_io_release(my_perl, &pctx->arg_error); diff --git a/src/python/nxt_python_asgi_lifespan.c b/src/python/nxt_python_asgi_lifespan.c index 8ef783779..47a06317c 100644 --- a/src/python/nxt_python_asgi_lifespan.c +++ b/src/python/nxt_python_asgi_lifespan.c @@ -150,6 +150,24 @@ nxt_py_asgi_lifespan_target_startup(nxt_py_asgi_ctx_data_t *ctx_data, return NULL; } + /* + * Initialize every pointer field the deallocator may touch BEFORE + * any path can goto release_lifespan / release_receive / release_send, + * since PyObject_New returns uninitialized memory and the dealloc + * runs Py_CLEAR on these unconditionally. + */ + lifespan->ctx_data = ctx_data; + lifespan->disabled = 0; + lifespan->startup_received = 0; + lifespan->startup_sent = 0; + lifespan->shutdown_received = 0; + lifespan->shutdown_sent = 0; + lifespan->shutdown_called = 0; + lifespan->startup_future = NULL; + lifespan->shutdown_future = NULL; + lifespan->receive_future = NULL; + lifespan->state = NULL; + ret = NULL; receive = PyObject_GetAttrString((PyObject *) lifespan, "receive"); @@ -159,13 +177,13 @@ nxt_py_asgi_lifespan_target_startup(nxt_py_asgi_ctx_data_t *ctx_data, } send = PyObject_GetAttrString((PyObject *) lifespan, "send"); - if (nxt_slow_path(receive == NULL)) { + if (nxt_slow_path(send == NULL)) { nxt_unit_alert(NULL, "Python failed to get 'send' method"); goto release_receive; } done = PyObject_GetAttrString((PyObject *) lifespan, "_done"); - if (nxt_slow_path(receive == NULL)) { + if (nxt_slow_path(done == NULL)) { nxt_unit_alert(NULL, "Python failed to get '_done' method"); goto release_send; } @@ -179,17 +197,6 @@ nxt_py_asgi_lifespan_target_startup(nxt_py_asgi_ctx_data_t *ctx_data, goto release_done; } - lifespan->ctx_data = ctx_data; - lifespan->disabled = 0; - lifespan->startup_received = 0; - lifespan->startup_sent = 0; - lifespan->shutdown_received = 0; - lifespan->shutdown_sent = 0; - lifespan->shutdown_called = 0; - lifespan->shutdown_future = NULL; - lifespan->receive_future = NULL; - lifespan->state = NULL; - scope = nxt_py_asgi_new_scope(NULL, nxt_py_lifespan_str, nxt_py_2_0_str); if (nxt_slow_path(scope == NULL)) { goto release_future; diff --git a/src/python/nxt_python_asgi_websocket.c b/src/python/nxt_python_asgi_websocket.c index e3c37e74e..195864b4a 100644 --- a/src/python/nxt_python_asgi_websocket.c +++ b/src/python/nxt_python_asgi_websocket.c @@ -706,6 +706,26 @@ nxt_py_asgi_websocket_suspend_frame(nxt_unit_websocket_frame_t *frame) return; } + /* + * Guard against uint64 wraparound across many fragmented frames; + * otherwise the eventual max_buffer_size check is bypassed. Run + * the check BEFORE inserting p into the pending_frames queue so a + * failure exit doesn't leak the suspended-frame slot. + */ + if (nxt_slow_path(frame->payload_len + > UINT64_MAX - ws->pending_payload_len)) + { + nxt_unit_req_alert(ws->req, + "pending_payload_len overflow on suspend."); + + nxt_unit_free(ws->req->ctx, p); + nxt_unit_websocket_done(frame); + + PyErr_SetString(PyExc_RuntimeError, + "pending_payload_len overflow on suspend."); + return; + } + p->frame = frame; nxt_queue_insert_tail(&ws->pending_frames, &p->link); @@ -750,6 +770,18 @@ nxt_py_asgi_websocket_pop_msg(nxt_py_asgi_websocket_t *ws, } else { if (frame != NULL) { + if (nxt_slow_path(frame->payload_len + > UINT64_MAX - ws->pending_payload_len)) + { + nxt_unit_req_alert(ws->req, + "pending_payload_len overflow on pop."); + + nxt_unit_websocket_done(frame); + + return PyErr_Format(PyExc_RuntimeError, + "pending_payload_len overflow on pop."); + } + payload_len = ws->pending_payload_len + frame->payload_len; fin_frame = frame; diff --git a/src/python/nxt_python_wsgi.c b/src/python/nxt_python_wsgi.c index 6bbf9e397..ecbc30fb0 100644 --- a/src/python/nxt_python_wsgi.c +++ b/src/python/nxt_python_wsgi.c @@ -453,6 +453,17 @@ nxt_python_request_handler(nxt_unit_request_info_t *req) PyEval_RestoreThread(pctx->thread_state); pctx->environ = nxt_python_copy_environ(NULL); + if (nxt_slow_path(pctx->environ == NULL)) { + /* + * Refresh failed; surface the error. The next request's + * NULL-check (above) will retry via copy_environ(req) and + * fail it cleanly with NXT_UNIT_ERROR if the retry also + * fails — no NULL dereference downstream. + */ + nxt_unit_alert(NULL, + "Python failed to refresh the \"environ\" " + "template"); + } pctx->thread_state = PyEval_SaveThread(); } diff --git a/src/ruby/nxt_ruby.c b/src/ruby/nxt_ruby.c index c0befdb3b..03be9b589 100644 --- a/src/ruby/nxt_ruby.c +++ b/src/ruby/nxt_ruby.c @@ -283,11 +283,12 @@ nxt_ruby_start(nxt_task_t *task, nxt_process_data_t *data) ruby_script("NGINX_Unit"); ruby_ctx.env = Qnil; - ruby_ctx.io_input = Qnil; - ruby_ctx.io_error = Qnil; + ruby_ctx.io_input_class = Qnil; + ruby_ctx.io_error_class = Qnil; ruby_ctx.thread = Qnil; ruby_ctx.ctx = NULL; ruby_ctx.req = NULL; + ruby_ctx.req_seq = 0; rack_init.task = task; rack_init.script = &c->script; @@ -570,8 +571,12 @@ nxt_ruby_rack_env_create(VALUE arg) rb_hash_aset(hash_env, rb_str_new2("SCRIPT_NAME"), rb_str_new("", 0)); rb_hash_aset(hash_env, rb_str_new2("rack.version"), version); - rb_hash_aset(hash_env, rb_str_new2("rack.input"), rctx->io_input); - rb_hash_aset(hash_env, rb_str_new2("rack.errors"), rctx->io_error); + /* + * rack.input and rack.errors are minted per-request in + * nxt_ruby_rack_app_run; the template env intentionally omits + * them so a fresh, request-bound instance is the only thing + * each app callback ever sees. + */ rb_hash_aset(hash_env, rb_str_new2("rack.multithread"), nxt_ruby_threads > 1 ? Qtrue : Qfalse); rb_hash_aset(hash_env, rb_str_new2("rack.multiprocess"), Qtrue); @@ -591,33 +596,28 @@ nxt_ruby_rack_env_create(VALUE arg) static int nxt_ruby_init_io(nxt_ruby_ctx_t *rctx) { - VALUE io_input, io_error; - - io_input = nxt_ruby_stream_io_input_init(); - - rctx->io_input = rb_funcall(io_input, rb_intern("new"), 1, - (VALUE) (uintptr_t) rctx); - if (nxt_slow_path(rctx->io_input == Qnil)) { + /* + * Store only the class references on the ctx; instances are + * minted per request in nxt_ruby_rack_app_run so each instance + * can snapshot rctx->req_seq for its originating request. + */ + rctx->io_input_class = nxt_ruby_stream_io_input_init(); + if (nxt_slow_path(rctx->io_input_class == Qnil)) { nxt_unit_alert(NULL, - "Ruby: Failed to create environment 'rack.input' var"); - + "Ruby: Failed to register 'rack.input' class"); return NXT_UNIT_ERROR; } - rb_gc_register_address(&rctx->io_input); - - io_error = nxt_ruby_stream_io_error_init(); + rb_gc_register_address(&rctx->io_input_class); - rctx->io_error = rb_funcall(io_error, rb_intern("new"), 1, - (VALUE) (uintptr_t) rctx); - if (nxt_slow_path(rctx->io_error == Qnil)) { + rctx->io_error_class = nxt_ruby_stream_io_error_init(); + if (nxt_slow_path(rctx->io_error_class == Qnil)) { nxt_unit_alert(NULL, - "Ruby: Failed to create environment 'rack.error' var"); - + "Ruby: Failed to register 'rack.errors' class"); return NXT_UNIT_ERROR; } - rb_gc_register_address(&rctx->io_error); + rb_gc_register_address(&rctx->io_error_class); return NXT_UNIT_OK; } @@ -641,6 +641,14 @@ nxt_ruby_request_handler_gvl(void *data) req = data; rctx = req->ctx->data; + /* + * Bump req_seq before publishing rctx->req: rack.input / + * rack.errors instances minted during this request snapshot + * req_seq at construction time, so any handle issued for an + * earlier request reads a mismatched seq and is rejected as + * stale (see nxt_ruby_bind_req). + */ + rctx->req_seq++; rctx->req = req; res = rb_protect(nxt_ruby_rack_app_run, (VALUE) (uintptr_t) req, &state); @@ -669,12 +677,38 @@ nxt_ruby_rack_app_run(VALUE arg) nxt_ruby_ctx_t *rctx; nxt_unit_request_info_t *req; + VALUE io_input, io_error; + req = (nxt_unit_request_info_t *) arg; rctx = req->ctx->data; env = rb_hash_dup(rctx->env); + /* + * Mint per-request rack.input / rack.errors instances bound to + * the current rctx + req_seq. This is what prevents a handle + * captured during request A from operating on request B handled + * by the same worker: each instance snapshots req_seq, and + * stream-IO ops reject mismatches. + */ + io_input = nxt_ruby_stream_io_input_new(rctx->io_input_class, rctx); + if (nxt_slow_path(io_input == Qnil)) { + nxt_unit_req_alert(req, + "Ruby: Failed to create per-request 'rack.input'"); + goto fail; + } + + io_error = nxt_ruby_stream_io_error_new(rctx->io_error_class, rctx); + if (nxt_slow_path(io_error == Qnil)) { + nxt_unit_req_alert(req, + "Ruby: Failed to create per-request 'rack.errors'"); + goto fail; + } + + rb_hash_aset(env, rb_str_new2("rack.input"), io_input); + rb_hash_aset(env, rb_str_new2("rack.errors"), io_error); + rc = nxt_ruby_read_request(req, env); if (nxt_slow_path(rc != NXT_UNIT_OK)) { nxt_unit_req_alert(req, @@ -1291,12 +1325,12 @@ nxt_ruby_exception_log(nxt_unit_request_info_t *req, uint32_t level, static void nxt_ruby_ctx_done(nxt_ruby_ctx_t *rctx) { - if (rctx->io_input != Qnil) { - rb_gc_unregister_address(&rctx->io_input); + if (rctx->io_input_class != Qnil) { + rb_gc_unregister_address(&rctx->io_input_class); } - if (rctx->io_error != Qnil) { - rb_gc_unregister_address(&rctx->io_error); + if (rctx->io_error_class != Qnil) { + rb_gc_unregister_address(&rctx->io_error_class); } if (rctx->env != Qnil) { @@ -1456,9 +1490,10 @@ nxt_ruby_init_threads(nxt_ruby_app_conf_t *c) rctx = &nxt_ruby_ctxs[i]; rctx->env = Qnil; - rctx->io_input = Qnil; - rctx->io_error = Qnil; + rctx->io_input_class = Qnil; + rctx->io_error_class = Qnil; rctx->thread = Qnil; + rctx->req_seq = 0; } for (i = 0; i < c->threads - 1; i++) { diff --git a/src/ruby/nxt_ruby.h b/src/ruby/nxt_ruby.h index 264300213..dfe1afa03 100644 --- a/src/ruby/nxt_ruby.h +++ b/src/ruby/nxt_ruby.h @@ -22,15 +22,26 @@ typedef struct { VALUE env; - VALUE io_input; - VALUE io_error; + VALUE io_input_class; + VALUE io_error_class; VALUE thread; nxt_unit_ctx_t *ctx; nxt_unit_request_info_t *req; + /* + * Monotonic per-rctx counter, bumped on each request entry. + * Each rack.input / rack.errors instance snapshots this on + * creation; stream-IO ops reject calls whose snapshot no longer + * matches rctx->req_seq, so a handle captured during request A + * cannot read/write the body of request B handled by the same + * worker. + */ + uint64_t req_seq; } nxt_ruby_ctx_t; VALUE nxt_ruby_stream_io_input_init(void); VALUE nxt_ruby_stream_io_error_init(void); +VALUE nxt_ruby_stream_io_input_new(VALUE class, nxt_ruby_ctx_t *rctx); +VALUE nxt_ruby_stream_io_error_new(VALUE class, nxt_ruby_ctx_t *rctx); #endif /* _NXT_RUBY_H_INCLUDED_ */ diff --git a/src/ruby/nxt_ruby_stream_io.c b/src/ruby/nxt_ruby_stream_io.c index bda89b0f6..18e638d6a 100644 --- a/src/ruby/nxt_ruby_stream_io.c +++ b/src/ruby/nxt_ruby_stream_io.c @@ -8,7 +8,28 @@ #include -static VALUE nxt_ruby_stream_io_new(VALUE class, VALUE arg); +/* + * Per-instance binding for rack.input / rack.errors objects. + * + * The handle stores both the ctx pointer (for the current req lookup) + * and a snapshot of rctx->req_seq taken at construction time. The + * stream-IO operations only proceed when: + * - rctx->req != NULL (a request is currently in flight), AND + * - rctx->req_seq == bind->req_seq (this handle was issued for the + * same request that is currently in flight). + * + * Either guard failing means the caller is holding a stale handle — + * either captured after the request finished (background thread, + * cached IO) or carried across to a later request handled by the + * same worker. In both cases, returning Qnil (reads) or routing the + * write through the NULL-context logger is the safest behaviour. + */ +typedef struct { + nxt_ruby_ctx_t *rctx; + uint64_t req_seq; +} nxt_ruby_io_bind_t; + + static VALUE nxt_ruby_stream_io_initialize(int argc, VALUE *argv, VALUE self); static VALUE nxt_ruby_stream_io_gets(VALUE obj); static VALUE nxt_ruby_stream_io_each(VALUE obj); @@ -16,26 +37,57 @@ static VALUE nxt_ruby_stream_io_read(VALUE obj, VALUE args); static VALUE nxt_ruby_stream_io_rewind(VALUE obj); static VALUE nxt_ruby_stream_io_puts(VALUE obj, VALUE args); static VALUE nxt_ruby_stream_io_write(VALUE obj, VALUE args); -nxt_inline long nxt_ruby_stream_io_s_write(nxt_ruby_ctx_t *rctx, VALUE val); +nxt_inline long nxt_ruby_stream_io_s_write(nxt_ruby_io_bind_t *bind, VALUE val); static VALUE nxt_ruby_stream_io_flush(VALUE obj); static VALUE nxt_ruby_stream_io_close(VALUE obj); -nxt_inline size_t nxt_ruby_dt_dsize_rctx(const void *arg); +nxt_inline size_t nxt_ruby_dt_dsize_bind(const void *arg); +nxt_inline void nxt_ruby_dt_dfree_bind(void *arg); +nxt_inline nxt_unit_request_info_t *nxt_ruby_bind_req(nxt_ruby_io_bind_t *bind); static const rb_data_type_t nxt_rctx_dt = { - .wrap_struct_name = "rctx", + .wrap_struct_name = "nxt_ruby_io_bind", .function = { - .dsize = nxt_ruby_dt_dsize_rctx, + .dsize = nxt_ruby_dt_dsize_bind, + .dfree = nxt_ruby_dt_dfree_bind, }, }; nxt_inline size_t -nxt_ruby_dt_dsize_rctx(const void *arg) +nxt_ruby_dt_dsize_bind(const void *arg) { - const nxt_ruby_ctx_t *rctx = arg; + (void) arg; + return sizeof(nxt_ruby_io_bind_t); +} + - return sizeof(*rctx); +nxt_inline void +nxt_ruby_dt_dfree_bind(void *arg) +{ + nxt_free(arg); +} + + +/* + * Returns the in-flight request if this handle is still valid, or + * NULL if it was captured outside its originating request (stale + * across-request reuse or after-request access). + */ +nxt_inline nxt_unit_request_info_t * +nxt_ruby_bind_req(nxt_ruby_io_bind_t *bind) +{ + if (bind == NULL || bind->rctx == NULL) { + return NULL; + } + + if (bind->rctx->req == NULL + || bind->rctx->req_seq != bind->req_seq) + { + return NULL; + } + + return bind->rctx->req; } @@ -48,9 +100,6 @@ nxt_ruby_stream_io_input_init(void) rb_undef_alloc_func(stream_io); - rb_gc_register_address(&stream_io); - - rb_define_singleton_method(stream_io, "new", nxt_ruby_stream_io_new, 1); rb_define_method(stream_io, "initialize", nxt_ruby_stream_io_initialize, -1); rb_define_method(stream_io, "gets", nxt_ruby_stream_io_gets, 0); @@ -72,9 +121,6 @@ nxt_ruby_stream_io_error_init(void) rb_undef_alloc_func(stream_io); - rb_gc_register_address(&stream_io); - - rb_define_singleton_method(stream_io, "new", nxt_ruby_stream_io_new, 1); rb_define_method(stream_io, "initialize", nxt_ruby_stream_io_initialize, -1); rb_define_method(stream_io, "puts", nxt_ruby_stream_io_puts, -2); @@ -86,12 +132,21 @@ nxt_ruby_stream_io_error_init(void) } -static VALUE -nxt_ruby_stream_io_new(VALUE class, VALUE arg) +nxt_inline VALUE +nxt_ruby_stream_io_alloc(VALUE class, nxt_ruby_ctx_t *rctx) { - VALUE self; + VALUE self; + nxt_ruby_io_bind_t *bind; + + bind = nxt_zalloc(sizeof(nxt_ruby_io_bind_t)); + if (nxt_slow_path(bind == NULL)) { + return Qnil; + } + + bind->rctx = rctx; + bind->req_seq = rctx->req_seq; - self = TypedData_Wrap_Struct(class, &nxt_rctx_dt, (void *)(uintptr_t)arg); + self = TypedData_Wrap_Struct(class, &nxt_rctx_dt, bind); rb_obj_call_init(self, 0, NULL); @@ -99,6 +154,20 @@ nxt_ruby_stream_io_new(VALUE class, VALUE arg) } +VALUE +nxt_ruby_stream_io_input_new(VALUE class, nxt_ruby_ctx_t *rctx) +{ + return nxt_ruby_stream_io_alloc(class, rctx); +} + + +VALUE +nxt_ruby_stream_io_error_new(VALUE class, nxt_ruby_ctx_t *rctx) +{ + return nxt_ruby_stream_io_alloc(class, rctx); +} + + static VALUE nxt_ruby_stream_io_initialize(int argc, VALUE *argv, VALUE self) { @@ -111,11 +180,20 @@ nxt_ruby_stream_io_gets(VALUE obj) { VALUE buf; ssize_t res; - nxt_ruby_ctx_t *rctx; + nxt_ruby_io_bind_t *bind; nxt_unit_request_info_t *req; - TypedData_Get_Struct(obj, nxt_ruby_ctx_t, &nxt_rctx_dt, rctx); - req = rctx->req; + TypedData_Get_Struct(obj, nxt_ruby_io_bind_t, &nxt_rctx_dt, bind); + + /* + * Reject calls on a stale handle (captured during a finished + * request, or carried across into a later request handled by + * the same worker context). See nxt_ruby_bind_req(). + */ + req = nxt_ruby_bind_req(bind); + if (req == NULL) { + return Qnil; + } if (req->content_length == 0) { return Qnil; @@ -166,13 +244,21 @@ nxt_ruby_stream_io_each(VALUE obj) static VALUE nxt_ruby_stream_io_read(VALUE obj, VALUE args) { - VALUE buf; - long copy_size, u_size; - nxt_ruby_ctx_t *rctx; + VALUE buf; + long copy_size, u_size; + nxt_ruby_io_bind_t *bind; + nxt_unit_request_info_t *req; - TypedData_Get_Struct(obj, nxt_ruby_ctx_t, &nxt_rctx_dt, rctx); + TypedData_Get_Struct(obj, nxt_ruby_io_bind_t, &nxt_rctx_dt, bind); + + /* See nxt_ruby_bind_req() — rejects stale handles captured + * across the request boundary or reused under a later request. */ + req = nxt_ruby_bind_req(bind); + if (req == NULL) { + return Qnil; + } - copy_size = rctx->req->content_length; + copy_size = req->content_length; if (RARRAY_LEN(args) > 0 && TYPE(RARRAY_PTR(args)[0]) == T_FIXNUM) { u_size = NUM2LONG(RARRAY_PTR(args)[0]); @@ -196,7 +282,7 @@ nxt_ruby_stream_io_read(VALUE obj, VALUE args) return Qnil; } - copy_size = nxt_unit_request_read(rctx->req, RSTRING_PTR(buf), copy_size); + copy_size = nxt_unit_request_read(req, RSTRING_PTR(buf), copy_size); if (RARRAY_LEN(args) > 1 && TYPE(RARRAY_PTR(args)[1]) == T_STRING) { @@ -220,15 +306,15 @@ nxt_ruby_stream_io_rewind(VALUE obj) static VALUE nxt_ruby_stream_io_puts(VALUE obj, VALUE args) { - nxt_ruby_ctx_t *rctx; + nxt_ruby_io_bind_t *bind; if (RARRAY_LEN(args) != 1) { return Qnil; } - TypedData_Get_Struct(obj, nxt_ruby_ctx_t, &nxt_rctx_dt, rctx); + TypedData_Get_Struct(obj, nxt_ruby_io_bind_t, &nxt_rctx_dt, bind); - nxt_ruby_stream_io_s_write(rctx, RARRAY_PTR(args)[0]); + nxt_ruby_stream_io_s_write(bind, RARRAY_PTR(args)[0]); return Qnil; } @@ -237,24 +323,26 @@ nxt_ruby_stream_io_puts(VALUE obj, VALUE args) static VALUE nxt_ruby_stream_io_write(VALUE obj, VALUE args) { - long len; - nxt_ruby_ctx_t *rctx; + long len; + nxt_ruby_io_bind_t *bind; if (RARRAY_LEN(args) != 1) { return Qnil; } - TypedData_Get_Struct(obj, nxt_ruby_ctx_t, &nxt_rctx_dt, rctx); + TypedData_Get_Struct(obj, nxt_ruby_io_bind_t, &nxt_rctx_dt, bind); - len = nxt_ruby_stream_io_s_write(rctx, RARRAY_PTR(args)[0]); + len = nxt_ruby_stream_io_s_write(bind, RARRAY_PTR(args)[0]); return LONG2FIX(len); } nxt_inline long -nxt_ruby_stream_io_s_write(nxt_ruby_ctx_t *rctx, VALUE val) +nxt_ruby_stream_io_s_write(nxt_ruby_io_bind_t *bind, VALUE val) { + nxt_unit_request_info_t *req; + if (nxt_slow_path(val == Qnil)) { return 0; } @@ -267,7 +355,23 @@ nxt_ruby_stream_io_s_write(nxt_ruby_ctx_t *rctx, VALUE val) } } - nxt_unit_req_error(rctx->req, "Ruby: %s", RSTRING_PTR(val)); + /* + * Apps legitimately write to rack.errors during at_exit hooks + * (running after the request handler returned and rctx->req was + * cleared at nxt_ruby.c:657) and may also retain rack.errors past + * the originating request. In both cases nxt_ruby_bind_req() + * returns NULL — route those messages through the NULL-context + * logger at ERR level so they still land in the unit log but do + * NOT get attributed to a later, unrelated request. ERR (not + * ALERT) avoids tripping the test suite's alert detector. + */ + req = nxt_ruby_bind_req(bind); + if (req == NULL) { + nxt_unit_log(NULL, NXT_UNIT_LOG_ERR, "Ruby: %s", RSTRING_PTR(val)); + return RSTRING_LEN(val); + } + + nxt_unit_req_error(req, "Ruby: %s", RSTRING_PTR(val)); return RSTRING_LEN(val); } diff --git a/src/wasm/nxt_wasm.c b/src/wasm/nxt_wasm.c index db79d6aee..7a7689efa 100644 --- a/src/wasm/nxt_wasm.c +++ b/src/wasm/nxt_wasm.c @@ -42,15 +42,95 @@ nxt_wasm_do_response_end(nxt_wasm_ctx_t *ctx) void nxt_wasm_do_send_headers(nxt_wasm_ctx_t *ctx, uint32_t offset) { - size_t fields_len; + size_t fields_len, fields_table_end; unsigned int i; nxt_wasm_response_fields_t *rh; + /* + * `offset`, `nfields`, and every (name_off, name_len, value_off, + * value_len) tuple arrive from the guest WASM module. Bound them + * against the linear-memory size before any dereference. + */ + if (offset > NXT_WASM_MEM_SIZE - sizeof(nxt_wasm_response_fields_t)) { + nxt_unit_req_alert(ctx->req, + "WASM send_headers offset %u out of range", offset); + return; + } + rh = (nxt_wasm_response_fields_t *)(ctx->baddr + offset); + /* + * Bound the fields[] table. Each entry is nxt_wasm_http_field_t; + * compute end-offset with overflow-safe arithmetic. + */ + if (rh->nfields > (NXT_WASM_MEM_SIZE - offset + - sizeof(nxt_wasm_response_fields_t)) + / sizeof(nxt_wasm_http_field_t)) + { + nxt_unit_req_alert(ctx->req, + "WASM send_headers nfields=%u out of range", + rh->nfields); + return; + } + fields_table_end = offset + sizeof(nxt_wasm_response_fields_t) + + (size_t) rh->nfields * sizeof(nxt_wasm_http_field_t); + + /* + * Bound each (name, value) range in the guest's memory. Field + * offsets are guest-relative to `rh`, so the absolute offset in + * linear memory is `offset + field_off`. + * + * Two additional caps: + * - `name_len` is passed to nxt_unit_response_add_field() whose + * `name_length` parameter is uint8_t — any name_len > 255 would + * silently truncate, splicing the next bytes of guest memory + * into the emitted header. Reject up front. + * - `fields_len` is the aggregate sum of every name+value length; + * nothing stops a guest from pointing many fields at the same + * in-bounds region and inflating the sum (or overflowing it). + * Accumulate with overflow checks and cap to the linear-memory + * size: no legitimate response header table can exceed that. + */ fields_len = 0; for (i = 0; i < rh->nfields; i++) { - fields_len += rh->fields[i].name_len + rh->fields[i].value_len; + uint32_t name_off = rh->fields[i].name_off; + uint32_t name_len = rh->fields[i].name_len; + uint32_t val_off = rh->fields[i].value_off; + uint32_t val_len = rh->fields[i].value_len; + size_t abs_name = (size_t) offset + name_off; + size_t abs_val = (size_t) offset + val_off; + + if (abs_name < fields_table_end + || abs_name > NXT_WASM_MEM_SIZE + || name_len > NXT_WASM_MEM_SIZE - abs_name + || abs_val < fields_table_end + || abs_val > NXT_WASM_MEM_SIZE + || val_len > NXT_WASM_MEM_SIZE - abs_val) + { + nxt_unit_req_alert(ctx->req, + "WASM send_headers field[%u] out of range " + "(name_off=%u name_len=%u val_off=%u val_len=%u)", + i, name_off, name_len, val_off, val_len); + return; + } + + if (name_len > UINT8_MAX) { + nxt_unit_req_alert(ctx->req, + "WASM send_headers field[%u] name_len=%u exceeds 255", + i, name_len); + return; + } + + if (val_len > NXT_WASM_MEM_SIZE - fields_len + || name_len > NXT_WASM_MEM_SIZE - fields_len - val_len) + { + nxt_unit_req_alert(ctx->req, + "WASM send_headers aggregate header size exceeds %zu " + "at field[%u]", (size_t) NXT_WASM_MEM_SIZE, i); + return; + } + + fields_len += name_len + val_len; } nxt_unit_response_init(ctx->req, ctx->status, rh->nfields, fields_len); @@ -80,8 +160,26 @@ nxt_wasm_do_send_response(nxt_wasm_ctx_t *ctx, uint32_t offset) nxt_unit_response_init(req, ctx->status, 0, 0); } + /* + * `offset` arrives from the guest WASM module; bound it against + * the linear-memory size before dereferencing. + */ + if (offset > NXT_WASM_MEM_SIZE - sizeof(nxt_wasm_response_t)) { + nxt_unit_req_alert(req, + "WASM send_response offset %u out of range", offset); + return; + } + resp = (nxt_wasm_response_t *)(nxt_wasm_ctx.baddr + offset); + if (resp->size > NXT_WASM_MEM_SIZE + - offset - offsetof(nxt_wasm_response_t, data)) + { + nxt_unit_req_alert(req, "WASM send_response size %u out of range", + resp->size); + return; + } + nxt_unit_response_write(req, (const char *)resp->data, resp->size); } @@ -97,23 +195,65 @@ nxt_wasm_request_handler(nxt_unit_request_info_t *req) nxt_wasm_request_t *wr; nxt_wasm_http_field_t *df; + /* + * Publish the in-flight request on the shared ctx *before* any + * hook or guarded jump so REQUEST_INIT / REQUEST_END always see + * the right request — including the bounds-failure paths that + * goto request_done before reaching the body-read block below. + * Without this, REQUEST_END would fire with a stale (or NULL) + * ctx->req on the first guarded exit of any worker. + */ + nxt_wasm_ctx.req = req; + NXT_WASM_DO_HOOK(NXT_WASM_FH_REQUEST_INIT); wr = (nxt_wasm_request_t *)nxt_wasm_ctx.baddr; + /* + * Each request field is copied into the WASM linear memory at the + * running `offset`. Without bounds checks, an unusually large + * request (many headers, oversized header values) can drive the + * memcpy past the end of the 32 MB sandbox region. + */ + /* + * Route bounds failures through the request_done label so the + * REQUEST_END hook still fires — modules tracking per-request + * state in request_init_handler need their matching cleanup. + */ +#define WASM_OFFSET_GUARD(need) \ + do { \ + if (offset > NXT_WASM_MEM_SIZE - 1 \ + || (size_t)(need) > NXT_WASM_MEM_SIZE - 1 - offset) \ + { \ + nxt_unit_req_alert(req, \ + "WASM request buffer overflow at offset %zu", offset); \ + nxt_unit_request_done(req, NXT_UNIT_ERROR); \ + goto request_done; \ + } \ + } while (0) + #define SET_REQ_MEMBER(dmember, smember) \ do { \ const char *str = nxt_unit_sptr_get(&r->smember); \ + size_t slen = strlen(str); \ + WASM_OFFSET_GUARD(slen); \ wr->dmember##_off = offset; \ - wr->dmember##_len = strlen(str); \ - memcpy((uint8_t *)wr + offset, str, wr->dmember##_len + 1); \ - offset += wr->dmember##_len + 1; \ + wr->dmember##_len = slen; \ + memcpy((uint8_t *)wr + offset, str, slen + 1); \ + offset += slen + 1; \ } while (0) r = req->request; offset = sizeof(nxt_wasm_request_t) + (r->fields_count * sizeof(nxt_wasm_http_field_t)); + if (offset > NXT_WASM_MEM_SIZE) { + nxt_unit_req_alert(req, "WASM request header table exceeds linear " + "memory: fields_count=%u", r->fields_count); + nxt_unit_request_done(req, NXT_UNIT_ERROR); + goto request_done; + } + SET_REQ_MEMBER(path, path); SET_REQ_MEMBER(method, method); SET_REQ_MEMBER(version, version); @@ -129,19 +269,24 @@ nxt_wasm_request_handler(nxt_unit_request_info_t *req) for (sf = r->fields; sf < sf_end; sf++) { const char *name = nxt_unit_sptr_get(&sf->name); const char *value = nxt_unit_sptr_get(&sf->value); + size_t nlen = strlen(name); + size_t vlen = strlen(value); + WASM_OFFSET_GUARD(nlen); df->name_off = offset; - df->name_len = strlen(name); - memcpy((uint8_t *)wr + offset, name, df->name_len + 1); - offset += df->name_len + 1; + df->name_len = nlen; + memcpy((uint8_t *)wr + offset, name, nlen + 1); + offset += nlen + 1; + WASM_OFFSET_GUARD(vlen); df->value_off = offset; - df->value_len = strlen(value); - memcpy((uint8_t *)wr + offset, value, df->value_len + 1); - offset += df->value_len + 1; + df->value_len = vlen; + memcpy((uint8_t *)wr + offset, value, vlen + 1); + offset += vlen + 1; df++; } +#undef WASM_OFFSET_GUARD wr->tls = r->tls; wr->nfields = r->fields_count; @@ -156,7 +301,6 @@ nxt_wasm_request_handler(nxt_unit_request_info_t *req) wr->request_size = offset + bytes_read; nxt_wasm_ctx.status = NXT_WASM_HTTP_OK; - nxt_wasm_ctx.req = req; err = nxt_wops->exec_request(&nxt_wasm_ctx); if (err) { goto out_err_500; diff --git a/test/test_asgi_websockets.py b/test/test_asgi_websockets.py index f93c97aba..f034195e1 100644 --- a/test/test_asgi_websockets.py +++ b/test/test_asgi_websockets.py @@ -170,7 +170,11 @@ def test_asgi_websockets_length_long(): ws.frame_write(sock, ws.OP_TEXT, 'fragment1', fin=False) ws.frame_write(sock, ws.OP_CONT, 'fragment2', length=2**64 - 1) - check_close(sock, 1009) # 1009 - CLOSE_TOO_LARGE + # 1002 - CLOSE_PROTOCOL_ERROR. RFC 6455 §5.2 requires the + # high bit of the 64-bit extended payload length to be 0; an + # all-ones length is a protocol violation and gets rejected + # before the policy-size check (1009 CLOSE_TOO_LARGE) runs. + check_close(sock, 1002) def test_asgi_websockets_frame_fragmentation_invalid():