Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/inc/quic_platform_posix.h
Original file line number Diff line number Diff line change
Expand Up @@ -1600,6 +1600,22 @@ CxPlatCqeGetSqe(

#endif

//
// WCP compatibility layer for non-Windows platforms.
// On POSIX, WCP (Wait Completion Packet) doesn't exist, so we map
// WCP-specific types and functions to their standard equivalents.
//
typedef CXPLAT_SQE CXPLAT_SQE_WCP;

#define CxPlatEventQEnqueueWcp(queue, sqe) \
CxPlatEventQEnqueue(queue, (CXPLAT_SQE*)(sqe))

#define CxPlatSqeInitializeWcp(queue, completion, sqe) \
CxPlatSqeInitialize(queue, completion, (CXPLAT_SQE*)(sqe))

#define CxPlatSqeCleanupWcp(queue, sqe) \
CxPlatSqeCleanup(queue, (CXPLAT_SQE*)(sqe))

//
// Thread Interfaces.
//
Expand Down
182 changes: 181 additions & 1 deletion src/inc/quic_platform_winuser.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ extern "C" {

#define INIT_NO_SAL(X) // No-op since Windows supports SAL

#ifndef STATUS_CANCELLED
#define STATUS_CANCELLED ((NTSTATUS)0xC0000120L)
Comment thread
Santhosha-bk marked this conversation as resolved.
#endif

#ifdef QUIC_RESTRICTED_BUILD
#ifndef NT_SUCCESS
#define NT_SUCCESS(Status) (((NTSTATUS)(Status)) >= 0)
Expand Down Expand Up @@ -779,6 +783,55 @@ typedef struct CXPLAT_SQE {
#endif
} CXPLAT_SQE;

// Extended SQE with Wait Completion Packet support for manual events
typedef struct CXPLAT_SQE_WCP {
CXPLAT_SQE BaseSqe; // Base SQE must be first for correct casting.
HANDLE WcpEvent; // Manual-reset event for wake packets
HANDLE WaitCompletionPacket; // Wait completion packet bound to Event
} CXPLAT_SQE_WCP;
CXPLAT_STATIC_ASSERT(offsetof(CXPLAT_SQE_WCP, BaseSqe) == 0, "BaseSqe must be at offset 0 in CXPLAT_SQE_WCP for correct pointer casting");

//
// Wait Completion Packet functions from ntdll.dll.
//

typedef NTSTATUS (NTAPI *FuncNtCreateWaitCompletionPacket)(
Comment thread
Santhosha-bk marked this conversation as resolved.
_Out_ PHANDLE WaitCompletionPacketHandle,
_In_ ACCESS_MASK DesiredAccess,
_In_opt_ PVOID ObjectAttributes
);

typedef NTSTATUS (NTAPI *FuncNtAssociateWaitCompletionPacket)(
_In_ HANDLE WaitCompletionPacketHandle,
_In_ HANDLE IoCompletionHandle,
_In_ HANDLE TargetObjectHandle,
_In_opt_ PVOID KeyContext,
_In_opt_ PVOID ApcContext,
_In_ NTSTATUS IoStatus,
_In_ ULONG_PTR IoStatusInformation,
_Out_opt_ PBOOLEAN TargetApcInvoked
);

typedef NTSTATUS (NTAPI *FuncNtCancelWaitCompletionPacket)(
_In_ HANDLE WaitCompletionPacketHandle,
_In_ BOOLEAN RemoveSignaledPacket
);

// Global function pointers (initialized in CxPlatInitialize)
extern FuncNtCreateWaitCompletionPacket NtCreateWaitCompletionPacket;
extern FuncNtAssociateWaitCompletionPacket NtAssociateWaitCompletionPacket;
extern FuncNtCancelWaitCompletionPacket NtCancelWaitCompletionPacket;

// Helper to check if WCP APIs are available
QUIC_INLINE
BOOLEAN
CxPlatWcpAvailable(void)
{
return NtCreateWaitCompletionPacket != NULL &&
NtAssociateWaitCompletionPacket != NULL &&
NtCancelWaitCompletionPacket != NULL;
}

QUIC_INLINE
BOOLEAN
CxPlatEventQInitialize(
Expand Down Expand Up @@ -838,6 +891,27 @@ CxPlatEventQEnqueueEx( // Windows specific extension
return PostQueuedCompletionStatus(*queue, num_bytes, 0, &sqe->Overlapped) != 0;
}

// Enqueue WCP-based SQE to the event queue.
// Falls back to standard PQCS if WCP is unavailable.
QUIC_INLINE
void
CxPlatEventQEnqueueWcp(
_In_ CXPLAT_EVENTQ* queue,
_In_ CXPLAT_SQE_WCP* sqe
)
{
if (sqe->WcpEvent) {
CxPlatEventSet(sqe->WcpEvent);
} else {
//
// If WCP is not available, fall back to PQCS. If the Enqueue fails (extremely rare OOM in kernel),
// the worker thread will deadlock waiting for an event that never arrives.
// We assert to diagnose the issue if it occurs.
//
CXPLAT_FRE_ASSERT(CxPlatEventQEnqueue(queue, &sqe->BaseSqe));
}
}

QUIC_INLINE
uint32_t
CxPlatEventQDequeue(
Expand All @@ -848,7 +922,9 @@ CxPlatEventQDequeue(
)
{
ULONG out_count = 0;
if (!GetQueuedCompletionStatusEx(*queue, events, count, &out_count, wait_time, FALSE)) return 0;
if (!GetQueuedCompletionStatusEx(*queue, events, count, &out_count, wait_time, FALSE)) {
return 0;
}
CXPLAT_DBG_ASSERT(out_count != 0);
CXPLAT_DBG_ASSERT(events[0].lpOverlapped != NULL || out_count == 1);
#if DEBUG
Expand All @@ -872,6 +948,11 @@ CxPlatEventQReturn(
UNREFERENCED_PARAMETER(count);
}

//
// Initialize a standard SQE for event queue operations.
// Uses PQCS (PostQueuedCompletionStatus) for enqueuing.
// Use this for non-critical operations where enqueue failure is tolerable.
//
QUIC_INLINE
BOOLEAN
CxPlatSqeInitialize(
Expand All @@ -886,6 +967,73 @@ CxPlatSqeInitialize(
return TRUE;
}

//
// Initialize a WCP-enabled SQE. WCP pre-allocates the completion packet at init time, avoiding
// silent PQCS failures under severe memory pressure that could deadlock the worker thread.
// Falls back to standard PQCS on older Windows. Use for critical operations (e.g., shutdown).
//
QUIC_INLINE
BOOLEAN
CxPlatSqeInitializeWcp(
_In_ CXPLAT_EVENTQ* queue,
_In_ CXPLAT_EVENT_COMPLETION completion,
_Out_ CXPLAT_SQE_WCP* sqe
)
{
CxPlatZeroMemory(sqe, sizeof(*sqe));
sqe->BaseSqe.Completion = completion;

if (!CxPlatWcpAvailable()) {
return TRUE;
}

CxPlatEventInitialize(&sqe->WcpEvent, TRUE, FALSE);
if (sqe->WcpEvent == NULL) {
return FALSE;
}

NTSTATUS status = NtCreateWaitCompletionPacket(
&sqe->WaitCompletionPacket,
GENERIC_ALL,
NULL);
if (!NT_SUCCESS(status)) {
CloseHandle(sqe->WcpEvent);
return FALSE;
}

status = NtAssociateWaitCompletionPacket(
sqe->WaitCompletionPacket,
*queue,
sqe->WcpEvent,
NULL,
&sqe->BaseSqe.Overlapped,
0, // IoStatus STATUS_SUCCESS
0,
NULL);

if (!NT_SUCCESS(status)) {
NTSTATUS CancelStatus = NtCancelWaitCompletionPacket(
sqe->WaitCompletionPacket, // WaitCompletionPacketHandle
TRUE); // RemoveSignaledPacket

// Close WCP handle on success or STATUS_CANCELLED (expected states where handle is valid).
// On other errors, leak to avoid potential corruption.
if (NT_SUCCESS(CancelStatus) || CancelStatus == STATUS_CANCELLED) {
CloseHandle(sqe->WaitCompletionPacket);
}

// Event handle is always safe to close
CloseHandle(sqe->WcpEvent);
sqe->WcpEvent = NULL;
sqe->WaitCompletionPacket = NULL;

return FALSE;
}

return TRUE;
}

// Most of the SQEs that are created through CxPlatStartDatapathIo will be using this.
QUIC_INLINE
void
CxPlatSqeInitializeEx(
Expand All @@ -909,6 +1057,38 @@ CxPlatSqeCleanup(
{
UNREFERENCED_PARAMETER(queue);
UNREFERENCED_PARAMETER(sqe);
// No-op for base SQE
}

QUIC_INLINE
void
CxPlatSqeCleanupWcp(
_In_ CXPLAT_EVENTQ* queue,
_In_ CXPLAT_SQE_WCP* sqe
)
{
UNREFERENCED_PARAMETER(queue);
if (sqe->WcpEvent) {
if (sqe->WaitCompletionPacket && CxPlatWcpAvailable()) {
NTSTATUS CancelStatus = NtCancelWaitCompletionPacket(
sqe->WaitCompletionPacket, // WaitCompletionPacketHandle
TRUE); // RemoveSignaledPacket

// After a WCP fires, the association is destroyed but the handle
// remains valid. we must close the handle.
// Close on success or STATUS_CANCELLED (expected states where handle is valid).
// On other errors, leak to avoid potential corruption.
if (NT_SUCCESS(CancelStatus) || CancelStatus == STATUS_CANCELLED) {
CloseHandle(sqe->WaitCompletionPacket);
}
}

// Event handle is always safe to close
CloseHandle(sqe->WcpEvent);

sqe->WcpEvent = NULL;
sqe->WaitCompletionPacket = NULL;
}
}

QUIC_INLINE
Expand Down
35 changes: 21 additions & 14 deletions src/platform/datapath_winuser.c
Original file line number Diff line number Diff line change
Expand Up @@ -3805,24 +3805,31 @@ CxPlatSendDataComplete(
{
CXPLAT_SOCKET_PROC* SocketProc = SendData->SocketProc;

if (IoResult != QUIC_STATUS_SUCCESS) {
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
SocketProc->Parent,
IoResult,
"WSASendMsg completion");
}
//
// Acquire rundown before accessing any fields of SocketProc->Parent
// to prevent use-after-free during socket shutdown.
//
if (CxPlatRundownAcquire(&SocketProc->RundownRef)) {
CXPLAT_SOCKET* Socket = SocketProc->Parent;

if (SocketProc->Parent->Type != CXPLAT_SOCKET_UDP) {
if (CxPlatRundownAcquire(&SocketProc->RundownRef)) {
SocketProc->Parent->Datapath->TcpHandlers.SendComplete(
SocketProc->Parent,
SocketProc->Parent->ClientContext,
if (IoResult != QUIC_STATUS_SUCCESS) {
QuicTraceEvent(
DatapathErrorStatus,
"[data][%p] ERROR, %u, %s.",
Socket,
IoResult,
"WSASendMsg completion");
}

if (Socket->Type != CXPLAT_SOCKET_UDP) {
Socket->Datapath->TcpHandlers.SendComplete(
Socket,
Socket->ClientContext,
IoResult,
SendData->TotalSize);
CxPlatRundownRelease(&SocketProc->RundownRef);
}

CxPlatRundownRelease(&SocketProc->RundownRef);
}

SendDataFree(SendData);
Expand Down
14 changes: 14 additions & 0 deletions src/platform/platform_winuser.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ QUIC_TRACE_RUNDOWN_CALLBACK* QuicTraceRundownCallback;
//
typedef LONG (WINAPI *FuncRtlGetVersion)(RTL_OSVERSIONINFOW *);

// Global function pointers (initialized in CxPlatInitialize)
FuncNtCreateWaitCompletionPacket NtCreateWaitCompletionPacket = NULL;
FuncNtAssociateWaitCompletionPacket NtAssociateWaitCompletionPacket = NULL;
FuncNtCancelWaitCompletionPacket NtCancelWaitCompletionPacket = NULL;

_IRQL_requires_max_(PASSIVE_LEVEL)
void
CxPlatSystemLoad(
Expand Down Expand Up @@ -266,6 +271,15 @@ CxPlatInitialize(
SuccessfullySetVersion = TRUE;
}
}

// Load Wait Completion Packet functions (available on Windows 11+)
NtCreateWaitCompletionPacket =
(FuncNtCreateWaitCompletionPacket)GetProcAddress(NtDllHandle, "NtCreateWaitCompletionPacket");
NtAssociateWaitCompletionPacket =
(FuncNtAssociateWaitCompletionPacket)GetProcAddress(NtDllHandle, "NtAssociateWaitCompletionPacket");
NtCancelWaitCompletionPacket =
(FuncNtCancelWaitCompletionPacket)GetProcAddress(NtDllHandle, "NtCancelWaitCompletionPacket");

FreeLibrary(NtDllHandle);
}
CXPLAT_DBG_ASSERT(SuccessfullySetVersion); // TODO: Is the assert here enough or is there an appropriate QUIC_STATUS we return?
Expand Down
Loading
Loading