diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index ef8954dda898..60d782107ee4 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -3826,6 +3826,27 @@ (rule 2 (is_nonzero (band _ a @ (value_type (ty_int (fits_in_64 ty))) b)) (is_nonzero_band ty a b)) +;; `ctz(X)` and `clz(X)` flow into a boolean context (`brif`, `select`, `trap`, +;; ...) more directly than the icmp-mediated form rewritten in the egraph at +;; `opts/icmp.isle`. The wasm front-end emits `brif (ireduce.i32 (ctz.i64 X))` +;; for `if (ctz X)`-style tests without ever materialising an `icmp`, so those +;; egraph rules don't fire on this shape; specializing here closes the gap. +;; +;; `ctz(X) != 0` iff LSB(X) == 0 iff `(X & 1) == 0` — emit `test X, 1; CC.Z`. +;; `clz(X) != 0` iff MSB(X) == 0 iff X >=signed 0 — emit `test X, X; CC.NS`. +;; +;; Both the bare and the `ireduce`-wrapped form are recognised: the latter is +;; the wasm-frontend's `i32.wrap_i64` over a 64-bit `ctz`/`clz`, which is a +;; no-op on values in [0, bitwidth]. +(rule 3 (is_nonzero (ctz (ty_32_or_64 ty) val)) + (CondResult.CC (x64_test ty val (RegMemImm.Imm 1)) (CC.Z))) +(rule 3 (is_nonzero (ireduce _ (ctz (ty_32_or_64 ty) val))) + (CondResult.CC (x64_test ty val (RegMemImm.Imm 1)) (CC.Z))) +(rule 3 (is_nonzero (clz (ty_32_or_64 ty) val)) + (let ((gpr Gpr val)) (CondResult.CC (x64_test ty gpr gpr) (CC.NS)))) +(rule 3 (is_nonzero (ireduce _ (clz (ty_32_or_64 ty) val))) + (let ((gpr Gpr val)) (CondResult.CC (x64_test ty gpr gpr) (CC.NS)))) + ;; Like `is_nonzero` but with additional specializations for compare ;; operators. We break this out from `is_nonzero` because we want to diff --git a/tests/disas/ctz-clz-bool-condition.wat b/tests/disas/ctz-clz-bool-condition.wat index 9e06d200f77f..4a8a1178f78e 100644 --- a/tests/disas/ctz-clz-bool-condition.wat +++ b/tests/disas/ctz-clz-bool-condition.wat @@ -108,14 +108,11 @@ ;; wasm[0]::function[2]::if_ctz_bare_i32: ;; pushq %rbp ;; movq %rsp, %rbp -;; movl $0x20, %esi -;; bsfl %edx, %r9d -;; cmovel %esi, %r9d -;; testl %r9d, %r9d -;; jne 0xa4 -;; 9a: movl $0xc8, %eax -;; jmp 0xa9 -;; a4: movl $0x64, %eax +;; testl $1, %edx +;; je 0x9a +;; 90: movl $0xc8, %eax +;; jmp 0x9f +;; 9a: movl $0x64, %eax ;; movq %rbp, %rsp ;; popq %rbp ;; retq @@ -167,14 +164,11 @@ ;; wasm[0]::function[7]::if_ctz_bare_i64: ;; pushq %rbp ;; movq %rsp, %rbp -;; movl $0x40, %esi -;; bsfq %rdx, %r9 -;; cmoveq %rsi, %r9 -;; testl %r9d, %r9d -;; jne 0x1a4 -;; 19a: movl $0xc8, %eax -;; jmp 0x1a9 -;; 1a4: movl $0x64, %eax +;; testq $1, %rdx +;; je 0x19b +;; 191: movl $0xc8, %eax +;; jmp 0x1a0 +;; 19b: movl $0x64, %eax ;; movq %rbp, %rsp ;; popq %rbp ;; retq @@ -216,16 +210,11 @@ ;; wasm[0]::function[11]::if_clz_bare_i32: ;; pushq %rbp ;; movq %rsp, %rbp -;; movq $18446744073709551615, %rsi -;; bsrl %edx, %r9d -;; cmovel %esi, %r9d -;; movl $0x1f, %eax -;; subl %r9d, %eax -;; testl %eax, %eax -;; jne 0x24d -;; 243: movl $0xc8, %eax -;; jmp 0x252 -;; 24d: movl $0x64, %eax +;; testl %edx, %edx +;; jns 0x236 +;; 22c: movl $0xc8, %eax +;; jmp 0x23b +;; 236: movl $0x64, %eax ;; movq %rbp, %rsp ;; popq %rbp ;; retq @@ -244,10 +233,10 @@ ;; pushq %rbp ;; movq %rsp, %rbp ;; testq %rdx, %rdx -;; jl 0x297 -;; 28d: movl $0xc8, %eax -;; jmp 0x29c -;; 297: movl $0x64, %eax +;; jl 0x277 +;; 26d: movl $0xc8, %eax +;; jmp 0x27c +;; 277: movl $0x64, %eax ;; movq %rbp, %rsp ;; popq %rbp ;; retq @@ -256,10 +245,10 @@ ;; pushq %rbp ;; movq %rsp, %rbp ;; testq %rdx, %rdx -;; jge 0x2d7 -;; 2cd: movl $0xc8, %eax -;; jmp 0x2dc -;; 2d7: movl $0x64, %eax +;; jge 0x2b7 +;; 2ad: movl $0xc8, %eax +;; jmp 0x2bc +;; 2b7: movl $0x64, %eax ;; movq %rbp, %rsp ;; popq %rbp ;; retq @@ -271,10 +260,10 @@ ;; bsfl %edx, %r9d ;; cmovel %esi, %r9d ;; cmpl $4, %r9d -;; je 0x325 -;; 31b: movl $0xc8, %eax -;; jmp 0x32a -;; 325: movl $0x64, %eax +;; je 0x305 +;; 2fb: movl $0xc8, %eax +;; jmp 0x30a +;; 305: movl $0x64, %eax ;; movq %rbp, %rsp ;; popq %rbp ;; retq