Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3826,6 +3826,27 @@
(rule 2 (is_nonzero (band _ a @ (value_type (ty_int (fits_in_64 ty))) b))
(is_nonzero_band ty a b))

;; `ctz(X)` and `clz(X)` flow into a boolean context (`brif`, `select`, `trap`,
;; ...) more directly than the icmp-mediated form rewritten in the egraph at
;; `opts/icmp.isle`. The wasm front-end emits `brif (ireduce.i32 (ctz.i64 X))`
;; for `if (ctz X)`-style tests without ever materialising an `icmp`, so those
;; egraph rules don't fire on this shape; specializing here closes the gap.
;;
;; `ctz(X) != 0` iff LSB(X) == 0 iff `(X & 1) == 0` — emit `test X, 1; CC.Z`.
;; `clz(X) != 0` iff MSB(X) == 0 iff X >=signed 0 — emit `test X, X; CC.NS`.
;;
;; Both the bare and the `ireduce`-wrapped form are recognised: the latter is
;; the wasm-frontend's `i32.wrap_i64` over a 64-bit `ctz`/`clz`, which is a
;; no-op on values in [0, bitwidth].
(rule 3 (is_nonzero (ctz (ty_32_or_64 ty) val))
(CondResult.CC (x64_test ty val (RegMemImm.Imm 1)) (CC.Z)))
(rule 3 (is_nonzero (ireduce _ (ctz (ty_32_or_64 ty) val)))
(CondResult.CC (x64_test ty val (RegMemImm.Imm 1)) (CC.Z)))
(rule 3 (is_nonzero (clz (ty_32_or_64 ty) val))
(let ((gpr Gpr val)) (CondResult.CC (x64_test ty gpr gpr) (CC.NS))))
(rule 3 (is_nonzero (ireduce _ (clz (ty_32_or_64 ty) val)))
(let ((gpr Gpr val)) (CondResult.CC (x64_test ty gpr gpr) (CC.NS))))


;; Like `is_nonzero` but with additional specializations for compare
;; operators. We break this out from `is_nonzero` because we want to
Expand Down
65 changes: 27 additions & 38 deletions tests/disas/ctz-clz-bool-condition.wat
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,11 @@
;; wasm[0]::function[2]::if_ctz_bare_i32:
;; pushq %rbp
;; movq %rsp, %rbp
;; movl $0x20, %esi
;; bsfl %edx, %r9d
;; cmovel %esi, %r9d
;; testl %r9d, %r9d
;; jne 0xa4
;; 9a: movl $0xc8, %eax
;; jmp 0xa9
;; a4: movl $0x64, %eax
;; testl $1, %edx
;; je 0x9a
;; 90: movl $0xc8, %eax
;; jmp 0x9f
;; 9a: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Expand Down Expand Up @@ -167,14 +164,11 @@
;; wasm[0]::function[7]::if_ctz_bare_i64:
;; pushq %rbp
;; movq %rsp, %rbp
;; movl $0x40, %esi
;; bsfq %rdx, %r9
;; cmoveq %rsi, %r9
;; testl %r9d, %r9d
;; jne 0x1a4
;; 19a: movl $0xc8, %eax
;; jmp 0x1a9
;; 1a4: movl $0x64, %eax
;; testq $1, %rdx
;; je 0x19b
;; 191: movl $0xc8, %eax
;; jmp 0x1a0
;; 19b: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Expand Down Expand Up @@ -216,16 +210,11 @@
;; wasm[0]::function[11]::if_clz_bare_i32:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq $18446744073709551615, %rsi
;; bsrl %edx, %r9d
;; cmovel %esi, %r9d
;; movl $0x1f, %eax
;; subl %r9d, %eax
;; testl %eax, %eax
;; jne 0x24d
;; 243: movl $0xc8, %eax
;; jmp 0x252
;; 24d: movl $0x64, %eax
;; testl %edx, %edx
;; jns 0x236
;; 22c: movl $0xc8, %eax
;; jmp 0x23b
;; 236: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Expand All @@ -244,10 +233,10 @@
;; pushq %rbp
;; movq %rsp, %rbp
;; testq %rdx, %rdx
;; jl 0x297
;; 28d: movl $0xc8, %eax
;; jmp 0x29c
;; 297: movl $0x64, %eax
;; jl 0x277
;; 26d: movl $0xc8, %eax
;; jmp 0x27c
;; 277: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Expand All @@ -256,10 +245,10 @@
;; pushq %rbp
;; movq %rsp, %rbp
;; testq %rdx, %rdx
;; jge 0x2d7
;; 2cd: movl $0xc8, %eax
;; jmp 0x2dc
;; 2d7: movl $0x64, %eax
;; jge 0x2b7
;; 2ad: movl $0xc8, %eax
;; jmp 0x2bc
;; 2b7: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Expand All @@ -271,10 +260,10 @@
;; bsfl %edx, %r9d
;; cmovel %esi, %r9d
;; cmpl $4, %r9d
;; je 0x325
;; 31b: movl $0xc8, %eax
;; jmp 0x32a
;; 325: movl $0x64, %eax
;; je 0x305
;; 2fb: movl $0xc8, %eax
;; jmp 0x30a
;; 305: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Loading