Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -4669,6 +4669,24 @@
(rule 3 (is_nonzero val @ (value_type $I8))
(CondResult.Cond (tst_imm $I32 val (u64_into_imm_logic $I32 255)) (Cond.Ne)))

;; `ctz(X)` and `clz(X)` flow into a boolean context (`brif`, `select`, ...)
;; without going through `icmp`, so the egraph rules in `opts/icmp.isle`
;; don't reach this shape. Specialize here:
;;
;; `ctz(X) != 0` iff LSB(X) == 0 iff `(X & 1) == 0` — emit `tst X, #1; Cond.Eq`.
;; `clz(X) != 0` iff MSB(X) == 0 iff X >=signed 0 — emit `cmp X, #0; Cond.Pl`.
;;
;; The `ireduce` variant catches the wasm front-end's `i32.wrap_i64` over a
;; 64-bit `ctz`/`clz` — a no-op on values in [0, bitwidth].
(rule 4 (is_nonzero (ctz (ty_32_or_64 ty) val))
(CondResult.Cond (tst_imm ty val (u64_into_imm_logic ty 1)) (Cond.Eq)))
(rule 4 (is_nonzero (ireduce _ (ctz (ty_32_or_64 ty) val)))
(CondResult.Cond (tst_imm ty val (u64_into_imm_logic ty 1)) (Cond.Eq)))
(rule 4 (is_nonzero (clz (ty_32_or_64 ty) val))
(CondResult.Cond (cmp_imm (operand_size ty) val (u8_into_imm12 0)) (Cond.Pl)))
(rule 4 (is_nonzero (ireduce _ (clz (ty_32_or_64 ty) val)))
(CondResult.Cond (cmp_imm (operand_size ty) val (u8_into_imm12 0)) (Cond.Pl)))

(decl emit_icmp (IntCC Value Value) CondResult)

;; 8/16-bit base signed/unsigned cases
Expand Down
21 changes: 21 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -3826,6 +3826,27 @@
(rule 2 (is_nonzero (band _ a @ (value_type (ty_int (fits_in_64 ty))) b))
(is_nonzero_band ty a b))

;; `ctz(X)` and `clz(X)` flow into a boolean context (`brif`, `select`, `trap`,
;; ...) more directly than the icmp-mediated form rewritten in the egraph at
;; `opts/icmp.isle`. The wasm front-end emits `brif (ireduce.i32 (ctz.i64 X))`
;; for `if (ctz X)`-style tests without ever materialising an `icmp`, so those
;; egraph rules don't fire on this shape; specializing here closes the gap.
;;
;; `ctz(X) != 0` iff LSB(X) == 0 iff `(X & 1) == 0` — emit `test X, 1; CC.Z`.
;; `clz(X) != 0` iff MSB(X) == 0 iff X >=signed 0 — emit `test X, X; CC.NS`.
;;
;; Both the bare and the `ireduce`-wrapped form are recognised: the latter is
;; the wasm-frontend's `i32.wrap_i64` over a 64-bit `ctz`/`clz`, which is a
;; no-op on values in [0, bitwidth].
(rule 3 (is_nonzero (ctz (ty_32_or_64 ty) val))
(CondResult.CC (x64_test ty val (RegMemImm.Imm 1)) (CC.Z)))
(rule 3 (is_nonzero (ireduce _ (ctz (ty_32_or_64 ty) val)))
(CondResult.CC (x64_test ty val (RegMemImm.Imm 1)) (CC.Z)))
(rule 3 (is_nonzero (clz (ty_32_or_64 ty) val))
(let ((gpr Gpr val)) (CondResult.CC (x64_test ty gpr gpr) (CC.NS))))
(rule 3 (is_nonzero (ireduce _ (clz (ty_32_or_64 ty) val)))
(let ((gpr Gpr val)) (CondResult.CC (x64_test ty gpr gpr) (CC.NS))))


;; Like `is_nonzero` but with additional specializations for compare
;; operators. We break this out from `is_nonzero` because we want to
Expand Down
149 changes: 149 additions & 0 deletions tests/disas/aarch64-ctz-clz-bool-condition.wat
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
;;! target = "aarch64"
;;! test = "compile"

;; aarch64 analogue of tests/disas/ctz-clz-bool-condition.wat. Verifies that
;; the bare `if (ctz X)` / `if (clz X)` lowerings collapse to a single
;; `tst`/`cmp` + condition, mirroring the x64 lowering rules.

(module
;; ----- ctz, i32 -------------------------------------------------------

(func $if_ctz_eq0_i32 (param i32) (result i32)
(i32.eq (i32.ctz (local.get 0)) (i32.const 0))
if (result i32) i32.const 100 else i32.const 200 end)
(func $if_ctz_ne0_i32 (param i32) (result i32)
(i32.ne (i32.ctz (local.get 0)) (i32.const 0))
if (result i32) i32.const 100 else i32.const 200 end)
(func $if_ctz_bare_i32 (param i32) (result i32)
(i32.ctz (local.get 0))
if (result i32) i32.const 100 else i32.const 200 end)

;; ----- ctz, i64 -------------------------------------------------------

(func $if_ctz_eq0_i64 (param i64) (result i32)
(i64.eq (i64.ctz (local.get 0)) (i64.const 0))
if (result i32) i32.const 100 else i32.const 200 end)
(func $if_ctz_bare_i64 (param i64) (result i32)
(i64.ctz (local.get 0)) i32.wrap_i64
if (result i32) i32.const 100 else i32.const 200 end)

;; ----- clz, i32 (sign-bit tests) --------------------------------------

(func $if_clz_eq0_i32 (param i32) (result i32)
(i32.eq (i32.clz (local.get 0)) (i32.const 0))
if (result i32) i32.const 100 else i32.const 200 end)
(func $if_clz_bare_i32 (param i32) (result i32)
(i32.clz (local.get 0))
if (result i32) i32.const 100 else i32.const 200 end)

;; ----- clz, i64 -------------------------------------------------------

(func $if_clz_eq0_i64 (param i64) (result i32)
(i64.eq (i64.clz (local.get 0)) (i64.const 0))
if (result i32) i32.const 100 else i32.const 200 end)

;; ----- negative test: numeric comparison must NOT collapse ------------
(func $if_ctz_eq4_i32 (param i32) (result i32)
(i32.eq (i32.ctz (local.get 0)) (i32.const 4))
if (result i32) i32.const 100 else i32.const 200 end)
)
;; wasm[0]::function[0]::if_ctz_eq0_i32:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; and w6, w4, #1
;; cbnz w6, #0x18
;; 10: mov w2, #0xc8
;; b #0x1c
;; 18: mov w2, #0x64
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[1]::if_ctz_ne0_i32:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; and w6, w4, #1
;; cbz w6, #0x58
;; 50: mov w2, #0xc8
;; b #0x5c
;; 58: mov w2, #0x64
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[2]::if_ctz_bare_i32:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; tst w4, #1
;; b.eq #0x98
;; 90: mov w2, #0xc8
;; b #0x9c
;; 98: mov w2, #0x64
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[3]::if_ctz_eq0_i64:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; and x6, x4, #1
;; cbnz x6, #0xd8
;; d0: mov w2, #0xc8
;; b #0xdc
;; d8: mov w2, #0x64
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[4]::if_ctz_bare_i64:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; tst x4, #1
;; b.eq #0x118
;; 110: mov w2, #0xc8
;; b #0x11c
;; 118: mov w2, #0x64
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[5]::if_clz_eq0_i32:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; cmp w4, #0
;; b.lt #0x158
;; 150: mov w2, #0xc8
;; b #0x15c
;; 158: mov w2, #0x64
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[6]::if_clz_bare_i32:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; cmp w4, #0
;; b.pl #0x198
;; 190: mov w2, #0xc8
;; b #0x19c
;; 198: mov w2, #0x64
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[7]::if_clz_eq0_i64:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; cmp x4, #0
;; b.lt #0x1d8
;; 1d0: mov w2, #0xc8
;; b #0x1dc
;; 1d8: mov w2, #0x64
;; ldp x29, x30, [sp], #0x10
;; ret
;;
;; wasm[0]::function[8]::if_ctz_eq4_i32:
;; stp x29, x30, [sp, #-0x10]!
;; mov x29, sp
;; rbit w6, w4
;; clz w8, w6
;; cmp w8, #4
;; b.eq #0x220
;; 218: mov w2, #0xc8
;; b #0x224
;; 220: mov w2, #0x64
;; ldp x29, x30, [sp], #0x10
;; ret
65 changes: 27 additions & 38 deletions tests/disas/ctz-clz-bool-condition.wat
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,11 @@
;; wasm[0]::function[2]::if_ctz_bare_i32:
;; pushq %rbp
;; movq %rsp, %rbp
;; movl $0x20, %esi
;; bsfl %edx, %r9d
;; cmovel %esi, %r9d
;; testl %r9d, %r9d
;; jne 0xa4
;; 9a: movl $0xc8, %eax
;; jmp 0xa9
;; a4: movl $0x64, %eax
;; testl $1, %edx
;; je 0x9a
;; 90: movl $0xc8, %eax
;; jmp 0x9f
;; 9a: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Expand Down Expand Up @@ -167,14 +164,11 @@
;; wasm[0]::function[7]::if_ctz_bare_i64:
;; pushq %rbp
;; movq %rsp, %rbp
;; movl $0x40, %esi
;; bsfq %rdx, %r9
;; cmoveq %rsi, %r9
;; testl %r9d, %r9d
;; jne 0x1a4
;; 19a: movl $0xc8, %eax
;; jmp 0x1a9
;; 1a4: movl $0x64, %eax
;; testq $1, %rdx
;; je 0x19b
;; 191: movl $0xc8, %eax
;; jmp 0x1a0
;; 19b: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Expand Down Expand Up @@ -216,16 +210,11 @@
;; wasm[0]::function[11]::if_clz_bare_i32:
;; pushq %rbp
;; movq %rsp, %rbp
;; movq $18446744073709551615, %rsi
;; bsrl %edx, %r9d
;; cmovel %esi, %r9d
;; movl $0x1f, %eax
;; subl %r9d, %eax
;; testl %eax, %eax
;; jne 0x24d
;; 243: movl $0xc8, %eax
;; jmp 0x252
;; 24d: movl $0x64, %eax
;; testl %edx, %edx
;; jns 0x236
;; 22c: movl $0xc8, %eax
;; jmp 0x23b
;; 236: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Expand All @@ -244,10 +233,10 @@
;; pushq %rbp
;; movq %rsp, %rbp
;; testq %rdx, %rdx
;; jl 0x297
;; 28d: movl $0xc8, %eax
;; jmp 0x29c
;; 297: movl $0x64, %eax
;; jl 0x277
;; 26d: movl $0xc8, %eax
;; jmp 0x27c
;; 277: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Expand All @@ -256,10 +245,10 @@
;; pushq %rbp
;; movq %rsp, %rbp
;; testq %rdx, %rdx
;; jge 0x2d7
;; 2cd: movl $0xc8, %eax
;; jmp 0x2dc
;; 2d7: movl $0x64, %eax
;; jge 0x2b7
;; 2ad: movl $0xc8, %eax
;; jmp 0x2bc
;; 2b7: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Expand All @@ -271,10 +260,10 @@
;; bsfl %edx, %r9d
;; cmovel %esi, %r9d
;; cmpl $4, %r9d
;; je 0x325
;; 31b: movl $0xc8, %eax
;; jmp 0x32a
;; 325: movl $0x64, %eax
;; je 0x305
;; 2fb: movl $0xc8, %eax
;; jmp 0x30a
;; 305: movl $0x64, %eax
;; movq %rbp, %rsp
;; popq %rbp
;; retq
Loading