Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions src/passes/OptimizeInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1193,6 +1193,35 @@ struct OptimizeInstructions
BranchHints::flip(curr, getFunction());
}
}
// (i32.and X 1) as if-else condition => (i32.ctz X) with swapped arms,
// since ctz(X) == 0 iff LSB(X) == 1 (saves one instruction).
//
// Gated on shrinkLevel >= 1 (i.e. -Os or -Oz) only: TZCNT can cost
// 1-2 cycles more than AND on common JIT VMs (per Agner Fog's
// tables), and JIT-less interpreters (wasm3, smart-contract
// runtimes) lack a fast path for ctz at all. The byte-saving is the
// win we want under shrink modes; under speed modes the AND form
// stays. See WebAssembly/binaryen#8562.
if (auto* binary = curr->condition->dynCast<Binary>()) {
if (binary->op == AndInt32 && getPassOptions().shrinkLevel >= 1) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about making this >= 2, i.e., only in -Oz? -Os is meant to be a good balance between size and speed, and without more data I'm not sure how balanced this is. -Oz is "size at all costs".

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so, as (i32.and X (i32.const 1)) would often feed conditionals and the proposed transform would unlock ripple effects. This is not only space but also time saving.

Expression* other = nullptr;
if (auto* c = binary->right->dynCast<Const>()) {
if (c->value.geti32() == 1) {
other = binary->left;
}
} else if (auto* c = binary->left->dynCast<Const>()) {
if (c->value.geti32() == 1) {
other = binary->right;
}
}
if (other) {
Builder builder(*getModule());
curr->condition = builder.makeUnary(CtzInt32, other);
std::swap(curr->ifTrue, curr->ifFalse);
BranchHints::flip(curr, getFunction());
}
}
}
// Note that we do not consider metadata here. Like LLVM, we ignore
// metadata when trying to fold code together, preferring certain
// optimization over possible benefits of profiling data.
Expand Down Expand Up @@ -3114,6 +3143,27 @@ struct OptimizeInstructions
binary->op = op;
return binary;
}
// eqz(and X 1) ==> ctz X in boolean context:
// both are truthy iff LSB(X) == 0, saving one instruction.
// Gated on shrinkLevel >= 1 (-Os, -Oz) — see the matching
// comment in visitIf and WebAssembly/binaryen#8562.
if (binary->op == AndInt32 &&
getPassOptions().shrinkLevel >= 1) {
Expression* other = nullptr;
if (auto* c = binary->right->dynCast<Const>()) {
if (c->value.geti32() == 1) {
other = binary->left;
}
} else if (auto* c = binary->left->dynCast<Const>()) {
if (c->value.geti32() == 1) {
other = binary->right;
}
}
if (other) {
Builder builder(*getModule());
return builder.makeUnary(CtzInt32, other);
}
}
}
}
if (unary->op == EqZInt32 || unary->op == EqZInt64) {
Expand Down
186 changes: 186 additions & 0 deletions test/lit/passes/optimize-instructions-lsb-if.wast
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add tests for select as well.

Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited.
;; RUN: wasm-opt %s --optimize-instructions -S -o - | filecheck %s --check-prefix=DEFAULT
;; RUN: wasm-opt %s --shrink-level=1 --optimize-instructions -S -o - | filecheck %s --check-prefix=SHRINK

;; Test the LSB→ctz fold: under shrink modes (-Os, -Oz, equivalent to
;; --shrink-level >= 1) `(if (i32.and X 1) T E)` becomes
;; `(if (i32.ctz X) E T)`, and `(br_if N V (i32.eqz (i32.and X 1)))`
;; becomes `(br_if N V (i32.ctz X))` — one instruction less, but
;; potentially 1-2 cycles slower on JIT VMs and unconditionally slower
;; on JIT-less interpreters. The fold is therefore suppressed under
;; default and speed-optimised modes, and only fires when the user has
;; opted into shrinking. See WebAssembly/binaryen#8562.

(module
;; DEFAULT: (func $lsb-if (param $x i32) (result i32)
;; DEFAULT-NEXT: (if (result i32)
;; DEFAULT-NEXT: (i32.and
;; DEFAULT-NEXT: (local.get $x)
;; DEFAULT-NEXT: (i32.const 1)
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: (then
;; DEFAULT-NEXT: (i32.const 1)
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: (else
;; DEFAULT-NEXT: (i32.const 0)
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; SHRINK: (func $lsb-if (param $x i32) (result i32)
;; SHRINK-NEXT: (if (result i32)
;; SHRINK-NEXT: (i32.ctz
;; SHRINK-NEXT: (local.get $x)
;; SHRINK-NEXT: )
;; SHRINK-NEXT: (then
;; SHRINK-NEXT: (i32.const 0)
;; SHRINK-NEXT: )
;; SHRINK-NEXT: (else
;; SHRINK-NEXT: (i32.const 1)
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
(func $lsb-if (param $x i32) (result i32)
;; if LSB is set, return 1; else return 0
(if (result i32)
(i32.and (local.get $x) (i32.const 1))
(then (i32.const 1))
(else (i32.const 0))
)
)

;; DEFAULT: (func $lsb-if-const-left (param $x i32) (result i32)
;; DEFAULT-NEXT: (if (result i32)
;; DEFAULT-NEXT: (i32.and
;; DEFAULT-NEXT: (local.get $x)
;; DEFAULT-NEXT: (i32.const 1)
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: (then
;; DEFAULT-NEXT: (i32.const 1)
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: (else
;; DEFAULT-NEXT: (i32.const 0)
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; SHRINK: (func $lsb-if-const-left (param $x i32) (result i32)
;; SHRINK-NEXT: (if (result i32)
;; SHRINK-NEXT: (i32.ctz
;; SHRINK-NEXT: (local.get $x)
;; SHRINK-NEXT: )
;; SHRINK-NEXT: (then
;; SHRINK-NEXT: (i32.const 0)
;; SHRINK-NEXT: )
;; SHRINK-NEXT: (else
;; SHRINK-NEXT: (i32.const 1)
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
(func $lsb-if-const-left (param $x i32) (result i32)
;; same but constant on the left
(if (result i32)
(i32.and (i32.const 1) (local.get $x))
(then (i32.const 1))
(else (i32.const 0))
)
)

;; DEFAULT: (func $lsb-brif (param $x i32) (result i32)
;; DEFAULT-NEXT: (block $done (result i32)
;; DEFAULT-NEXT: (drop
;; DEFAULT-NEXT: (br_if $done
;; DEFAULT-NEXT: (i32.const 99)
;; DEFAULT-NEXT: (i32.eqz
;; DEFAULT-NEXT: (i32.and
;; DEFAULT-NEXT: (local.get $x)
;; DEFAULT-NEXT: (i32.const 1)
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: (i32.const 42)
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; SHRINK: (func $lsb-brif (param $x i32) (result i32)
;; SHRINK-NEXT: (block $done (result i32)
;; SHRINK-NEXT: (drop
;; SHRINK-NEXT: (br_if $done
;; SHRINK-NEXT: (i32.const 99)
;; SHRINK-NEXT: (i32.ctz
;; SHRINK-NEXT: (local.get $x)
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
;; SHRINK-NEXT: (i32.const 42)
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
(func $lsb-brif (param $x i32) (result i32)
;; br_if (eqz (and X 1)) — the typical is_skewed/is_scalar pattern
(block $done (result i32)
(drop
(br_if $done
(i32.const 99)
(i32.eqz (i32.and (local.get $x) (i32.const 1)))
)
)
(i32.const 42)
)
)

;; DEFAULT: (func $lsb-select (param $x i32) (param $a i32) (param $b i32) (result i32)
;; DEFAULT-NEXT: (select
;; DEFAULT-NEXT: (local.get $b)
;; DEFAULT-NEXT: (local.get $a)
;; DEFAULT-NEXT: (i32.and
;; DEFAULT-NEXT: (local.get $x)
;; DEFAULT-NEXT: (i32.const 1)
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; SHRINK: (func $lsb-select (param $x i32) (param $a i32) (param $b i32) (result i32)
;; SHRINK-NEXT: (select
;; SHRINK-NEXT: (local.get $a)
;; SHRINK-NEXT: (local.get $b)
;; SHRINK-NEXT: (i32.ctz
;; SHRINK-NEXT: (local.get $x)
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
(func $lsb-select (param $x i32) (param $a i32) (param $b i32) (result i32)
;; select with the eqz-and-1 boolean condition.
;; Non-constant arms keep the select itself in the IR — otherwise
;; an unrelated `select c1 c0 P` simplification would eat it.
(select
(local.get $a)
(local.get $b)
(i32.eqz (i32.and (local.get $x) (i32.const 1)))
)
)

;; DEFAULT: (func $lsb-select-const-left (param $x i32) (param $a i32) (param $b i32) (result i32)
;; DEFAULT-NEXT: (select
;; DEFAULT-NEXT: (local.get $b)
;; DEFAULT-NEXT: (local.get $a)
;; DEFAULT-NEXT: (i32.and
;; DEFAULT-NEXT: (local.get $x)
;; DEFAULT-NEXT: (i32.const 1)
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; DEFAULT-NEXT: )
;; SHRINK: (func $lsb-select-const-left (param $x i32) (param $a i32) (param $b i32) (result i32)
;; SHRINK-NEXT: (select
;; SHRINK-NEXT: (local.get $a)
;; SHRINK-NEXT: (local.get $b)
;; SHRINK-NEXT: (i32.ctz
;; SHRINK-NEXT: (local.get $x)
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
;; SHRINK-NEXT: )
(func $lsb-select-const-left (param $x i32) (param $a i32) (param $b i32) (result i32)
;; same but with the constant on the left of the AND.
(select
(local.get $a)
(local.get $b)
(i32.eqz (i32.and (i32.const 1) (local.get $x)))
)
)
)