diff --git a/src/passes/OptimizeInstructions.cpp b/src/passes/OptimizeInstructions.cpp index 9ea8a7aa982..ac1552ee0c7 100644 --- a/src/passes/OptimizeInstructions.cpp +++ b/src/passes/OptimizeInstructions.cpp @@ -1193,6 +1193,35 @@ struct OptimizeInstructions BranchHints::flip(curr, getFunction()); } } + // (i32.and X 1) as if-else condition => (i32.ctz X) with swapped arms, + // since ctz(X) == 0 iff LSB(X) == 1 (saves one instruction). + // + // Gated on shrinkLevel >= 1 (i.e. -Os or -Oz) only: TZCNT can cost + // 1-2 cycles more than AND on common JIT VMs (per Agner Fog's + // tables), and JIT-less interpreters (wasm3, smart-contract + // runtimes) lack a fast path for ctz at all. The byte-saving is the + // win we want under shrink modes; under speed modes the AND form + // stays. See WebAssembly/binaryen#8562. + if (auto* binary = curr->condition->dynCast()) { + if (binary->op == AndInt32 && getPassOptions().shrinkLevel >= 1) { + Expression* other = nullptr; + if (auto* c = binary->right->dynCast()) { + if (c->value.geti32() == 1) { + other = binary->left; + } + } else if (auto* c = binary->left->dynCast()) { + if (c->value.geti32() == 1) { + other = binary->right; + } + } + if (other) { + Builder builder(*getModule()); + curr->condition = builder.makeUnary(CtzInt32, other); + std::swap(curr->ifTrue, curr->ifFalse); + BranchHints::flip(curr, getFunction()); + } + } + } // Note that we do not consider metadata here. Like LLVM, we ignore // metadata when trying to fold code together, preferring certain // optimization over possible benefits of profiling data. @@ -3114,6 +3143,27 @@ struct OptimizeInstructions binary->op = op; return binary; } + // eqz(and X 1) ==> ctz X in boolean context: + // both are truthy iff LSB(X) == 0, saving one instruction. + // Gated on shrinkLevel >= 1 (-Os, -Oz) — see the matching + // comment in visitIf and WebAssembly/binaryen#8562. + if (binary->op == AndInt32 && + getPassOptions().shrinkLevel >= 1) { + Expression* other = nullptr; + if (auto* c = binary->right->dynCast()) { + if (c->value.geti32() == 1) { + other = binary->left; + } + } else if (auto* c = binary->left->dynCast()) { + if (c->value.geti32() == 1) { + other = binary->right; + } + } + if (other) { + Builder builder(*getModule()); + return builder.makeUnary(CtzInt32, other); + } + } } } if (unary->op == EqZInt32 || unary->op == EqZInt64) { diff --git a/test/lit/passes/optimize-instructions-lsb-if.wast b/test/lit/passes/optimize-instructions-lsb-if.wast new file mode 100644 index 00000000000..89c24a7b270 --- /dev/null +++ b/test/lit/passes/optimize-instructions-lsb-if.wast @@ -0,0 +1,186 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py and should not be edited. +;; RUN: wasm-opt %s --optimize-instructions -S -o - | filecheck %s --check-prefix=DEFAULT +;; RUN: wasm-opt %s --shrink-level=1 --optimize-instructions -S -o - | filecheck %s --check-prefix=SHRINK + +;; Test the LSB→ctz fold: under shrink modes (-Os, -Oz, equivalent to +;; --shrink-level >= 1) `(if (i32.and X 1) T E)` becomes +;; `(if (i32.ctz X) E T)`, and `(br_if N V (i32.eqz (i32.and X 1)))` +;; becomes `(br_if N V (i32.ctz X))` — one instruction less, but +;; potentially 1-2 cycles slower on JIT VMs and unconditionally slower +;; on JIT-less interpreters. The fold is therefore suppressed under +;; default and speed-optimised modes, and only fires when the user has +;; opted into shrinking. See WebAssembly/binaryen#8562. + +(module + ;; DEFAULT: (func $lsb-if (param $x i32) (result i32) + ;; DEFAULT-NEXT: (if (result i32) + ;; DEFAULT-NEXT: (i32.and + ;; DEFAULT-NEXT: (local.get $x) + ;; DEFAULT-NEXT: (i32.const 1) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: (then + ;; DEFAULT-NEXT: (i32.const 1) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: (else + ;; DEFAULT-NEXT: (i32.const 0) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; SHRINK: (func $lsb-if (param $x i32) (result i32) + ;; SHRINK-NEXT: (if (result i32) + ;; SHRINK-NEXT: (i32.ctz + ;; SHRINK-NEXT: (local.get $x) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: (then + ;; SHRINK-NEXT: (i32.const 0) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: (else + ;; SHRINK-NEXT: (i32.const 1) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + (func $lsb-if (param $x i32) (result i32) + ;; if LSB is set, return 1; else return 0 + (if (result i32) + (i32.and (local.get $x) (i32.const 1)) + (then (i32.const 1)) + (else (i32.const 0)) + ) + ) + + ;; DEFAULT: (func $lsb-if-const-left (param $x i32) (result i32) + ;; DEFAULT-NEXT: (if (result i32) + ;; DEFAULT-NEXT: (i32.and + ;; DEFAULT-NEXT: (local.get $x) + ;; DEFAULT-NEXT: (i32.const 1) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: (then + ;; DEFAULT-NEXT: (i32.const 1) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: (else + ;; DEFAULT-NEXT: (i32.const 0) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; SHRINK: (func $lsb-if-const-left (param $x i32) (result i32) + ;; SHRINK-NEXT: (if (result i32) + ;; SHRINK-NEXT: (i32.ctz + ;; SHRINK-NEXT: (local.get $x) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: (then + ;; SHRINK-NEXT: (i32.const 0) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: (else + ;; SHRINK-NEXT: (i32.const 1) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + (func $lsb-if-const-left (param $x i32) (result i32) + ;; same but constant on the left + (if (result i32) + (i32.and (i32.const 1) (local.get $x)) + (then (i32.const 1)) + (else (i32.const 0)) + ) + ) + + ;; DEFAULT: (func $lsb-brif (param $x i32) (result i32) + ;; DEFAULT-NEXT: (block $done (result i32) + ;; DEFAULT-NEXT: (drop + ;; DEFAULT-NEXT: (br_if $done + ;; DEFAULT-NEXT: (i32.const 99) + ;; DEFAULT-NEXT: (i32.eqz + ;; DEFAULT-NEXT: (i32.and + ;; DEFAULT-NEXT: (local.get $x) + ;; DEFAULT-NEXT: (i32.const 1) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: (i32.const 42) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; SHRINK: (func $lsb-brif (param $x i32) (result i32) + ;; SHRINK-NEXT: (block $done (result i32) + ;; SHRINK-NEXT: (drop + ;; SHRINK-NEXT: (br_if $done + ;; SHRINK-NEXT: (i32.const 99) + ;; SHRINK-NEXT: (i32.ctz + ;; SHRINK-NEXT: (local.get $x) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: (i32.const 42) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + (func $lsb-brif (param $x i32) (result i32) + ;; br_if (eqz (and X 1)) — the typical is_skewed/is_scalar pattern + (block $done (result i32) + (drop + (br_if $done + (i32.const 99) + (i32.eqz (i32.and (local.get $x) (i32.const 1))) + ) + ) + (i32.const 42) + ) + ) + + ;; DEFAULT: (func $lsb-select (param $x i32) (param $a i32) (param $b i32) (result i32) + ;; DEFAULT-NEXT: (select + ;; DEFAULT-NEXT: (local.get $b) + ;; DEFAULT-NEXT: (local.get $a) + ;; DEFAULT-NEXT: (i32.and + ;; DEFAULT-NEXT: (local.get $x) + ;; DEFAULT-NEXT: (i32.const 1) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; SHRINK: (func $lsb-select (param $x i32) (param $a i32) (param $b i32) (result i32) + ;; SHRINK-NEXT: (select + ;; SHRINK-NEXT: (local.get $a) + ;; SHRINK-NEXT: (local.get $b) + ;; SHRINK-NEXT: (i32.ctz + ;; SHRINK-NEXT: (local.get $x) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + (func $lsb-select (param $x i32) (param $a i32) (param $b i32) (result i32) + ;; select with the eqz-and-1 boolean condition. + ;; Non-constant arms keep the select itself in the IR — otherwise + ;; an unrelated `select c1 c0 P` simplification would eat it. + (select + (local.get $a) + (local.get $b) + (i32.eqz (i32.and (local.get $x) (i32.const 1))) + ) + ) + + ;; DEFAULT: (func $lsb-select-const-left (param $x i32) (param $a i32) (param $b i32) (result i32) + ;; DEFAULT-NEXT: (select + ;; DEFAULT-NEXT: (local.get $b) + ;; DEFAULT-NEXT: (local.get $a) + ;; DEFAULT-NEXT: (i32.and + ;; DEFAULT-NEXT: (local.get $x) + ;; DEFAULT-NEXT: (i32.const 1) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; DEFAULT-NEXT: ) + ;; SHRINK: (func $lsb-select-const-left (param $x i32) (param $a i32) (param $b i32) (result i32) + ;; SHRINK-NEXT: (select + ;; SHRINK-NEXT: (local.get $a) + ;; SHRINK-NEXT: (local.get $b) + ;; SHRINK-NEXT: (i32.ctz + ;; SHRINK-NEXT: (local.get $x) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + ;; SHRINK-NEXT: ) + (func $lsb-select-const-left (param $x i32) (param $a i32) (param $b i32) (result i32) + ;; same but with the constant on the left of the AND. + (select + (local.get $a) + (local.get $b) + (i32.eqz (i32.and (i32.const 1) (local.get $x))) + ) + ) +)