-
Notifications
You must be signed in to change notification settings - Fork 857
[NFC] Refactor delta debugging to use coroutines #8657
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,130 @@ | ||
| /* | ||
| * Copyright 2026 WebAssembly Community Group participants | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #ifndef wasm_support_coroutine_h | ||
| #define wasm_support_coroutine_h | ||
|
|
||
| #include <coroutine> | ||
| #include <exception> | ||
|
|
||
| namespace wasm { | ||
|
|
||
| template<typename PromiseType> struct GetPromise { | ||
| PromiseType* promise = nullptr; | ||
| bool await_ready() const noexcept { return false; } | ||
| bool await_suspend(std::coroutine_handle<PromiseType> h) noexcept { | ||
| promise = &h.promise(); | ||
| return false; | ||
| } | ||
| PromiseType* await_resume() const noexcept { return promise; } | ||
| }; | ||
|
|
||
| template<typename T, typename U = void> struct Generator; | ||
|
|
||
| // One-way generator | ||
| template<typename T> struct Generator<T, void> { | ||
| struct promise_type { | ||
| T current_value; | ||
|
|
||
| Generator get_return_object() { | ||
| return {std::coroutine_handle<promise_type>::from_promise(*this)}; | ||
| } | ||
| std::suspend_always initial_suspend() { return {}; } | ||
| std::suspend_always final_suspend() noexcept { return {}; } | ||
| void unhandled_exception() { std::terminate(); } | ||
| void return_void() {} | ||
|
|
||
| std::suspend_always yield_value(T value) { | ||
| current_value = std::move(value); | ||
| return {}; | ||
| } | ||
| }; | ||
|
|
||
| std::coroutine_handle<promise_type> handle; | ||
|
|
||
| Generator(std::coroutine_handle<promise_type> h) : handle(h) {} | ||
| Generator(const Generator&) = delete; | ||
| Generator(Generator&& other) noexcept : handle(other.handle) { | ||
| other.handle = nullptr; | ||
| } | ||
| ~Generator() { | ||
| if (handle) { | ||
| handle.destroy(); | ||
| } | ||
| } | ||
|
|
||
| bool next() { | ||
| handle.resume(); | ||
| return !handle.done(); | ||
| } | ||
|
|
||
| T& get() { return handle.promise().current_value; } | ||
| const T& get() const { return handle.promise().current_value; } | ||
| }; | ||
|
|
||
| // Two-way generator | ||
| template<typename T, typename U> struct Generator { | ||
| struct promise_type { | ||
| T current_value; | ||
| U received_value; | ||
|
|
||
| Generator get_return_object() { | ||
| return {std::coroutine_handle<promise_type>::from_promise(*this)}; | ||
| } | ||
| std::suspend_always initial_suspend() { return {}; } | ||
| std::suspend_always final_suspend() noexcept { return {}; } | ||
| void unhandled_exception() { std::terminate(); } | ||
| void return_void() {} | ||
|
|
||
| auto yield_value(T value) { | ||
| current_value = std::move(value); | ||
| return YieldAwaiter{this}; | ||
| } | ||
|
|
||
| struct YieldAwaiter { | ||
| promise_type* p; | ||
| bool await_ready() const noexcept { return false; } | ||
| void await_suspend(std::coroutine_handle<promise_type>) noexcept {} | ||
| U await_resume() const noexcept { return p->received_value; } | ||
| }; | ||
| }; | ||
|
|
||
| std::coroutine_handle<promise_type> handle; | ||
|
|
||
| Generator(std::coroutine_handle<promise_type> h) : handle(h) {} | ||
| Generator(const Generator&) = delete; | ||
| Generator(Generator&& other) noexcept : handle(other.handle) { | ||
| other.handle = nullptr; | ||
| } | ||
| ~Generator() { | ||
| if (handle) { | ||
| handle.destroy(); | ||
| } | ||
| } | ||
|
|
||
| bool resume(U value) { | ||
| handle.promise().received_value = std::move(value); | ||
| handle.resume(); | ||
| return !handle.done(); | ||
| } | ||
|
|
||
| T& get() { return handle.promise().current_value; } | ||
| const T& get() const { return handle.promise().current_value; } | ||
| }; | ||
|
|
||
| } // namespace wasm | ||
|
|
||
| #endif // wasm_support_coroutine_h | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,100 +21,152 @@ | |
| #include <cassert> | ||
| #include <vector> | ||
|
|
||
| #include "support/coroutine.h" | ||
|
|
||
| namespace wasm { | ||
|
|
||
| // Use the delta debugging algorithm (Zeller 1999, | ||
| // https://dl.acm.org/doi/10.1109/32.988498) to find the minimal set of | ||
| // items necessary to preserve some property. Returns that minimal set of | ||
| // items, preserving their input order. `tryPartition` should have this | ||
| // signature: | ||
| // | ||
| // bool tryPartition(size_t partitionIndex, | ||
| // size_t numPartitions, | ||
| // const std::vector<T>& partition) | ||
| // | ||
| // It should return true iff the property is preserved while keeping only | ||
| // `partition` items. | ||
| template<typename T, typename F> | ||
| std::vector<T> deltaDebugging(std::vector<T> items, const F& tryPartition) { | ||
| if (items.empty()) { | ||
| return items; | ||
| } | ||
| // First try removing everything. | ||
| if (tryPartition(0, 1, {})) { | ||
| return {}; | ||
| // Use the delta debugging algorithm (Zeller 2002, | ||
| // https://dl.acm.org/doi/10.1109/32.988498) to find the minimal set of items | ||
| // necessary to preserve some property. `working` is the minimal set of items | ||
| // found so far and `test` is the smaller set of items that should be tested | ||
| // next. After testing, call `accept()`, `reject()`, or `resolve(bool accepted)` | ||
| // to update the working and test sets appropriately. | ||
| template<typename T> struct DeltaDebugger { | ||
| DeltaDebugger(std::vector<T> items) : task(run(std::move(items))) { | ||
| task.handle.resume(); | ||
| } | ||
| size_t numPartitions = 2; | ||
| while (numPartitions <= items.size()) { | ||
| // Partition the items. | ||
| std::vector<std::vector<T>> partitions; | ||
| size_t size = items.size(); | ||
| size_t basePartitionSize = size / numPartitions; | ||
| size_t rem = size % numPartitions; | ||
| size_t idx = 0; | ||
| for (size_t i = 0; i < numPartitions; ++i) { | ||
| size_t partitionSize = basePartitionSize + (i < rem ? 1 : 0); | ||
| if (partitionSize > 0) { | ||
| std::vector<T> partition; | ||
| partition.reserve(partitionSize); | ||
| for (size_t j = 0; j < partitionSize; ++j) { | ||
| partition.push_back(items[idx++]); | ||
| } | ||
| partitions.emplace_back(std::move(partition)); | ||
| } | ||
|
|
||
| bool finished() const { return task.get()->finished; } | ||
|
|
||
| const std::vector<T>& working() const { return task.get()->working; } | ||
| std::vector<T>& test() { return task.get()->test; } | ||
|
|
||
| size_t partitionCount() const { return task.get()->numPartitions; } | ||
| size_t partitionIndex() const { return task.get()->currPartition; } | ||
|
|
||
| void resolve(bool success) { | ||
| if (finished()) { | ||
| return; | ||
| } | ||
| assert(numPartitions == partitions.size()); | ||
| task.resume(success); | ||
| } | ||
|
|
||
| bool reduced = false; | ||
| void accept() { resolve(true); } | ||
| void reject() { resolve(false); } | ||
|
|
||
| // Try keeping only one partition. Try each partition in turn. | ||
| for (size_t i = 0; i < numPartitions; ++i) { | ||
| if (tryPartition(i, numPartitions, partitions[i])) { | ||
| items = std::move(partitions[i]); | ||
| numPartitions = 2; | ||
| reduced = true; | ||
| break; | ||
| } | ||
| private: | ||
| struct State { | ||
| std::vector<T> working; | ||
| std::vector<T> test; | ||
| size_t numPartitions = 1; | ||
| size_t currPartition = 0; | ||
| bool finished = false; | ||
| }; | ||
|
|
||
| Generator<State*, bool> task; | ||
|
|
||
| static Generator<State*, bool> run(std::vector<T> items) { | ||
| State state; | ||
| auto& [working, test, numPartitions, currPartition, finished] = state; | ||
|
|
||
| working = std::move(items); | ||
|
|
||
| if (working.empty()) { | ||
| finished = true; | ||
| co_yield &state; | ||
| co_return; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does this need to yield before returning? Isn't the output in the right place already?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A tricky thing here is that we need to prevent the coroutine from ever returning because we depend on its local
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, thanks, that's what I was missing. Please document that, it is indeed tricky...
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or, could we
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately there is not a great way to do that. This is by far the simplest approach I tried. Will add comments. |
||
| } | ||
| if (reduced) { | ||
| continue; | ||
|
|
||
| // First try removing everything. | ||
| if (co_yield &state) { | ||
| working = {}; | ||
| finished = true; | ||
| co_yield &state; | ||
| co_return; | ||
| } | ||
|
|
||
| // Otherwise, try keeping the complement of a partition. Do not do this with | ||
| // only two partitions because that would be no different from what we | ||
| // already tried. | ||
| if (numPartitions > 2) { | ||
| numPartitions = 2; | ||
| while (numPartitions <= working.size()) { | ||
| // Partition the items. | ||
| std::vector<std::vector<T>> partitions; | ||
| size_t size = working.size(); | ||
| size_t basePartitionSize = size / numPartitions; | ||
| size_t rem = size % numPartitions; | ||
| size_t idx = 0; | ||
| for (size_t i = 0; i < numPartitions; ++i) { | ||
| std::vector<T> complement; | ||
| complement.reserve(items.size() - partitions[i].size()); | ||
| for (size_t j = 0; j < numPartitions; ++j) { | ||
| if (j != i) { | ||
| complement.insert( | ||
| complement.end(), partitions[j].begin(), partitions[j].end()); | ||
| size_t partitionSize = basePartitionSize + (i < rem ? 1 : 0); | ||
| if (partitionSize > 0) { | ||
| std::vector<T> partition; | ||
| partition.reserve(partitionSize); | ||
| for (size_t j = 0; j < partitionSize; ++j) { | ||
| partition.push_back(working[idx++]); | ||
| } | ||
| partitions.emplace_back(std::move(partition)); | ||
| } | ||
| if (tryPartition(i, numPartitions, complement)) { | ||
| items = std::move(complement); | ||
| numPartitions = std::max(numPartitions - 1, size_t(2)); | ||
| } | ||
| assert(numPartitions == partitions.size()); | ||
|
|
||
| bool reduced = false; | ||
|
|
||
| // Try keeping only one partition. Try each partition in turn. | ||
| for (currPartition = 0; currPartition < numPartitions; ++currPartition) { | ||
| test = std::move(partitions[currPartition]); | ||
| if (co_yield &state) { | ||
| working = std::move(test); | ||
| numPartitions = 2; | ||
| reduced = true; | ||
| break; | ||
| } else { | ||
| // Restore the partition since we failed and might need it for | ||
| // complement testing. | ||
| partitions[currPartition] = std::move(test); | ||
| } | ||
| } | ||
| if (reduced) { | ||
| continue; | ||
| } | ||
| } | ||
|
|
||
| if (numPartitions == items.size()) { | ||
| // Cannot further refine the partitions. We're done. | ||
| break; | ||
| // Otherwise, try keeping the complement of a partition. Do not do this | ||
| // with only two partitions because that would be no different from what | ||
| // we already tried. | ||
| if (numPartitions > 2) { | ||
| for (currPartition = 0; currPartition < numPartitions; | ||
| ++currPartition) { | ||
| test.clear(); | ||
| test.reserve(working.size() - partitions[currPartition].size()); | ||
| for (size_t i = 0; i < numPartitions; ++i) { | ||
| if (i != currPartition) { | ||
| test.insert( | ||
| test.end(), partitions[i].begin(), partitions[i].end()); | ||
| } | ||
| } | ||
| if (co_yield &state) { | ||
| working = std::move(test); | ||
| numPartitions = std::max(numPartitions - 1, size_t(2)); | ||
| reduced = true; | ||
| break; | ||
| } | ||
| } | ||
| if (reduced) { | ||
| continue; | ||
| } | ||
| } | ||
|
|
||
| if (numPartitions == working.size()) { | ||
| // Cannot further refine the partitions. We're done. | ||
| break; | ||
| } | ||
|
|
||
| // Otherwise, make the partitions finer grained. | ||
| numPartitions = std::min(working.size(), 2 * numPartitions); | ||
| } | ||
|
|
||
| // Otherwise, make the partitions finer grained. | ||
| numPartitions = std::min(items.size(), 2 * numPartitions); | ||
| // Yield final state | ||
| test = {}; | ||
| finished = true; | ||
| co_yield &state; | ||
| } | ||
| return items; | ||
| } | ||
| }; | ||
|
|
||
| } // namespace wasm | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe add some docs for these classes? I'm not really sure what "GetPromise" means or does just from this code (which seems so generic as to do almost nothing but store a "promise"..?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All these methods are well-known to the compiler and configure the suspending and resuming behavior of our Generator utility. Unfortunately this is just a bunch of unavoidable boilerplate that doesn't do anything interesting (or comprehensible to non-experts). I'll document the interesting user-exposed methods, but for most of this there's not anything more to say than
// Unavoidable boilerplate.