-
Notifications
You must be signed in to change notification settings - Fork 536
Update aiebu submodule and print opcode information in context health report on ERT_CMD_STATE_TIMEOUT (for AIE4 and AIE2PS) #9876
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 4 commits
89f2992
bdeab25
550614d
09a2e73
b7b5f9e
01e8536
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,6 +53,8 @@ | |
| #include "core/common/runner/capture.h" | ||
| #include "core/common/xdp/profile.h" | ||
|
|
||
| #include "core/common/aiebu/src/cpp/include/aiebu/aiebu_debug.h" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't "aiebu/aiebu_debug.h" enough?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We could add |
||
|
|
||
| #include <boost/format.hpp> | ||
|
|
||
| #include <algorithm> | ||
|
|
@@ -2725,6 +2727,19 @@ class run_impl : public std::enable_shared_from_this<run_impl> | |
| return m_module; | ||
| } | ||
|
|
||
| std::shared_ptr<xrt::elf_impl> | ||
|
sayyanna marked this conversation as resolved.
Outdated
|
||
| get_elf_handle() const | ||
| { | ||
| if (!m_module) | ||
| return {}; | ||
| try { | ||
| return xrt_core::module_int::get_elf_handle(m_module); | ||
| } | ||
| catch (const std::exception&) { | ||
| return {}; | ||
| } | ||
|
sayyanna marked this conversation as resolved.
Outdated
|
||
| } | ||
|
|
||
| kernel_command* | ||
| get_cmd() const | ||
| { | ||
|
|
@@ -5047,7 +5062,7 @@ what() const noexcept | |
| static std::string | ||
| aie_error_message_v1(const ert_packet* epkt, const std::string& msg, | ||
| const std::string& elf_filename, const std::string& kernel_instance, | ||
| const std::string& elf_uuid) | ||
| const std::string& elf_uuid, const xrt::elf_impl* elf_impl_ptr = nullptr) | ||
| { | ||
| constexpr auto indent8 = 8; | ||
| auto ctx_health = get_ert_ctx_health_data_v1(epkt); | ||
|
|
@@ -5118,6 +5133,29 @@ aie_error_message_v1(const ert_packet* epkt, const std::string& msg, | |
| << "\nuc_esr=0x" << std::setw(indent8) | ||
| << ctx_health->aie4.uc_info[i].uc_esr | ||
| << "\n"; | ||
|
|
||
| // Decode the opcode at (uc_idx, page_idx, offset) directly from the ELF binary | ||
| if (elf_impl_ptr) { | ||
| aiebu::AIEDebug dbg(elf_impl_ptr->get_elfio()); | ||
| auto info = dbg.get_opcode_information( | ||
| kernel_instance, | ||
| ctx_health->aie4.uc_info[i].uc_idx, | ||
| ctx_health->aie4.uc_info[i].page_idx, | ||
| ctx_health->aie4.uc_info[i].offset); | ||
| if (info.found) { | ||
| oss << "\nOpcode: " << info.opcode_name; | ||
| if (!info.args_str.empty()) | ||
| oss << " " << info.args_str; | ||
| oss << "\nOpcode Size: 0x" << std::hex << info.opcode_size; | ||
| if (info.line > 0) | ||
| oss << "\nLine: " << std::dec << info.line; | ||
| if (!info.source_file.empty()) | ||
| oss << "\nFile: " << info.source_file; | ||
| oss << "\n"; | ||
| } | ||
| if (!info.diag_info.empty()) | ||
| oss << "Opcode diag_info: " << info.diag_info << "\n"; | ||
| } | ||
| } | ||
| } | ||
| return oss.str(); | ||
|
|
@@ -5138,12 +5176,11 @@ get_elf_identity_from_run(const xrt::run& run) | |
| if (!impl) | ||
| return {}; | ||
|
|
||
| const auto& mod = impl->get_module(); | ||
| if (!mod) | ||
| auto elf_handle = impl->get_elf_handle(); | ||
| if (!elf_handle) | ||
| return {}; | ||
|
|
||
| try { | ||
| auto elf_handle = xrt_core::module_int::get_elf_handle(mod); | ||
| return {xrt_core::elf_int::get_filename(elf_handle.get()), | ||
| impl->get_kernel()->get_full_name(), | ||
| elf_handle->get_cfg_uuid().to_string()}; | ||
|
|
@@ -5163,7 +5200,12 @@ amend_aie_error_message(const xrt::run& run, const std::string& msg) | |
|
|
||
| if (epkt->data[0] == ERT_CTX_HEALTH_DATA_V1) { | ||
| auto [elf_filename, kernel_instance, elf_uuid] = get_elf_identity_from_run(run); | ||
| return aie_error_message_v1(epkt, msg, elf_filename, kernel_instance, elf_uuid); | ||
| // Retrieve parsed ELF for efficient binary decode (no re-parsing overhead) | ||
| std::shared_ptr<xrt::elf_impl> elf_handle; | ||
| if (auto impl = run.get_handle()) | ||
| elf_handle = impl->get_elf_handle(); | ||
| return aie_error_message_v1(epkt, msg, elf_filename, kernel_instance, elf_uuid, | ||
| elf_handle.get()); | ||
|
sayyanna marked this conversation as resolved.
Outdated
|
||
| } | ||
| else if (epkt->data[0] != ERT_CTX_HEALTH_DATA_V0) | ||
| return msg; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.