From d749a0d779e3d5480e0564317c22b3611949831c Mon Sep 17 00:00:00 2001 From: Irish Dev <95187413+sad-c0der@users.noreply.github.com> Date: Wed, 21 Aug 2024 19:12:27 +0100 Subject: [PATCH 1/2] Update nyxstone.cpp Added for loop for readability, changed a couple of things and confirmed that it builds successfully and works as intended, minor update --- src/nyxstone.cpp | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/src/nyxstone.cpp b/src/nyxstone.cpp index 37d87ef..442d01c 100644 --- a/src/nyxstone.cpp +++ b/src/nyxstone.cpp @@ -418,10 +418,12 @@ tl::expected Nyxstone::disassemble_impl(const std::vector data(bytes.data(), bytes.size()); uint64_t pos = 0; uint64_t insn_count = 0; - while (true) { + uint64_t insn_size = 0; + std::string insn_str; + + for (; pos < data.size() && (count == 0 || insn_count < count); pos += insn_size) { // Decompose one instruction llvm::MCInst insn; - uint64_t insn_size = 0; auto res = disassembler->getInstruction(insn, insn_size, data.slice(pos), address + pos, llvm::nulls()); if (res == llvm::MCDisassembler::Fail || res == llvm::MCDisassembler::SoftFail || !error_msg.empty()) { std::stringstream error_stream; @@ -431,43 +433,31 @@ tl::expected Nyxstone::disassemble_impl(const std::vectorprintInst(&insn, - /* Address */ address + pos, - /* Annot */ "", *subtarget_info, str_stream); - - // left trim + instruction_printer->printInst(&insn, address + pos, "", *subtarget_info, str_stream); + + // Left trim insn_str.erase(0, insn_str.find_first_not_of(" \t\n\r")); - // convert tabulators to spaces + // Convert tabulators to spaces std::replace(insn_str.begin(), insn_str.end(), '\t', ' '); - + // Add instruction to results if (disassembly != nullptr) { *disassembly += insn_str + "\n"; } + if (instructions != nullptr) { Nyxstone::Instruction new_insn; new_insn.address = address + pos; new_insn.assembly = insn_str; new_insn.bytes.reserve(insn_size); std::copy(data.begin() + pos, data.begin() + pos + insn_size, std::back_inserter(new_insn.bytes)); - instructions->push_back(new_insn); - } - - // Abort after n instructions if requested - insn_count += 1; - if (count != 0 && insn_count >= count) { - break; - } - - // Prepare next iteration - pos += insn_size; - if (pos >= data.size()) { - break; + instructions->emplace_back(std::move(new_insn)); } + + insn_count++; } return {}; From 98e7544bc75c444ab12cc52d044797007592d377 Mon Sep 17 00:00:00 2001 From: Irish Dev <95187413+sad-c0der@users.noreply.github.com> Date: Sat, 16 Nov 2024 00:58:51 +0000 Subject: [PATCH 2/2] Refactor LLVM disassembly function for efficiency and readability I've added the loop you suggested and refactored some of the code for efficiency and readability. --- src/nyxstone.cpp | 70 +++++++++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/src/nyxstone.cpp b/src/nyxstone.cpp index 442d01c..09897d9 100644 --- a/src/nyxstone.cpp +++ b/src/nyxstone.cpp @@ -3,6 +3,8 @@ #include "ELFStreamerWrapper.h" #include "ObjectWriterWrapper.h" +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-parameter" #include #include #include @@ -14,6 +16,7 @@ #include #include #include +#pragma GCC diagnostic pop #include #include @@ -378,21 +381,18 @@ tl::expected Nyxstone::assemble_impl(const std::string& assem tl::expected Nyxstone::disassemble_impl(const std::vector& bytes, uint64_t address, size_t count, std::string* disassembly, std::vector* instructions) const { - if (disassembly == nullptr && instructions == nullptr) { + if ((disassembly == nullptr && instructions == nullptr) || bytes.empty()) { return {}; } if (disassembly != nullptr) { disassembly->clear(); } + if (instructions != nullptr) { instructions->clear(); } - if (bytes.empty()) { - return {}; - } - // Equip context with info objects and custom error handling llvm::SmallString<128> error_msg; llvm::MCContext context( @@ -414,50 +414,60 @@ tl::expected Nyxstone::disassemble_impl(const std::vector data(bytes.data(), bytes.size()); - uint64_t pos = 0; - uint64_t insn_count = 0; - uint64_t insn_size = 0; - std::string insn_str; - - for (; pos < data.size() && (count == 0 || insn_count < count); pos += insn_size) { + // Disassemble instructions + llvm::ArrayRef data(bytes); + uint64_t pos = 0, insn_count = 0; + const bool disassemble_all = (count == 0); + + // We exit either if we reached the end of the provided bytes, or if we have disassembled as many instructions + // as the user has requested + while (pos < data.size() && (disassemble_all || insn_count < count)) { // Decompose one instruction llvm::MCInst insn; - auto res = disassembler->getInstruction(insn, insn_size, data.slice(pos), address + pos, llvm::nulls()); - if (res == llvm::MCDisassembler::Fail || res == llvm::MCDisassembler::SoftFail || !error_msg.empty()) { + uint64_t insn_size = 0; + + if (disassembler->getInstruction(insn, insn_size, data.slice(pos), address + pos, llvm::nulls()) != llvm::MCDisassembler::Success + || !error_msg.empty()) { std::stringstream error_stream; - error_stream << "Could not disassemble at position " << pos << " / address " << std::hex << address + pos; + error_stream << "Could not disassemble at position " << pos + << " / address " << std::hex << address + pos; if (!error_msg.empty()) { - error_stream << "(= " << error_msg.c_str() << " )"; + error_stream << " (= " << error_msg.c_str() << ")"; } return tl::unexpected(error_stream.str()); } - + // Generate instruction disassembly text + std::string insn_str; llvm::raw_string_ostream str_stream(insn_str); - instruction_printer->printInst(&insn, address + pos, "", *subtarget_info, str_stream); - + instruction_printer->printInst(&insn, /* Address */ address + pos, /* Annot */ "", *subtarget_info, str_stream); // Left trim insn_str.erase(0, insn_str.find_first_not_of(" \t\n\r")); // Convert tabulators to spaces std::replace(insn_str.begin(), insn_str.end(), '\t', ' '); - + // Add instruction to results if (disassembly != nullptr) { *disassembly += insn_str + "\n"; } - + if (instructions != nullptr) { - Nyxstone::Instruction new_insn; - new_insn.address = address + pos; - new_insn.assembly = insn_str; - new_insn.bytes.reserve(insn_size); - std::copy(data.begin() + pos, data.begin() + pos + insn_size, std::back_inserter(new_insn.bytes)); - instructions->emplace_back(std::move(new_insn)); + Instruction new_insn{address + pos, insn_str, {}}; + new_insn.bytes.assign(data.begin() + pos, data.begin() + pos + insn_size); + instructions->push_back(std::move(new_insn)); + } + + // Abort after n instructions if requested + insn_count += 1; + if (count != 0 && insn_count >= count) { + break; + } + + // Prepare next iteration + pos += insn_size; + if (pos >= data.size()) { + break; } - - insn_count++; } return {};