mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-19 06:46:43 +00:00
[llvm-mc] Add --hex to disassemble hex bytes
`--disassemble`/`--cdis` parses input bytes as decimal, 0bbin, 0ooct, or 0xhex. While the hexadecimal digit form is most commonly used, requiring a 0x prefix for each byte (`0x48 0x29 0xc3`) is cumbersome. Tools like xxd -p and rz-asm use a plain hex dump form without the 0x prefix or space separator. This patch adds --hex to disassemble such hex bytes with optional whitespace. ``` % rz-asm -a x86 -b 64 -d 4829c34829c4 sub rbx, rax sub rsp, rax % llvm-mc -triple=x86_64 --cdis --hex --output-asm-variant=1 <<< 4829c34829c4 .text sub rbx, rax sub rsp, rax ``` Pull Request: https://github.com/llvm/llvm-project/pull/119992
This commit is contained in:
parent
e2a94a97bd
commit
c6ff809ae9
@ -92,6 +92,10 @@ End-user Options
|
||||
|
||||
Generate DWARF debugging info for assembly source files.
|
||||
|
||||
.. option:: --hex
|
||||
|
||||
Take raw hexadecimal bytes as input for disassembly. Whitespace is ignored.
|
||||
|
||||
.. option:: --large-code-model
|
||||
|
||||
Create CFI directives that assume the code might be more than 2 GB.
|
||||
|
62
llvm/test/MC/Disassembler/X86/hex-bytes.txt
Normal file
62
llvm/test/MC/Disassembler/X86/hex-bytes.txt
Normal file
@ -0,0 +1,62 @@
|
||||
# RUN: rm -rf %t && split-file %s %t && cd %t
|
||||
# RUN: llvm-mc -triple=x86_64 --disassemble --hex a.s | FileCheck %s
|
||||
# RUN: llvm-mc -triple=x86_64 --disassemble --hex decode1.s 2>&1 | FileCheck %s --check-prefix=DECODE1 --implicit-check-not=warning:
|
||||
# RUN: not llvm-mc -triple=x86_64 --disassemble --hex decode2.s 2>&1 | FileCheck %s --check-prefix=DECODE2 --implicit-check-not=warning:
|
||||
# RUN: not llvm-mc -triple=x86_64 --disassemble --hex err1.s 2>&1 | FileCheck %s --check-prefix=ERR1 --implicit-check-not=error:
|
||||
# RUN: not llvm-mc -triple=x86_64 --disassemble --hex err2.s 2>&1 | FileCheck %s --check-prefix=ERR2 --implicit-check-not=error:
|
||||
|
||||
#--- a.s
|
||||
4883ec08 31 # comment
|
||||
# comment
|
||||
ed4829 c390
|
||||
[c3c3][4829c3]
|
||||
[90]
|
||||
|
||||
# CHECK: subq $8, %rsp
|
||||
# CHECK-NEXT: xorl %ebp, %ebp
|
||||
# CHECK-NEXT: subq %rax, %rbx
|
||||
# CHECK-NEXT: nop
|
||||
# CHECK-NEXT: retq
|
||||
# CHECK-NEXT: retq
|
||||
# CHECK-NEXT: subq %rax, %rbx
|
||||
# CHECK-NEXT: nop
|
||||
# CHECK-EMPTY:
|
||||
|
||||
#--- decode1.s
|
||||
4889
|
||||
|
||||
# DECODE1: 1:1: warning: invalid instruction encoding
|
||||
|
||||
#--- decode2.s
|
||||
[4889][4889] [4889]4889c3
|
||||
[4889]
|
||||
|
||||
# DECODE2: 1:2: warning: invalid instruction encoding
|
||||
# DECODE2: 1:8: warning: invalid instruction encoding
|
||||
# DECODE2: 1:15: warning: invalid instruction encoding
|
||||
# DECODE2: 2:3: warning: invalid instruction encoding
|
||||
|
||||
#--- err1.s
|
||||
0x31ed
|
||||
0xcc
|
||||
g0
|
||||
|
||||
# ERR1: 1:1: error: invalid input token
|
||||
# ERR1: 2:1: error: invalid input token
|
||||
# ERR1: 3:1: error: invalid input token
|
||||
# ERR1: xorl %ebp, %ebp
|
||||
# ERR1-NEXT: int3
|
||||
# ERR1-EMPTY:
|
||||
|
||||
#--- err2.s
|
||||
g
|
||||
90c
|
||||
cc
|
||||
c
|
||||
|
||||
# ERR2: 1:1: error: expected two hex digits
|
||||
# ERR2: 2:3: error: expected two hex digits
|
||||
# ERR2: 4:1: error: expected two hex digits
|
||||
# ERR2: nop
|
||||
# ERR2-NEXT: int3
|
||||
# ERR2-EMPTY:
|
@ -12,6 +12,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "Disassembler.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
|
||||
@ -94,10 +95,8 @@ static bool SkipToToken(StringRef &Str) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool ByteArrayFromString(ByteArrayTy &ByteArray,
|
||||
StringRef &Str,
|
||||
SourceMgr &SM) {
|
||||
static bool byteArrayFromString(ByteArrayTy &ByteArray, StringRef &Str,
|
||||
SourceMgr &SM, bool HexBytes) {
|
||||
while (SkipToToken(Str)) {
|
||||
// Handled by higher level
|
||||
if (Str[0] == '[' || Str[0] == ']')
|
||||
@ -109,7 +108,24 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
|
||||
|
||||
// Convert to a byte and add to the byte vector.
|
||||
unsigned ByteVal;
|
||||
if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
|
||||
if (HexBytes) {
|
||||
if (Next < 2) {
|
||||
SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
|
||||
SourceMgr::DK_Error, "expected two hex digits");
|
||||
Str = Str.substr(Next);
|
||||
return true;
|
||||
}
|
||||
Next = 2;
|
||||
unsigned C0 = hexDigitValue(Value[0]);
|
||||
unsigned C1 = hexDigitValue(Value[1]);
|
||||
if (C0 == -1u || C1 == -1u) {
|
||||
SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
|
||||
SourceMgr::DK_Error, "invalid input token");
|
||||
Str = Str.substr(Next);
|
||||
return true;
|
||||
}
|
||||
ByteVal = C0 * 16 + C1;
|
||||
} else if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
|
||||
// If we have an error, print it and skip to the end of line.
|
||||
SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
|
||||
"invalid input token");
|
||||
@ -130,9 +146,8 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
|
||||
int Disassembler::disassemble(const Target &T, const std::string &Triple,
|
||||
MCSubtargetInfo &STI, MCStreamer &Streamer,
|
||||
MemoryBuffer &Buffer, SourceMgr &SM,
|
||||
MCContext &Ctx,
|
||||
const MCTargetOptions &MCOptions) {
|
||||
|
||||
MCContext &Ctx, const MCTargetOptions &MCOptions,
|
||||
bool HexBytes) {
|
||||
std::unique_ptr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
|
||||
if (!MRI) {
|
||||
errs() << "error: no register info for target " << Triple << "\n";
|
||||
@ -188,7 +203,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
|
||||
}
|
||||
|
||||
// It's a real token, get the bytes and emit them
|
||||
ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
|
||||
ErrorOccurred |= byteArrayFromString(ByteArray, Str, SM, HexBytes);
|
||||
|
||||
if (!ByteArray.first.empty())
|
||||
ErrorOccurred |=
|
||||
|
@ -32,7 +32,7 @@ public:
|
||||
static int disassemble(const Target &T, const std::string &Triple,
|
||||
MCSubtargetInfo &STI, MCStreamer &Streamer,
|
||||
MemoryBuffer &Buffer, SourceMgr &SM, MCContext &Ctx,
|
||||
const MCTargetOptions &MCOptions);
|
||||
const MCTargetOptions &MCOptions, bool HexBytes);
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
@ -94,6 +94,12 @@ static cl::opt<bool>
|
||||
cl::desc("Prefer hex format for immediate values"),
|
||||
cl::cat(MCCategory));
|
||||
|
||||
static cl::opt<bool>
|
||||
HexBytes("hex",
|
||||
cl::desc("Take raw hexadecimal bytes as input for disassembly. "
|
||||
"Whitespace is ignored"),
|
||||
cl::cat(MCCategory));
|
||||
|
||||
static cl::list<std::string>
|
||||
DefineSymbol("defsym",
|
||||
cl::desc("Defines a symbol to be an integer constant"),
|
||||
@ -592,7 +598,7 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
if (disassemble)
|
||||
Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str, *Buffer,
|
||||
SrcMgr, Ctx, MCOptions);
|
||||
SrcMgr, Ctx, MCOptions, HexBytes);
|
||||
|
||||
// Keep output if no errors.
|
||||
if (Res == 0) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user