mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 17:56:05 +00:00

Fixed the DisassemblerLLVMC disassembler to parse more efficiently instead of parsing opcodes over and over. The InstructionLLVMC class now only reads the opcode in the InstructionLLVMC::Decode function. This can be done very efficiently for ARM and architectures that have fixed opcode sizes. For x64 it still calls the disassembler to get the byte size. Moved the lldb_private::Instruction::Dump(...) function up into the lldb_private::Instruction class and it now uses the function that gets the mnemonic, operandes and comments so that all disassembly is using the same code. Added StreamString::FillLastLineToColumn() to allow filling a line up to a column with a character (which is used by the lldb_private::Instruction::Dump(...) function). Modified the Opcode::GetData() fucntion to "do the right thing" for thumb instructions. llvm-svn: 156532
670 lines
22 KiB
C++
670 lines
22 KiB
C++
//===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "DisassemblerLLVMC.h"
|
|
|
|
#include "llvm-c/Disassembler.h"
|
|
#include "llvm/Support/TargetSelect.h"
|
|
|
|
#include "lldb/Core/Address.h"
|
|
#include "lldb/Core/DataExtractor.h"
|
|
#include "lldb/Core/Stream.h"
|
|
#include "lldb/Symbol/SymbolContext.h"
|
|
#include "lldb/Target/ExecutionContext.h"
|
|
#include "lldb/Target/Process.h"
|
|
#include "lldb/Target/RegisterContext.h"
|
|
#include "lldb/Target/Target.h"
|
|
#include "lldb/Target/StackFrame.h"
|
|
|
|
#include <regex.h>
|
|
|
|
using namespace lldb;
|
|
using namespace lldb_private;
|
|
|
|
class InstructionLLVMC : public lldb_private::Instruction
|
|
{
|
|
public:
|
|
InstructionLLVMC (DisassemblerLLVMC &disasm,
|
|
const lldb_private::Address &address,
|
|
AddressClass addr_class) :
|
|
Instruction(address, addr_class),
|
|
m_is_valid(false),
|
|
m_disasm(disasm),
|
|
m_does_branch(eLazyBoolCalculate)
|
|
{
|
|
}
|
|
|
|
virtual
|
|
~InstructionLLVMC ()
|
|
{
|
|
}
|
|
|
|
static void
|
|
PadToWidth (lldb_private::StreamString &ss,
|
|
int new_width)
|
|
{
|
|
int old_width = ss.GetSize();
|
|
|
|
if (old_width < new_width)
|
|
{
|
|
ss.Printf("%*s", new_width - old_width, "");
|
|
}
|
|
}
|
|
|
|
virtual bool
|
|
DoesBranch () const
|
|
{
|
|
return m_does_branch == eLazyBoolYes;
|
|
}
|
|
|
|
virtual size_t
|
|
Decode (const lldb_private::Disassembler &disassembler,
|
|
const lldb_private::DataExtractor &data,
|
|
uint32_t data_offset)
|
|
{
|
|
// All we have to do is read the opcode which can be easy for some
|
|
// architetures
|
|
bool got_op = false;
|
|
const ArchSpec &arch = m_disasm.GetArchitecture();
|
|
|
|
const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
|
|
const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
|
|
if (min_op_byte_size == max_op_byte_size)
|
|
{
|
|
// Fixed size instructions, just read that amount of data.
|
|
if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size))
|
|
return false;
|
|
|
|
switch (min_op_byte_size)
|
|
{
|
|
case 1:
|
|
m_opcode.SetOpcode8 (data.GetU8 (&data_offset));
|
|
got_op = true;
|
|
break;
|
|
|
|
case 2:
|
|
m_opcode.SetOpcode16 (data.GetU16 (&data_offset));
|
|
got_op = true;
|
|
break;
|
|
|
|
case 4:
|
|
m_opcode.SetOpcode32 (data.GetU32 (&data_offset));
|
|
got_op = true;
|
|
break;
|
|
|
|
case 8:
|
|
m_opcode.SetOpcode64 (data.GetU64 (&data_offset));
|
|
got_op = true;
|
|
break;
|
|
|
|
default:
|
|
m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size), min_op_byte_size);
|
|
got_op = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!got_op)
|
|
{
|
|
::LLVMDisasmContextRef disasm_context = m_disasm.m_disasm_context;
|
|
|
|
bool is_altnernate_isa = false;
|
|
if (m_disasm.m_alternate_disasm_context)
|
|
{
|
|
const AddressClass address_class = GetAddressClass ();
|
|
|
|
if (address_class == eAddressClassCodeAlternateISA)
|
|
{
|
|
disasm_context = m_disasm.m_alternate_disasm_context;
|
|
is_altnernate_isa = true;
|
|
}
|
|
}
|
|
const llvm::Triple::ArchType machine = arch.GetMachine();
|
|
if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb)
|
|
{
|
|
if (machine == llvm::Triple::thumb || is_altnernate_isa)
|
|
{
|
|
uint32_t thumb_opcode = data.GetU16(&data_offset);
|
|
if ((thumb_opcode & 0xe000) != 0xe000 || ((thumb_opcode & 0x1800u) == 0))
|
|
{
|
|
m_opcode.SetOpcode16 (thumb_opcode);
|
|
}
|
|
else
|
|
{
|
|
thumb_opcode <<= 16;
|
|
thumb_opcode |= data.GetU16(&data_offset);
|
|
m_opcode.SetOpcode32 (thumb_opcode);
|
|
m_is_valid = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
m_opcode.SetOpcode32 (data.GetU32(&data_offset));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// The opcode isn't evenly sized, so we need to actually use the llvm
|
|
// disassembler to parse it and get the size.
|
|
char out_string[512];
|
|
m_disasm.Lock(this, NULL);
|
|
uint8_t *opcode_data = const_cast<uint8_t *>(data.PeekData (data_offset, 1));
|
|
const size_t opcode_data_len = data.GetByteSize() - data_offset;
|
|
const addr_t pc = m_address.GetFileAddress();
|
|
const size_t inst_size = ::LLVMDisasmInstruction (disasm_context,
|
|
opcode_data,
|
|
opcode_data_len,
|
|
pc, // PC value
|
|
out_string,
|
|
sizeof(out_string));
|
|
// The address lookup function could have caused us to fill in our comment
|
|
m_comment.clear();
|
|
m_disasm.Unlock();
|
|
if (inst_size == 0)
|
|
m_opcode.Clear();
|
|
else
|
|
{
|
|
m_opcode.SetOpcodeBytes(opcode_data, inst_size);
|
|
m_is_valid = true;
|
|
}
|
|
}
|
|
}
|
|
return m_opcode.GetByteSize();
|
|
}
|
|
|
|
void
|
|
AppendComment (std::string &description)
|
|
{
|
|
if (m_comment.empty())
|
|
m_comment.swap (description);
|
|
else
|
|
{
|
|
m_comment.append(", ");
|
|
m_comment.append(description);
|
|
}
|
|
}
|
|
|
|
virtual void
|
|
CalculateMnemonicOperandsAndComment (const lldb_private::ExecutionContext *exe_ctx)
|
|
{
|
|
DataExtractor data;
|
|
const AddressClass address_class = GetAddressClass ();
|
|
|
|
if (m_opcode.GetData(data, address_class))
|
|
{
|
|
char out_string[512];
|
|
|
|
::LLVMDisasmContextRef disasm_context;
|
|
|
|
if (address_class == eAddressClassCodeAlternateISA)
|
|
disasm_context = m_disasm.m_alternate_disasm_context;
|
|
else
|
|
disasm_context = m_disasm.m_disasm_context;
|
|
|
|
lldb::addr_t pc = LLDB_INVALID_ADDRESS;
|
|
|
|
if (exe_ctx)
|
|
{
|
|
Target *target = exe_ctx->GetTargetPtr();
|
|
if (target)
|
|
pc = m_address.GetLoadAddress(target);
|
|
}
|
|
|
|
if (pc == LLDB_INVALID_ADDRESS)
|
|
pc = m_address.GetFileAddress();
|
|
|
|
m_disasm.Lock(this, exe_ctx);
|
|
uint8_t *opcode_data = const_cast<uint8_t *>(data.PeekData (0, 1));
|
|
const size_t opcode_data_len = data.GetByteSize();
|
|
size_t inst_size = ::LLVMDisasmInstruction (disasm_context,
|
|
opcode_data,
|
|
opcode_data_len,
|
|
pc,
|
|
out_string,
|
|
sizeof(out_string));
|
|
|
|
m_disasm.Unlock();
|
|
|
|
if (inst_size == 0)
|
|
{
|
|
m_comment.assign ("unknown opcode");
|
|
inst_size = m_opcode.GetByteSize();
|
|
StreamString mnemonic_strm;
|
|
uint32_t offset = 0;
|
|
switch (inst_size)
|
|
{
|
|
case 1:
|
|
{
|
|
const uint8_t uval8 = data.GetU8 (&offset);
|
|
m_opcode.SetOpcode8 (uval8);
|
|
m_opcode_name.assign (".byte");
|
|
mnemonic_strm.Printf("0x%2.2x", uval8);
|
|
}
|
|
break;
|
|
case 2:
|
|
{
|
|
const uint16_t uval16 = data.GetU16(&offset);
|
|
m_opcode.SetOpcode16(uval16);
|
|
m_opcode_name.assign (".short");
|
|
mnemonic_strm.Printf("0x%4.4x", uval16);
|
|
}
|
|
break;
|
|
case 4:
|
|
{
|
|
const uint32_t uval32 = data.GetU32(&offset);
|
|
m_opcode.SetOpcode32(uval32);
|
|
m_opcode_name.assign (".long");
|
|
mnemonic_strm.Printf("0x%8.8x", uval32);
|
|
}
|
|
break;
|
|
case 8:
|
|
{
|
|
const uint64_t uval64 = data.GetU64(&offset);
|
|
m_opcode.SetOpcode64(uval64);
|
|
m_opcode_name.assign (".quad");
|
|
mnemonic_strm.Printf("0x%16.16llx", uval64);
|
|
}
|
|
break;
|
|
default:
|
|
if (inst_size == 0)
|
|
return;
|
|
else
|
|
{
|
|
const uint8_t *bytes = data.PeekData(offset, inst_size);
|
|
if (bytes == NULL)
|
|
return;
|
|
m_opcode_name.assign (".byte");
|
|
m_opcode.SetOpcodeBytes(bytes, inst_size);
|
|
mnemonic_strm.Printf("0x%2.2x", bytes[0]);
|
|
for (uint32_t i=1; i<inst_size; ++i)
|
|
mnemonic_strm.Printf(" 0x%2.2x", bytes[i]);
|
|
}
|
|
break;
|
|
}
|
|
m_mnemocics.swap(mnemonic_strm.GetString());
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
if (m_does_branch == eLazyBoolCalculate)
|
|
{
|
|
if (StringRepresentsBranch (out_string, strlen(out_string)))
|
|
m_does_branch = eLazyBoolYes;
|
|
else
|
|
m_does_branch = eLazyBoolNo;
|
|
}
|
|
}
|
|
|
|
if (!s_regex_compiled)
|
|
{
|
|
::regcomp(&s_regex, "[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?", REG_EXTENDED);
|
|
s_regex_compiled = true;
|
|
}
|
|
|
|
::regmatch_t matches[3];
|
|
|
|
if (!::regexec(&s_regex, out_string, sizeof(matches) / sizeof(::regmatch_t), matches, 0))
|
|
{
|
|
if (matches[1].rm_so != -1)
|
|
m_opcode_name.assign(out_string + matches[1].rm_so, matches[1].rm_eo - matches[1].rm_so);
|
|
if (matches[2].rm_so != -1)
|
|
m_mnemocics.assign(out_string + matches[2].rm_so, matches[2].rm_eo - matches[2].rm_so);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool
|
|
IsValid ()
|
|
{
|
|
return m_is_valid;
|
|
}
|
|
|
|
size_t
|
|
GetByteSize ()
|
|
{
|
|
return m_opcode.GetByteSize();
|
|
}
|
|
protected:
|
|
|
|
bool StringRepresentsBranch (const char *data, size_t size)
|
|
{
|
|
const char *cursor = data;
|
|
|
|
bool inWhitespace = true;
|
|
|
|
while (inWhitespace && cursor < data + size)
|
|
{
|
|
switch (*cursor)
|
|
{
|
|
default:
|
|
inWhitespace = false;
|
|
break;
|
|
case ' ':
|
|
break;
|
|
case '\t':
|
|
break;
|
|
}
|
|
|
|
if (inWhitespace)
|
|
++cursor;
|
|
}
|
|
|
|
if (cursor >= data + size)
|
|
return false;
|
|
|
|
llvm::Triple::ArchType arch = m_disasm.GetArchitecture().GetMachine();
|
|
|
|
switch (arch)
|
|
{
|
|
default:
|
|
return false;
|
|
case llvm::Triple::x86:
|
|
case llvm::Triple::x86_64:
|
|
switch (cursor[0])
|
|
{
|
|
default:
|
|
return false;
|
|
case 'j':
|
|
return true;
|
|
case 'c':
|
|
if (cursor[1] == 'a' &&
|
|
cursor[2] == 'l' &&
|
|
cursor[3] == 'l')
|
|
return true;
|
|
else
|
|
return false;
|
|
}
|
|
case llvm::Triple::arm:
|
|
case llvm::Triple::thumb:
|
|
switch (cursor[0])
|
|
{
|
|
default:
|
|
return false;
|
|
case 'b':
|
|
{
|
|
switch (cursor[1])
|
|
{
|
|
default:
|
|
return true;
|
|
case 'f':
|
|
case 'i':
|
|
case 'k':
|
|
return false;
|
|
}
|
|
}
|
|
case 'c':
|
|
{
|
|
switch (cursor[1])
|
|
{
|
|
default:
|
|
return false;
|
|
case 'b':
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool m_is_valid;
|
|
DisassemblerLLVMC &m_disasm;
|
|
LazyBool m_does_branch;
|
|
|
|
static bool s_regex_compiled;
|
|
static ::regex_t s_regex;
|
|
};
|
|
|
|
bool InstructionLLVMC::s_regex_compiled = false;
|
|
::regex_t InstructionLLVMC::s_regex;
|
|
|
|
Disassembler *
|
|
DisassemblerLLVMC::CreateInstance (const ArchSpec &arch)
|
|
{
|
|
std::auto_ptr<DisassemblerLLVMC> disasm_ap (new DisassemblerLLVMC(arch));
|
|
|
|
if (disasm_ap.get() && disasm_ap->IsValid())
|
|
return disasm_ap.release();
|
|
|
|
return NULL;
|
|
}
|
|
|
|
DisassemblerLLVMC::DisassemblerLLVMC (const ArchSpec &arch) :
|
|
Disassembler(arch),
|
|
m_exe_ctx (NULL),
|
|
m_inst (NULL),
|
|
m_disasm_context (NULL),
|
|
m_alternate_disasm_context (NULL)
|
|
{
|
|
m_disasm_context = ::LLVMCreateDisasm(arch.GetTriple().getTriple().c_str(),
|
|
(void*)this,
|
|
/*TagType=*/1,
|
|
NULL,
|
|
DisassemblerLLVMC::SymbolLookupCallback);
|
|
|
|
if (arch.GetTriple().getArch() == llvm::Triple::arm)
|
|
{
|
|
ArchSpec thumb_arch(arch);
|
|
thumb_arch.GetTriple().setArchName(llvm::StringRef("thumbv7"));
|
|
std::string thumb_triple(thumb_arch.GetTriple().getTriple());
|
|
|
|
m_alternate_disasm_context = ::LLVMCreateDisasm(thumb_triple.c_str(),
|
|
(void*)this,
|
|
/*TagType=*/1,
|
|
NULL,
|
|
DisassemblerLLVMC::SymbolLookupCallback);
|
|
}
|
|
}
|
|
|
|
DisassemblerLLVMC::~DisassemblerLLVMC()
|
|
{
|
|
if (m_disasm_context)
|
|
{
|
|
::LLVMDisasmDispose(m_disasm_context);
|
|
m_disasm_context = NULL;
|
|
}
|
|
if (m_alternate_disasm_context)
|
|
{
|
|
::LLVMDisasmDispose(m_alternate_disasm_context);
|
|
m_alternate_disasm_context = NULL;
|
|
}
|
|
}
|
|
|
|
size_t
|
|
DisassemblerLLVMC::DecodeInstructions (const Address &base_addr,
|
|
const DataExtractor& data,
|
|
uint32_t data_offset,
|
|
uint32_t num_instructions,
|
|
bool append)
|
|
{
|
|
if (!append)
|
|
m_instruction_list.Clear();
|
|
|
|
if (!IsValid())
|
|
return 0;
|
|
|
|
uint32_t data_cursor = data_offset;
|
|
const size_t data_byte_size = data.GetByteSize();
|
|
uint32_t instructions_parsed = 0;
|
|
Address inst_addr(base_addr);
|
|
|
|
while (data_cursor < data_byte_size && instructions_parsed < num_instructions)
|
|
{
|
|
|
|
AddressClass address_class = eAddressClassCode;
|
|
|
|
if (m_alternate_disasm_context)
|
|
address_class = inst_addr.GetAddressClass ();
|
|
|
|
InstructionSP inst_sp(new InstructionLLVMC(*this,
|
|
inst_addr,
|
|
address_class));
|
|
|
|
if (!inst_sp)
|
|
break;
|
|
|
|
uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
|
|
|
|
if (inst_size == 0)
|
|
break;
|
|
|
|
m_instruction_list.Append(inst_sp);
|
|
data_cursor += inst_size;
|
|
inst_addr.Slide(inst_size);
|
|
instructions_parsed++;
|
|
}
|
|
|
|
return data_cursor - data_offset;
|
|
}
|
|
|
|
void
|
|
DisassemblerLLVMC::Initialize()
|
|
{
|
|
PluginManager::RegisterPlugin (GetPluginNameStatic(),
|
|
GetPluginDescriptionStatic(),
|
|
CreateInstance);
|
|
|
|
llvm::InitializeAllTargetInfos();
|
|
llvm::InitializeAllTargetMCs();
|
|
llvm::InitializeAllAsmParsers();
|
|
llvm::InitializeAllDisassemblers();
|
|
}
|
|
|
|
void
|
|
DisassemblerLLVMC::Terminate()
|
|
{
|
|
PluginManager::UnregisterPlugin (CreateInstance);
|
|
}
|
|
|
|
|
|
const char *
|
|
DisassemblerLLVMC::GetPluginNameStatic()
|
|
{
|
|
return "llvm-mc";
|
|
}
|
|
|
|
const char *
|
|
DisassemblerLLVMC::GetPluginDescriptionStatic()
|
|
{
|
|
return "Disassembler that uses LLVM MC to disassemble i386, x86_64 and ARM.";
|
|
}
|
|
|
|
int DisassemblerLLVMC::OpInfoCallback (void *disassembler,
|
|
uint64_t pc,
|
|
uint64_t offset,
|
|
uint64_t size,
|
|
int tag_type,
|
|
void *tag_bug)
|
|
{
|
|
return static_cast<DisassemblerLLVMC*>(disassembler)->OpInfo (pc,
|
|
offset,
|
|
size,
|
|
tag_type,
|
|
tag_bug);
|
|
}
|
|
|
|
const char *DisassemblerLLVMC::SymbolLookupCallback (void *disassembler,
|
|
uint64_t value,
|
|
uint64_t *type,
|
|
uint64_t pc,
|
|
const char **name)
|
|
{
|
|
return static_cast<DisassemblerLLVMC*>(disassembler)->SymbolLookup(value,
|
|
type,
|
|
pc,
|
|
name);
|
|
}
|
|
|
|
int DisassemblerLLVMC::OpInfo (uint64_t PC,
|
|
uint64_t Offset,
|
|
uint64_t Size,
|
|
int tag_type,
|
|
void *tag_bug)
|
|
{
|
|
switch (tag_type)
|
|
{
|
|
default:
|
|
break;
|
|
case 1:
|
|
bzero (tag_bug, sizeof(::LLVMOpInfo1));
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
const char *DisassemblerLLVMC::SymbolLookup (uint64_t value,
|
|
uint64_t *type_ptr,
|
|
uint64_t pc,
|
|
const char **name)
|
|
{
|
|
if (*type_ptr)
|
|
{
|
|
if (m_exe_ctx && m_inst)
|
|
{
|
|
//std::string remove_this_prior_to_checkin;
|
|
Address reference_address;
|
|
|
|
Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : NULL;
|
|
|
|
if (target && !target->GetSectionLoadList().IsEmpty())
|
|
target->GetSectionLoadList().ResolveLoadAddress(value, reference_address);
|
|
else
|
|
{
|
|
ModuleSP module_sp(m_inst->GetAddress().GetModule());
|
|
if (module_sp)
|
|
module_sp->ResolveFileAddress(value, reference_address);
|
|
}
|
|
|
|
if (reference_address.IsValid() && reference_address.GetSection())
|
|
{
|
|
StreamString ss;
|
|
|
|
reference_address.Dump (&ss,
|
|
target,
|
|
Address::DumpStyleResolvedDescriptionNoModule,
|
|
Address::DumpStyleSectionNameOffset);
|
|
|
|
if (!ss.GetString().empty())
|
|
{
|
|
//remove_this_prior_to_checkin = ss.GetString();
|
|
//if (*type_ptr)
|
|
m_inst->AppendComment(ss.GetString());
|
|
}
|
|
}
|
|
//printf ("DisassemblerLLVMC::SymbolLookup (value=0x%16.16llx, type=%llu, pc=0x%16.16llx, name=\"%s\") m_exe_ctx=%p, m_inst=%p\n", value, *type_ptr, pc, remove_this_prior_to_checkin.c_str(), m_exe_ctx, m_inst);
|
|
}
|
|
}
|
|
|
|
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
|
|
*name = NULL;
|
|
return NULL;
|
|
}
|
|
|
|
//------------------------------------------------------------------
|
|
// PluginInterface protocol
|
|
//------------------------------------------------------------------
|
|
const char *
|
|
DisassemblerLLVMC::GetPluginName()
|
|
{
|
|
return "DisassemblerLLVMC";
|
|
}
|
|
|
|
const char *
|
|
DisassemblerLLVMC::GetShortPluginName()
|
|
{
|
|
return GetPluginNameStatic();
|
|
}
|
|
|
|
uint32_t
|
|
DisassemblerLLVMC::GetPluginVersion()
|
|
{
|
|
return 1;
|
|
}
|
|
|