mirror of
https://github.com/JakeHillion/drgn.git
synced 2024-12-23 09:43:06 +00:00
9fda010789
Currently, reference objects and buffer value objects have a byte order. However, this doesn't always make sense for a couple of reasons: - Byte order is only meaningful for scalars. What does it mean for a struct to be big endian? A struct doesn't have a most or least significant byte; its scalar members do. - The DWARF specification allows either types or variables to have a byte order (DW_AT_endianity). The only producer I could find that uses this is GCC for the scalar_storage_order type attribute, and it only uses it for base types, not variables. GDB only seems to use to check it for base types, as well. So, remove the byte order from objects, and move it to integer, boolean, floating-point, and pointer types. This model makes more sense, and it means that we can get the binary representation of any object now. The only downside is that we can no longer support a bit offset for non-scalars, but as far as I can tell, nothing needs that. Signed-off-by: Omar Sandoval <osandov@osandov.com>
606 lines
15 KiB
C
606 lines
15 KiB
C
%{
|
|
// Copyright (c) Facebook, Inc. and its affiliates.
|
|
// SPDX-License-Identifier: GPL-3.0+
|
|
|
|
#include <byteswap.h>
|
|
#include <elfutils/libdw.h>
|
|
#include <elfutils/libdwfl.h>
|
|
#include <string.h>
|
|
|
|
#include "drgn.h"
|
|
#include "error.h"
|
|
#include "linux_kernel.h"
|
|
#include "platform.h"
|
|
#include "program.h"
|
|
#include "util.h"
|
|
%}
|
|
|
|
x86-64
|
|
%%
|
|
rax
|
|
rdx
|
|
rcx
|
|
rbx
|
|
rsi
|
|
rdi
|
|
rbp
|
|
rsp
|
|
r8
|
|
r9
|
|
r10
|
|
r11
|
|
r12
|
|
r13
|
|
r14
|
|
r15
|
|
# The System V ABI calls this the return address (RA) register, but it's
|
|
# effectively the instruction pointer.
|
|
rip
|
|
xmm0
|
|
xmm1
|
|
xmm2
|
|
xmm3
|
|
xmm4
|
|
xmm5
|
|
xmm6
|
|
xmm7
|
|
xmm8
|
|
xmm9
|
|
xmm10
|
|
xmm11
|
|
xmm12
|
|
xmm13
|
|
xmm14
|
|
xmm15
|
|
st0
|
|
st1
|
|
st2
|
|
st3
|
|
st4
|
|
st5
|
|
st6
|
|
st7
|
|
mm0
|
|
mm1
|
|
mm2
|
|
mm3
|
|
mm4
|
|
mm5
|
|
mm6
|
|
mm7
|
|
rFLAGS
|
|
es
|
|
cs
|
|
ss
|
|
ds
|
|
fs
|
|
gs
|
|
fs.base, 58
|
|
gs.base
|
|
tr, 62
|
|
ldtr
|
|
mxcsr
|
|
fcw
|
|
fsw
|
|
xmm16
|
|
xmm17
|
|
xmm18
|
|
xmm19
|
|
xmm20
|
|
xmm21
|
|
xmm22
|
|
xmm23
|
|
xmm24
|
|
xmm25
|
|
xmm26
|
|
xmm27
|
|
xmm28
|
|
xmm29
|
|
xmm30
|
|
xmm31
|
|
k0, 118
|
|
k1
|
|
k2
|
|
k3
|
|
k4
|
|
k5
|
|
k6
|
|
k7
|
|
bnd0
|
|
bnd1
|
|
bnd2
|
|
bnd3
|
|
%%
|
|
|
|
/*
|
|
* The in-kernel struct pt_regs, UAPI struct pt_regs, elf_gregset_t, and struct
|
|
* user_regs_struct all have the same layout.
|
|
*/
|
|
static struct drgn_error *
|
|
set_initial_registers_from_struct_x86_64(struct drgn_program *prog,
|
|
Dwfl_Thread *thread, const void *regs,
|
|
size_t size)
|
|
{
|
|
if (size < 160) {
|
|
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
|
|
"registers are truncated");
|
|
}
|
|
|
|
bool bswap;
|
|
struct drgn_error *err = drgn_program_bswap(prog, &bswap);
|
|
if (err)
|
|
return err;
|
|
|
|
Dwarf_Word dwarf_regs[17];
|
|
#define READ_REGISTER(n) ({ \
|
|
uint64_t reg; \
|
|
memcpy(®, (uint64_t *)regs + n, sizeof(reg)); \
|
|
bswap ? bswap_64(reg) : reg; \
|
|
})
|
|
dwarf_regs[0] = READ_REGISTER(10); /* rax */
|
|
dwarf_regs[1] = READ_REGISTER(12); /* rdx */
|
|
dwarf_regs[2] = READ_REGISTER(11); /* rcx */
|
|
dwarf_regs[3] = READ_REGISTER(5); /* rbx */
|
|
dwarf_regs[4] = READ_REGISTER(13); /* rsi */
|
|
dwarf_regs[5] = READ_REGISTER(14); /* rdi */
|
|
dwarf_regs[6] = READ_REGISTER(4); /* rbp */
|
|
dwarf_regs[7] = READ_REGISTER(19); /* rsp */
|
|
dwarf_regs[8] = READ_REGISTER(9); /* r8 */
|
|
dwarf_regs[9] = READ_REGISTER(8); /* r9 */
|
|
dwarf_regs[10] = READ_REGISTER(7); /* r10 */
|
|
dwarf_regs[11] = READ_REGISTER(6); /* r11 */
|
|
dwarf_regs[12] = READ_REGISTER(3); /* r12 */
|
|
dwarf_regs[13] = READ_REGISTER(2); /* r13 */
|
|
dwarf_regs[14] = READ_REGISTER(1); /* r14 */
|
|
dwarf_regs[15] = READ_REGISTER(0); /* r15 */
|
|
dwarf_regs[16] = READ_REGISTER(16); /* rip */
|
|
#undef READ_REGISTER
|
|
|
|
if (!dwfl_thread_state_registers(thread, 0, 17, dwarf_regs))
|
|
return drgn_error_libdwfl();
|
|
return NULL;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
pt_regs_set_initial_registers_x86_64(Dwfl_Thread *thread,
|
|
const struct drgn_object *obj)
|
|
{
|
|
return set_initial_registers_from_struct_x86_64(drgn_object_program(obj),
|
|
thread,
|
|
drgn_object_buffer(obj),
|
|
drgn_object_size(obj));
|
|
}
|
|
|
|
static struct drgn_error *
|
|
prstatus_set_initial_registers_x86_64(struct drgn_program *prog,
|
|
Dwfl_Thread *thread, const void *prstatus,
|
|
size_t size)
|
|
{
|
|
if (size < 112) {
|
|
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
|
|
"NT_PRSTATUS is truncated");
|
|
}
|
|
return set_initial_registers_from_struct_x86_64(prog, thread,
|
|
(char *)prstatus + 112,
|
|
size - 112);
|
|
}
|
|
|
|
static inline struct drgn_error *
|
|
read_register(struct drgn_object *reg_obj, const struct drgn_object *frame_obj,
|
|
const char *name, Dwarf_Addr *ret)
|
|
{
|
|
struct drgn_error *err;
|
|
uint64_t reg;
|
|
|
|
err = drgn_object_member(reg_obj, frame_obj, name);
|
|
if (err)
|
|
return err;
|
|
err = drgn_object_read_unsigned(reg_obj, ®);
|
|
if (err)
|
|
return err;
|
|
*ret = reg;
|
|
return NULL;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
set_initial_registers_inactive_task_frame(Dwfl_Thread *thread,
|
|
const struct drgn_object *frame_obj)
|
|
{
|
|
struct drgn_error *err;
|
|
struct drgn_object reg_obj;
|
|
Dwarf_Word dwarf_regs[5];
|
|
|
|
drgn_object_init(®_obj, drgn_object_program(frame_obj));
|
|
|
|
err = read_register(®_obj, frame_obj, "bx", &dwarf_regs[0]);
|
|
if (err)
|
|
goto out;
|
|
/* rbx is register 3. */
|
|
if (!dwfl_thread_state_registers(thread, 3, 1, dwarf_regs)) {
|
|
err = drgn_error_libdwfl();
|
|
goto out;
|
|
}
|
|
|
|
err = read_register(®_obj, frame_obj, "bp", &dwarf_regs[0]);
|
|
if (err)
|
|
goto out;
|
|
dwarf_regs[1] = frame_obj->address + drgn_object_size(frame_obj);
|
|
/* rbp is register 6, rsp is register 7. */
|
|
if (!dwfl_thread_state_registers(thread, 6, 2, dwarf_regs)) {
|
|
err = drgn_error_libdwfl();
|
|
goto out;
|
|
}
|
|
|
|
err = read_register(®_obj, frame_obj, "r12", &dwarf_regs[0]);
|
|
if (err)
|
|
goto out;
|
|
err = read_register(®_obj, frame_obj, "r13", &dwarf_regs[1]);
|
|
if (err)
|
|
goto out;
|
|
err = read_register(®_obj, frame_obj, "r14", &dwarf_regs[2]);
|
|
if (err)
|
|
goto out;
|
|
err = read_register(®_obj, frame_obj, "r15", &dwarf_regs[3]);
|
|
if (err)
|
|
goto out;
|
|
err = read_register(®_obj, frame_obj, "ret_addr", &dwarf_regs[4]);
|
|
if (err)
|
|
goto out;
|
|
/* r12-r15 are registers 12-15; register 16 is the return address. */
|
|
if (!dwfl_thread_state_registers(thread, 12, 5, dwarf_regs)) {
|
|
err = drgn_error_libdwfl();
|
|
goto out;
|
|
}
|
|
|
|
err = NULL;
|
|
out:
|
|
drgn_object_deinit(®_obj);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
set_initial_registers_frame_pointer(Dwfl_Thread *thread,
|
|
const struct drgn_object *bp_obj)
|
|
{
|
|
struct drgn_error *err;
|
|
|
|
struct drgn_object reg_obj;
|
|
drgn_object_init(®_obj, drgn_object_program(bp_obj));
|
|
|
|
Dwarf_Word dwarf_regs[2];
|
|
|
|
err = drgn_object_subscript(®_obj, bp_obj, 0);
|
|
if (err)
|
|
goto out;
|
|
uint64_t reg;
|
|
err = drgn_object_read_unsigned(®_obj, ®);
|
|
if (err)
|
|
goto out;
|
|
dwarf_regs[0] = reg;
|
|
|
|
err = drgn_object_read_unsigned(bp_obj, ®);
|
|
if (err)
|
|
goto out;
|
|
dwarf_regs[1] = reg + 16;
|
|
|
|
/* rbp is register 6, rsp is register 7. */
|
|
if (!dwfl_thread_state_registers(thread, 6, 2, dwarf_regs)) {
|
|
err = drgn_error_libdwfl();
|
|
goto out;
|
|
}
|
|
|
|
err = drgn_object_subscript(®_obj, bp_obj, 1);
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_read_unsigned(®_obj, ®);
|
|
if (err)
|
|
goto out;
|
|
dwarf_regs[0] = reg;
|
|
/* The return address is register 16. */
|
|
if (!dwfl_thread_state_registers(thread, 16, 1, dwarf_regs)) {
|
|
err = drgn_error_libdwfl();
|
|
goto out;
|
|
}
|
|
|
|
err = NULL;
|
|
out:
|
|
drgn_object_deinit(®_obj);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_set_initial_registers_x86_64(Dwfl_Thread *thread,
|
|
const struct drgn_object *task_obj)
|
|
{
|
|
struct drgn_error *err;
|
|
struct drgn_program *prog = drgn_object_program(task_obj);
|
|
struct drgn_object sp_obj;
|
|
|
|
drgn_object_init(&sp_obj, prog);
|
|
|
|
err = drgn_object_member_dereference(&sp_obj, task_obj, "thread");
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_member(&sp_obj, &sp_obj, "sp");
|
|
if (err)
|
|
goto out;
|
|
|
|
/*
|
|
* Since Linux kernel commit 0100301bfdf5 ("sched/x86: Rewrite the
|
|
* switch_to() code") (in v4.9), sp points to a struct
|
|
* inactive_task_frame, which we can use to get the callee-saved
|
|
* registers. Before that, sp points to bp. As long as frame pointers
|
|
* are enabled, this in turn points to the previous bp and the return
|
|
* address.
|
|
*/
|
|
struct drgn_qualified_type frame_type;
|
|
err = drgn_program_find_type(prog, "struct inactive_task_frame *", NULL,
|
|
&frame_type);
|
|
if (!err) {
|
|
err = drgn_object_cast(&sp_obj, frame_type, &sp_obj);
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_dereference(&sp_obj, &sp_obj);
|
|
if (err)
|
|
goto out;
|
|
err = set_initial_registers_inactive_task_frame(thread,
|
|
&sp_obj);
|
|
} else if (err->code == DRGN_ERROR_LOOKUP) {
|
|
drgn_error_destroy(err);
|
|
err = drgn_program_find_type(prog, "unsigned long **", NULL,
|
|
&frame_type);
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_cast(&sp_obj, frame_type, &sp_obj);
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_dereference(&sp_obj, &sp_obj);
|
|
if (err)
|
|
goto out;
|
|
err = set_initial_registers_frame_pointer(thread, &sp_obj);
|
|
}
|
|
out:
|
|
drgn_object_deinit(&sp_obj);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_get_page_offset_x86_64(struct drgn_object *ret)
|
|
{
|
|
struct drgn_error *err;
|
|
struct drgn_program *prog = drgn_object_program(ret);
|
|
|
|
struct drgn_qualified_type qualified_type;
|
|
err = drgn_program_find_primitive_type(prog, DRGN_C_TYPE_UNSIGNED_LONG,
|
|
&qualified_type.type);
|
|
if (err)
|
|
return err;
|
|
qualified_type.qualifiers = 0;
|
|
|
|
/*
|
|
* If KASLR is enabled, PAGE_OFFSET is easily available via
|
|
* page_offset_base.
|
|
*/
|
|
struct drgn_object tmp;
|
|
drgn_object_init(&tmp, prog);
|
|
err = drgn_program_find_object(prog, "page_offset_base", NULL,
|
|
DRGN_FIND_OBJECT_VARIABLE, &tmp);
|
|
if (!err) {
|
|
err = drgn_object_cast(ret, qualified_type, &tmp);
|
|
goto out;
|
|
}
|
|
if (err->code == DRGN_ERROR_LOOKUP)
|
|
drgn_error_destroy(err);
|
|
else
|
|
goto out;
|
|
|
|
/*
|
|
* If not, we determine it based on the kernel page table. Before Linux
|
|
* kernel commit d52888aa2753 ("x86/mm: Move LDT remap out of KASLR
|
|
* region on 5-level paging") (in v4.20), PAGE_OFFSET was pgd slot 272.
|
|
* After that, it is pgd slot 273, and slot 272 is empty (reserved for
|
|
* Local Descriptor Table mappings for userspace tasks).
|
|
*/
|
|
uint64_t pgd;
|
|
err = drgn_program_read_u64(prog,
|
|
prog->vmcoreinfo.swapper_pg_dir + 272 * 8,
|
|
false, &pgd);
|
|
if (err)
|
|
goto out;
|
|
uint64_t value;
|
|
if (pgd) {
|
|
if (prog->vmcoreinfo.pgtable_l5_enabled)
|
|
value = UINT64_C(0xff10000000000000);
|
|
else
|
|
value = UINT64_C(0xffff880000000000);
|
|
} else {
|
|
if (prog->vmcoreinfo.pgtable_l5_enabled)
|
|
value = UINT64_C(0xff11000000000000);
|
|
else
|
|
value = UINT64_C(0xffff888000000000);
|
|
}
|
|
err = drgn_object_set_unsigned(ret, qualified_type, value, 0);
|
|
|
|
out:
|
|
drgn_object_deinit(&tmp);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_get_vmemmap_x86_64(struct drgn_object *ret)
|
|
{
|
|
|
|
struct drgn_error *err;
|
|
struct drgn_program *prog = drgn_object_program(ret);
|
|
|
|
struct drgn_qualified_type qualified_type;
|
|
err = drgn_program_find_type(prog, "struct page *", NULL,
|
|
&qualified_type);
|
|
if (err)
|
|
return err;
|
|
|
|
/* If KASLR is enabled, vmemmap is vmemmap_base. */
|
|
struct drgn_object tmp;
|
|
drgn_object_init(&tmp, prog);
|
|
err = drgn_program_find_object(prog, "vmemmap_base", NULL,
|
|
DRGN_FIND_OBJECT_VARIABLE, &tmp);
|
|
if (!err) {
|
|
err = drgn_object_cast(ret, qualified_type, &tmp);
|
|
goto out;
|
|
}
|
|
if (err->code == DRGN_ERROR_LOOKUP)
|
|
drgn_error_destroy(err);
|
|
else
|
|
goto out;
|
|
|
|
/* Otherwise, it depends on whether we have 5-level page tables. */
|
|
uint64_t value;
|
|
if (prog->vmcoreinfo.pgtable_l5_enabled)
|
|
value = UINT64_C(0xffd4000000000000);
|
|
else
|
|
value = UINT64_C(0xffffea0000000000);
|
|
err = drgn_object_set_unsigned(ret, qualified_type, value, 0);
|
|
|
|
out:
|
|
drgn_object_deinit(&tmp);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_live_direct_mapping_fallback_x86_64(struct drgn_program *prog,
|
|
uint64_t *address_ret,
|
|
uint64_t *size_ret)
|
|
{
|
|
struct drgn_error *err;
|
|
unsigned long page_offset_base_address;
|
|
|
|
*size_ret = UINT64_C(1) << 46;
|
|
err = proc_kallsyms_symbol_addr("page_offset_base",
|
|
&page_offset_base_address);
|
|
if (!err) {
|
|
return drgn_program_read_word(prog, page_offset_base_address,
|
|
false, address_ret);
|
|
} else if (err == &drgn_not_found) {
|
|
/*
|
|
* This is only called for pre-4.11 kernels, so we can assume
|
|
* the old location.
|
|
*/
|
|
*address_ret = UINT64_C(0xffff880000000000);
|
|
return NULL;
|
|
} else {
|
|
return err;
|
|
}
|
|
}
|
|
|
|
struct pgtable_iterator_x86_64 {
|
|
uint16_t index[5];
|
|
uint64_t table[5][512];
|
|
};
|
|
|
|
static void pgtable_iterator_arch_init_x86_64(void *buf)
|
|
{
|
|
struct pgtable_iterator_x86_64 *arch = buf;
|
|
memset(arch->index, 0xff, sizeof(arch->index));
|
|
memset(arch->table, 0, sizeof(arch->table));
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_pgtable_iterator_next_x86_64(struct pgtable_iterator *it,
|
|
uint64_t *virt_addr_ret,
|
|
uint64_t *phys_addr_ret)
|
|
{
|
|
static const int PAGE_SHIFT = 12;
|
|
static const int PGTABLE_SHIFT = 9;
|
|
static const int PGTABLE_MASK = (1 << PGTABLE_SHIFT) - 1;
|
|
static const uint64_t PRESENT = 0x1;
|
|
static const uint64_t PSE = 0x80; /* a.k.a. huge page */
|
|
static const uint64_t ADDRESS_MASK = UINT64_C(0xffffffffff000);
|
|
struct drgn_program *prog = it->prog;
|
|
struct pgtable_iterator_x86_64 *arch = (void *)it->arch;
|
|
int levels = prog->vmcoreinfo.pgtable_l5_enabled ? 5 : 4, level;
|
|
bool bswap;
|
|
struct drgn_error *err = drgn_program_bswap(prog, &bswap);
|
|
if (err)
|
|
return err;
|
|
|
|
/* Find the lowest level with cached entries. */
|
|
for (level = 0; level < levels; level++) {
|
|
if (arch->index[level] < ARRAY_SIZE(arch->table[level]))
|
|
break;
|
|
}
|
|
/* For every level below that, refill the cache/return pages. */
|
|
for (;; level--) {
|
|
uint64_t table;
|
|
bool table_physical;
|
|
uint16_t index;
|
|
if (level == levels) {
|
|
uint64_t start_non_canonical, end_non_canonical;
|
|
start_non_canonical = (UINT64_C(1) <<
|
|
(PAGE_SHIFT +
|
|
PGTABLE_SHIFT * levels - 1));
|
|
end_non_canonical = (UINT64_MAX <<
|
|
(PAGE_SHIFT +
|
|
PGTABLE_SHIFT * levels - 1));
|
|
if (it->virt_addr >= start_non_canonical &&
|
|
it->virt_addr < end_non_canonical) {
|
|
*virt_addr_ret = start_non_canonical;
|
|
*phys_addr_ret = UINT64_MAX;
|
|
it->virt_addr = end_non_canonical;
|
|
return NULL;
|
|
}
|
|
table = it->pgtable;
|
|
table_physical = false;
|
|
} else {
|
|
uint64_t entry = arch->table[level][arch->index[level]++];
|
|
if (bswap)
|
|
entry = bswap_64(entry);
|
|
table = entry & ADDRESS_MASK;
|
|
if (!(entry & PRESENT) || (entry & PSE) || level == 0) {
|
|
uint64_t mask = (UINT64_C(1) <<
|
|
(PAGE_SHIFT +
|
|
PGTABLE_SHIFT * level)) - 1;
|
|
*virt_addr_ret = it->virt_addr & ~mask;
|
|
if (entry & PRESENT)
|
|
*phys_addr_ret = table & ~mask;
|
|
else
|
|
*phys_addr_ret = UINT64_MAX;
|
|
it->virt_addr = (it->virt_addr | mask) + 1;
|
|
return NULL;
|
|
}
|
|
table_physical = true;
|
|
}
|
|
index = (it->virt_addr >>
|
|
(PAGE_SHIFT + PGTABLE_SHIFT * (level - 1))) & PGTABLE_MASK;
|
|
/*
|
|
* It's only marginally more expensive to read 4096 bytes than 8
|
|
* bytes, so we always read to the end of the table.
|
|
*/
|
|
err = drgn_program_read_memory(prog,
|
|
&arch->table[level - 1][index],
|
|
table + 8 * index,
|
|
sizeof(arch->table[0]) - 8 * index,
|
|
table_physical);
|
|
if (err)
|
|
return err;
|
|
arch->index[level - 1] = index;
|
|
}
|
|
}
|
|
|
|
const struct drgn_architecture_info arch_info_x86_64 = {
|
|
ARCHITECTURE_INFO,
|
|
.default_flags = (DRGN_PLATFORM_IS_64_BIT |
|
|
DRGN_PLATFORM_IS_LITTLE_ENDIAN),
|
|
.pt_regs_set_initial_registers = pt_regs_set_initial_registers_x86_64,
|
|
.prstatus_set_initial_registers = prstatus_set_initial_registers_x86_64,
|
|
.linux_kernel_set_initial_registers =
|
|
linux_kernel_set_initial_registers_x86_64,
|
|
.linux_kernel_get_page_offset = linux_kernel_get_page_offset_x86_64,
|
|
.linux_kernel_get_vmemmap = linux_kernel_get_vmemmap_x86_64,
|
|
.linux_kernel_live_direct_mapping_fallback =
|
|
linux_kernel_live_direct_mapping_fallback_x86_64,
|
|
.pgtable_iterator_arch_size = sizeof(struct pgtable_iterator_x86_64),
|
|
.pgtable_iterator_arch_init = pgtable_iterator_arch_init_x86_64,
|
|
.linux_kernel_pgtable_iterator_next =
|
|
linux_kernel_pgtable_iterator_next_x86_64,
|
|
};
|