mirror of
https://github.com/JakeHillion/drgn.git
synced 2024-12-23 09:43:06 +00:00
286c09844e
I recently hit a couple of CI failures caused by relying on transitive includes that weren't always present. include-what-you-use is a Clang-based tool that helps with this. It's a bit finicky and noisy, so this adds scripts/iwyu.py to make running it more convenient (but not reliable enough to automate it in Travis). This cleans up all reasonable include-what-you-use warnings and reorganizes a few header files. Signed-off-by: Omar Sandoval <osandov@osandov.com>
559 lines
13 KiB
C
559 lines
13 KiB
C
%{
|
|
// Copyright (c) Facebook, Inc. and its affiliates.
|
|
// SPDX-License-Identifier: GPL-3.0+
|
|
|
|
#include <byteswap.h>
|
|
#include <elfutils/libdw.h>
|
|
#include <elfutils/libdwfl.h>
|
|
#include <string.h>
|
|
|
|
#include "drgn.h"
|
|
#include "error.h"
|
|
#include "linux_kernel.h"
|
|
#include "platform.h"
|
|
#include "program.h"
|
|
#include "util.h"
|
|
%}
|
|
|
|
x86-64
|
|
%%
|
|
rax
|
|
rdx
|
|
rcx
|
|
rbx
|
|
rsi
|
|
rdi
|
|
rbp
|
|
rsp
|
|
r8
|
|
r9
|
|
r10
|
|
r11
|
|
r12
|
|
r13
|
|
r14
|
|
r15
|
|
# The System V ABI calls this the return address (RA) register, but it's
|
|
# effectively the instruction pointer.
|
|
rip
|
|
xmm0
|
|
xmm1
|
|
xmm2
|
|
xmm3
|
|
xmm4
|
|
xmm5
|
|
xmm6
|
|
xmm7
|
|
xmm8
|
|
xmm9
|
|
xmm10
|
|
xmm11
|
|
xmm12
|
|
xmm13
|
|
xmm14
|
|
xmm15
|
|
st0
|
|
st1
|
|
st2
|
|
st3
|
|
st4
|
|
st5
|
|
st6
|
|
st7
|
|
mm0
|
|
mm1
|
|
mm2
|
|
mm3
|
|
mm4
|
|
mm5
|
|
mm6
|
|
mm7
|
|
rFLAGS
|
|
es
|
|
cs
|
|
ss
|
|
ds
|
|
fs
|
|
gs
|
|
fs.base, 58
|
|
gs.base
|
|
tr, 62
|
|
ldtr
|
|
mxcsr
|
|
fcw
|
|
fsw
|
|
xmm16
|
|
xmm17
|
|
xmm18
|
|
xmm19
|
|
xmm20
|
|
xmm21
|
|
xmm22
|
|
xmm23
|
|
xmm24
|
|
xmm25
|
|
xmm26
|
|
xmm27
|
|
xmm28
|
|
xmm29
|
|
xmm30
|
|
xmm31
|
|
k0, 118
|
|
k1
|
|
k2
|
|
k3
|
|
k4
|
|
k5
|
|
k6
|
|
k7
|
|
bnd0
|
|
bnd1
|
|
bnd2
|
|
bnd3
|
|
%%
|
|
|
|
/*
|
|
* The in-kernel struct pt_regs, UAPI struct pt_regs, elf_gregset_t, and struct
|
|
* user_regs_struct all have the same layout.
|
|
*/
|
|
static struct drgn_error *
|
|
set_initial_registers_from_struct_x86_64(Dwfl_Thread *thread, const void *regs,
|
|
size_t size, bool bswap)
|
|
{
|
|
Dwarf_Word dwarf_regs[17];
|
|
|
|
if (size < 160) {
|
|
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
|
|
"registers are truncated");
|
|
}
|
|
|
|
#define READ_REGISTER(n) ({ \
|
|
uint64_t reg; \
|
|
memcpy(®, (uint64_t *)regs + n, sizeof(reg)); \
|
|
bswap ? bswap_64(reg) : reg; \
|
|
})
|
|
dwarf_regs[0] = READ_REGISTER(10); /* rax */
|
|
dwarf_regs[1] = READ_REGISTER(12); /* rdx */
|
|
dwarf_regs[2] = READ_REGISTER(11); /* rcx */
|
|
dwarf_regs[3] = READ_REGISTER(5); /* rbx */
|
|
dwarf_regs[4] = READ_REGISTER(13); /* rsi */
|
|
dwarf_regs[5] = READ_REGISTER(14); /* rdi */
|
|
dwarf_regs[6] = READ_REGISTER(4); /* rbp */
|
|
dwarf_regs[7] = READ_REGISTER(19); /* rsp */
|
|
dwarf_regs[8] = READ_REGISTER(9); /* r8 */
|
|
dwarf_regs[9] = READ_REGISTER(8); /* r9 */
|
|
dwarf_regs[10] = READ_REGISTER(7); /* r10 */
|
|
dwarf_regs[11] = READ_REGISTER(6); /* r11 */
|
|
dwarf_regs[12] = READ_REGISTER(3); /* r12 */
|
|
dwarf_regs[13] = READ_REGISTER(2); /* r13 */
|
|
dwarf_regs[14] = READ_REGISTER(1); /* r14 */
|
|
dwarf_regs[15] = READ_REGISTER(0); /* r15 */
|
|
dwarf_regs[16] = READ_REGISTER(16); /* rip */
|
|
#undef READ_REGISTER
|
|
|
|
if (!dwfl_thread_state_registers(thread, 0, 17, dwarf_regs))
|
|
return drgn_error_libdwfl();
|
|
return NULL;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
pt_regs_set_initial_registers_x86_64(Dwfl_Thread *thread,
|
|
const struct drgn_object *obj)
|
|
{
|
|
bool bswap = (obj->value.little_endian !=
|
|
(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__));
|
|
return set_initial_registers_from_struct_x86_64(thread,
|
|
drgn_object_buffer(obj),
|
|
drgn_buffer_object_size(obj),
|
|
bswap);
|
|
}
|
|
|
|
static struct drgn_error *
|
|
prstatus_set_initial_registers_x86_64(struct drgn_program *prog,
|
|
Dwfl_Thread *thread, const void *prstatus,
|
|
size_t size)
|
|
{
|
|
if (size < 112) {
|
|
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
|
|
"NT_PRSTATUS is truncated");
|
|
}
|
|
bool bswap;
|
|
struct drgn_error *err = drgn_program_bswap(prog, &bswap);
|
|
if (err)
|
|
return err;
|
|
return set_initial_registers_from_struct_x86_64(thread,
|
|
(char *)prstatus + 112,
|
|
size - 112, bswap);
|
|
}
|
|
|
|
static inline struct drgn_error *read_register(struct drgn_object *reg_obj,
|
|
struct drgn_object *frame_obj,
|
|
const char *name,
|
|
Dwarf_Addr *ret)
|
|
{
|
|
struct drgn_error *err;
|
|
uint64_t reg;
|
|
|
|
err = drgn_object_member_dereference(reg_obj, frame_obj, name);
|
|
if (err)
|
|
return err;
|
|
err = drgn_object_read_unsigned(reg_obj, ®);
|
|
if (err)
|
|
return err;
|
|
*ret = reg;
|
|
return NULL;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
set_initial_registers_inactive_task_frame(Dwfl_Thread *thread,
|
|
struct drgn_object *frame_obj)
|
|
{
|
|
struct drgn_error *err;
|
|
struct drgn_object reg_obj;
|
|
Dwarf_Word dwarf_regs[5];
|
|
uint64_t sp;
|
|
|
|
drgn_object_init(®_obj, drgn_object_program(frame_obj));
|
|
|
|
err = read_register(®_obj, frame_obj, "bx", &dwarf_regs[0]);
|
|
if (err)
|
|
goto out;
|
|
/* rbx is register 3. */
|
|
if (!dwfl_thread_state_registers(thread, 3, 1, dwarf_regs)) {
|
|
err = drgn_error_libdwfl();
|
|
goto out;
|
|
}
|
|
|
|
err = read_register(®_obj, frame_obj, "bp", &dwarf_regs[0]);
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_read_unsigned(frame_obj, &sp);
|
|
if (err)
|
|
goto out;
|
|
/* rbp is register 6. */
|
|
if (!dwfl_thread_state_registers(thread, 6, 1, dwarf_regs)) {
|
|
err = drgn_error_libdwfl();
|
|
goto out;
|
|
}
|
|
|
|
err = read_register(®_obj, frame_obj, "r12", &dwarf_regs[0]);
|
|
if (err)
|
|
goto out;
|
|
err = read_register(®_obj, frame_obj, "r13", &dwarf_regs[1]);
|
|
if (err)
|
|
goto out;
|
|
err = read_register(®_obj, frame_obj, "r14", &dwarf_regs[2]);
|
|
if (err)
|
|
goto out;
|
|
err = read_register(®_obj, frame_obj, "r15", &dwarf_regs[3]);
|
|
if (err)
|
|
goto out;
|
|
err = read_register(®_obj, frame_obj, "ret_addr", &dwarf_regs[4]);
|
|
if (err)
|
|
goto out;
|
|
/* r12-r15 are registers 12-15; register 16 is the return address. */
|
|
if (!dwfl_thread_state_registers(thread, 12, 5, dwarf_regs)) {
|
|
err = drgn_error_libdwfl();
|
|
goto out;
|
|
}
|
|
|
|
err = NULL;
|
|
out:
|
|
drgn_object_deinit(®_obj);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_set_initial_registers_x86_64(Dwfl_Thread *thread,
|
|
const struct drgn_object *task_obj)
|
|
{
|
|
struct drgn_error *err;
|
|
struct drgn_program *prog = drgn_object_program(task_obj);
|
|
struct drgn_object sp_obj;
|
|
|
|
drgn_object_init(&sp_obj, prog);
|
|
|
|
err = drgn_object_member_dereference(&sp_obj, task_obj, "thread");
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_member(&sp_obj, &sp_obj, "sp");
|
|
if (err)
|
|
goto out;
|
|
uint64_t sp;
|
|
err = drgn_object_read_unsigned(&sp_obj, &sp);
|
|
if (err)
|
|
goto out;
|
|
Dwarf_Word dwarf_reg = sp;
|
|
/* rsp is register 7. */
|
|
if (!dwfl_thread_state_registers(thread, 7, 1, &dwarf_reg)) {
|
|
err = drgn_error_libdwfl();
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Since Linux kernel commit 0100301bfdf5 ("sched/x86: Rewrite the
|
|
* switch_to() code") (in v4.9), sp points to a struct
|
|
* inactive_task_frame, which we can use to get most registers. Before
|
|
* that, it points to bp.
|
|
*/
|
|
struct drgn_qualified_type frame_type;
|
|
err = drgn_program_find_type(prog, "struct inactive_task_frame *", NULL,
|
|
&frame_type);
|
|
if (!err) {
|
|
err = drgn_object_cast(&sp_obj, frame_type, &sp_obj);
|
|
if (err)
|
|
goto out;
|
|
err = set_initial_registers_inactive_task_frame(thread,
|
|
&sp_obj);
|
|
|
|
} else if (err->code == DRGN_ERROR_LOOKUP) {
|
|
uint64_t bp;
|
|
|
|
drgn_error_destroy(err);
|
|
err = drgn_program_find_type(prog, "unsigned long", NULL,
|
|
&frame_type);
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_set_reference(&sp_obj, frame_type, sp, 0, 0,
|
|
DRGN_PROGRAM_ENDIAN);
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_read_unsigned(&sp_obj, &bp);
|
|
if (err)
|
|
goto out;
|
|
dwarf_reg = bp;
|
|
/* rbp is register 6. */
|
|
if (!dwfl_thread_state_registers(thread, 6, 1, &dwarf_reg)) {
|
|
err = drgn_error_libdwfl();
|
|
goto out;
|
|
}
|
|
err = NULL;
|
|
}
|
|
out:
|
|
drgn_object_deinit(&sp_obj);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_get_page_offset_x86_64(struct drgn_program *prog, uint64_t *ret)
|
|
{
|
|
struct drgn_error *err;
|
|
struct drgn_object obj;
|
|
uint64_t pgd;
|
|
|
|
/*
|
|
* If KASLR is enabled, PAGE_OFFSET is easily available via
|
|
* page_offset_base.
|
|
*/
|
|
drgn_object_init(&obj, prog);
|
|
err = drgn_program_find_object(prog, "page_offset_base", NULL,
|
|
DRGN_FIND_OBJECT_VARIABLE, &obj);
|
|
if (!err) {
|
|
err = drgn_object_read_unsigned(&obj, ret);
|
|
goto out;
|
|
}
|
|
if (err->code == DRGN_ERROR_LOOKUP)
|
|
drgn_error_destroy(err);
|
|
else
|
|
goto out;
|
|
|
|
/*
|
|
* If not, we determine it based on the kernel page table. Before Linux
|
|
* kernel commit d52888aa2753 ("x86/mm: Move LDT remap out of KASLR
|
|
* region on 5-level paging") (in v4.20), PAGE_OFFSET was pgd slot 272.
|
|
* After that, it is pgd slot 273, and slot 272 is empty (reserved for
|
|
* Local Descriptor Table mappings for userspace tasks).
|
|
*/
|
|
err = drgn_program_read_u64(prog,
|
|
prog->vmcoreinfo.swapper_pg_dir + 272 * 8,
|
|
false, &pgd);
|
|
if (err)
|
|
goto out;
|
|
if (pgd) {
|
|
if (prog->vmcoreinfo.pgtable_l5_enabled)
|
|
*ret = UINT64_C(0xff10000000000000);
|
|
else
|
|
*ret = UINT64_C(0xffff880000000000);
|
|
} else {
|
|
if (prog->vmcoreinfo.pgtable_l5_enabled)
|
|
*ret = UINT64_C(0xff11000000000000);
|
|
else
|
|
*ret = UINT64_C(0xffff888000000000);
|
|
}
|
|
|
|
out:
|
|
drgn_object_deinit(&obj);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_get_vmemmap_x86_64(struct drgn_program *prog, uint64_t *ret)
|
|
{
|
|
|
|
struct drgn_error *err;
|
|
struct drgn_object obj;
|
|
|
|
/* If KASLR is enabled, vmemmap is vmemmap_base. */
|
|
drgn_object_init(&obj, prog);
|
|
err = drgn_program_find_object(prog, "vmemmap_base", NULL,
|
|
DRGN_FIND_OBJECT_VARIABLE, &obj);
|
|
if (!err) {
|
|
err = drgn_object_read_unsigned(&obj, ret);
|
|
goto out;
|
|
}
|
|
if (err->code == DRGN_ERROR_LOOKUP) {
|
|
drgn_error_destroy(err);
|
|
err = NULL;
|
|
} else {
|
|
goto out;
|
|
}
|
|
|
|
/* Otherwise, it depends on whether we have 5-level page tables. */
|
|
if (prog->vmcoreinfo.pgtable_l5_enabled)
|
|
*ret = UINT64_C(0xffd4000000000000);
|
|
else
|
|
*ret = UINT64_C(0xffffea0000000000);
|
|
|
|
out:
|
|
drgn_object_deinit(&obj);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_live_direct_mapping_fallback_x86_64(struct drgn_program *prog,
|
|
uint64_t *address_ret,
|
|
uint64_t *size_ret)
|
|
{
|
|
struct drgn_error *err;
|
|
unsigned long page_offset_base_address;
|
|
|
|
*size_ret = UINT64_C(1) << 46;
|
|
err = proc_kallsyms_symbol_addr("page_offset_base",
|
|
&page_offset_base_address);
|
|
if (!err) {
|
|
return drgn_program_read_word(prog, page_offset_base_address,
|
|
false, address_ret);
|
|
} else if (err == &drgn_not_found) {
|
|
/*
|
|
* This is only called for pre-4.11 kernels, so we can assume
|
|
* the old location.
|
|
*/
|
|
*address_ret = UINT64_C(0xffff880000000000);
|
|
return NULL;
|
|
} else {
|
|
return err;
|
|
}
|
|
}
|
|
|
|
struct pgtable_iterator_x86_64 {
|
|
uint16_t index[5];
|
|
uint64_t table[5][512];
|
|
};
|
|
|
|
static void pgtable_iterator_arch_init_x86_64(void *buf)
|
|
{
|
|
struct pgtable_iterator_x86_64 *arch = buf;
|
|
memset(arch->index, 0xff, sizeof(arch->index));
|
|
memset(arch->table, 0, sizeof(arch->table));
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_pgtable_iterator_next_x86_64(struct pgtable_iterator *it,
|
|
uint64_t *virt_addr_ret,
|
|
uint64_t *phys_addr_ret)
|
|
{
|
|
static const int PAGE_SHIFT = 12;
|
|
static const int PGTABLE_SHIFT = 9;
|
|
static const int PGTABLE_MASK = (1 << PGTABLE_SHIFT) - 1;
|
|
static const uint64_t PRESENT = 0x1;
|
|
static const uint64_t PSE = 0x80; /* a.k.a. huge page */
|
|
static const uint64_t ADDRESS_MASK = UINT64_C(0xffffffffff000);
|
|
struct drgn_program *prog = it->prog;
|
|
struct pgtable_iterator_x86_64 *arch = (void *)it->arch;
|
|
int levels = prog->vmcoreinfo.pgtable_l5_enabled ? 5 : 4, level;
|
|
bool bswap;
|
|
struct drgn_error *err = drgn_program_bswap(prog, &bswap);
|
|
if (err)
|
|
return err;
|
|
|
|
/* Find the lowest level with cached entries. */
|
|
for (level = 0; level < levels; level++) {
|
|
if (arch->index[level] < ARRAY_SIZE(arch->table[level]))
|
|
break;
|
|
}
|
|
/* For every level below that, refill the cache/return pages. */
|
|
for (;; level--) {
|
|
uint64_t table;
|
|
bool table_physical;
|
|
uint16_t index;
|
|
if (level == levels) {
|
|
uint64_t start_non_canonical, end_non_canonical;
|
|
start_non_canonical = (UINT64_C(1) <<
|
|
(PAGE_SHIFT +
|
|
PGTABLE_SHIFT * levels - 1));
|
|
end_non_canonical = (UINT64_MAX <<
|
|
(PAGE_SHIFT +
|
|
PGTABLE_SHIFT * levels - 1));
|
|
if (it->virt_addr >= start_non_canonical &&
|
|
it->virt_addr < end_non_canonical) {
|
|
*virt_addr_ret = start_non_canonical;
|
|
*phys_addr_ret = UINT64_MAX;
|
|
it->virt_addr = end_non_canonical;
|
|
return NULL;
|
|
}
|
|
table = it->pgtable;
|
|
table_physical = false;
|
|
} else {
|
|
uint64_t entry = arch->table[level][arch->index[level]++];
|
|
if (bswap)
|
|
entry = bswap_64(entry);
|
|
table = entry & ADDRESS_MASK;
|
|
if (!(entry & PRESENT) || (entry & PSE) || level == 0) {
|
|
uint64_t mask = (UINT64_C(1) <<
|
|
(PAGE_SHIFT +
|
|
PGTABLE_SHIFT * level)) - 1;
|
|
*virt_addr_ret = it->virt_addr & ~mask;
|
|
if (entry & PRESENT)
|
|
*phys_addr_ret = table & ~mask;
|
|
else
|
|
*phys_addr_ret = UINT64_MAX;
|
|
it->virt_addr = (it->virt_addr | mask) + 1;
|
|
return NULL;
|
|
}
|
|
table_physical = true;
|
|
}
|
|
index = (it->virt_addr >>
|
|
(PAGE_SHIFT + PGTABLE_SHIFT * (level - 1))) & PGTABLE_MASK;
|
|
/*
|
|
* It's only marginally more expensive to read 4096 bytes than 8
|
|
* bytes, so we always read to the end of the table.
|
|
*/
|
|
err = drgn_program_read_memory(prog,
|
|
&arch->table[level - 1][index],
|
|
table + 8 * index,
|
|
sizeof(arch->table[0]) - 8 * index,
|
|
table_physical);
|
|
if (err)
|
|
return err;
|
|
arch->index[level - 1] = index;
|
|
}
|
|
}
|
|
|
|
const struct drgn_architecture_info arch_info_x86_64 = {
|
|
ARCHITECTURE_INFO,
|
|
.default_flags = (DRGN_PLATFORM_IS_64_BIT |
|
|
DRGN_PLATFORM_IS_LITTLE_ENDIAN),
|
|
.pt_regs_set_initial_registers = pt_regs_set_initial_registers_x86_64,
|
|
.prstatus_set_initial_registers = prstatus_set_initial_registers_x86_64,
|
|
.linux_kernel_set_initial_registers =
|
|
linux_kernel_set_initial_registers_x86_64,
|
|
.linux_kernel_get_page_offset = linux_kernel_get_page_offset_x86_64,
|
|
.linux_kernel_get_vmemmap = linux_kernel_get_vmemmap_x86_64,
|
|
.linux_kernel_live_direct_mapping_fallback =
|
|
linux_kernel_live_direct_mapping_fallback_x86_64,
|
|
.pgtable_iterator_arch_size = sizeof(struct pgtable_iterator_x86_64),
|
|
.pgtable_iterator_arch_init = pgtable_iterator_arch_init_x86_64,
|
|
.linux_kernel_pgtable_iterator_next =
|
|
linux_kernel_pgtable_iterator_next_x86_64,
|
|
};
|