mirror of
https://github.com/JakeHillion/drgn.git
synced 2024-12-22 17:23:06 +00:00
e9d16732d6
We're currently checking whether the iterator has entered the non-canonical range when fetching the last level of the page table, but the cutover actually happens while we're in the last level. Fix it by doing the check unconditionally. Signed-off-by: Omar Sandoval <osandov@osandov.com>
667 lines
19 KiB
C
667 lines
19 KiB
C
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
#include <byteswap.h>
|
|
#include <elf.h>
|
|
#include <string.h>
|
|
|
|
#include "array.h"
|
|
#include "drgn.h"
|
|
#include "error.h"
|
|
#include "linux_kernel.h"
|
|
#include "orc.h"
|
|
#include "platform.h" // IWYU pragma: associated
|
|
#include "program.h"
|
|
#include "register_state.h"
|
|
#include "serialize.h"
|
|
#include "type.h"
|
|
#include "util.h"
|
|
|
|
#include "arch_x86_64_defs.inc"
|
|
|
|
static const struct drgn_cfi_row default_dwarf_cfi_row_x86_64 = DRGN_CFI_ROW(
|
|
/*
|
|
* The System V psABI defines the CFA as the value of rsp in the calling
|
|
* frame.
|
|
*/
|
|
[DRGN_REGISTER_NUMBER(rsp)] = { DRGN_CFI_RULE_CFA_PLUS_OFFSET },
|
|
/*
|
|
* Other callee-saved registers default to DW_CFA_same_value. This isn't
|
|
* explicitly documented in the psABI, but it seems to be the consensus.
|
|
*/
|
|
DRGN_CFI_SAME_VALUE_INIT(DRGN_REGISTER_NUMBER(rbx)),
|
|
DRGN_CFI_SAME_VALUE_INIT(DRGN_REGISTER_NUMBER(rbp)),
|
|
DRGN_CFI_SAME_VALUE_INIT(DRGN_REGISTER_NUMBER(r12)),
|
|
DRGN_CFI_SAME_VALUE_INIT(DRGN_REGISTER_NUMBER(r13)),
|
|
DRGN_CFI_SAME_VALUE_INIT(DRGN_REGISTER_NUMBER(r14)),
|
|
DRGN_CFI_SAME_VALUE_INIT(DRGN_REGISTER_NUMBER(r15)),
|
|
);
|
|
|
|
static struct drgn_error *
|
|
orc_to_cfi_x86_64(const struct drgn_orc_entry *orc,
|
|
struct drgn_cfi_row **row_ret, bool *interrupted_ret,
|
|
drgn_register_number *ret_addr_regno_ret)
|
|
{
|
|
enum {
|
|
ORC_REG_UNDEFINED = 0,
|
|
ORC_REG_PREV_SP = 1,
|
|
ORC_REG_DX = 2,
|
|
ORC_REG_DI = 3,
|
|
ORC_REG_BP = 4,
|
|
ORC_REG_SP = 5,
|
|
ORC_REG_R10 = 6,
|
|
ORC_REG_R13 = 7,
|
|
ORC_REG_BP_INDIRECT = 8,
|
|
ORC_REG_SP_INDIRECT = 9,
|
|
};
|
|
|
|
if (!drgn_cfi_row_copy(row_ret, drgn_empty_cfi_row))
|
|
return &drgn_enomem;
|
|
|
|
struct drgn_cfi_rule rule;
|
|
switch (drgn_orc_sp_reg(orc)) {
|
|
case ORC_REG_UNDEFINED:
|
|
if (drgn_orc_is_end(orc))
|
|
return NULL;
|
|
else
|
|
return &drgn_not_found;
|
|
case ORC_REG_SP:
|
|
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
|
|
rule.regno = DRGN_REGISTER_NUMBER(rsp);
|
|
rule.offset = orc->sp_offset;
|
|
break;
|
|
case ORC_REG_BP:
|
|
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
|
|
rule.regno = DRGN_REGISTER_NUMBER(rbp);
|
|
rule.offset = orc->sp_offset;
|
|
break;
|
|
case ORC_REG_SP_INDIRECT:
|
|
rule.kind = DRGN_CFI_RULE_AT_REGISTER_ADD_OFFSET;
|
|
rule.regno = DRGN_REGISTER_NUMBER(rbp);
|
|
rule.offset = orc->sp_offset;
|
|
break;
|
|
case ORC_REG_BP_INDIRECT:
|
|
rule.kind = DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET;
|
|
rule.regno = DRGN_REGISTER_NUMBER(rbp);
|
|
rule.offset = orc->sp_offset;
|
|
break;
|
|
case ORC_REG_R10:
|
|
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
|
|
rule.regno = DRGN_REGISTER_NUMBER(r10);
|
|
rule.offset = 0;
|
|
break;
|
|
case ORC_REG_R13:
|
|
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
|
|
rule.regno = DRGN_REGISTER_NUMBER(r13);
|
|
rule.offset = 0;
|
|
break;
|
|
case ORC_REG_DI:
|
|
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
|
|
rule.regno = DRGN_REGISTER_NUMBER(rdi);
|
|
rule.offset = 0;
|
|
break;
|
|
case ORC_REG_DX:
|
|
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
|
|
rule.regno = DRGN_REGISTER_NUMBER(rdx);
|
|
rule.offset = 0;
|
|
break;
|
|
default:
|
|
return drgn_error_format(DRGN_ERROR_OTHER,
|
|
"unknown ORC SP base register %d",
|
|
drgn_orc_sp_reg(orc));
|
|
}
|
|
if (!drgn_cfi_row_set_cfa(row_ret, &rule))
|
|
return &drgn_enomem;
|
|
|
|
switch (drgn_orc_type(orc)) {
|
|
case DRGN_ORC_TYPE_CALL:
|
|
rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET;
|
|
rule.offset = -8;
|
|
if (!drgn_cfi_row_set_register(row_ret,
|
|
DRGN_REGISTER_NUMBER(rip),
|
|
&rule))
|
|
return &drgn_enomem;
|
|
rule.kind = DRGN_CFI_RULE_CFA_PLUS_OFFSET;
|
|
rule.offset = 0;
|
|
if (!drgn_cfi_row_set_register(row_ret,
|
|
DRGN_REGISTER_NUMBER(rsp),
|
|
&rule))
|
|
return &drgn_enomem;
|
|
*interrupted_ret = false;
|
|
break;
|
|
#define SET_AT_CFA_RULE(reg, cfa_offset) do { \
|
|
rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; \
|
|
rule.offset = cfa_offset; \
|
|
if (!drgn_cfi_row_set_register(row_ret, DRGN_REGISTER_NUMBER(reg), \
|
|
&rule)) \
|
|
return &drgn_enomem; \
|
|
} while (0)
|
|
case DRGN_ORC_TYPE_REGS:
|
|
SET_AT_CFA_RULE(rip, 128);
|
|
SET_AT_CFA_RULE(rsp, 152);
|
|
SET_AT_CFA_RULE(r15, 0);
|
|
SET_AT_CFA_RULE(r14, 8);
|
|
SET_AT_CFA_RULE(r13, 16);
|
|
SET_AT_CFA_RULE(r12, 24);
|
|
SET_AT_CFA_RULE(rbp, 32);
|
|
SET_AT_CFA_RULE(rbx, 40);
|
|
SET_AT_CFA_RULE(r11, 48);
|
|
SET_AT_CFA_RULE(r10, 56);
|
|
SET_AT_CFA_RULE(r9, 64);
|
|
SET_AT_CFA_RULE(r8, 72);
|
|
SET_AT_CFA_RULE(rax, 80);
|
|
SET_AT_CFA_RULE(rcx, 88);
|
|
SET_AT_CFA_RULE(rdx, 96);
|
|
SET_AT_CFA_RULE(rsi, 104);
|
|
SET_AT_CFA_RULE(rdi, 112);
|
|
SET_AT_CFA_RULE(cs, 136);
|
|
SET_AT_CFA_RULE(rflags, 144);
|
|
SET_AT_CFA_RULE(ss, 160);
|
|
*interrupted_ret = true;
|
|
break;
|
|
case DRGN_ORC_TYPE_REGS_PARTIAL:
|
|
SET_AT_CFA_RULE(rip, 0);
|
|
SET_AT_CFA_RULE(rsp, 24);
|
|
#undef SET_AT_CFA_RULE
|
|
#define SET_SAME_VALUE_RULE(reg) do { \
|
|
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; \
|
|
rule.regno = DRGN_REGISTER_NUMBER(reg); \
|
|
rule.offset = 0; \
|
|
if (!drgn_cfi_row_set_register(row_ret, DRGN_REGISTER_NUMBER(reg), \
|
|
&rule)) \
|
|
return &drgn_enomem; \
|
|
} while (0)
|
|
/*
|
|
* This ORC entry is for an interrupt handler before it saves
|
|
* the whole pt_regs. These registers are not clobbered before
|
|
* they are saved, so they should have the same value. See Linux
|
|
* kernel commit 81b67439d147 ("x86/unwind/orc: Fix premature
|
|
* unwind stoppage due to IRET frames").
|
|
*
|
|
* This probably also applies to other registers, but to stay on
|
|
* the safe side we only handle registers used by ORC.
|
|
*/
|
|
SET_SAME_VALUE_RULE(r10);
|
|
SET_SAME_VALUE_RULE(r13);
|
|
SET_SAME_VALUE_RULE(rdi);
|
|
SET_SAME_VALUE_RULE(rdx);
|
|
#undef SET_SAME_VALUE_RULE
|
|
*interrupted_ret = true;
|
|
break;
|
|
default:
|
|
return drgn_error_format(DRGN_ERROR_OTHER,
|
|
"unknown ORC entry type %d",
|
|
drgn_orc_type(orc));
|
|
}
|
|
|
|
switch (drgn_orc_bp_reg(orc)) {
|
|
case ORC_REG_UNDEFINED:
|
|
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
|
|
rule.regno = DRGN_REGISTER_NUMBER(rbp);
|
|
rule.offset = 0;
|
|
break;
|
|
case ORC_REG_PREV_SP:
|
|
rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET;
|
|
rule.offset = orc->bp_offset;
|
|
break;
|
|
case ORC_REG_BP:
|
|
rule.kind = DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET;
|
|
rule.regno = DRGN_REGISTER_NUMBER(rbp);
|
|
rule.offset = orc->bp_offset;
|
|
break;
|
|
default:
|
|
return drgn_error_format(DRGN_ERROR_OTHER,
|
|
"unknown ORC BP base register %d",
|
|
drgn_orc_bp_reg(orc));
|
|
}
|
|
if (!drgn_cfi_row_set_register(row_ret, DRGN_REGISTER_NUMBER(rbp),
|
|
&rule))
|
|
return &drgn_enomem;
|
|
*ret_addr_regno_ret = DRGN_REGISTER_NUMBER(rip);
|
|
return NULL;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
get_registers_from_frame_pointer(struct drgn_program *prog,
|
|
uint64_t frame_pointer,
|
|
struct drgn_register_state **ret)
|
|
{
|
|
struct drgn_error *err;
|
|
uint64_t frame[2];
|
|
err = drgn_program_read_memory(prog, frame, frame_pointer,
|
|
sizeof(frame), false);
|
|
if (err)
|
|
return err;
|
|
|
|
uint64_t unwound_frame_pointer =
|
|
drgn_platform_bswap(&prog->platform) ? bswap_64(frame[0]) : frame[0];
|
|
if (unwound_frame_pointer <= frame_pointer) {
|
|
/*
|
|
* The next frame pointer isn't valid. Maybe frame pointers are
|
|
* not enabled or we're in the middle of a prologue or epilogue.
|
|
*/
|
|
return &drgn_stop;
|
|
}
|
|
|
|
struct drgn_register_state *regs =
|
|
drgn_register_state_create(rbp, false);
|
|
if (!regs)
|
|
return &drgn_enomem;
|
|
drgn_register_state_set_from_buffer(regs, rip, &frame[1]);
|
|
drgn_register_state_set_from_u64(prog, regs, rsp,
|
|
frame_pointer + sizeof(frame));
|
|
drgn_register_state_set_from_buffer(regs, rbp, &frame[0]);
|
|
drgn_register_state_set_pc_from_register(prog, regs, rip);
|
|
*ret = regs;
|
|
return NULL;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
fallback_unwind_x86_64(struct drgn_program *prog,
|
|
struct drgn_register_state *regs,
|
|
struct drgn_register_state **ret)
|
|
{
|
|
struct drgn_error *err;
|
|
|
|
struct optional_uint64 rbp =
|
|
drgn_register_state_get_u64(prog, regs, rbp);
|
|
if (!rbp.has_value)
|
|
return &drgn_stop;
|
|
|
|
err = get_registers_from_frame_pointer(prog, rbp.value, ret);
|
|
if (err) {
|
|
if (err->code == DRGN_ERROR_FAULT) {
|
|
drgn_error_destroy(err);
|
|
err = &drgn_stop;
|
|
}
|
|
return err;
|
|
}
|
|
drgn_register_state_set_cfa(prog, regs, rbp.value + 16);
|
|
return NULL;
|
|
}
|
|
|
|
// elf_gregset_t (in NT_PRSTATUS) and struct user_regs_struct have the same
|
|
// layout. struct pt_regs is a prefix of that layout which elides several
|
|
// segment registers. full_regset tells us which one we were given; true is
|
|
// elf_gregset_t, false is struct pt_regs.
|
|
static struct drgn_error *
|
|
get_initial_registers_from_struct_x86_64(struct drgn_program *prog,
|
|
const void *buf, size_t size,
|
|
bool full_regset,
|
|
struct drgn_register_state **ret)
|
|
{
|
|
if (size < (full_regset ? 216 : 168)) {
|
|
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
|
|
"registers are truncated");
|
|
}
|
|
|
|
struct drgn_register_state *regs;
|
|
if (full_regset)
|
|
regs = drgn_register_state_create(gs, true);
|
|
else
|
|
regs = drgn_register_state_create(ss, true);
|
|
if (!regs)
|
|
return &drgn_enomem;
|
|
|
|
drgn_register_state_set_from_buffer(regs, rip, (uint64_t *)buf + 16);
|
|
drgn_register_state_set_from_buffer(regs, rsp, (uint64_t *)buf + 19);
|
|
drgn_register_state_set_range_from_buffer(regs, r15, rdi, buf);
|
|
drgn_register_state_set_range_from_buffer(regs, cs, rflags,
|
|
(uint64_t *)buf + 17);
|
|
if (full_regset) {
|
|
drgn_register_state_set_range_from_buffer(regs, ss, gs,
|
|
(uint64_t *)buf + 20);
|
|
} else {
|
|
drgn_register_state_set_from_buffer(regs, ss,
|
|
(uint64_t *)buf + 20);
|
|
}
|
|
drgn_register_state_set_pc_from_register(prog, regs, rip);
|
|
|
|
*ret = regs;
|
|
return NULL;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
pt_regs_get_initial_registers_x86_64(const struct drgn_object *obj,
|
|
struct drgn_register_state **ret)
|
|
{
|
|
return get_initial_registers_from_struct_x86_64(drgn_object_program(obj),
|
|
drgn_object_buffer(obj),
|
|
drgn_object_size(obj),
|
|
false, ret);
|
|
}
|
|
|
|
static struct drgn_error *
|
|
prstatus_get_initial_registers_x86_64(struct drgn_program *prog,
|
|
const void *prstatus, size_t size,
|
|
struct drgn_register_state **ret)
|
|
{
|
|
// offsetof(struct elf_prstatus, pr_reg)
|
|
static const size_t pr_reg_offset = 112;
|
|
if (size < pr_reg_offset) {
|
|
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
|
|
"NT_PRSTATUS is truncated");
|
|
}
|
|
return get_initial_registers_from_struct_x86_64(prog,
|
|
(char *)prstatus + pr_reg_offset,
|
|
size - pr_reg_offset,
|
|
true, ret);
|
|
}
|
|
|
|
static struct drgn_error *
|
|
get_initial_registers_inactive_task_frame(struct drgn_object *frame_obj,
|
|
struct drgn_register_state **ret)
|
|
{
|
|
struct drgn_error *err;
|
|
struct drgn_program *prog = drgn_object_program(frame_obj);
|
|
|
|
uint64_t address = frame_obj->address;
|
|
err = drgn_object_read(frame_obj, frame_obj);
|
|
if (err)
|
|
return err;
|
|
const char *frame_buf = drgn_object_buffer(frame_obj);
|
|
size_t frame_size = drgn_object_size(frame_obj);
|
|
|
|
struct drgn_register_state *regs =
|
|
drgn_register_state_create(rbx, false);
|
|
if (!regs)
|
|
return &drgn_enomem;
|
|
|
|
#define COPY_REGISTER(id, member_name) do { \
|
|
struct drgn_type_member *member; \
|
|
uint64_t bit_offset; \
|
|
err = drgn_type_find_member(frame_obj->type, member_name, &member, \
|
|
&bit_offset); \
|
|
if (err) \
|
|
goto err; \
|
|
if (bit_offset / 8 + DRGN_REGISTER_SIZE(id) > frame_size) { \
|
|
err = drgn_error_create(DRGN_ERROR_OUT_OF_BOUNDS, \
|
|
"out of bounds of value"); \
|
|
goto err; \
|
|
} \
|
|
drgn_register_state_set_from_buffer(regs, id, \
|
|
frame_buf + bit_offset / 8); \
|
|
} while (0)
|
|
|
|
COPY_REGISTER(rip, "ret_addr");
|
|
COPY_REGISTER(r15, "r15");
|
|
COPY_REGISTER(r14, "r14");
|
|
COPY_REGISTER(r13, "r13");
|
|
COPY_REGISTER(r12, "r12");
|
|
COPY_REGISTER(rbp, "bp");
|
|
COPY_REGISTER(rbx, "bx");
|
|
|
|
#undef COPY_REGISTER
|
|
|
|
drgn_register_state_set_from_u64(prog, regs, rsp, address + frame_size);
|
|
drgn_register_state_set_pc_from_register(prog, regs, rip);
|
|
|
|
*ret = regs;
|
|
return NULL;
|
|
|
|
err:
|
|
drgn_register_state_destroy(regs);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_get_initial_registers_x86_64(const struct drgn_object *task_obj,
|
|
struct drgn_register_state **ret)
|
|
{
|
|
struct drgn_error *err;
|
|
struct drgn_program *prog = drgn_object_program(task_obj);
|
|
|
|
struct drgn_object sp_obj;
|
|
drgn_object_init(&sp_obj, prog);
|
|
|
|
err = drgn_object_member_dereference(&sp_obj, task_obj, "thread");
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_member(&sp_obj, &sp_obj, "sp");
|
|
if (err)
|
|
goto out;
|
|
|
|
/*
|
|
* Since Linux kernel commit 0100301bfdf5 ("sched/x86: Rewrite the
|
|
* switch_to() code") (in v4.9), sp points to a struct
|
|
* inactive_task_frame, which we can use to get the callee-saved
|
|
* registers. Before that, sp points to bp. As long as frame pointers
|
|
* are enabled, this in turn points to the previous bp and the return
|
|
* address.
|
|
*/
|
|
struct drgn_qualified_type frame_type;
|
|
err = drgn_program_find_type(prog, "struct inactive_task_frame *", NULL,
|
|
&frame_type);
|
|
if (!err) {
|
|
err = drgn_object_cast(&sp_obj, frame_type, &sp_obj);
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_dereference(&sp_obj, &sp_obj);
|
|
if (err)
|
|
goto out;
|
|
err = get_initial_registers_inactive_task_frame(&sp_obj, ret);
|
|
} else if (err->code == DRGN_ERROR_LOOKUP) {
|
|
drgn_error_destroy(err);
|
|
err = drgn_program_find_type(prog, "void **", NULL,
|
|
&frame_type);
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_cast(&sp_obj, frame_type, &sp_obj);
|
|
if (err)
|
|
goto out;
|
|
err = drgn_object_dereference(&sp_obj, &sp_obj);
|
|
if (err)
|
|
goto out;
|
|
uint64_t frame_pointer;
|
|
err = drgn_object_read_unsigned(&sp_obj, &frame_pointer);
|
|
if (err)
|
|
return err;
|
|
err = get_registers_from_frame_pointer(prog, frame_pointer,
|
|
ret);
|
|
if (err == &drgn_stop) {
|
|
err = drgn_error_create(DRGN_ERROR_OTHER,
|
|
"invalid frame pointer");
|
|
}
|
|
}
|
|
out:
|
|
drgn_object_deinit(&sp_obj);
|
|
return err;
|
|
}
|
|
|
|
static struct drgn_error *
|
|
apply_elf_reloc_x86_64(const struct drgn_relocating_section *relocating,
|
|
uint64_t r_offset, uint32_t r_type,
|
|
const int64_t *r_addend, uint64_t sym_value)
|
|
{
|
|
switch (r_type) {
|
|
case R_X86_64_NONE:
|
|
return NULL;
|
|
case R_X86_64_64:
|
|
return drgn_reloc_add64(relocating, r_offset, r_addend,
|
|
sym_value);
|
|
case R_X86_64_PC32:
|
|
return drgn_reloc_add32(relocating, r_offset, r_addend,
|
|
sym_value
|
|
- (relocating->addr + r_offset));
|
|
/*
|
|
* The only difference between 32 and 32S is how overflow is checked,
|
|
* which we don't do.
|
|
*/
|
|
case R_X86_64_32:
|
|
case R_X86_64_32S:
|
|
return drgn_reloc_add32(relocating, r_offset, r_addend,
|
|
sym_value);
|
|
case R_X86_64_PC64:
|
|
return drgn_reloc_add64(relocating, r_offset, r_addend,
|
|
sym_value
|
|
- (relocating->addr + r_offset));
|
|
default:
|
|
return DRGN_UNKNOWN_RELOCATION_TYPE(r_type);
|
|
}
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_live_direct_mapping_fallback_x86_64(struct drgn_program *prog,
|
|
uint64_t *address_ret,
|
|
uint64_t *size_ret)
|
|
{
|
|
struct drgn_error *err;
|
|
unsigned long page_offset_base_address;
|
|
|
|
*size_ret = UINT64_C(1) << 46;
|
|
err = proc_kallsyms_symbol_addr("page_offset_base",
|
|
&page_offset_base_address);
|
|
if (!err) {
|
|
return drgn_program_read_word(prog, page_offset_base_address,
|
|
false, address_ret);
|
|
} else if (err == &drgn_not_found) {
|
|
/*
|
|
* This is only called for pre-4.11 kernels, so we can assume
|
|
* the old location.
|
|
*/
|
|
*address_ret = UINT64_C(0xffff880000000000);
|
|
return NULL;
|
|
} else {
|
|
return err;
|
|
}
|
|
}
|
|
|
|
struct pgtable_iterator_x86_64 {
|
|
struct pgtable_iterator it;
|
|
uint16_t index[5];
|
|
uint64_t table[5][512];
|
|
};
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_pgtable_iterator_create_x86_64(struct drgn_program *prog,
|
|
struct pgtable_iterator **ret)
|
|
{
|
|
struct pgtable_iterator_x86_64 *it = malloc(sizeof(*it));
|
|
if (!it)
|
|
return &drgn_enomem;
|
|
*ret = &it->it;
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static void linux_kernel_pgtable_iterator_destroy_x86_64(struct pgtable_iterator *_it)
|
|
{
|
|
free(container_of(_it, struct pgtable_iterator_x86_64, it));
|
|
}
|
|
|
|
static void
|
|
linux_kernel_pgtable_iterator_init_x86_64(struct drgn_program *prog,
|
|
struct pgtable_iterator *_it)
|
|
{
|
|
struct pgtable_iterator_x86_64 *it =
|
|
container_of(_it, struct pgtable_iterator_x86_64, it);
|
|
memset(it->index, 0xff, sizeof(it->index));
|
|
}
|
|
|
|
static struct drgn_error *
|
|
linux_kernel_pgtable_iterator_next_x86_64(struct drgn_program *prog,
|
|
struct pgtable_iterator *_it,
|
|
uint64_t *virt_addr_ret,
|
|
uint64_t *phys_addr_ret)
|
|
{
|
|
static const int PAGE_SHIFT = 12;
|
|
static const int PGTABLE_SHIFT = 9;
|
|
static const int PGTABLE_MASK = (1 << PGTABLE_SHIFT) - 1;
|
|
static const uint64_t PRESENT = 0x1;
|
|
static const uint64_t PSE = 0x80; /* a.k.a. huge page */
|
|
static const uint64_t ADDRESS_MASK = UINT64_C(0xffffffffff000);
|
|
struct drgn_error *err;
|
|
bool bswap = drgn_platform_bswap(&prog->platform);
|
|
struct pgtable_iterator_x86_64 *it =
|
|
container_of(_it, struct pgtable_iterator_x86_64, it);
|
|
uint64_t virt_addr = it->it.virt_addr;
|
|
int levels = prog->vmcoreinfo.pgtable_l5_enabled ? 5 : 4, level;
|
|
|
|
uint64_t start_non_canonical =
|
|
(UINT64_C(1) <<
|
|
(PAGE_SHIFT + PGTABLE_SHIFT * levels - 1));
|
|
uint64_t end_non_canonical =
|
|
(UINT64_MAX <<
|
|
(PAGE_SHIFT + PGTABLE_SHIFT * levels - 1));
|
|
if (virt_addr >= start_non_canonical && virt_addr < end_non_canonical) {
|
|
*virt_addr_ret = start_non_canonical;
|
|
*phys_addr_ret = UINT64_MAX;
|
|
it->it.virt_addr = end_non_canonical;
|
|
return NULL;
|
|
}
|
|
|
|
/* Find the lowest level with cached entries. */
|
|
for (level = 0; level < levels; level++) {
|
|
if (it->index[level] < array_size(it->table[level]))
|
|
break;
|
|
}
|
|
/* For every level below that, refill the cache/return pages. */
|
|
for (;; level--) {
|
|
uint64_t table;
|
|
bool table_physical;
|
|
uint16_t index;
|
|
if (level == levels) {
|
|
table = it->it.pgtable;
|
|
table_physical = false;
|
|
} else {
|
|
uint64_t entry = it->table[level][it->index[level]++];
|
|
if (bswap)
|
|
entry = bswap_64(entry);
|
|
table = entry & ADDRESS_MASK;
|
|
if (!(entry & PRESENT) || (entry & PSE) || level == 0) {
|
|
uint64_t mask = (UINT64_C(1) <<
|
|
(PAGE_SHIFT +
|
|
PGTABLE_SHIFT * level)) - 1;
|
|
*virt_addr_ret = virt_addr & ~mask;
|
|
if (entry & PRESENT)
|
|
*phys_addr_ret = table & ~mask;
|
|
else
|
|
*phys_addr_ret = UINT64_MAX;
|
|
it->it.virt_addr = (virt_addr | mask) + 1;
|
|
return NULL;
|
|
}
|
|
table_physical = true;
|
|
}
|
|
index = (virt_addr >>
|
|
(PAGE_SHIFT + PGTABLE_SHIFT * (level - 1))) & PGTABLE_MASK;
|
|
/*
|
|
* It's only marginally more expensive to read 4096 bytes than 8
|
|
* bytes, so we always read to the end of the table.
|
|
*/
|
|
err = drgn_program_read_memory(prog,
|
|
&it->table[level - 1][index],
|
|
table + 8 * index,
|
|
sizeof(it->table[0]) - 8 * index,
|
|
table_physical);
|
|
if (err)
|
|
return err;
|
|
it->index[level - 1] = index;
|
|
}
|
|
}
|
|
|
|
const struct drgn_architecture_info arch_info_x86_64 = {
|
|
.name = "x86-64",
|
|
.arch = DRGN_ARCH_X86_64,
|
|
.default_flags = (DRGN_PLATFORM_IS_64_BIT |
|
|
DRGN_PLATFORM_IS_LITTLE_ENDIAN),
|
|
DRGN_ARCHITECTURE_REGISTERS,
|
|
.default_dwarf_cfi_row = &default_dwarf_cfi_row_x86_64,
|
|
.orc_to_cfi = orc_to_cfi_x86_64,
|
|
.fallback_unwind = fallback_unwind_x86_64,
|
|
.pt_regs_get_initial_registers = pt_regs_get_initial_registers_x86_64,
|
|
.prstatus_get_initial_registers = prstatus_get_initial_registers_x86_64,
|
|
.linux_kernel_get_initial_registers =
|
|
linux_kernel_get_initial_registers_x86_64,
|
|
.apply_elf_reloc = apply_elf_reloc_x86_64,
|
|
.linux_kernel_live_direct_mapping_fallback =
|
|
linux_kernel_live_direct_mapping_fallback_x86_64,
|
|
.linux_kernel_pgtable_iterator_create =
|
|
linux_kernel_pgtable_iterator_create_x86_64,
|
|
.linux_kernel_pgtable_iterator_destroy =
|
|
linux_kernel_pgtable_iterator_destroy_x86_64,
|
|
.linux_kernel_pgtable_iterator_init =
|
|
linux_kernel_pgtable_iterator_init_x86_64,
|
|
.linux_kernel_pgtable_iterator_next =
|
|
linux_kernel_pgtable_iterator_next_x86_64,
|
|
};
|