libdrgn: add ORC unwinder

The Linux kernel has its own stack unwinding format for x86-64 called
ORC: https://www.kernel.org/doc/html/latest/x86/orc-unwinder.html. It is
essentially a simplified, less complete version of DWARF CFI. ORC is
generated by analyzing machine code, so it is present for all but a few
ignored functions. In contrast, DWARF CFI is generated by the compiler
and is therefore missing for functions written in assembly and inline
assembly (which is widespread in the kernel).

This implements an ORC stack unwinder: it applies ELF relocations to the
ORC sections, adds a new DRGN_CFI_RULE_REGISTER_ADD_OFFSET CFI rule
kind, parses and efficiently stores ORC data, and translates ORC to drgn
CFI rules. This will allow us to stack trace through assembly code,
interrupts, and system calls.

Signed-off-by: Omar Sandoval <osandov@osandov.com>
This commit is contained in:
Omar Sandoval 2021-03-16 15:39:37 -07:00
parent 090064f20d
commit 630d39e345
11 changed files with 745 additions and 72 deletions

View File

@ -92,6 +92,14 @@ Some of drgn's behavior can be modified through environment variables:
:exc:`drgn.MissingDebugInfoError`. Any additional errors are truncated. The
default is 5; -1 is unlimited.
``DRGN_PREFER_ORC_UNWINDER```
Whether to prefer using `ORC
<https://www.kernel.org/doc/html/latest/x86/orc-unwinder.html>`_ over DWARF
for stack unwinding (0 or 1). The default is 0. Note that drgn will always
fall back to ORC for functions lacking DWARF call frame information and
vice versa. This environment variable is mainly intended for testing and
may be ignored in the future.
``DRGN_USE_LIBKDUMPFILE_FOR_ELF``
Whether drgn should use libkdumpfile for ELF vmcores (0 or 1). The default
is 0. This functionality will be removed in the future.

View File

@ -9,6 +9,7 @@
#include "drgn.h"
#include "error.h"
#include "linux_kernel.h"
#include "orc.h"
#include "platform.h" // IWYU pragma: associated
#include "program.h"
#include "register_state.h"
@ -39,6 +40,187 @@
#include "arch_x86_64.inc"
static struct drgn_error *
orc_to_cfi_x86_64(const struct drgn_orc_entry *orc,
struct drgn_cfi_row **row_ret, bool *interrupted_ret,
drgn_register_number *ret_addr_regno_ret)
{
enum {
ORC_REG_UNDEFINED = 0,
ORC_REG_PREV_SP = 1,
ORC_REG_DX = 2,
ORC_REG_DI = 3,
ORC_REG_BP = 4,
ORC_REG_SP = 5,
ORC_REG_R10 = 6,
ORC_REG_R13 = 7,
ORC_REG_BP_INDIRECT = 8,
ORC_REG_SP_INDIRECT = 9,
};
if (!drgn_cfi_row_copy(row_ret, drgn_empty_cfi_row))
return &drgn_enomem;
struct drgn_cfi_rule rule;
switch (drgn_orc_sp_reg(orc)) {
case ORC_REG_UNDEFINED:
if (drgn_orc_is_end(orc))
return NULL;
else
return &drgn_not_found;
case ORC_REG_SP:
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
rule.regno = DRGN_REGISTER_NUMBER(rsp);
rule.offset = orc->sp_offset;
break;
case ORC_REG_BP:
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
rule.regno = DRGN_REGISTER_NUMBER(rbp);
rule.offset = orc->sp_offset;
break;
case ORC_REG_SP_INDIRECT:
rule.kind = DRGN_CFI_RULE_AT_REGISTER_ADD_OFFSET;
rule.regno = DRGN_REGISTER_NUMBER(rbp);
rule.offset = orc->sp_offset;
break;
case ORC_REG_BP_INDIRECT:
rule.kind = DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET;
rule.regno = DRGN_REGISTER_NUMBER(rbp);
rule.offset = orc->sp_offset;
break;
case ORC_REG_R10:
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
rule.regno = DRGN_REGISTER_NUMBER(r10);
rule.offset = 0;
break;
case ORC_REG_R13:
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
rule.regno = DRGN_REGISTER_NUMBER(r13);
rule.offset = 0;
break;
case ORC_REG_DI:
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
rule.regno = DRGN_REGISTER_NUMBER(rdi);
rule.offset = 0;
break;
case ORC_REG_DX:
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
rule.regno = DRGN_REGISTER_NUMBER(rdx);
rule.offset = 0;
break;
default:
return drgn_error_format(DRGN_ERROR_OTHER,
"unknown ORC SP base register %d",
drgn_orc_sp_reg(orc));
}
if (!drgn_cfi_row_set_cfa(row_ret, &rule))
return &drgn_enomem;
switch (drgn_orc_type(orc)) {
case DRGN_ORC_TYPE_CALL:
rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET;
rule.offset = -8;
if (!drgn_cfi_row_set_register(row_ret,
DRGN_REGISTER_NUMBER(rip),
&rule))
return &drgn_enomem;
rule.kind = DRGN_CFI_RULE_CFA_PLUS_OFFSET;
rule.offset = 0;
if (!drgn_cfi_row_set_register(row_ret,
DRGN_REGISTER_NUMBER(rsp),
&rule))
return &drgn_enomem;
*interrupted_ret = false;
break;
#define SET_AT_CFA_RULE(reg, cfa_offset) do { \
rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; \
rule.offset = cfa_offset; \
if (!drgn_cfi_row_set_register(row_ret, DRGN_REGISTER_NUMBER(reg), \
&rule)) \
return &drgn_enomem; \
} while (0)
case DRGN_ORC_TYPE_REGS:
SET_AT_CFA_RULE(rip, 128);
SET_AT_CFA_RULE(rsp, 152);
SET_AT_CFA_RULE(r15, 0);
SET_AT_CFA_RULE(r14, 8);
SET_AT_CFA_RULE(r13, 16);
SET_AT_CFA_RULE(r12, 24);
SET_AT_CFA_RULE(rbp, 32);
SET_AT_CFA_RULE(rbx, 40);
SET_AT_CFA_RULE(r11, 48);
SET_AT_CFA_RULE(r10, 56);
SET_AT_CFA_RULE(r9, 64);
SET_AT_CFA_RULE(r8, 72);
SET_AT_CFA_RULE(rax, 80);
SET_AT_CFA_RULE(rcx, 88);
SET_AT_CFA_RULE(rdx, 96);
SET_AT_CFA_RULE(rsi, 104);
SET_AT_CFA_RULE(rdi, 112);
*interrupted_ret = true;
break;
case DRGN_ORC_TYPE_REGS_PARTIAL:
SET_AT_CFA_RULE(rip, 0);
SET_AT_CFA_RULE(rsp, 24);
#undef SET_AT_CFA_RULE
#define SET_SAME_VALUE_RULE(reg) do { \
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; \
rule.regno = DRGN_REGISTER_NUMBER(reg); \
rule.offset = 0; \
if (!drgn_cfi_row_set_register(row_ret, DRGN_REGISTER_NUMBER(reg), \
&rule)) \
return &drgn_enomem; \
} while (0)
/*
* This ORC entry is for an interrupt handler before it saves
* the whole pt_regs. These registers are not clobbered before
* they are saved, so they should have the same value. See Linux
* kernel commit 81b67439d147 ("x86/unwind/orc: Fix premature
* unwind stoppage due to IRET frames").
*
* This probably also applies to other registers, but to stay on
* the safe side we only handle registers used by ORC.
*/
SET_SAME_VALUE_RULE(r10);
SET_SAME_VALUE_RULE(r13);
SET_SAME_VALUE_RULE(rdi);
SET_SAME_VALUE_RULE(rdx);
#undef SET_SAME_VALUE_RULE
*interrupted_ret = true;
break;
default:
return drgn_error_format(DRGN_ERROR_OTHER,
"unknown ORC entry type %d",
drgn_orc_type(orc));
}
switch (drgn_orc_bp_reg(orc)) {
case ORC_REG_UNDEFINED:
rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET;
rule.regno = DRGN_REGISTER_NUMBER(rbp);
rule.offset = 0;
break;
case ORC_REG_PREV_SP:
rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET;
rule.offset = orc->bp_offset;
break;
case ORC_REG_BP:
rule.kind = DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET;
rule.regno = DRGN_REGISTER_NUMBER(rbp);
rule.offset = orc->bp_offset;
break;
default:
return drgn_error_format(DRGN_ERROR_OTHER,
"unknown ORC BP base register %d",
drgn_orc_bp_reg(orc));
}
if (!drgn_cfi_row_set_register(row_ret, DRGN_REGISTER_NUMBER(rbp),
&rule))
return &drgn_enomem;
*ret_addr_regno_ret = DRGN_REGISTER_NUMBER(rip);
return NULL;
}
static struct drgn_error *
get_registers_from_frame_pointer(struct drgn_program *prog,
uint64_t frame_pointer,
@ -568,6 +750,7 @@ const struct drgn_architecture_info arch_info_x86_64 = {
DRGN_CFI_SAME_VALUE_INIT(DRGN_REGISTER_NUMBER(r14)),
DRGN_CFI_SAME_VALUE_INIT(DRGN_REGISTER_NUMBER(r15)),
),
.orc_to_cfi = orc_to_cfi_x86_64,
.fallback_unwind = fallback_unwind_x86_64,
.pt_regs_get_initial_registers = pt_regs_get_initial_registers_x86_64,
.prstatus_get_initial_registers = prstatus_get_initial_registers_x86_64,

View File

@ -49,17 +49,27 @@ enum drgn_cfi_rule_kind {
DRGN_CFI_RULE_UNDEFINED,
/**
* Register value in the caller is stored at the CFA in the current
* frame plus an offset.
* frame plus an offset: `*(cfa + offset)`.
*/
DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET,
/**
* Register value in the caller is the CFA in the current frame plus an
* offset.
* offset: `cfa + offset`.
*/
DRGN_CFI_RULE_CFA_PLUS_OFFSET,
/**
* Register value in the caller is stored at the value of a register in
* the current frame plus an offset: `*(reg + offset)`.
*/
DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET,
/**
* Register value in the caller is an offset plus the value stored at
* the value of a register in the current frame: `(*reg) + offset`.
*/
DRGN_CFI_RULE_AT_REGISTER_ADD_OFFSET,
/**
* Register value in the caller is the value of a register in the
* current frame plus an offset.
* current frame plus an offset: `reg + offset`.
*
* Note that this can also be used to represent DWARF's "same value"
* rule by using the same register with an offset of 0.
@ -89,8 +99,10 @@ struct drgn_cfi_rule {
union {
/**
* Offset for @ref DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET, @ref
* DRGN_CFI_RULE_CFA_PLUS_OFFSET, and @ref
* DRGN_CFI_RULE_REGISTER_PLUS_OFFSET.
* DRGN_CFI_RULE_CFA_PLUS_OFFSET, @ref
* DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET, and @ref
* DRGN_CFI_RULE_AT_REGISTER_ADD_OFFSET,
* DRGN_CFI_RULE_REGISTER_PLUS_OFFSET, @ref
*/
int64_t offset;
/**

View File

@ -26,6 +26,7 @@
#include "lazy_object.h"
#include "linux_kernel.h"
#include "object.h"
#include "orc.h"
#include "path.h"
#include "program.h"
#include "register_state.h"
@ -101,6 +102,8 @@ static const char * const drgn_debug_scn_names[] = {
[DRGN_SCN_DEBUG_LINE] = ".debug_line",
[DRGN_SCN_DEBUG_FRAME] = ".debug_frame",
[DRGN_SCN_EH_FRAME] = ".eh_frame",
[DRGN_SCN_ORC_UNWIND_IP] = ".orc_unwind_ip",
[DRGN_SCN_ORC_UNWIND] = ".orc_unwind",
[DRGN_SCN_TEXT] = ".text",
[DRGN_SCN_GOT] = ".got",
};
@ -265,6 +268,8 @@ drgn_debug_info_module_destroy(struct drgn_debug_info_module *module)
{
if (module) {
drgn_error_destroy(module->err);
free(module->orc_entries);
free(module->orc_pc_offsets);
free(module->fdes);
free(module->cies);
elf_end(module->elf);
@ -480,7 +485,7 @@ drgn_debug_info_report_module(struct drgn_debug_info_load_state *load,
if (new_ret)
*new_ret = true;
struct drgn_debug_info_module *module = malloc(sizeof(*module));
struct drgn_debug_info_module *module = calloc(1, sizeof(*module));
if (!module) {
err = &drgn_enomem;
goto free;
@ -497,24 +502,11 @@ drgn_debug_info_report_module(struct drgn_debug_info_load_state *load,
free(module);
goto free;
}
} else {
module->name = NULL;
}
module->dwfl_module = dwfl_module;
memset(module->scns, 0, sizeof(module->scns));
memset(module->scn_data, 0, sizeof(module->scn_data));
module->pcrel_base = 0;
module->textrel_base = 0;
module->datarel_base = 0;
module->cies = NULL;
module->fdes = NULL;
module->num_fdes = 0;
module->parsed_frames = false;
module->path = path_key;
module->fd = fd;
module->elf = elf;
module->err = NULL;
module->next = NULL;
/* path_key, fd and elf are owned by the module now. */
@ -856,7 +848,8 @@ static struct drgn_error *relocate_elf_file(Elf *elf)
goto out;
}
if (strstartswith(scnname, ".rela.debug_")) {
if (strstartswith(scnname, ".rela.debug_") ||
strstartswith(scnname, ".rela.orc_")) {
Elf_Scn *scn = elf_getscn(elf, shdr->sh_info);
if (!scn) {
err = drgn_error_libelf();
@ -4265,8 +4258,329 @@ out:
return err;
}
static struct drgn_error *
drgn_debug_info_find_dwarf_cfi(struct drgn_debug_info_module *module,
uint64_t unbiased_pc,
struct drgn_cfi_row **row_ret,
bool *interrupted_ret,
drgn_register_number *ret_addr_regno_ret)
{
struct drgn_error *err;
struct drgn_dwarf_fde *fde;
err = drgn_debug_info_find_fde(module, unbiased_pc, &fde);
if (err)
return err;
if (!fde)
return &drgn_not_found;
err = drgn_debug_info_find_cfi_in_fde(module, fde, unbiased_pc,
row_ret);
if (err)
return err;
*interrupted_ret = module->cies[fde->cie].signal_frame;
*ret_addr_regno_ret = module->cies[fde->cie].return_address_register;
return NULL;
}
/*
* Get the program counter of an ORC entry directly from the .orc_unwind_ip
* section.
*/
static inline uint64_t drgn_raw_orc_pc(struct drgn_debug_info_module *module,
size_t i)
{
int32_t offset;
memcpy(&offset,
(int32_t *)module->scn_data[DRGN_SCN_ORC_UNWIND_IP]->d_buf + i,
sizeof(offset));
if (drgn_platform_bswap(&module->platform))
offset = bswap_32(offset);
return module->orc_pc_base + UINT64_C(4) * i + offset;
}
static int compare_orc_entries(const void *a, const void *b, void *arg)
{
struct drgn_debug_info_module *module = arg;
size_t index_a = *(size_t *)a;
size_t index_b = *(size_t *)b;
uint64_t pc_a = drgn_raw_orc_pc(module, index_a);
uint64_t pc_b = drgn_raw_orc_pc(module, index_b);
if (pc_a < pc_b)
return -1;
else if (pc_a > pc_b)
return 1;
/*
* If two entries have the same PC, then one is probably a "terminator"
* at the end of a compilation unit. Prefer the real entry.
*/
const struct drgn_orc_entry *entries =
module->scn_data[DRGN_SCN_ORC_UNWIND]->d_buf;
uint16_t flags_a, flags_b;
memcpy(&flags_a, &entries[index_a].flags, sizeof(flags_a));
memcpy(&flags_b, &entries[index_b].flags, sizeof(flags_b));
if (drgn_platform_bswap(&module->platform)) {
flags_a = bswap_16(flags_a);
flags_b = bswap_16(flags_b);
}
return (drgn_orc_flags_is_terminator(flags_b)
- drgn_orc_flags_is_terminator(flags_a));
}
static size_t keep_orc_entry(struct drgn_debug_info_module *module,
size_t *indices, size_t num_entries, size_t i)
{
const struct drgn_orc_entry *entries =
module->scn_data[DRGN_SCN_ORC_UNWIND]->d_buf;
if (num_entries > 0 &&
memcmp(&entries[indices[num_entries - 1]], &entries[indices[i]],
sizeof(entries[0])) == 0) {
/*
* The previous entry is identical to this one, so we can skip
* this entry (which effectively merges it into the previous
* one). This usually happens for "terminator" entries.
*/
return num_entries;
}
indices[num_entries] = indices[i];
return num_entries + 1;
}
/*
* The vast majority of ORC entries are redundant with DWARF CFI, and it's a
* waste to store and binary search those entries. This removes ORC entries that
* are entirely shadowed by DWARF FDEs.
*/
static size_t remove_fdes_from_orc(struct drgn_debug_info_module *module,
size_t *indices, size_t num_entries)
{
if (module->num_fdes == 0)
return num_entries;
struct drgn_dwarf_fde *fde = module->fdes;
struct drgn_dwarf_fde *last_fde = &module->fdes[module->num_fdes - 1];
size_t new_num_entries = 0;
/* Keep any entries that start before the first DWARF FDE. */
uint64_t start_pc;
for (;;) {
start_pc = drgn_raw_orc_pc(module, new_num_entries);
if (fde->initial_location <= start_pc)
break;
new_num_entries++;
if (new_num_entries == num_entries)
return num_entries;
}
for (size_t i = new_num_entries; i < num_entries - 1; i++) {
uint64_t end_pc = drgn_raw_orc_pc(module, i + 1);
/*
* Find the last FDE that starts at or before the current ORC
* entry.
*/
while (fde != last_fde && fde[1].initial_location <= start_pc)
fde++;
/*
* Check whether the current ORC entry is completely covered by
* one or more FDEs.
*/
while (end_pc - fde->initial_location > fde->address_range) {
/*
* The current FDE doesn't cover the current ORC entry.
*/
if (fde == last_fde) {
/*
* There are no more FDEs. Keep the remaining
* ORC entries.
*/
if (i != new_num_entries) {
memmove(&indices[new_num_entries],
&indices[i],
(num_entries - i) *
sizeof(indices[0]));
}
return new_num_entries + (num_entries - i);
}
if (fde[1].initial_location - fde->initial_location
> fde->address_range) {
/*
* There is a gap between the current FDE and
* the next FDE that exposes the current ORC
* entry. Keep it.
*/
new_num_entries = keep_orc_entry(module,
indices,
new_num_entries,
i);
break;
}
fde++;
}
start_pc = end_pc;
}
/* We don't know where the last ORC entry ends, so always keep it. */
return keep_orc_entry(module, indices, new_num_entries,
num_entries - 1);
}
static struct drgn_error *
drgn_debug_info_parse_orc(struct drgn_debug_info_module *module)
{
struct drgn_error *err;
if (!module->platform.arch->orc_to_cfi ||
!module->scns[DRGN_SCN_ORC_UNWIND_IP] ||
!module->scns[DRGN_SCN_ORC_UNWIND])
return NULL;
GElf_Shdr shdr_mem, *shdr;
shdr = gelf_getshdr(module->scns[DRGN_SCN_ORC_UNWIND_IP], &shdr_mem);
if (!shdr)
return drgn_error_libelf();
module->orc_pc_base = shdr->sh_addr;
if (!module->scn_data[DRGN_SCN_ORC_UNWIND_IP]) {
err = read_elf_section(module->scns[DRGN_SCN_ORC_UNWIND_IP],
&module->scn_data[DRGN_SCN_ORC_UNWIND_IP]);
if (err)
return err;
}
Elf_Data *orc_unwind_ip = module->scn_data[DRGN_SCN_ORC_UNWIND_IP];
if (!module->scn_data[DRGN_SCN_ORC_UNWIND]) {
err = read_elf_section(module->scns[DRGN_SCN_ORC_UNWIND],
&module->scn_data[DRGN_SCN_ORC_UNWIND]);
if (err)
return err;
}
Elf_Data *orc_unwind = module->scn_data[DRGN_SCN_ORC_UNWIND];
size_t num_entries = orc_unwind_ip->d_size / sizeof(int32_t);
if (orc_unwind_ip->d_size % sizeof(int32_t) != 0 ||
orc_unwind->d_size % sizeof(struct drgn_orc_entry) != 0 ||
orc_unwind->d_size / sizeof(struct drgn_orc_entry) != num_entries) {
return drgn_error_create(DRGN_ERROR_OTHER,
".orc_unwind_ip and/or .orc_unwind has invalid size");
}
if (!num_entries)
return NULL;
size_t *indices = malloc_array(num_entries, sizeof(indices[0]));
if (!indices)
return &drgn_enomem;
for (size_t i = 0; i < num_entries; i++)
indices[i] = i;
/*
* Sort the ORC entries for binary search. Since Linux kernel commit
* f14bf6a350df ("x86/unwind/orc: Remove boot-time ORC unwind tables
* sorting") (in v5.6), this is already sorted for vmlinux, so only sort
* it if necessary.
*/
for (size_t i = 1; i < num_entries; i++) {
if (compare_orc_entries(&indices[i - 1], &indices[i],
module) > 0) {
qsort_r(indices, num_entries, sizeof(indices[0]),
compare_orc_entries, module);
break;
}
}
num_entries = remove_fdes_from_orc(module, indices, num_entries);
int32_t *pc_offsets = malloc_array(num_entries, sizeof(pc_offsets[0]));
if (!pc_offsets) {
err = &drgn_enomem;
goto out;
}
struct drgn_orc_entry *entries = malloc_array(num_entries,
sizeof(entries[0]));
if (!entries) {
free(pc_offsets);
err = &drgn_enomem;
goto out;
}
const int32_t *orig_offsets = orc_unwind_ip->d_buf;
const struct drgn_orc_entry *orig_entries = orc_unwind->d_buf;
bool bswap = drgn_platform_bswap(&module->platform);
for (size_t i = 0; i < num_entries; i++) {
size_t index = indices[i];
int32_t offset;
memcpy(&offset, &orig_offsets[index], sizeof(offset));
struct drgn_orc_entry entry;
memcpy(&entry, &orig_entries[index], sizeof(entry));
if (bswap) {
offset = bswap_32(offset);
entry.sp_offset = bswap_16(entry.sp_offset);
entry.bp_offset = bswap_16(entry.bp_offset);
entry.flags = bswap_16(entry.flags);
}
pc_offsets[i] = UINT64_C(4) * index + offset - UINT64_C(4) * i;
entries[i] = entry;
}
module->orc_pc_offsets = pc_offsets;
module->orc_entries = entries;
module->num_orc_entries = num_entries;
err = NULL;
out:
free(indices);
return err;
}
static inline uint64_t drgn_orc_pc(struct drgn_debug_info_module *module,
size_t i)
{
return module->orc_pc_base + UINT64_C(4) * i + module->orc_pc_offsets[i];
}
static struct drgn_error *
drgn_debug_info_find_orc_cfi(struct drgn_debug_info_module *module,
uint64_t unbiased_pc,
struct drgn_cfi_row **row_ret,
bool *interrupted_ret,
drgn_register_number *ret_addr_regno_ret)
{
struct drgn_error *err;
if (!module->parsed_orc) {
err = drgn_debug_info_parse_orc(module);
if (err)
return err;
module->parsed_orc = true;
}
/*
* We don't know the maximum program counter covered by the ORC data,
* but the last entry seems to always be a terminator, so it doesn't
* matter. All addresses beyond the max will fall into the last entry.
*/
if (!module->num_orc_entries || unbiased_pc < drgn_orc_pc(module, 0))
return &drgn_not_found;
size_t lo = 0, hi = module->num_orc_entries, found = 0;
while (lo < hi) {
size_t mid = lo + (hi - lo) / 2;
if (drgn_orc_pc(module, mid) <= unbiased_pc) {
found = mid;
lo = mid + 1;
} else {
hi = mid;
}
}
return module->platform.arch->orc_to_cfi(&module->orc_entries[found],
row_ret, interrupted_ret,
ret_addr_regno_ret);
}
struct drgn_error *
drgn_debug_info_module_find_cfi(struct drgn_debug_info_module *module,
drgn_debug_info_module_find_cfi(struct drgn_program *prog,
struct drgn_debug_info_module *module,
uint64_t pc, struct drgn_cfi_row **row_ret,
bool *interrupted_ret,
drgn_register_number *ret_addr_regno_ret)
@ -4276,19 +4590,27 @@ drgn_debug_info_module_find_cfi(struct drgn_debug_info_module *module,
Dwarf_Addr bias;
dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, &bias, NULL,
NULL, NULL);
uint64_t unbiased_pc = pc - bias;
struct drgn_dwarf_fde *fde;
err = drgn_debug_info_find_fde(module, pc - bias, &fde);
if (err)
return err;
if (!fde)
return &drgn_not_found;
err = drgn_debug_info_find_cfi_in_fde(module, fde, pc - bias, row_ret);
if (err)
return err;
*interrupted_ret = module->cies[fde->cie].signal_frame;
*ret_addr_regno_ret = module->cies[fde->cie].return_address_register;
return NULL;
if (prog->prefer_orc_unwinder) {
err = drgn_debug_info_find_orc_cfi(module, unbiased_pc, row_ret,
interrupted_ret,
ret_addr_regno_ret);
if (err != &drgn_not_found)
return err;
return drgn_debug_info_find_dwarf_cfi(module, unbiased_pc,
row_ret, interrupted_ret,
ret_addr_regno_ret);
} else {
err = drgn_debug_info_find_dwarf_cfi(module, unbiased_pc,
row_ret, interrupted_ret,
ret_addr_regno_ret);
if (err != &drgn_not_found)
return err;
return drgn_debug_info_find_orc_cfi(module, unbiased_pc,
row_ret, interrupted_ret,
ret_addr_regno_ret);
}
}
struct drgn_error *

View File

@ -61,6 +61,8 @@ enum drgn_debug_info_scn {
/* Sections whose data we should cache when it is first used. */
DRGN_SCN_DEBUG_FRAME = DRGN_NUM_DEBUG_SCN_DATA_PRECACHE,
DRGN_SCN_EH_FRAME,
DRGN_SCN_ORC_UNWIND_IP,
DRGN_SCN_ORC_UNWIND,
DRGN_NUM_DEBUG_SCN_DATA,
@ -109,8 +111,44 @@ struct drgn_debug_info_module {
struct drgn_dwarf_fde *fdes;
/** Number of elements in @ref drgn_debug_info_module::fdes. */
size_t num_fdes;
/**
* Base for calculating program counter corresponding to an ORC unwinder
* entry.
*
* This is the address of the `.orc_unwind_ip` ELF section.
*
* @sa drgn_debug_info_module::orc_entries
*/
uint64_t orc_pc_base;
/**
* Offsets for calculating program counter corresponding to an ORC
* unwinder entry.
*
* This is the contents of the `.orc_unwind_ip` ELF section, byte
* swapped to the host's byte order if necessary.
*
* @sa drgn_debug_info_module::orc_entries
*/
int32_t *orc_pc_offsets;
/**
* ORC unwinder entries.
*
* This is the contents of the `.orc_unwind` ELF section, byte swapped
* to the host's byte order if necessary.
*
* Entry `i` specifies how to unwind the stack if
* `orc_pc(i) <= PC < orc_pc(i + 1)`, where
* `orc_pc(i) = orc_pc_base + 4 * i + orc_pc_offsets[i]`.
*/
struct drgn_orc_entry *orc_entries;
/** Number of ORC unwinder entries. */
size_t num_orc_entries;
/** Whether .debug_frame and .eh_frame have been parsed. */
bool parsed_frames;
/** Whether ORC unwinder data has been parsed. */
bool parsed_orc;
/*
* path, elf, and fd are used when an ELF file was reported with
@ -353,7 +391,8 @@ drgn_debug_info_find_object(const char *name, size_t name_len,
* drgn_not_found if CFI wasn't found.
*/
struct drgn_error *
drgn_debug_info_module_find_cfi(struct drgn_debug_info_module *module,
drgn_debug_info_module_find_cfi(struct drgn_program *prog,
struct drgn_debug_info_module *module,
uint64_t pc, struct drgn_cfi_row **row_ret,
bool *interrupted_ret,
drgn_register_number *ret_addr_regno_ret);

63
libdrgn/orc.h Normal file
View File

@ -0,0 +1,63 @@
// Copyright (c) Facebook, Inc. and its affiliates.
// SPDX-License-Identifier: GPL-3.0+
/**
* @file
*
* ORC unwinder definitions.
*
* As of Linux v5.12, ORC is only defined for x86-64. This file assumes that the
* overall format would be the same for other architectures other than
* architecture-specific register numbers, but this may require reorganization
* if that isn't the case.
*/
#ifndef DRGN_ORC_H
#define DRGN_ORC_H
#include <stdbool.h>
#include <stdint.h>
struct drgn_orc_entry {
int16_t sp_offset;
int16_t bp_offset;
/*
* This is represented by 4 bit fields in the Linux kernel, but this is
* easier to deal with.
*/
uint16_t flags;
};
/* These correspond to UNWIND_HINT_* in the Linux kernel. */
enum {
DRGN_ORC_TYPE_CALL = 0,
DRGN_ORC_TYPE_REGS = 1,
DRGN_ORC_TYPE_REGS_PARTIAL = 2,
};
static inline int drgn_orc_sp_reg(const struct drgn_orc_entry *orc)
{
return orc->flags & 0xf;
}
static inline int drgn_orc_bp_reg(const struct drgn_orc_entry *orc)
{
return (orc->flags >> 4) & 0xf;
}
static inline int drgn_orc_type(const struct drgn_orc_entry *orc)
{
return (orc->flags >> 8) & 0x3;
}
static inline bool drgn_orc_is_end(const struct drgn_orc_entry *orc)
{
return orc->flags & 0x400;
}
static inline bool drgn_orc_flags_is_terminator(uint16_t flags)
{
return (flags & 0x40f) == 0;
}
#endif /* DRGN_ORC_H */

View File

@ -10,6 +10,7 @@
#include "drgn.h"
#include "util.h"
struct drgn_orc_entry;
struct drgn_register_state;
struct drgn_register {
@ -86,6 +87,9 @@ struct drgn_architecture_info {
drgn_register_number (*dwarf_regno_to_internal)(uint64_t);
/* CFI row containing default rules for DWARF CFI. */
struct drgn_cfi_row *default_dwarf_cfi_row;
struct drgn_error *(*orc_to_cfi)(const struct drgn_orc_entry *,
struct drgn_cfi_row **, bool *,
drgn_register_number *);
/*
* Try to unwind a stack frame if CFI wasn't found. Returns &drgn_stop
* if we couldn't.

View File

@ -78,6 +78,8 @@ void drgn_program_init(struct drgn_program *prog,
prog->core_fd = -1;
if (platform)
drgn_program_set_platform(prog, platform);
char *env = getenv("DRGN_PREFER_ORC_UNWINDER");
prog->prefer_orc_unwinder = env && atoi(env);
drgn_object_init(&prog->page_offset, prog);
drgn_object_init(&prog->vmemmap, prog);
}

View File

@ -147,6 +147,7 @@ struct drgn_program {
struct drgn_prstatus_map prstatus_map;
};
bool prstatus_cached;
bool prefer_orc_unwinder;
/*
* Linux kernel-specific.

View File

@ -409,6 +409,37 @@ out:
return err;
}
static void drgn_add_to_register(void *dst, size_t dst_size, const void *src,
size_t src_size, int64_t addend,
bool little_endian)
{
while (addend && dst_size && src_size) {
uint64_t uvalue;
copy_lsbytes(&uvalue, sizeof(uvalue), HOST_LITTLE_ENDIAN, src,
src_size, little_endian);
size_t n = min(sizeof(uvalue), src_size);
if (little_endian)
src = (char *)src + n;
src_size -= n;
bool carry = __builtin_add_overflow(uvalue, (uint64_t)addend,
&uvalue);
addend = (addend < 0 ? -1 : 0) + carry;
copy_lsbytes(dst, dst_size, little_endian, &uvalue,
sizeof(uvalue), HOST_LITTLE_ENDIAN);
n = min(sizeof(uvalue), dst_size);
if (little_endian)
dst = (char *)dst + n;
dst_size -= n;
}
if (dst != src) {
copy_lsbytes(dst, dst_size, little_endian, src, src_size,
little_endian);
}
}
static struct drgn_error *
drgn_unwind_one_register(struct drgn_program *prog,
const struct drgn_cfi_rule *rule,
@ -438,40 +469,33 @@ drgn_unwind_one_register(struct drgn_program *prog,
sizeof(cfa.value), HOST_LITTLE_ENDIAN);
return NULL;
}
case DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET:
case DRGN_CFI_RULE_AT_REGISTER_ADD_OFFSET: {
if (!drgn_register_state_has_register(regs, rule->regno))
return &drgn_not_found;
const struct drgn_register_layout *layout =
&prog->platform.arch->register_layout[rule->regno];
uint64_t address;
copy_lsbytes(&address, sizeof(address), HOST_LITTLE_ENDIAN,
&regs->buf[layout->offset], layout->size,
little_endian);
if (rule->kind == DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET)
address += rule->offset;
address &= drgn_platform_address_mask(&prog->platform);
err = drgn_program_read_memory(prog, buf, address, size, false);
if (!err && rule->kind == DRGN_CFI_RULE_AT_REGISTER_ADD_OFFSET) {
drgn_add_to_register(buf, size, buf, size, rule->offset,
little_endian);
}
break;
}
case DRGN_CFI_RULE_REGISTER_PLUS_OFFSET: {
if (!drgn_register_state_has_register(regs, rule->regno))
return &drgn_not_found;
const struct drgn_register_layout *layout =
&prog->platform.arch->register_layout[rule->regno];
unsigned char *dst = buf;
size_t dst_size = size;
const unsigned char *src = &regs->buf[layout->offset];
size_t src_size = layout->size;
int64_t addend = rule->offset;
while (addend && dst_size && src_size) {
uint64_t uvalue;
copy_lsbytes(&uvalue, sizeof(uvalue),
HOST_LITTLE_ENDIAN, src, src_size,
little_endian);
size_t n = min(sizeof(uvalue), src_size);
if (little_endian)
src += n;
src_size -= n;
bool carry = __builtin_add_overflow(uvalue,
(uint64_t)addend,
&uvalue);
addend = (addend < 0 ? -1 : 0) + carry;
copy_lsbytes(dst, dst_size, little_endian, &uvalue,
sizeof(uvalue), HOST_LITTLE_ENDIAN);
n = min(sizeof(uvalue), dst_size);
if (little_endian)
dst += n;
dst_size -= n;
}
copy_lsbytes(dst, dst_size, little_endian, src, src_size,
little_endian);
drgn_add_to_register(buf, size, &regs->buf[layout->offset],
layout->size, rule->offset, little_endian);
return NULL;
}
case DRGN_CFI_RULE_AT_DWARF_EXPRESSION:
@ -526,7 +550,7 @@ drgn_unwind_with_cfi(struct drgn_program *prog, struct drgn_cfi_row **row,
bool interrupted;
drgn_register_number ret_addr_regno;
/* If we found the module, then we must have the PC. */
err = drgn_debug_info_module_find_cfi(regs->module,
err = drgn_debug_info_module_find_cfi(prog, regs->module,
regs->_pc - !regs->interrupted,
row, &interrupted,
&ret_addr_regno);

View File

@ -4,12 +4,13 @@
import os
import signal
from drgn import Object, cast
from drgn import Object, Program, cast
from drgn.helpers.linux.pid import find_task
from tests.helpers.linux import (
LinuxHelperTestCase,
fork_and_pause,
proc_state,
setenv,
wait_until,
)
@ -18,16 +19,30 @@ class TestStackTrace(LinuxHelperTestCase):
def test_by_task_struct(self):
pid = fork_and_pause()
wait_until(lambda: proc_state(pid) == "S")
self.assertIn("schedule", str(self.prog.stack_trace(find_task(self.prog, pid))))
self.assertIn("pause", str(self.prog.stack_trace(find_task(self.prog, pid))))
os.kill(pid, signal.SIGKILL)
os.waitpid(pid, 0)
def test_by_pid(self):
pid = fork_and_pause()
wait_until(lambda: proc_state(pid) == "S")
self.assertIn("schedule", str(self.prog.stack_trace(pid)))
os.kill(pid, signal.SIGKILL)
os.waitpid(pid, 0)
def _test_by_pid(self, orc):
old_orc = int(os.environ.get("DRGN_PREFER_ORC_UNWINDER", "0")) != 0
with setenv("DRGN_PREFER_ORC_UNWINDER", "1" if orc else "0"):
if orc == old_orc:
prog = self.prog
else:
prog = Program()
prog.set_kernel()
prog.load_default_debug_info()
pid = fork_and_pause()
wait_until(lambda: proc_state(pid) == "S")
self.assertIn("pause", str(prog.stack_trace(pid)))
os.kill(pid, signal.SIGKILL)
os.waitpid(pid, 0)
def test_by_pid_dwarf(self):
self._test_by_pid(False)
def test_by_pid_orc(self):
self._test_by_pid(True)
def test_pt_regs(self):
# This won't unwind anything useful, but at least make sure it accepts