diff --git a/libdrgn/arch_x86_64.c.in b/libdrgn/arch_x86_64.c.in
index a64a5d15..c6e4399c 100644
--- a/libdrgn/arch_x86_64.c.in
+++ b/libdrgn/arch_x86_64.c.in
@@ -257,67 +257,25 @@ out:
static struct drgn_error *
linux_kernel_set_initial_registers_x86_64(Dwfl_Thread *thread,
- const struct drgn_object *task_obj,
- const void *prstatus,
- size_t prstatus_size)
+ const struct drgn_object *task_obj)
{
struct drgn_error *err;
struct drgn_program *prog = task_obj->prog;
struct drgn_object sp_obj;
- struct drgn_qualified_type frame_type;
- uint64_t sp;
- Dwarf_Word dwarf_reg;
drgn_object_init(&sp_obj, prog);
- if (prstatus) {
- /*
- * If the stack pointer in PRSTATUS is within this task's stack,
- * then we can use it. Otherwise, the task either wasn't running
- * or was in the middle of context switching. Either way, we
- * should use the saved registers instead.
- */
- uint64_t thread_size;
- uint64_t stack;
-
- err = linux_kernel_get_thread_size(prog, &thread_size);
- if (err)
- goto out;
- err = drgn_object_member_dereference(&sp_obj, task_obj,
- "stack");
- if (err)
- goto out;
- err = drgn_object_read_unsigned(&sp_obj, &stack);
- if (err)
- goto out;
-
- if (prstatus_size < 272) {
- err = drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
- "registers are truncated");
- goto out;
- }
- memcpy(&sp, (char *)prstatus + 264, sizeof(sp));
- if (drgn_program_bswap(prog))
- sp = bswap_64(sp);
- if (sp > stack && sp <= stack + thread_size) {
- err = prstatus_set_initial_registers_x86_64(prog,
- thread,
- prstatus,
- prstatus_size);
- goto out;
- }
- }
-
err = drgn_object_member_dereference(&sp_obj, task_obj, "thread");
if (err)
goto out;
err = drgn_object_member(&sp_obj, &sp_obj, "sp");
if (err)
goto out;
+ uint64_t sp;
err = drgn_object_read_unsigned(&sp_obj, &sp);
if (err)
goto out;
- dwarf_reg = sp;
+ Dwarf_Word dwarf_reg = sp;
/* rsp is register 7. */
if (!dwfl_thread_state_registers(thread, 7, 1, &dwarf_reg)) {
err = drgn_error_libdwfl();
@@ -330,6 +288,7 @@ linux_kernel_set_initial_registers_x86_64(Dwfl_Thread *thread,
* inactive_task_frame, which we can use to get most registers. Before
* that, it points to bp.
*/
+ struct drgn_qualified_type frame_type;
err = drgn_program_find_type(prog, "struct inactive_task_frame *", NULL,
&frame_type);
if (!err) {
diff --git a/libdrgn/platform.h b/libdrgn/platform.h
index c97608ec..8510cc45 100644
--- a/libdrgn/platform.h
+++ b/libdrgn/platform.h
@@ -65,35 +65,8 @@ struct drgn_architecture_info {
Dwfl_Thread *,
const void *,
size_t);
- /*
- * Get a task's registers from the task_struct or PRSTATUS note as
- * appropriate.
- *
- * The given PRSTATUS note is for the CPU that the task is assigned to,
- * which may or may not be for the given task. This callback must
- * determine that (typically by checking whether the stack pointer in
- * PRSTATUS lies within the task's stack).
- *
- * We find the PRSTATUS note by CPU rather than by PID for two reasons:
- *
- * 1. The PID is populated by the kernel from "current" (the current
- * task) via a non-maskable interrupt (NMI). During a context switch,
- * the stack pointer and current are not updated atomically, so if
- * the NMI arrives in the middle of a context switch, the stack
- * pointer may not actually be that of current. Therefore, the stack
- * pointer in PRSTATUS may not actually be for the PID in PRSTATUS.
- *
- * We go through all of this trouble because blindly trusting the PID
- * could result in a stack trace for the wrong task, which we want to
- * avoid at all costs.
- *
- * 2. There is an idle task with PID 0 for each CPU, so for an idle task
- * we have no choice but to find the note by CPU.
- */
struct drgn_error *(*linux_kernel_set_initial_registers)(Dwfl_Thread *,
- const struct drgn_object *,
- const void *prstatus,
- size_t prstatus_size);
+ const struct drgn_object *);
struct drgn_error *(*linux_kernel_get_page_offset)(struct drgn_program *,
uint64_t *);
struct drgn_error *(*linux_kernel_get_vmemmap)(struct drgn_program *,
diff --git a/libdrgn/program.c b/libdrgn/program.c
index 3038bb1b..bbd45f36 100644
--- a/libdrgn/program.c
+++ b/libdrgn/program.c
@@ -738,33 +738,37 @@ drgn_program_load_debug_info(struct drgn_program *prog, const char **paths,
return err;
}
-struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog,
- char *data, size_t size)
+static uint32_t get_prstatus_pid(struct drgn_program *prog, const char *data,
+ size_t size)
{
- if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) {
- struct string *entry;
+ uint32_t pr_pid;
+ memcpy(&pr_pid, data + (drgn_program_is_64_bit(prog) ? 32 : 24),
+ sizeof(pr_pid));
+ if (drgn_program_bswap(prog))
+ pr_pid = bswap_32(pr_pid);
+ return pr_pid;
+}
- entry = drgn_prstatus_vector_append_entry(&prog->prstatus_vector);
+struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog,
+ const char *data,
+ size_t size)
+{
+ if (size < (drgn_program_is_64_bit(prog) ? 36 : 28)) {
+ return drgn_error_create(DRGN_ERROR_OTHER,
+ "NT_PRSTATUS is truncated");
+ }
+ if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) {
+ struct string *entry =
+ drgn_prstatus_vector_append_entry(&prog->prstatus_vector);
if (!entry)
return &drgn_enomem;
entry->str = data;
entry->len = size;
} else {
- struct drgn_prstatus_map_entry entry;
- size_t pr_pid_offset;
- uint32_t pr_pid;
-
- pr_pid_offset = drgn_program_is_64_bit(prog) ? 32 : 24;
- if (size < pr_pid_offset + sizeof(pr_pid))
- return NULL;
-
- memcpy(&pr_pid, data + pr_pid_offset, sizeof(pr_pid));
- if (drgn_program_bswap(prog))
- pr_pid = bswap_32(pr_pid);
-
- entry.key = pr_pid;
- entry.value.str = data;
- entry.value.len = size;
+ struct drgn_prstatus_map_entry entry = {
+ .key = get_prstatus_pid(prog, data, size),
+ .value = { data, size },
+ };
if (drgn_prstatus_map_insert(&prog->prstatus_map, &entry,
NULL) == -1)
return &drgn_enomem;
@@ -856,7 +860,8 @@ out:
struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog,
uint32_t cpu,
- struct string *ret)
+ struct string *ret,
+ uint32_t *tid_ret)
{
struct drgn_error *err;
@@ -867,6 +872,7 @@ struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog,
if (cpu < prog->prstatus_vector.size) {
*ret = prog->prstatus_vector.data[cpu];
+ *tid_ret = get_prstatus_pid(prog, ret->str, ret->len);
} else {
ret->str = NULL;
ret->len = 0;
diff --git a/libdrgn/program.h b/libdrgn/program.h
index f4db36c6..7efbdcf2 100644
--- a/libdrgn/program.h
+++ b/libdrgn/program.h
@@ -190,10 +190,12 @@ struct drgn_error *drgn_program_get_dwfl(struct drgn_program *prog, Dwfl **ret);
*
* @param[out] ret Returned note data. If not found, ret->str is set to
* @c NULL and ret->len is set to zero.
+ * @param[out] tid_ret Returned thread ID of note.
*/
struct drgn_error *drgn_program_find_prstatus_by_cpu(struct drgn_program *prog,
uint32_t cpu,
- struct string *ret);
+ struct string *ret,
+ uint32_t *tid_ret);
/**
* Find the @c NT_PRSTATUS note for the given thread ID.
@@ -214,7 +216,8 @@ struct drgn_error *drgn_program_find_prstatus_by_tid(struct drgn_program *prog,
* @param[in] size Size of data in note.
*/
struct drgn_error *drgn_program_cache_prstatus_entry(struct drgn_program *prog,
- char *data, size_t size);
+ const char *data,
+ size_t size);
/*
* Like @ref drgn_program_find_symbol_by_address(), but @p ret is already
diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c
index d3c344df..4224e9a0 100644
--- a/libdrgn/stack_trace.c
+++ b/libdrgn/stack_trace.c
@@ -256,7 +256,6 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread,
/* First, try pt_regs. */
if (prog->stack_trace_obj) {
bool is_pt_regs;
-
err = drgn_get_stack_trace_obj(&obj, prog, &is_pt_regs);
if (err)
goto out;
@@ -275,8 +274,6 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread,
goto out;
}
} else if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) {
- bool found;
-
err = drgn_program_find_object(prog, "init_pid_ns", NULL,
DRGN_FIND_OBJECT_ANY, &tmp);
if (err)
@@ -287,6 +284,7 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread,
err = linux_helper_find_task(&obj, &tmp, prog->stack_trace_tid);
if (err)
goto out;
+ bool found;
err = drgn_object_bool(&obj, &found);
if (err)
goto out;
@@ -319,29 +317,62 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread,
} else {
goto out;
}
- prstatus.str = NULL;
- prstatus.len = 0;
} else {
+ /*
+ * For kernel core dumps, we look up the PRSTATUS note
+ * by CPU rather than by PID. This is because there is
+ * an idle task with PID 0 for each CPU, so we must find
+ * the idle task by CPU. Rather than making PID 0 a
+ * special case, we handle all tasks this way.
+ */
union drgn_value value;
- uint32_t cpu;
-
err = drgn_object_member_dereference(&tmp, &obj, "cpu");
if (!err) {
err = drgn_object_read_integer(&tmp, &value);
if (err)
goto out;
- cpu = value.uvalue;
} else if (err->code == DRGN_ERROR_LOOKUP) {
/* !SMP. Must be CPU 0. */
drgn_error_destroy(err);
- cpu = 0;
+ value.uvalue = 0;
} else {
goto out;
}
- err = drgn_program_find_prstatus_by_cpu(prog, cpu,
- &prstatus);
+ uint32_t prstatus_tid;
+ err = drgn_program_find_prstatus_by_cpu(prog,
+ value.uvalue,
+ &prstatus,
+ &prstatus_tid);
if (err)
goto out;
+ if (prstatus.str) {
+ /*
+ * The PRSTATUS note is for the CPU that the
+ * task is assigned to, but it is not
+ * necessarily for this task. Only use it if the
+ * PID matches.
+ *
+ * Note that this isn't perfect: the PID is
+ * populated by the kernel from "current" (the
+ * current task) via a non-maskable interrupt
+ * (NMI). During a context switch, the stack
+ * pointer and current are not updated
+ * atomically, so if the NMI arrives in the
+ * middle of a context switch, the stack pointer
+ * may not actually be that of current.
+ * Therefore, the stack pointer in PRSTATUS may
+ * not actually be for the PID in PRSTATUS.
+ * Unfortunately, we can't easily fix this.
+ */
+ err = drgn_object_member_dereference(&tmp, &obj, "pid");
+ if (err)
+ goto out;
+ err = drgn_object_read_integer(&tmp, &value);
+ if (err)
+ goto out;
+ if (prstatus_tid == value.uvalue)
+ goto prstatus;
+ }
}
if (!prog->platform.arch->linux_kernel_set_initial_registers) {
err = drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT,
@@ -350,9 +381,7 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread,
goto out;
}
err = prog->platform.arch->linux_kernel_set_initial_registers(thread,
- &obj,
- prstatus.str,
- prstatus.len);
+ &obj);
} else {
err = drgn_program_find_prstatus_by_tid(prog,
prog->stack_trace_tid,
@@ -363,6 +392,7 @@ static bool drgn_thread_set_initial_registers(Dwfl_Thread *thread,
err = drgn_error_create(DRGN_ERROR_LOOKUP, "thread not found");
goto out;
}
+prstatus:
if (!prog->platform.arch->prstatus_set_initial_registers) {
err = drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT,
"core dump stack unwinding is not supported for %s architecture",