libdrgn: refactor page table iterators

AArch64 will need different sizes of page table iterators depending on
the page size and virtual address size. Rather than the static
pgtable_iterator_arch_size, allow architectures to define callbacks for
allocating and freeing a page table iterator. Also remove the generic
page table iterator wrapper and just pass that information to the
iterator function.

Signed-off-by: Omar Sandoval <osandov@osandov.com>
This commit is contained in:
Omar Sandoval 2022-07-08 15:34:29 -07:00
parent 95053639d4
commit 36fecd1ded
5 changed files with 137 additions and 86 deletions

View File

@ -629,19 +629,41 @@ linux_kernel_live_direct_mapping_fallback_x86_64(struct drgn_program *prog,
} }
struct pgtable_iterator_x86_64 { struct pgtable_iterator_x86_64 {
struct pgtable_iterator it;
uint16_t index[5]; uint16_t index[5];
uint64_t table[5][512]; uint64_t table[5][512];
}; };
static void pgtable_iterator_arch_init_x86_64(void *buf) static struct drgn_error *
linux_kernel_pgtable_iterator_create_x86_64(struct drgn_program *prog,
struct pgtable_iterator **ret)
{ {
struct pgtable_iterator_x86_64 *arch = buf; struct pgtable_iterator_x86_64 *it = malloc(sizeof(*it));
memset(arch->index, 0xff, sizeof(arch->index)); if (!it)
memset(arch->table, 0, sizeof(arch->table)); return &drgn_enomem;
*ret = &it->it;
return NULL;
}
static void linux_kernel_pgtable_iterator_destroy_x86_64(struct pgtable_iterator *_it)
{
free(container_of(_it, struct pgtable_iterator_x86_64, it));
}
static void
linux_kernel_pgtable_iterator_init_x86_64(struct drgn_program *prog,
struct pgtable_iterator *_it)
{
struct pgtable_iterator_x86_64 *it =
container_of(_it, struct pgtable_iterator_x86_64, it);
memset(it->index, 0xff, sizeof(it->index));
memset(it->table, 0, sizeof(it->table));
} }
static struct drgn_error * static struct drgn_error *
linux_kernel_pgtable_iterator_next_x86_64(struct pgtable_iterator *it, linux_kernel_pgtable_iterator_next_x86_64(struct drgn_program *prog,
struct pgtable_iterator *_it,
uint64_t *virt_addr_ret, uint64_t *virt_addr_ret,
uint64_t *phys_addr_ret) uint64_t *phys_addr_ret)
{ {
@ -652,14 +674,15 @@ linux_kernel_pgtable_iterator_next_x86_64(struct pgtable_iterator *it,
static const uint64_t PSE = 0x80; /* a.k.a. huge page */ static const uint64_t PSE = 0x80; /* a.k.a. huge page */
static const uint64_t ADDRESS_MASK = UINT64_C(0xffffffffff000); static const uint64_t ADDRESS_MASK = UINT64_C(0xffffffffff000);
struct drgn_error *err; struct drgn_error *err;
struct drgn_program *prog = it->prog;
struct pgtable_iterator_x86_64 *arch = (void *)it->arch;
int levels = prog->vmcoreinfo.pgtable_l5_enabled ? 5 : 4, level;
bool bswap = drgn_platform_bswap(&prog->platform); bool bswap = drgn_platform_bswap(&prog->platform);
struct pgtable_iterator_x86_64 *it =
container_of(_it, struct pgtable_iterator_x86_64, it);
uint64_t virt_addr = it->it.virt_addr;
int levels = prog->vmcoreinfo.pgtable_l5_enabled ? 5 : 4, level;
/* Find the lowest level with cached entries. */ /* Find the lowest level with cached entries. */
for (level = 0; level < levels; level++) { for (level = 0; level < levels; level++) {
if (arch->index[level] < array_size(arch->table[level])) if (it->index[level] < array_size(it->table[level]))
break; break;
} }
/* For every level below that, refill the cache/return pages. */ /* For every level below that, refill the cache/return pages. */
@ -675,17 +698,17 @@ linux_kernel_pgtable_iterator_next_x86_64(struct pgtable_iterator *it,
end_non_canonical = (UINT64_MAX << end_non_canonical = (UINT64_MAX <<
(PAGE_SHIFT + (PAGE_SHIFT +
PGTABLE_SHIFT * levels - 1)); PGTABLE_SHIFT * levels - 1));
if (it->virt_addr >= start_non_canonical && if (virt_addr >= start_non_canonical &&
it->virt_addr < end_non_canonical) { virt_addr < end_non_canonical) {
*virt_addr_ret = start_non_canonical; *virt_addr_ret = start_non_canonical;
*phys_addr_ret = UINT64_MAX; *phys_addr_ret = UINT64_MAX;
it->virt_addr = end_non_canonical; it->it.virt_addr = end_non_canonical;
return NULL; return NULL;
} }
table = it->pgtable; table = it->it.pgtable;
table_physical = false; table_physical = false;
} else { } else {
uint64_t entry = arch->table[level][arch->index[level]++]; uint64_t entry = it->table[level][it->index[level]++];
if (bswap) if (bswap)
entry = bswap_64(entry); entry = bswap_64(entry);
table = entry & ADDRESS_MASK; table = entry & ADDRESS_MASK;
@ -693,30 +716,30 @@ linux_kernel_pgtable_iterator_next_x86_64(struct pgtable_iterator *it,
uint64_t mask = (UINT64_C(1) << uint64_t mask = (UINT64_C(1) <<
(PAGE_SHIFT + (PAGE_SHIFT +
PGTABLE_SHIFT * level)) - 1; PGTABLE_SHIFT * level)) - 1;
*virt_addr_ret = it->virt_addr & ~mask; *virt_addr_ret = virt_addr & ~mask;
if (entry & PRESENT) if (entry & PRESENT)
*phys_addr_ret = table & ~mask; *phys_addr_ret = table & ~mask;
else else
*phys_addr_ret = UINT64_MAX; *phys_addr_ret = UINT64_MAX;
it->virt_addr = (it->virt_addr | mask) + 1; it->it.virt_addr = (virt_addr | mask) + 1;
return NULL; return NULL;
} }
table_physical = true; table_physical = true;
} }
index = (it->virt_addr >> index = (virt_addr >>
(PAGE_SHIFT + PGTABLE_SHIFT * (level - 1))) & PGTABLE_MASK; (PAGE_SHIFT + PGTABLE_SHIFT * (level - 1))) & PGTABLE_MASK;
/* /*
* It's only marginally more expensive to read 4096 bytes than 8 * It's only marginally more expensive to read 4096 bytes than 8
* bytes, so we always read to the end of the table. * bytes, so we always read to the end of the table.
*/ */
err = drgn_program_read_memory(prog, err = drgn_program_read_memory(prog,
&arch->table[level - 1][index], &it->table[level - 1][index],
table + 8 * index, table + 8 * index,
sizeof(arch->table[0]) - 8 * index, sizeof(it->table[0]) - 8 * index,
table_physical); table_physical);
if (err) if (err)
return err; return err;
arch->index[level - 1] = index; it->index[level - 1] = index;
} }
} }
@ -738,8 +761,12 @@ const struct drgn_architecture_info arch_info_x86_64 = {
.linux_kernel_get_vmemmap = linux_kernel_get_vmemmap_x86_64, .linux_kernel_get_vmemmap = linux_kernel_get_vmemmap_x86_64,
.linux_kernel_live_direct_mapping_fallback = .linux_kernel_live_direct_mapping_fallback =
linux_kernel_live_direct_mapping_fallback_x86_64, linux_kernel_live_direct_mapping_fallback_x86_64,
.pgtable_iterator_arch_size = sizeof(struct pgtable_iterator_x86_64), .linux_kernel_pgtable_iterator_create =
.pgtable_iterator_arch_init = pgtable_iterator_arch_init_x86_64, linux_kernel_pgtable_iterator_create_x86_64,
.linux_kernel_pgtable_iterator_destroy =
linux_kernel_pgtable_iterator_destroy_x86_64,
.linux_kernel_pgtable_iterator_init =
linux_kernel_pgtable_iterator_init_x86_64,
.linux_kernel_pgtable_iterator_next = .linux_kernel_pgtable_iterator_next =
linux_kernel_pgtable_iterator_next_x86_64, linux_kernel_pgtable_iterator_next_x86_64,
}; };

View File

@ -12,61 +12,79 @@
#include "program.h" #include "program.h"
#include "util.h" #include "util.h"
static void end_virtual_address_translation(struct drgn_program *prog)
{
prog->in_address_translation = false;
}
static struct drgn_error *
begin_virtual_address_translation(struct drgn_program *prog, uint64_t pgtable,
uint64_t virt_addr)
{
struct drgn_error *err;
if (prog->in_address_translation) {
return drgn_error_create_fault("recursive address translation; "
"page table may be missing from core dump",
virt_addr);
}
prog->in_address_translation = true;
if (!prog->pgtable_it) {
if (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) {
err = drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
"virtual address translation is only available for the Linux kernel");
goto err;
}
if (!prog->has_platform) {
err = drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
"cannot do virtual address translation without platform");
goto err;
}
if (!prog->platform.arch->linux_kernel_pgtable_iterator_next) {
err = drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT,
"virtual address translation is not implemented for %s architecture",
prog->platform.arch->name);
goto err;
}
err = prog->platform.arch->linux_kernel_pgtable_iterator_create(prog,
&prog->pgtable_it);
if (err) {
prog->pgtable_it = NULL;
goto err;
}
}
prog->pgtable_it->pgtable = pgtable;
prog->pgtable_it->virt_addr = virt_addr;
prog->platform.arch->linux_kernel_pgtable_iterator_init(prog, prog->pgtable_it);
return NULL;
err:
end_virtual_address_translation(prog);
return err;
}
struct drgn_error *linux_helper_read_vm(struct drgn_program *prog, struct drgn_error *linux_helper_read_vm(struct drgn_program *prog,
uint64_t pgtable, uint64_t virt_addr, uint64_t pgtable, uint64_t virt_addr,
void *buf, size_t count) void *buf, size_t count)
{ {
struct drgn_error *err; struct drgn_error *err;
struct pgtable_iterator *it;
pgtable_iterator_next_fn *next; err = begin_virtual_address_translation(prog, pgtable, virt_addr);
if (err)
return err;
if (!count) {
err = NULL;
goto out;
}
struct pgtable_iterator *it = prog->pgtable_it;
pgtable_iterator_next_fn *next =
prog->platform.arch->linux_kernel_pgtable_iterator_next;
uint64_t read_addr = 0; uint64_t read_addr = 0;
size_t read_size = 0; size_t read_size = 0;
if (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) {
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
"virtual address translation is only available for the Linux kernel");
}
if (!prog->has_platform) {
return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT,
"cannot do virtual address translation without platform");
}
if (!prog->platform.arch->linux_kernel_pgtable_iterator_next) {
return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT,
"virtual address translation is not implemented for %s architecture",
prog->platform.arch->name);
}
if (!count)
return NULL;
if (prog->pgtable_it_in_use) {
return drgn_error_create_fault("recursive address translation; "
"page table may be missing from core dump",
virt_addr);
}
if (prog->pgtable_it) {
it = prog->pgtable_it;
} else {
it = malloc(sizeof(*it) +
prog->platform.arch->pgtable_iterator_arch_size);
if (!it)
return &drgn_enomem;
prog->pgtable_it = it;
it->prog = prog;
}
it->pgtable = pgtable;
it->virt_addr = virt_addr;
prog->pgtable_it_in_use = true;
prog->platform.arch->pgtable_iterator_arch_init(it->arch);
next = prog->platform.arch->linux_kernel_pgtable_iterator_next;
do { do {
uint64_t virt_addr, start_virt_addr, end_virt_addr; uint64_t start_virt_addr, start_phys_addr;
uint64_t start_phys_addr, end_phys_addr; err = next(prog, it, &start_virt_addr, &start_phys_addr);
size_t n;
virt_addr = it->virt_addr;
err = next(it, &start_virt_addr, &start_phys_addr);
if (err) if (err)
break; break;
if (start_phys_addr == UINT64_MAX) { if (start_phys_addr == UINT64_MAX) {
@ -74,9 +92,10 @@ struct drgn_error *linux_helper_read_vm(struct drgn_program *prog,
virt_addr); virt_addr);
break; break;
} }
end_virt_addr = it->virt_addr;
end_phys_addr = start_phys_addr + (end_virt_addr - start_virt_addr); uint64_t end_phys_addr =
n = min(end_virt_addr - virt_addr, (uint64_t)count); start_phys_addr + (it->virt_addr - start_virt_addr);
size_t n = min(it->virt_addr - virt_addr, (uint64_t)count);
if (read_size && end_phys_addr == read_addr + read_size) { if (read_size && end_phys_addr == read_addr + read_size) {
read_size += n; read_size += n;
} else { } else {
@ -91,13 +110,15 @@ struct drgn_error *linux_helper_read_vm(struct drgn_program *prog,
read_addr = start_phys_addr + (virt_addr - start_virt_addr); read_addr = start_phys_addr + (virt_addr - start_virt_addr);
read_size = n; read_size = n;
} }
virt_addr = it->virt_addr;
count -= n; count -= n;
} while (count); } while (count);
if (!err) { if (!err) {
err = drgn_program_read_memory(prog, buf, read_addr, read_size, err = drgn_program_read_memory(prog, buf, read_addr, read_size,
true); true);
} }
prog->pgtable_it_in_use = false; out:
end_virtual_address_translation(prog);
return err; return err;
} }

View File

@ -80,13 +80,10 @@ apply_elf_reloc_fn(const struct drgn_relocating_section *relocating,
/* Page table iterator. */ /* Page table iterator. */
struct pgtable_iterator { struct pgtable_iterator {
struct drgn_program *prog;
/* Address of the top-level page table to iterate. */ /* Address of the top-level page table to iterate. */
uint64_t pgtable; uint64_t pgtable;
/* Current virtual address to translate. */ /* Current virtual address to translate. */
uint64_t virt_addr; uint64_t virt_addr;
/* Architecture-specific data. */
char arch[];
}; };
/* /*
@ -113,7 +110,8 @@ struct pgtable_iterator {
* maps to, or @c UINT64_MAX if it is not mapped. * maps to, or @c UINT64_MAX if it is not mapped.
*/ */
typedef struct drgn_error * typedef struct drgn_error *
(pgtable_iterator_next_fn)(struct pgtable_iterator *it, uint64_t *virt_addr_ret, (pgtable_iterator_next_fn)(struct drgn_program *prog,
struct pgtable_iterator *it, uint64_t *virt_addr_ret,
uint64_t *phys_addr_ret); uint64_t *phys_addr_ret);
struct drgn_architecture_info { struct drgn_architecture_info {
@ -166,10 +164,14 @@ struct drgn_architecture_info {
struct drgn_error *(*linux_kernel_live_direct_mapping_fallback)(struct drgn_program *, struct drgn_error *(*linux_kernel_live_direct_mapping_fallback)(struct drgn_program *,
uint64_t *, uint64_t *,
uint64_t *); uint64_t *);
/* Size to allocate for pgtable_iterator::arch. */ /* Allocate a Linux kernel page table iterator. */
size_t pgtable_iterator_arch_size; struct drgn_error *(*linux_kernel_pgtable_iterator_create)(struct drgn_program *,
/* Initialize pgtable_iterator::arch. */ struct pgtable_iterator **);
void (*pgtable_iterator_arch_init)(void *buf); /* Destroy a Linux kernel page table iterator. */
void (*linux_kernel_pgtable_iterator_destroy)(struct pgtable_iterator *);
/* (Re)initialize a Linux kernel page table iterator. */
void (*linux_kernel_pgtable_iterator_init)(struct drgn_program *,
struct pgtable_iterator *);
/* Iterate a (user or kernel) page table in the Linux kernel. */ /* Iterate a (user or kernel) page table in the Linux kernel. */
pgtable_iterator_next_fn *linux_kernel_pgtable_iterator_next; pgtable_iterator_next_fn *linux_kernel_pgtable_iterator_next;
}; };

View File

@ -124,7 +124,8 @@ void drgn_program_deinit(struct drgn_program *prog)
drgn_thread_destroy(prog->crashed_thread); drgn_thread_destroy(prog->crashed_thread);
else if (prog->flags & DRGN_PROGRAM_IS_LIVE) else if (prog->flags & DRGN_PROGRAM_IS_LIVE)
drgn_thread_destroy(prog->main_thread); drgn_thread_destroy(prog->main_thread);
free(prog->pgtable_it); if (prog->pgtable_it)
prog->platform.arch->linux_kernel_pgtable_iterator_destroy(prog->pgtable_it);
drgn_object_deinit(&prog->vmemmap); drgn_object_deinit(&prog->vmemmap);
drgn_object_deinit(&prog->page_offset); drgn_object_deinit(&prog->page_offset);

View File

@ -170,13 +170,13 @@ struct drgn_program {
struct drgn_object page_offset; struct drgn_object page_offset;
/* Cached vmemmap. */ /* Cached vmemmap. */
struct drgn_object vmemmap; struct drgn_object vmemmap;
/* Page table iterator for linux_helper_read_vm(). */ /* Page table iterator. */
struct pgtable_iterator *pgtable_it; struct pgtable_iterator *pgtable_it;
/* /*
* Whether @ref drgn_program::pgtable_it is currently being used. Used * Whether we are currently in address translation. Used to prevent
* to prevent address translation from recursing. * address translation from recursing.
*/ */
bool pgtable_it_in_use; bool in_address_translation;
}; };
/** Initialize a @ref drgn_program. */ /** Initialize a @ref drgn_program. */