Rewrite drgn core in C
The current mixed Python/C implementation works well, but it has a
couple of important limitations:
- It's too slow for some common use cases, like iterating over large
data structures.
- It can't be reused in utilities written in other languages.
This replaces the internals with a new library written in C, libdrgn. It
includes Python bindings with mostly the same public interface as
before, with some important improvements:
- Types are now represented by a single Type class rather than the messy
polymorphism in the Python implementation.
- Qualifiers are a bitmask instead of a set of strings.
- Bit fields are not considered a separate type.
- The lvalue/rvalue terminology is replaced with reference/value.
- Structure, union, and array values are better supported.
- Function objects are supported.
- Program distinguishes between lookups of variables, constants, and
functions.
The C rewrite is about 6x as fast as the original Python when using the
Python bindings, and about 8x when using the C API directly.
Currently, the exposed API in C is fairly conservative. In the future,
the memory reader, type index, and object index APIs will probably be
exposed for more flexibility.
2019-03-22 23:27:46 +00:00
|
|
|
// Copyright 2018-2019 - Omar Sandoval
|
|
|
|
// SPDX-License-Identifier: GPL-3.0+
|
|
|
|
|
2019-09-25 01:13:53 +01:00
|
|
|
#include <elfutils/libdwelf.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/types.h>
|
Rewrite drgn core in C
The current mixed Python/C implementation works well, but it has a
couple of important limitations:
- It's too slow for some common use cases, like iterating over large
data structures.
- It can't be reused in utilities written in other languages.
This replaces the internals with a new library written in C, libdrgn. It
includes Python bindings with mostly the same public interface as
before, with some important improvements:
- Types are now represented by a single Type class rather than the messy
polymorphism in the Python implementation.
- Qualifiers are a bitmask instead of a set of strings.
- Bit fields are not considered a separate type.
- The lvalue/rvalue terminology is replaced with reference/value.
- Structure, union, and array values are better supported.
- Function objects are supported.
- Program distinguishes between lookups of variables, constants, and
functions.
The C rewrite is about 6x as fast as the original Python when using the
Python bindings, and about 8x when using the C API directly.
Currently, the exposed API in C is fairly conservative. In the future,
the memory reader, type index, and object index APIs will probably be
exposed for more flexibility.
2019-03-22 23:27:46 +00:00
|
|
|
#include "internal.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* glibc added reallocarray() in 2.26, but since it's so trivial, it's easier to
|
|
|
|
* duplicate it here than it is to do feature detection.
|
|
|
|
*/
|
|
|
|
void *realloc_array(void *ptr, size_t nmemb, size_t size)
|
|
|
|
{
|
|
|
|
size_t bytes;
|
|
|
|
|
|
|
|
if (__builtin_mul_overflow(nmemb, size, &bytes)) {
|
|
|
|
errno = ENOMEM;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return realloc(ptr, bytes);
|
|
|
|
}
|
|
|
|
|
|
|
|
void *malloc_array(size_t nmemb, size_t size)
|
|
|
|
{
|
|
|
|
size_t bytes;
|
|
|
|
|
|
|
|
if (__builtin_mul_overflow(nmemb, size, &bytes)) {
|
|
|
|
errno = ENOMEM;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
return malloc(bytes);
|
|
|
|
}
|
|
|
|
|
2019-09-25 01:13:53 +01:00
|
|
|
struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret)
|
|
|
|
{
|
|
|
|
struct drgn_error *err;
|
|
|
|
|
|
|
|
*fd_ret = open(path, O_RDONLY);
|
|
|
|
if (*fd_ret == -1)
|
|
|
|
return drgn_error_create_os("open", errno, path);
|
|
|
|
*elf_ret = dwelf_elf_begin(*fd_ret);
|
|
|
|
if (!*elf_ret) {
|
|
|
|
err = drgn_error_libelf();
|
|
|
|
goto err_fd;
|
|
|
|
}
|
|
|
|
if (elf_kind(*elf_ret) != ELF_K_ELF) {
|
|
|
|
err = drgn_error_create(DRGN_ERROR_OTHER, "not an ELF file");
|
|
|
|
goto err_elf;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
err_elf:
|
|
|
|
elf_end(*elf_ret);
|
|
|
|
err_fd:
|
|
|
|
close(*fd_ret);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret,
|
|
|
|
const char * const *path_formats, ...)
|
|
|
|
{
|
|
|
|
struct drgn_error *err;
|
|
|
|
size_t i;
|
|
|
|
|
|
|
|
for (i = 0; path_formats[i]; i++) {
|
|
|
|
va_list ap;
|
|
|
|
int ret;
|
|
|
|
char *path;
|
|
|
|
int fd;
|
|
|
|
Elf *elf;
|
|
|
|
|
|
|
|
va_start(ap, path_formats);
|
|
|
|
ret = vasprintf(&path, path_formats[i], ap);
|
|
|
|
va_end(ap);
|
|
|
|
if (ret == -1)
|
|
|
|
return &drgn_enomem;
|
|
|
|
fd = open(path, O_RDONLY);
|
|
|
|
if (fd == -1) {
|
|
|
|
free(path);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
elf = dwelf_elf_begin(fd);
|
|
|
|
if (!elf) {
|
|
|
|
close(fd);
|
|
|
|
free(path);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (elf_kind(elf) != ELF_K_ELF) {
|
|
|
|
err = drgn_error_format(DRGN_ERROR_OTHER,
|
|
|
|
"%s: not an ELF file", path);
|
|
|
|
elf_end(elf);
|
|
|
|
close(fd);
|
|
|
|
free(path);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
*path_ret = path;
|
|
|
|
*fd_ret = fd;
|
|
|
|
*elf_ret = elf;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
*path_ret = NULL;
|
|
|
|
*fd_ret = -1;
|
|
|
|
*elf_ret = NULL;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
Rewrite drgn core in C
The current mixed Python/C implementation works well, but it has a
couple of important limitations:
- It's too slow for some common use cases, like iterating over large
data structures.
- It can't be reused in utilities written in other languages.
This replaces the internals with a new library written in C, libdrgn. It
includes Python bindings with mostly the same public interface as
before, with some important improvements:
- Types are now represented by a single Type class rather than the messy
polymorphism in the Python implementation.
- Qualifiers are a bitmask instead of a set of strings.
- Bit fields are not considered a separate type.
- The lvalue/rvalue terminology is replaced with reference/value.
- Structure, union, and array values are better supported.
- Function objects are supported.
- Program distinguishes between lookups of variables, constants, and
functions.
The C rewrite is about 6x as fast as the original Python when using the
Python bindings, and about 8x when using the C API directly.
Currently, the exposed API in C is fairly conservative. In the future,
the memory reader, type index, and object index APIs will probably be
exposed for more flexibility.
2019-03-22 23:27:46 +00:00
|
|
|
struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret)
|
|
|
|
{
|
|
|
|
GElf_Shdr shdr_mem, *shdr;
|
|
|
|
Elf_Data *data;
|
|
|
|
|
|
|
|
shdr = gelf_getshdr(scn, &shdr_mem);
|
|
|
|
if (!shdr)
|
|
|
|
return drgn_error_libelf();
|
2019-09-25 01:16:17 +01:00
|
|
|
if ((shdr->sh_flags & SHF_COMPRESSED) && elf_compress(scn, 0, 0) < 0)
|
|
|
|
return drgn_error_libelf();
|
Rewrite drgn core in C
The current mixed Python/C implementation works well, but it has a
couple of important limitations:
- It's too slow for some common use cases, like iterating over large
data structures.
- It can't be reused in utilities written in other languages.
This replaces the internals with a new library written in C, libdrgn. It
includes Python bindings with mostly the same public interface as
before, with some important improvements:
- Types are now represented by a single Type class rather than the messy
polymorphism in the Python implementation.
- Qualifiers are a bitmask instead of a set of strings.
- Bit fields are not considered a separate type.
- The lvalue/rvalue terminology is replaced with reference/value.
- Structure, union, and array values are better supported.
- Function objects are supported.
- Program distinguishes between lookups of variables, constants, and
functions.
The C rewrite is about 6x as fast as the original Python when using the
Python bindings, and about 8x when using the C API directly.
Currently, the exposed API in C is fairly conservative. In the future,
the memory reader, type index, and object index APIs will probably be
exposed for more flexibility.
2019-03-22 23:27:46 +00:00
|
|
|
data = elf_getdata(scn, NULL);
|
|
|
|
if (!data)
|
|
|
|
return drgn_error_libelf();
|
|
|
|
*ret = data;
|
|
|
|
return NULL;
|
|
|
|
}
|
2019-09-25 01:13:53 +01:00
|
|
|
|
|
|
|
struct drgn_error *elf_address_range(Elf *elf, uint64_t bias,
|
|
|
|
uint64_t *start_ret, uint64_t *end_ret)
|
|
|
|
{
|
|
|
|
uint64_t start = UINT64_MAX, end = 0;
|
|
|
|
size_t phnum, i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the minimum and maximum addresses from the PT_LOAD segments. We
|
|
|
|
* ignore memory ranges that start beyond UINT64_MAX, and we truncate
|
|
|
|
* ranges that end beyond UINT64_MAX.
|
|
|
|
*/
|
|
|
|
if (elf_getphdrnum(elf, &phnum) != 0)
|
|
|
|
return drgn_error_libelf();
|
|
|
|
for (i = 0; i < phnum; i++) {
|
|
|
|
GElf_Phdr phdr_mem, *phdr;
|
|
|
|
uint64_t segment_start, segment_end;
|
|
|
|
|
|
|
|
phdr = gelf_getphdr(elf, i, &phdr_mem);
|
|
|
|
if (!phdr)
|
|
|
|
return drgn_error_libelf();
|
|
|
|
if (phdr->p_type != PT_LOAD || !phdr->p_vaddr)
|
|
|
|
continue;
|
|
|
|
if (__builtin_add_overflow(phdr->p_vaddr, bias,
|
|
|
|
&segment_start))
|
|
|
|
continue;
|
|
|
|
if (__builtin_add_overflow(segment_start, phdr->p_memsz,
|
|
|
|
&segment_end))
|
|
|
|
segment_end = UINT64_MAX;
|
|
|
|
if (segment_start < segment_end) {
|
|
|
|
if (segment_start < start)
|
|
|
|
start = segment_start;
|
|
|
|
if (segment_end > end)
|
|
|
|
end = segment_end;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (start >= end) {
|
|
|
|
return drgn_error_create(DRGN_ERROR_OTHER,
|
|
|
|
"ELF file has no loadable segments");
|
|
|
|
}
|
|
|
|
*start_ret = start;
|
|
|
|
*end_ret = end;
|
|
|
|
return NULL;
|
|
|
|
}
|