drgn/libdrgn/dwarf_index.c

// Copyright 2018-2019 - Omar Sandoval
// SPDX-License-Identifier: GPL-3.0+

#include <assert.h>
#include <dwarf.h>
#include <elfutils/libdw.h>
#include <elfutils/libdwelf.h>
#include <fcntl.h>
#include <gelf.h>
#include <inttypes.h>
#include <libelf.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>

#include "internal.h"
#include "dwarf_index.h"
#include "read.h"
#include "siphash.h"
#include "string_builder.h"

DEFINE_VECTOR_FUNCTIONS(dwfl_module_vector)
DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_module_vector)

static inline struct hash_pair
drgn_dwarf_module_hash(const struct drgn_dwarf_module_key *key)
{
	size_t hash;

	hash = cityhash_size_t(key->build_id, key->build_id_len);
	hash = hash_combine(hash, key->start);
	hash = hash_combine(hash, key->end);
	return hash_pair_from_avalanching_hash(hash);
}
static inline bool drgn_dwarf_module_eq(const struct drgn_dwarf_module_key *a,
					const struct drgn_dwarf_module_key *b)
{
	return (a->build_id_len == b->build_id_len &&
		(a->build_id_len == 0 ||
		 memcmp(a->build_id, b->build_id, a->build_id_len) == 0) &&
		a->start == b->start && a->end == b->end);
}
DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_module_table, drgn_dwarf_module_hash,
			    drgn_dwarf_module_eq)

DEFINE_HASH_TABLE_FUNCTIONS(c_string_set, c_string_hash, c_string_eq)

/**
 * @c Dwfl_Callbacks::find_elf() implementation.
 *
 * Ideally we'd use @c dwfl_report_elf() instead, but that doesn't take an @c
 * Elf handle, which we need for a couple of reasons:
 *
 * - We usually already have the @c Elf handle open in order to identify the
 *   file.
 * - For kernel modules, we set the section addresses in the @c Elf handle
 *   ourselves instead of using @c Dwfl_Callbacks::section_address().
 *
 * Additionally, there's a special case for vmlinux. It is usually an @c ET_EXEC
 * ELF file, but when KASLR is enabled, it needs to be handled like an @c ET_DYN
 * file. libdwfl has a hack for this when @c dwfl_report_module() is used, but
 * @ref dwfl_report_elf() bypasses this hack.
 *
 * So, we're stuck using @c dwfl_report_module() and this dummy callback.
 */
static int drgn_dwfl_find_elf(Dwfl_Module *dwfl_module, void **userdatap,
			      const char *name, Dwarf_Addr base,
			      char **file_name, Elf **elfp)
{
	struct drgn_dwfl_module_userdata *userdata = *userdatap;
	int fd;

	/*
	 * libdwfl consumes the returned path, file descriptor, and ELF handle,
	 * so clear the fields.
	 */
	*file_name = userdata->path;
	fd = userdata->fd;
	*elfp = userdata->elf;
	userdata->path = NULL;
	userdata->fd = -1;
	userdata->elf = NULL;
	return fd;
}

/*
 * Uses drgn_dwfl_find_elf() if the ELF file was reported directly and falls
 * back to dwfl_linux_proc_find_elf() otherwise.
 */
static int drgn_dwfl_linux_proc_find_elf(Dwfl_Module *dwfl_module,
					 void **userdatap, const char *name,
					 Dwarf_Addr base, char **file_name,
					 Elf **elfp)
{
	struct drgn_dwfl_module_userdata *userdata = *userdatap;

	if (userdata->elf) {
		return drgn_dwfl_find_elf(dwfl_module, userdatap, name, base,
					  file_name, elfp);
	}
	return dwfl_linux_proc_find_elf(dwfl_module, userdatap, name, base,
					file_name, elfp);
}

/*
 * Uses drgn_dwfl_find_elf() if the ELF file was reported directly and falls
 * back to dwfl_build_id_find_elf() otherwise.
 */
static int drgn_dwfl_build_id_find_elf(Dwfl_Module *dwfl_module,
				       void **userdatap, const char *name,
				       Dwarf_Addr base, char **file_name,
				       Elf **elfp)
{
	struct drgn_dwfl_module_userdata *userdata = *userdatap;

	if (userdata->elf) {
		return drgn_dwfl_find_elf(dwfl_module, userdatap, name, base,
					  file_name, elfp);
	}
	return dwfl_build_id_find_elf(dwfl_module, userdatap, name, base,
				      file_name, elfp);
}

/**
 * @c Dwfl_Callbacks::section_address() implementation.
 *
 * We set the section header @c sh_addr in memory instead of using this, but
 * libdwfl requires the callback pointer to be non-@c NULL. It will be called
 * for any sections that still have a zero @c sh_addr, meaning they are not
 * present in memory.
 */
static int drgn_dwfl_section_address(Dwfl_Module *module, void **userdatap,
				     const char *name, Dwarf_Addr base,
				     const char *secname, Elf32_Word shndx,
				     const GElf_Shdr *shdr, Dwarf_Addr *addr)
{
	*addr = -1;
	return DWARF_CB_OK;
}

const Dwfl_Callbacks drgn_dwfl_callbacks = {
	.find_elf = drgn_dwfl_find_elf,
	.find_debuginfo = dwfl_standard_find_debuginfo,
	.section_address = drgn_dwfl_section_address,
};

const Dwfl_Callbacks drgn_linux_proc_dwfl_callbacks = {
	.find_elf = drgn_dwfl_linux_proc_find_elf,
	.find_debuginfo = dwfl_standard_find_debuginfo,
	.section_address = drgn_dwfl_section_address,
};

const Dwfl_Callbacks drgn_userspace_core_dump_dwfl_callbacks = {
	.find_elf = drgn_dwfl_build_id_find_elf,
	.find_debuginfo = dwfl_standard_find_debuginfo,
	.section_address = drgn_dwfl_section_address,
};

enum {
	SECTION_DEBUG_INFO,
	SECTION_DEBUG_ABBREV,
	SECTION_DEBUG_STR,
	SECTION_DEBUG_LINE,
	DRGN_DWARF_INDEX_NUM_SECTIONS,
};

static const char * const section_name[DRGN_DWARF_INDEX_NUM_SECTIONS] = {
	[SECTION_DEBUG_INFO] = ".debug_info",
	[SECTION_DEBUG_ABBREV] = ".debug_abbrev",
	[SECTION_DEBUG_STR] = ".debug_str",
	[SECTION_DEBUG_LINE] = ".debug_line",
};

/*
 * The DWARF abbreviation table gets translated into a series of instructions.
 * An instruction <= INSN_MAX_SKIP indicates a number of bytes to be skipped
 * over. The next few instructions mean that the corresponding attribute can be
 * skipped over. The remaining instructions indicate that the corresponding
 * attribute should be parsed. Finally, every sequence of instructions
 * corresponding to a DIE is terminated by a zero byte followed by a bitmask of
 * TAG_FLAG_* bits combined with the DWARF tag (which may be set to zero if the
 * tag is not of interest).
 */
enum {
	INSN_MAX_SKIP = 229,
	ATTRIB_BLOCK1,
	ATTRIB_BLOCK2,
	ATTRIB_BLOCK4,
	ATTRIB_EXPRLOC,
	ATTRIB_LEB128,
	ATTRIB_STRING,
	ATTRIB_SIBLING_REF1,
	ATTRIB_SIBLING_REF2,
	ATTRIB_SIBLING_REF4,
	ATTRIB_SIBLING_REF8,
	ATTRIB_SIBLING_REF_UDATA,
	ATTRIB_NAME_STRP4,
	ATTRIB_NAME_STRP8,
	ATTRIB_NAME_STRING,
	ATTRIB_STMT_LIST_LINEPTR4,
	ATTRIB_STMT_LIST_LINEPTR8,
	ATTRIB_DECL_FILE_DATA1,
	ATTRIB_DECL_FILE_DATA2,
	ATTRIB_DECL_FILE_DATA4,
	ATTRIB_DECL_FILE_DATA8,
	ATTRIB_DECL_FILE_UDATA,
	ATTRIB_SPECIFICATION_REF1,
	ATTRIB_SPECIFICATION_REF2,
	ATTRIB_SPECIFICATION_REF4,
	ATTRIB_SPECIFICATION_REF8,
	ATTRIB_SPECIFICATION_REF_UDATA,
	ATTRIB_MAX_INSN = ATTRIB_SPECIFICATION_REF_UDATA,
};

enum {
	/* Maximum number of bits used by the tags we care about. */
	TAG_BITS = 6,
	TAG_MASK = (1 << TAG_BITS) - 1,
	/* The remaining bits can be used for other purposes. */
	TAG_FLAG_DECLARATION = 0x40,
	TAG_FLAG_CHILDREN = 0x80,
};

DEFINE_VECTOR(uint8_vector, uint8_t)
DEFINE_VECTOR(uint32_vector, uint32_t)
DEFINE_VECTOR(uint64_vector, uint64_t)

struct abbrev_table {
	/*
	 * This is indexed on the DWARF abbreviation code minus one. It maps the
	 * abbreviation code to an index in insns where the instruction stream
	 * for that code begins.
	 *
	 * Technically, abbreviation codes don't have to be sequential. In
	 * practice, GCC seems to always generate sequential codes starting at
	 * one, so we can get away with a flat array.
	 */
	struct uint32_vector decls;
	struct uint8_vector insns;
};

static void abbrev_table_init(struct abbrev_table *abbrev)
{
	uint32_vector_init(&abbrev->decls);
	uint8_vector_init(&abbrev->insns);
}

static void abbrev_table_deinit(struct abbrev_table *abbrev)
{
	uint8_vector_deinit(&abbrev->insns);
	uint32_vector_deinit(&abbrev->decls);
}

struct compilation_unit {
	Dwfl_Module *module;
	Elf_Data *sections[DRGN_DWARF_INDEX_NUM_SECTIONS];
	const char *ptr;
	uint64_t unit_length;
	uint64_t debug_abbrev_offset;
	uint8_t address_size;
	bool is_64_bit;
	bool bswap;
};

static inline const char *section_ptr(Elf_Data *data, size_t offset)
{
	return &((char *)data->d_buf)[offset];
}

static inline const char *section_end(Elf_Data *data)
{
	return section_ptr(data, data->d_size);
}

/*
 * An indexed DIE.
 *
 * DIEs with the same name but different tags or files are considered distinct.
 * We only compare the hash of the file name, not the string value, because a
 * 64-bit collision is unlikely enough, especially when also considering the
 * name and tag.
 */
struct drgn_dwarf_index_die {
	uint64_t tag;
	uint64_t file_name_hash;
	/*
	 * The next DIE with the same name (as an index into
	 * drgn_dwarf_index_shard::dies), or SIZE_MAX if this is the last DIE.
	 */
	size_t next;
	Dwfl_Module *module;
	uint64_t offset;
};

/*
 * The key is the DIE name. The value is the first DIE with that name (as an
 * index into drgn_dwarf_index_shard::dies).
 */
DEFINE_HASH_TABLE_FUNCTIONS(drgn_dwarf_index_die_map, string_hash, string_eq)
DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_die_vector)

static inline size_t hash_pair_to_shard(struct hash_pair hp)
{
	/*
	 * The 8 most significant bits of the hash are used as the F14 tag, so
	 * we don't want to use those for sharding.
	 */
	return ((hp.first >>
		 (8 * sizeof(size_t) - 8 - DRGN_DWARF_INDEX_SHARD_BITS)) &
		(((size_t)1 << DRGN_DWARF_INDEX_SHARD_BITS) - 1));
}

static inline struct drgn_error *drgn_eof(void)
{
	return drgn_error_create(DRGN_ERROR_OTHER,
				 "debug information is truncated");
}

static inline bool skip_leb128(const char **ptr, const char *end)
{
	for (;;) {
		if (*ptr >= end)
			return false;
		if (!(*(const uint8_t *)(*ptr)++ & 0x80))
			return true;
	}
}

static inline struct drgn_error *read_uleb128(const char **ptr, const char *end,
					      uint64_t *value)
{
	int shift = 0;
	uint8_t byte;

	*value = 0;
	for (;;) {
		if (*ptr >= end)
			return drgn_eof();
		byte = *(const uint8_t *)*ptr;
		(*ptr)++;
		if (shift == 63 && byte > 1) {
			return drgn_error_create(DRGN_ERROR_OVERFLOW,
						 "ULEB128 overflowed unsigned 64-bit integer");
		}
		*value |= (uint64_t)(byte & 0x7f) << shift;
		shift += 7;
		if (!(byte & 0x80))
			break;
	}
	return NULL;
}

static inline struct drgn_error *read_uleb128_into_size_t(const char **ptr,
							  const char *end,
							  size_t *value)
{
	struct drgn_error *err;
	uint64_t tmp;

	if ((err = read_uleb128(ptr, end, &tmp)))
		return err;

	if (tmp > SIZE_MAX)
		return drgn_eof();
	*value = tmp;
	return NULL;
}

static void free_shards(struct drgn_dwarf_index *dindex, size_t n)
{
	size_t i;

	for (i = 0; i < n; i++) {
		drgn_dwarf_index_die_vector_deinit(&dindex->shards[i].dies);
		drgn_dwarf_index_die_map_deinit(&dindex->shards[i].map);
		omp_destroy_lock(&dindex->shards[i].lock);
	}
}

static void drgn_dwarf_module_destroy(struct drgn_dwarf_module *module)
{
	if (module) {
		dwfl_module_vector_deinit(&module->dwfl_modules);
		free(module->name);
		free(module->build_id);
		free(module);
	}
}

static void
drgn_dwfl_module_userdata_destroy(struct drgn_dwfl_module_userdata *userdata)
{
	if (userdata) {
		elf_end(userdata->elf);
		if (userdata->fd != -1)
			close(userdata->fd);
		free(userdata->path);
		free(userdata);
	}
}

struct drgn_dwfl_module_removed_arg {
	Dwfl *dwfl;
	bool finish_indexing;
	bool free_all;
};

static int drgn_dwfl_module_removed(Dwfl_Module *dwfl_module, void *userdatap,
				    const char *name, Dwarf_Addr base,
				    void *_arg)
{
	struct drgn_dwfl_module_removed_arg *arg = _arg;
	/*
	 * userdatap is actually a void ** like for the other libdwfl callbacks,
	 * but dwfl_report_end() has the wrong signature for the removed
	 * callback.
	 */
	struct drgn_dwfl_module_userdata *userdata = *(void **)userdatap;

	if (arg->finish_indexing && userdata &&
	    userdata->state == DRGN_DWARF_MODULE_INDEXING)
		userdata->state = DRGN_DWARF_MODULE_INDEXED;
	if (arg->free_all || !userdata ||
	    userdata->state != DRGN_DWARF_MODULE_INDEXED) {
		drgn_dwfl_module_userdata_destroy(userdata);
	} else {
		Dwarf_Addr end;

		/*
		 * The module was already indexed. Report it again so libdwfl
		 * doesn't remove it.
		 */
		dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL,
				 NULL, NULL);
		dwfl_report_module(arg->dwfl, name, base, end);
	}
	return DWARF_CB_OK;
}

static void drgn_dwarf_module_finish_indexing(struct drgn_dwarf_index *dindex,
					      struct drgn_dwarf_module *module)
{
	module->state = DRGN_DWARF_MODULE_INDEXED;
	/*
	 * We don't need this anymore (but reinitialize it to empty so that
	 * drgn_dwarf_index_get_unindexed() skips this module).
	 */
	dwfl_module_vector_deinit(&module->dwfl_modules);
	dwfl_module_vector_init(&module->dwfl_modules);
	if (module->name) {
		int ret;

		ret = c_string_set_insert(&dindex->names,
					  (const char **)&module->name, NULL);
		/* drgn_dwarf_index_get_unindexed() should've reserved enough for us. */
		assert(ret != -1);
	}
}

static void drgn_dwarf_index_free_modules(struct drgn_dwarf_index *dindex,
					  bool finish_indexing, bool free_all)
{
	struct drgn_dwfl_module_removed_arg arg = {
		.dwfl = dindex->dwfl,
		.finish_indexing = finish_indexing,
		.free_all = free_all,
	};
	struct drgn_dwarf_module_table_iterator it;
	size_t i;

	for (it = drgn_dwarf_module_table_first(&dindex->module_table);
	     it.entry; ) {
		struct drgn_dwarf_module *module = *it.entry;

		if (finish_indexing &&
		    module->state == DRGN_DWARF_MODULE_INDEXING)
			drgn_dwarf_module_finish_indexing(dindex, module);
		if (free_all || module->state != DRGN_DWARF_MODULE_INDEXED) {
			it = drgn_dwarf_module_table_delete_iterator(&dindex->module_table,
								     it);
			drgn_dwarf_module_destroy(module);
		} else {
			it = drgn_dwarf_module_table_next(it);
		}
	}

	for (i = dindex->no_build_id.size; i-- > 0; ) {
		struct drgn_dwarf_module *module = dindex->no_build_id.data[i];

		if (finish_indexing &&
		    module->state == DRGN_DWARF_MODULE_INDEXING)
			drgn_dwarf_module_finish_indexing(dindex, module);
		if (free_all || module->state != DRGN_DWARF_MODULE_INDEXED) {
			dindex->no_build_id.size--;
			if (i != dindex->no_build_id.size) {
				dindex->no_build_id.data[i] =
					dindex->no_build_id.data[dindex->no_build_id.size];
			}
			drgn_dwarf_module_destroy(module);
		}
	}

	dwfl_report_begin(dindex->dwfl);
	dwfl_report_end(dindex->dwfl, drgn_dwfl_module_removed, &arg);
}

struct drgn_error *drgn_dwarf_index_init(struct drgn_dwarf_index *dindex,
					 const Dwfl_Callbacks *callbacks)
{
	size_t i;
	char *max_errors;

	dindex->dwfl = dwfl_begin(callbacks);
	if (!dindex->dwfl)
		return drgn_error_libdwfl();
	for (i = 0; i < ARRAY_SIZE(dindex->shards); i++) {
		struct drgn_dwarf_index_shard *shard = &dindex->shards[i];

		omp_init_lock(&shard->lock);
		drgn_dwarf_index_die_map_init(&shard->map);
		drgn_dwarf_index_die_vector_init(&shard->dies);
	}
	memset(&dindex->errors, 0, sizeof(dindex->errors));
	dindex->num_errors = 0;
	max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS");
	if (max_errors)
		dindex->max_errors = atoi(max_errors);
	else
		dindex->max_errors = 5;
	drgn_dwarf_module_table_init(&dindex->module_table);
	drgn_dwarf_module_vector_init(&dindex->no_build_id);
	c_string_set_init(&dindex->names);
	return NULL;
}

void drgn_dwarf_index_deinit(struct drgn_dwarf_index *dindex)
{
	if (!dindex)
		return;
	c_string_set_deinit(&dindex->names);
	drgn_dwarf_index_free_modules(dindex, false, true);
	assert(dindex->no_build_id.size == 0);
	assert(drgn_dwarf_module_table_size(&dindex->module_table) == 0);
	drgn_dwarf_module_vector_deinit(&dindex->no_build_id);
	drgn_dwarf_module_table_deinit(&dindex->module_table);
	free_shards(dindex, ARRAY_SIZE(dindex->shards));
	dwfl_end(dindex->dwfl);
}

void drgn_dwarf_index_report_begin(struct drgn_dwarf_index *dindex)
{
	dwfl_report_begin_add(dindex->dwfl);
}

struct drgn_error *
drgn_dwarf_index_report_error(struct drgn_dwarf_index *dindex, const char *name,
			      const char *message, struct drgn_error *err)
{
	if (err && err->code == DRGN_ERROR_NO_MEMORY) {
		/* Always fail hard if we're out of memory. */
		goto err;
	}
	if (dindex->num_errors == 0 &&
	    !string_builder_append(&dindex->errors,
				   "could not get debugging information for:"))
		goto err;
	if (dindex->num_errors < dindex->max_errors) {
		if (!string_builder_line_break(&dindex->errors))
			goto err;
		if (name && !string_builder_append(&dindex->errors, name))
			goto err;
		if (name && (message || err) &&
		    !string_builder_append(&dindex->errors, " ("))
			goto err;
		if (message && !string_builder_append(&dindex->errors, message))
			goto err;
		if (message && err &&
		    !string_builder_append(&dindex->errors, ": "))
			goto err;
		if (err && !string_builder_append_error(&dindex->errors, err))
			goto err;
		if (name && (message || err) &&
		    !string_builder_appendc(&dindex->errors, ')'))
			goto err;
	}
	dindex->num_errors++;
	drgn_error_destroy(err);
	return NULL;

err:
	drgn_error_destroy(err);
	return &drgn_enomem;
}

static void drgn_dwarf_index_reset_errors(struct drgn_dwarf_index *dindex)
{
	dindex->errors.len = 0;
	dindex->num_errors = 0;
}

static struct drgn_error *
drgn_dwarf_index_finalize_errors(struct drgn_dwarf_index *dindex)
{
	struct drgn_error *err;

	if (dindex->num_errors > dindex->max_errors &&
	    (!string_builder_line_break(&dindex->errors) ||
	     !string_builder_appendf(&dindex->errors, "... %u more",
				     dindex->num_errors - dindex->max_errors))) {
		drgn_dwarf_index_reset_errors(dindex);
		return &drgn_enomem;
	}
	if (dindex->num_errors) {
		err = drgn_error_from_string_builder(DRGN_ERROR_MISSING_DEBUG_INFO,
						     &dindex->errors);
		memset(&dindex->errors, 0, sizeof(dindex->errors));
		dindex->num_errors = 0;
		return err;
	} else {
		return NULL;
	}
}

static struct drgn_error *
drgn_dwarf_index_insert_module(struct drgn_dwarf_index *dindex,
			       const void *build_id, size_t build_id_len,
			       uint64_t start, uint64_t end, const char *name,
			       struct drgn_dwarf_module **ret)
{
	struct hash_pair hp;
	struct drgn_dwarf_module_table_iterator it;
	struct drgn_dwarf_module *module;

	if (build_id_len) {
		struct drgn_dwarf_module_key key = {
			.build_id = build_id,
			.build_id_len = build_id_len,
			.start = start,
			.end = end,
		};

		hp = drgn_dwarf_module_table_hash(&key);
		it = drgn_dwarf_module_table_search_hashed(&dindex->module_table,
							   &key, hp);
		if (it.entry) {
			module = *it.entry;
			goto out;
		}
	}

	module = malloc(sizeof(*module));
	if (!module)
		return &drgn_enomem;
	module->start = start;
	module->end = end;
	if (name) {
		module->name = strdup(name);
		if (!module->name)
			goto err_module;
	} else {
		module->name = NULL;
	}
	module->build_id_len = build_id_len;
	if (build_id_len) {
		module->build_id = malloc(build_id_len);
		if (!module->build_id)
			goto err_name;
		memcpy(module->build_id, build_id, build_id_len);
		if (drgn_dwarf_module_table_insert_searched(&dindex->module_table,
							    &module, hp,
							    &it) == -1) {
			free(module->build_id);
err_name:
			free(module->name);
err_module:
			free(module);
			return &drgn_enomem;
		}
	} else {
		module->build_id = NULL;
		if (!drgn_dwarf_module_vector_append(&dindex->no_build_id,
						     &module))
			goto err_name;
	}
	module->state = DRGN_DWARF_MODULE_NEW;
	dwfl_module_vector_init(&module->dwfl_modules);
out:
	*ret = module;
	return NULL;
}

struct drgn_error *drgn_dwarf_index_report_elf(struct drgn_dwarf_index *dindex,
					       const char *path, int fd,
					       Elf *elf, uint64_t start,
					       uint64_t end, const char *name,
					       bool *new_ret)
{
	struct drgn_error *err;
	const void *build_id;
	ssize_t build_id_len;
	struct drgn_dwarf_module *module;
	char *path_key = NULL;
	Dwfl_Module *dwfl_module;
	void **userdatap;
	struct drgn_dwfl_module_userdata *userdata;

	if (new_ret)
		*new_ret = false;

	build_id_len = dwelf_elf_gnu_build_id(elf, &build_id);
	if (build_id_len == -1) {
		err = drgn_dwarf_index_report_error(dindex, path, NULL,
						    drgn_error_libdwfl());
		goto free;
	}

	err = drgn_dwarf_index_insert_module(dindex, build_id, build_id_len,
					     start, end, name, &module);
	if (err)
		goto free;
	if (module->state == DRGN_DWARF_MODULE_INDEXED) {
		/* We've already indexed this module. */
		err = NULL;
		goto free;
	}

	path_key = realpath(path, NULL);
	if (!path_key) {
		path_key = strdup(path);
		if (!path_key) {
			err = &drgn_enomem;
			goto free;
		}
	}
	dwfl_module = dwfl_report_module(dindex->dwfl, path_key, start, end);
	if (!dwfl_module) {
		err = drgn_error_libdwfl();
		goto free;
	}

	dwfl_module_info(dwfl_module, &userdatap, NULL, NULL, NULL, NULL, NULL,
			 NULL);
	if (*userdatap) {
		/* We've already reported this file at this offset. */
		err = NULL;
		goto free;
	}

	userdata = malloc(sizeof(*userdata));
	if (!userdata) {
		err = &drgn_enomem;
		goto free;
	}
	userdata->path = path_key;
	userdata->fd = fd;
	userdata->elf = elf;
	userdata->state = DRGN_DWARF_MODULE_NEW;
	*userdatap = userdata;
	if (new_ret)
		*new_ret = true;

	if (!dwfl_module_vector_append(&module->dwfl_modules, &dwfl_module)) {
		/*
		 * NB: not goto free now that we're referencing the file from a
		 * Dwfl_Module.
		 */
		return &drgn_enomem;
	}
	return NULL;

free:
	elf_end(elf);
	close(fd);
	free(path_key);
	return err;
}

static int drgn_dwarf_index_report_dwfl_module(Dwfl_Module *dwfl_module,
					       void **userdatap,
					       const char *name,
					       Dwarf_Addr base, void *arg)
{
	struct drgn_error *err;
	struct drgn_dwarf_index *dindex = arg;
	struct drgn_dwfl_module_userdata *userdata = *userdatap;
	const unsigned char *build_id;
	int build_id_len;
	GElf_Addr build_id_vaddr;
	Dwarf_Addr end;
	struct drgn_dwarf_module *module;

	if (userdata) {
		/*
		 * This was either reported from
		 * drgn_dwarf_index_report_module() or already indexed.
		 */
		return DWARF_CB_OK;
	}

	build_id_len = dwfl_module_build_id(dwfl_module, &build_id,
					    &build_id_vaddr);
	if (build_id_len == -1) {
		err = drgn_dwarf_index_report_error(dindex, name, NULL,
						    drgn_error_libdwfl());
		if (err) {
			drgn_error_destroy(err);
			return DWARF_CB_ABORT;
		}
		return DWARF_CB_OK;
	}
	dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, NULL, NULL);

	err = drgn_dwarf_index_insert_module(dindex, build_id, build_id_len,
					     base, end, NULL, &module);
	if (err) {
		drgn_error_destroy(err);
		return DWARF_CB_ABORT;
	}

	userdata = malloc(sizeof(*userdata));
	if (!userdata)
		return DWARF_CB_ABORT;
	*userdatap = userdata;
	userdata->path = NULL;
	userdata->fd = -1;
	userdata->elf = NULL;
	if (module->state == DRGN_DWARF_MODULE_INDEXED) {
		/*
		 * We've already indexed this module. Don't index it again, but
		 * keep the Dwfl_Module.
		 */
		userdata->state = DRGN_DWARF_MODULE_INDEXING;
	} else {
		userdata->state = DRGN_DWARF_MODULE_NEW;
		if (!dwfl_module_vector_append(&module->dwfl_modules,
					       &dwfl_module))
			return DWARF_CB_ABORT;
	}
	return DWARF_CB_OK;
}

static struct drgn_error *
append_unindexed_module(struct drgn_dwarf_module *module,
			struct drgn_dwarf_module_vector *unindexed,
			size_t *num_names)
{
	if (!module->dwfl_modules.size) {
		/* This was either already indexed or had no new files. */
		return NULL;
	}
	if (!drgn_dwarf_module_vector_append(unindexed, &module))
		return &drgn_enomem;
	*num_names += 1;
	return NULL;
}

static struct drgn_error *
drgn_dwarf_index_get_unindexed(struct drgn_dwarf_index *dindex,
			       struct drgn_dwarf_module_vector *unindexed)
{
	struct drgn_error *err;
	size_t num_names = 0;
	struct drgn_dwarf_module_table_iterator it;
	size_t i;

	/*
	 * Walk the module table and no build ID lists, but skip modules with no
	 * Dwfl_Module (which may be because they were already indexed or
	 * because the files were already reported).
	 */
	for (it = drgn_dwarf_module_table_first(&dindex->module_table);
	     it.entry; it = drgn_dwarf_module_table_next(it)) {
		err = append_unindexed_module(*it.entry, unindexed, &num_names);
		if (err)
			return err;
	}
	for (i = dindex->no_build_id.size; i-- > 0; ) {
		struct drgn_dwarf_module *module = dindex->no_build_id.data[i];

		if (module->state == DRGN_DWARF_MODULE_INDEXED) {
			/*
			 * If this module is indexed, then every module before
			 * it must be indexed, so we can stop looking.
			 */
			break;
		}
		err = append_unindexed_module(module, unindexed, &num_names);
		if (err)
			return err;
	}
	if (num_names &&
	    !c_string_set_reserve(&dindex->names,
				  c_string_set_size(&dindex->names) + num_names))
		return &drgn_enomem;
	return NULL;
}

static struct drgn_error *apply_relocation(Elf_Data *data, uint64_t r_offset,
					   uint32_t r_type, int64_t r_addend,
					   uint64_t st_value)
{
	char *p;

	p = (char *)data->d_buf + r_offset;
	switch (r_type) {
	case R_X86_64_NONE:
		break;
	case R_X86_64_32:
		if (r_offset > SIZE_MAX - sizeof(uint32_t) ||
		    r_offset + sizeof(uint32_t) > data->d_size) {
			return drgn_error_create(DRGN_ERROR_OTHER,
						 "invalid relocation offset");
		}
		*(uint32_t *)p = st_value + r_addend;
		break;
	case R_X86_64_64:
		if (r_offset > SIZE_MAX - sizeof(uint64_t) ||
		    r_offset + sizeof(uint64_t) > data->d_size) {
			return drgn_error_create(DRGN_ERROR_OTHER,
						 "invalid relocation offset");
		}
		*(uint64_t *)p = st_value + r_addend;
		break;
	default:
		return drgn_error_format(DRGN_ERROR_OTHER,
					 "unimplemented relocation type %" PRIu32,
					 r_type);
	}
	return NULL;
}

static struct drgn_error *relocate_section(Elf_Scn *scn, Elf_Scn *rela_scn,
					   Elf_Scn *symtab_scn,
					   uint64_t *sh_addrs, size_t shdrnum)
{
	struct drgn_error *err;
	Elf_Data *data, *rela_data, *symtab_data;
	const Elf64_Rela *relocs;
	const Elf64_Sym *syms;
	size_t num_relocs, num_syms;
	size_t i;
	GElf_Shdr *shdr, shdr_mem;

	err = read_elf_section(scn, &data);
	if (err)
		return err;
	err = read_elf_section(rela_scn, &rela_data);
	if (err)
		return err;
	err = read_elf_section(symtab_scn, &symtab_data);
	if (err)
		return err;

	relocs = (Elf64_Rela *)rela_data->d_buf;
	num_relocs = rela_data->d_size / sizeof(Elf64_Rela);
	syms = (Elf64_Sym *)symtab_data->d_buf;
	num_syms = symtab_data->d_size / sizeof(Elf64_Sym);

	for (i = 0; i < num_relocs; i++) {
		const Elf64_Rela *reloc = &relocs[i];
		uint32_t r_sym, r_type;
		uint16_t st_shndx;
		uint64_t sh_addr;

		r_sym = ELF64_R_SYM(reloc->r_info);
		r_type = ELF64_R_TYPE(reloc->r_info);

		if (r_sym >= num_syms) {
			return drgn_error_create(DRGN_ERROR_OTHER,
						 "invalid relocation symbol");
		}
		st_shndx = syms[r_sym].st_shndx;
		if (st_shndx == 0) {
			sh_addr = 0;
		} else if (st_shndx < shdrnum) {
			sh_addr = sh_addrs[st_shndx - 1];
		} else {
			return drgn_error_create(DRGN_ERROR_OTHER,
						 "invalid symbol section index");
		}
		err = apply_relocation(data, reloc->r_offset, r_type,
				       reloc->r_addend,
				       sh_addr + syms[r_sym].st_value);
		if (err)
			return err;
	}

	/*
	 * Mark the relocation section as empty so that libdwfl doesn't try to
	 * apply it again.
	 */
	shdr = gelf_getshdr(rela_scn, &shdr_mem);
	if (!shdr)
		return drgn_error_libelf();
	shdr->sh_size = 0;
	if (!gelf_update_shdr(rela_scn, shdr))
		return drgn_error_libelf();
	rela_data->d_size = 0;
	return NULL;
}

/*
 * Before the debugging information in a relocatable ELF file (e.g., Linux
 * kernel module) can be used, it must have ELF relocations applied. This is
 * usually done by libdwfl. However, libdwfl is relatively slow at it. This is a
 * much faster implementation. It is only implemented for x86-64; for other
 * architectures, we can fall back to libdwfl.
 */
static struct drgn_error *apply_elf_relocations(Elf *elf)
{
	struct drgn_error *err;
	GElf_Ehdr ehdr_mem, *ehdr;
	size_t shdrnum, shstrndx;
	uint64_t *sh_addrs;
	Elf_Scn *scn;

	ehdr = gelf_getehdr(elf, &ehdr_mem);
	if (!ehdr)
		return drgn_error_libelf();

	if (ehdr->e_type != ET_REL ||
	    ehdr->e_machine != EM_X86_64 ||
	    ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
	    ehdr->e_ident[EI_DATA] !=
	    (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ?
	     ELFDATA2LSB : ELFDATA2MSB)) {
		/* Unsupported; fall back to libdwfl. */
		return NULL;
	}

	if (elf_getshdrnum(elf, &shdrnum))
		return drgn_error_libelf();
	if (shdrnum > 1) {
		sh_addrs = calloc(shdrnum - 1, sizeof(*sh_addrs));
		if (!sh_addrs)
			return &drgn_enomem;

		scn = NULL;
		while ((scn = elf_nextscn(elf, scn))) {
			size_t ndx;

			ndx = elf_ndxscn(scn);
			if (ndx > 0 && ndx < shdrnum) {
				GElf_Shdr *shdr, shdr_mem;

				shdr = gelf_getshdr(scn, &shdr_mem);
				if (!shdr) {
					err = drgn_error_libelf();
					goto out;
				}
				sh_addrs[ndx - 1] = shdr->sh_addr;
			}
		}
	} else {
		sh_addrs = NULL;
	}

	if (elf_getshdrstrndx(elf, &shstrndx)) {
		err = drgn_error_libelf();
		goto out;
	}

	scn = NULL;
	while ((scn = elf_nextscn(elf, scn))) {
		GElf_Shdr *shdr, shdr_mem;
		const char *scnname;

		shdr = gelf_getshdr(scn, &shdr_mem);
		if (!shdr) {
			err = drgn_error_libelf();
			goto out;
		}

		if (shdr->sh_type != SHT_RELA)
			continue;

		scnname = elf_strptr(elf, shstrndx, shdr->sh_name);
		if (!scnname)
			continue;

		if (strstartswith(scnname, ".rela.debug_")) {
			Elf_Scn *info_scn, *link_scn;

			info_scn = elf_getscn(elf, shdr->sh_info);
			if (!info_scn) {
				err = drgn_error_libelf();
				goto out;
			}

			link_scn = elf_getscn(elf, shdr->sh_link);
			if (!link_scn) {
				err = drgn_error_libelf();
				goto out;
			}

			err = relocate_section(info_scn, scn, link_scn,
					       sh_addrs, shdrnum);
			if (err)
				goto out;
		}
	}
out:
	free(sh_addrs);
	return NULL;
}

static struct drgn_error *get_debug_sections(Elf *elf, Elf_Data **sections)
{
	struct drgn_error *err;
	size_t shstrndx;
	Elf_Scn *scn = NULL;
	size_t i;
	Elf_Data *debug_str;

	if (elf_getshdrstrndx(elf, &shstrndx))
		return drgn_error_libelf();

	while ((scn = elf_nextscn(elf, scn))) {
		GElf_Shdr *shdr, shdr_mem;
		const char *scnname;

		shdr = gelf_getshdr(scn, &shdr_mem);
		if (!shdr)
			return drgn_error_libelf();

		if (shdr->sh_type == SHT_NOBITS || (shdr->sh_flags & SHF_GROUP))
			continue;

		scnname = elf_strptr(elf, shstrndx, shdr->sh_name);
		if (!scnname)
			continue;

		for (i = 0; i < DRGN_DWARF_INDEX_NUM_SECTIONS; i++) {
			if (sections[i])
				continue;

			if (strcmp(scnname, section_name[i]) != 0)
				continue;

			err = read_elf_section(scn, &sections[i]);
			if (err)
				return err;
		}
	}

	for (i = 0; i < DRGN_DWARF_INDEX_NUM_SECTIONS; i++) {
		if (i != SECTION_DEBUG_LINE && !sections[i]) {
			return drgn_error_format(DRGN_ERROR_OTHER,
						 "no %s section",
						 section_name[i]);
		}
	}

	debug_str = sections[SECTION_DEBUG_STR];
	if (debug_str->d_size == 0 ||
	    ((char *)debug_str->d_buf)[debug_str->d_size - 1] != '\0') {
		return drgn_error_create(DRGN_ERROR_OTHER,
					 ".debug_str is not null terminated");
	}
	return NULL;
}

static struct drgn_error *read_compilation_unit_header(const char *ptr,
						       const char *end,
						       struct compilation_unit *cu)
{
	uint32_t tmp;
	uint16_t version;

	if (!read_u32(&ptr, end, cu->bswap, &tmp))
		return drgn_eof();
	cu->is_64_bit = tmp == UINT32_C(0xffffffff);
	if (cu->is_64_bit) {
		if (!read_u64(&ptr, end, cu->bswap, &cu->unit_length))
			return drgn_eof();
	} else {
		cu->unit_length = tmp;
	}

	if (!read_u16(&ptr, end, cu->bswap, &version))
		return drgn_eof();
	if (version != 2 && version != 3 && version != 4) {
		return drgn_error_format(DRGN_ERROR_OTHER,
					 "unknown DWARF CU version %" PRIu16,
					 version);
	}

	if (cu->is_64_bit) {
		if (!read_u64(&ptr, end, cu->bswap, &cu->debug_abbrev_offset))
			return drgn_eof();
	} else {
		if (!read_u32_into_u64(&ptr, end, cu->bswap,
				       &cu->debug_abbrev_offset))
			return drgn_eof();
	}

	if (!read_u8(&ptr, end, &cu->address_size))
		return drgn_eof();

	return NULL;
}

DEFINE_VECTOR(compilation_unit_vector, struct compilation_unit)

static struct drgn_error *
read_dwfl_module_cus(Dwfl_Module *dwfl_module,
		     struct drgn_dwfl_module_userdata *userdata,
		     struct compilation_unit_vector *cus)
{
	struct drgn_error *err;
	Dwarf *dwarf;
	Dwarf_Addr bias;
	Elf *elf;
	Elf_Data *sections[DRGN_DWARF_INDEX_NUM_SECTIONS] = {};
	bool bswap;
	const char *ptr, *end;

	if (userdata->elf) {
		err = apply_elf_relocations(userdata->elf);
		if (err)
			return err;
	}

	/*
	 * Note: not dwfl_module_getelf(), because then libdwfl applies
	 * ELF relocations to all sections, not just debug sections.
	 */
	dwarf = dwfl_module_getdwarf(dwfl_module, &bias);
	if (!dwarf)
		return drgn_error_libdwfl();

	elf = dwarf_getelf(dwarf);
	if (!elf)
		return drgn_error_libdw();

	err = get_debug_sections(elf, sections);
	if (err)
		return err;

	bswap = (elf_getident(elf, NULL)[EI_DATA] !=
		 (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ?
		  ELFDATA2LSB : ELFDATA2MSB));

	ptr = section_ptr(sections[SECTION_DEBUG_INFO], 0);
	end = section_end(sections[SECTION_DEBUG_INFO]);
	while (ptr < end) {
		struct compilation_unit *cu;

		cu = compilation_unit_vector_append_entry(cus);
		if (!cu)
			return &drgn_enomem;
		cu->module = dwfl_module;
		memcpy(cu->sections, sections, sizeof(cu->sections));
		cu->ptr = ptr;
		cu->bswap = bswap;
		err = read_compilation_unit_header(ptr, end, cu);
		if (err)
			return err;

		ptr += (cu->is_64_bit ? 12 : 4) + cu->unit_length;
	}
	return NULL;
}

static struct drgn_error *read_module_cus(struct drgn_dwarf_module *module,
					  struct compilation_unit_vector *cus,
					  const char **name_ret)
{
	struct drgn_error *err;
	const size_t orig_cus_size = cus->size;
	size_t i;

	for (i = 0; i < module->dwfl_modules.size; i++) {
		Dwfl_Module *dwfl_module;
		void **userdatap;
		struct drgn_dwfl_module_userdata *userdata;

		dwfl_module = module->dwfl_modules.data[i];
		*name_ret = dwfl_module_info(dwfl_module, &userdatap, NULL,
					     NULL, NULL, NULL, NULL, NULL);
		userdata = *userdatap;
		err = read_dwfl_module_cus(dwfl_module, userdata, cus);
		if (err) {
			/*
			 * Ignore the error unless we have no more Dwfl_Modules
			 * to try.
			 */
			if (i == module->dwfl_modules.size - 1)
				return err;
			drgn_error_destroy(err);
			cus->size = orig_cus_size;
			continue;
		}
		userdata->state = DRGN_DWARF_MODULE_INDEXING;
		module->state = DRGN_DWARF_MODULE_INDEXING;
		return NULL;
	}
	DRGN_UNREACHABLE();
}

static struct drgn_error *read_cus(struct drgn_dwarf_index *dindex,
				   struct drgn_dwarf_module **unindexed,
				   size_t num_unindexed,
				   struct compilation_unit_vector *all_cus)
{
	struct drgn_error *err = NULL;

	#pragma omp parallel
	{
		struct compilation_unit_vector cus;
		size_t i;

		compilation_unit_vector_init(&cus);
		#pragma omp for schedule(dynamic)
		for (i = 0; i < num_unindexed; i++) {
			struct drgn_error *module_err;
			const char *name;

			if (err)
				continue;

			module_err = read_module_cus(unindexed[i], &cus, &name);
			if (module_err) {
				#pragma omp critical(drgn_read_cus)
				if (err) {
					drgn_error_destroy(module_err);
				} else {
					err = drgn_dwarf_index_report_error(dindex,
									    name,
									    NULL,
									    module_err);
				}
				continue;
			}
		}

		if (cus.size) {
			#pragma omp critical(drgn_read_cus)
			if (!err) {
				if (compilation_unit_vector_reserve(all_cus,
								    all_cus->size + cus.size)) {
					memcpy(all_cus->data + all_cus->size,
					       cus.data,
					       cus.size * sizeof(*cus.data));
					all_cus->size += cus.size;
				} else {
					err = &drgn_enomem;
				}
			}
		}
		compilation_unit_vector_deinit(&cus);
	}
	return err;
}

static struct drgn_error *read_abbrev_decl(const char **ptr, const char *end,
					   const struct compilation_unit *cu,
					   struct abbrev_table *abbrev)
{
	struct drgn_error *err;
	uint64_t code;
	uint32_t insn_index;
	uint64_t tag;
	uint8_t children;
	uint8_t die_flags;
	bool should_index;
	bool first = true;
	uint8_t insn;

	static_assert(ATTRIB_MAX_INSN == UINT8_MAX,
		      "maximum DWARF attribute instruction is invalid");

	if ((err = read_uleb128(ptr, end, &code)))
		return err;
	if (code == 0)
		return &drgn_stop;
	if (code != abbrev->decls.size + 1) {
		return drgn_error_create(DRGN_ERROR_OTHER,
					 "DWARF abbreviation table is not sequential");
	}

	insn_index = abbrev->insns.size;
	if (!uint32_vector_append(&abbrev->decls, &insn_index))
		return &drgn_enomem;

	if ((err = read_uleb128(ptr, end, &tag)))
		return err;

	switch (tag) {
	/* Types. */
	case DW_TAG_base_type:
	case DW_TAG_class_type:
	case DW_TAG_enumeration_type:
	case DW_TAG_structure_type:
	case DW_TAG_typedef:
	case DW_TAG_union_type:
	/* Variables. */
	case DW_TAG_variable:
	/* Constants. */
	case DW_TAG_enumerator:
	/* Functions. */
	case DW_TAG_subprogram:
		should_index = true;
		break;
	default:
		should_index = false;
		break;
	}

	if (should_index || tag == DW_TAG_compile_unit)
		die_flags = tag;
	else
		die_flags = 0;

	if (!read_u8(ptr, end, &children))
		return drgn_eof();
	if (children)
		die_flags |= TAG_FLAG_CHILDREN;

	for (;;) {
		uint64_t name, form;

		if ((err = read_uleb128(ptr, end, &name)))
			return err;
		if ((err = read_uleb128(ptr, end, &form)))
			return err;
		if (name == 0 && form == 0)
			break;

		if (name == DW_AT_sibling && tag != DW_TAG_enumeration_type) {
			/*
			 * If we are indexing enumerators, we must descend into
			 * DW_TAG_enumeration_type to find the DW_TAG_enumerator
			 * children instead of skipping to the sibling DIE.
			 */
			switch (form) {
			case DW_FORM_ref1:
				insn = ATTRIB_SIBLING_REF1;
				goto append_insn;
			case DW_FORM_ref2:
				insn = ATTRIB_SIBLING_REF2;
				goto append_insn;
			case DW_FORM_ref4:
				insn = ATTRIB_SIBLING_REF4;
				goto append_insn;
			case DW_FORM_ref8:
				insn = ATTRIB_SIBLING_REF8;
				goto append_insn;
			case DW_FORM_ref_udata:
				insn = ATTRIB_SIBLING_REF_UDATA;
				goto append_insn;
			default:
				break;
			}
		} else if (name == DW_AT_name && should_index) {
			switch (form) {
			case DW_FORM_strp:
				if (cu->is_64_bit)
					insn = ATTRIB_NAME_STRP8;
				else
					insn = ATTRIB_NAME_STRP4;
				goto append_insn;
			case DW_FORM_string:
				insn = ATTRIB_NAME_STRING;
				goto append_insn;
			default:
				break;
			}
		} else if (name == DW_AT_stmt_list &&
			   tag == DW_TAG_compile_unit &&
			   cu->sections[SECTION_DEBUG_LINE]) {
			switch (form) {
			case DW_FORM_data4:
				insn = ATTRIB_STMT_LIST_LINEPTR4;
				goto append_insn;
			case DW_FORM_data8:
				insn = ATTRIB_STMT_LIST_LINEPTR8;
				goto append_insn;
			case DW_FORM_sec_offset:
				if (cu->is_64_bit)
					insn = ATTRIB_STMT_LIST_LINEPTR8;
				else
					insn = ATTRIB_STMT_LIST_LINEPTR4;
				goto append_insn;
			default:
				break;
			}
		} else if (name == DW_AT_decl_file && should_index) {
			switch (form) {
			case DW_FORM_data1:
				insn = ATTRIB_DECL_FILE_DATA1;
				goto append_insn;
			case DW_FORM_data2:
				insn = ATTRIB_DECL_FILE_DATA2;
				goto append_insn;
			case DW_FORM_data4:
				insn = ATTRIB_DECL_FILE_DATA4;
				goto append_insn;
			case DW_FORM_data8:
				insn = ATTRIB_DECL_FILE_DATA8;
				goto append_insn;
			/*
			 * decl_file must be positive, so if the compiler uses
			 * DW_FORM_sdata for some reason, just treat it as
			 * udata.
			 */
			case DW_FORM_sdata:
			case DW_FORM_udata:
				insn = ATTRIB_DECL_FILE_UDATA;
				goto append_insn;
			default:
				break;
			}
		} else if (name == DW_AT_declaration) {
			/*
			 * In theory, this could be DW_FORM_flag with a value of
			 * zero, but in practice, GCC always uses
			 * DW_FORM_flag_present.
			 */
			die_flags |= TAG_FLAG_DECLARATION;
		} else if (name == DW_AT_specification && should_index) {
			switch (form) {
			case DW_FORM_ref1:
				insn = ATTRIB_SPECIFICATION_REF1;
				goto append_insn;
			case DW_FORM_ref2:
				insn = ATTRIB_SPECIFICATION_REF2;
				goto append_insn;
			case DW_FORM_ref4:
				insn = ATTRIB_SPECIFICATION_REF4;
				goto append_insn;
			case DW_FORM_ref8:
				insn = ATTRIB_SPECIFICATION_REF8;
				goto append_insn;
			case DW_FORM_ref_udata:
				insn = ATTRIB_SPECIFICATION_REF_UDATA;
				goto append_insn;
			default:
				break;
			}
		}

		switch (form) {
		case DW_FORM_addr:
			insn = cu->address_size;
			break;
		case DW_FORM_data1:
		case DW_FORM_ref1:
		case DW_FORM_flag:
			insn = 1;
			break;
		case DW_FORM_data2:
		case DW_FORM_ref2:
			insn = 2;
			break;
		case DW_FORM_data4:
		case DW_FORM_ref4:
			insn = 4;
			break;
		case DW_FORM_data8:
		case DW_FORM_ref8:
		case DW_FORM_ref_sig8:
			insn = 8;
			break;
		case DW_FORM_block1:
			insn = ATTRIB_BLOCK1;
			goto append_insn;
		case DW_FORM_block2:
			insn = ATTRIB_BLOCK2;
			goto append_insn;
		case DW_FORM_block4:
			insn = ATTRIB_BLOCK4;
			goto append_insn;
		case DW_FORM_exprloc:
			insn = ATTRIB_EXPRLOC;
			goto append_insn;
		case DW_FORM_sdata:
		case DW_FORM_udata:
		case DW_FORM_ref_udata:
			insn = ATTRIB_LEB128;
			goto append_insn;
		case DW_FORM_ref_addr:
		case DW_FORM_sec_offset:
		case DW_FORM_strp:
			insn = cu->is_64_bit ? 8 : 4;
			break;
		case DW_FORM_string:
			insn = ATTRIB_STRING;
			goto append_insn;
		case DW_FORM_flag_present:
			continue;
		case DW_FORM_indirect:
			return drgn_error_create(DRGN_ERROR_OTHER,
						 "DW_FORM_indirect is not implemented");
		default:
			return drgn_error_format(DRGN_ERROR_OTHER,
						 "unknown attribute form %" PRIu64,
						 form);
		}

		if (!first) {
			uint8_t last_insn;

			last_insn = abbrev->insns.data[abbrev->insns.size - 1];
			if (last_insn + insn <= INSN_MAX_SKIP) {
				abbrev->insns.data[abbrev->insns.size - 1] += insn;
				continue;
			} else if (last_insn < INSN_MAX_SKIP) {
				insn = last_insn + insn - INSN_MAX_SKIP;
				abbrev->insns.data[abbrev->insns.size - 1] =
					INSN_MAX_SKIP;
			}
		}

append_insn:
		first = false;
		if (!uint8_vector_append(&abbrev->insns, &insn))
			return &drgn_enomem;
	}
	insn = 0;
	if (!uint8_vector_append(&abbrev->insns, &insn) ||
	    !uint8_vector_append(&abbrev->insns, &die_flags))
		return &drgn_enomem;
	return NULL;
}

static struct drgn_error *read_abbrev_table(const char *ptr, const char *end,
					    const struct compilation_unit *cu,
					    struct abbrev_table *abbrev)
{
	struct drgn_error *err;

	for (;;) {
		err = read_abbrev_decl(&ptr, end, cu, abbrev);
		if (err && err->code == DRGN_ERROR_STOP)
			break;
		else if (err)
			return err;
	}
	return NULL;
}

static struct drgn_error *skip_lnp_header(struct compilation_unit *cu,
					  const char **ptr, const char *end)
{
	uint32_t tmp;
	bool is_64_bit;
	uint16_t version;
	uint8_t opcode_base;

	if (!read_u32(ptr, end, cu->bswap, &tmp))
		return drgn_eof();
	is_64_bit = tmp == UINT32_C(0xffffffff);
	if (is_64_bit)
		*ptr += sizeof(uint64_t);

	if (!read_u16(ptr, end, cu->bswap, &version))
		return drgn_eof();
	if (version != 2 && version != 3 && version != 4) {
		return drgn_error_format(DRGN_ERROR_OTHER,
					 "unknown DWARF LNP version %" PRIu16,
					 version);
	}

	/*
	 * header_length
	 * minimum_instruction_length
	 * maximum_operations_per_instruction (DWARF 4 only)
	 * default_is_stmt
	 * line_base
	 * line_range
	 */
	*ptr += (is_64_bit ? 8 : 4) + 4 + (version >= 4);

	if (!read_u8(ptr, end, &opcode_base))
		return drgn_eof();
	/* standard_opcode_lengths */
	*ptr += opcode_base - 1;

	return NULL;
}

/*
 * Hash the canonical path of a directory. Components are hashed in reverse
 * order. We always include a trailing slash.
 */
static void hash_directory(struct siphash *hash, const char *path,
			   size_t path_len)
{
	struct path_iterator it = {
		.components = (struct path_iterator_component []){
			{ path, path_len, },
		},
		.num_components = 1,
	};
	const char *component;
	size_t component_len;

	while (path_iterator_next(&it, &component, &component_len)) {
		siphash_update(hash, component, component_len);
		siphash_update(hash, "/", 1);
	}
}

DEFINE_VECTOR(siphash_vector, struct siphash)

static struct drgn_error *
read_file_name_table(struct drgn_dwarf_index *dindex,
		     struct compilation_unit *cu, size_t stmt_list,
		     struct uint64_vector *file_name_table)
{
	/*
	 * We don't care about hash flooding attacks, so don't bother with the
	 * random key.
	 */
	static const uint64_t siphash_key[2];
	struct drgn_error *err;
	Elf_Data *debug_line = cu->sections[SECTION_DEBUG_LINE];
	const char *ptr = section_ptr(debug_line, stmt_list);
	const char *end = section_end(debug_line);
	struct siphash_vector directories;

	siphash_vector_init(&directories);

	err = skip_lnp_header(cu, &ptr, end);
	if (err)
		return err;

	for (;;) {
		struct siphash *hash;
		const char *path;
		size_t path_len;

		if (!read_string(&ptr, end, &path, &path_len))
			return drgn_eof();
		if (!path_len)
			break;

		hash = siphash_vector_append_entry(&directories);
		if (!hash) {
			err = &drgn_enomem;
			goto out;
		}
		siphash_init(hash, siphash_key);
		hash_directory(hash, path, path_len);
	}

	for (;;) {
		const char *path;
		size_t path_len;
		uint64_t directory_index;
		struct siphash hash;
		uint64_t file_name_hash;

		if (!read_string(&ptr, end, &path, &path_len)) {
			err = drgn_eof();
			goto out;
		}
		if (!path_len)
			break;

		if ((err = read_uleb128(&ptr, end, &directory_index)))
			goto out;
		/* mtime, size */
		if (!skip_leb128(&ptr, end) || !skip_leb128(&ptr, end)) {
			err = drgn_eof();
			goto out;
		}

		if (directory_index > directories.size) {
			err = drgn_error_format(DRGN_ERROR_OTHER,
						"directory index %" PRIu64 " is invalid",
						directory_index);
			goto out;
		}

		if (directory_index)
			hash = directories.data[directory_index - 1];
		else
			siphash_init(&hash, siphash_key);
		siphash_update(&hash, path, path_len);

		file_name_hash = siphash_final(&hash);
		if (!uint64_vector_append(file_name_table, &file_name_hash)) {
			err = &drgn_enomem;
			goto out;
		}
	}

	err = NULL;
out:
	siphash_vector_deinit(&directories);
	return err;
}

static bool append_die_entry(struct drgn_dwarf_index_shard *shard, uint64_t tag,
			     uint64_t file_name_hash, Dwfl_Module *module,
			     uint64_t offset)
{
	struct drgn_dwarf_index_die *die;

	die = drgn_dwarf_index_die_vector_append_entry(&shard->dies);
	if (!die)
		return false;
	die->tag = tag;
	die->file_name_hash = file_name_hash;
	die->module = module;
	die->offset = offset;
	die->next = SIZE_MAX;
	return true;
}

static struct drgn_error *index_die(struct drgn_dwarf_index *dindex,
				    const char *name, uint64_t tag,
				    uint64_t file_name_hash,
				    Dwfl_Module *module, uint64_t offset)
{
	struct drgn_error *err;
	struct drgn_dwarf_index_die_map_entry entry = {
		.key = {
			.str = name,
			.len = strlen(name),
		},
	};
	struct hash_pair hp;
	struct drgn_dwarf_index_shard *shard;
	struct drgn_dwarf_index_die_map_iterator it;
	size_t index;
	struct drgn_dwarf_index_die *die;

	hp = drgn_dwarf_index_die_map_hash(&entry.key);
	shard = &dindex->shards[hash_pair_to_shard(hp)];
	omp_set_lock(&shard->lock);
	it = drgn_dwarf_index_die_map_search_hashed(&shard->map, &entry.key,
						    hp);
	if (!it.entry) {
		if (!append_die_entry(shard, tag, file_name_hash, module,
				      offset)) {
			err = &drgn_enomem;
			goto out;
		}
		entry.value = shard->dies.size - 1;
		if (drgn_dwarf_index_die_map_insert_searched(&shard->map,
							     &entry, hp,
							     NULL) == 1)
			err = NULL;
		else
			err = &drgn_enomem;
		goto out;
	}

	die = &shard->dies.data[it.entry->value];
	for (;;) {
		if (die->tag == tag &&
		    die->file_name_hash == file_name_hash) {
			err = NULL;
			goto out;
		}

		if (die->next == SIZE_MAX)
			break;
		die = &shard->dies.data[die->next];
	}

	index = die - shard->dies.data;
	if (!append_die_entry(shard, tag, file_name_hash, module, offset)) {
		err = &drgn_enomem;
		goto out;
	}
	shard->dies.data[index].next = shard->dies.size - 1;
	err = NULL;
out:
	omp_unset_lock(&shard->lock);
	return err;
}

struct die {
	const char *sibling;
	const char *name;
	size_t stmt_list;
	size_t decl_file;
	const char *specification;
	uint8_t flags;
};

static struct drgn_error *read_die(struct compilation_unit *cu,
				   const struct abbrev_table *abbrev,
				   const char **ptr, const char *end,
				   const char *debug_str_buffer,
				   const char *debug_str_end, struct die *die)
{
	struct drgn_error *err;
	uint64_t code;
	uint8_t *insnp;
	uint8_t insn;

	if ((err = read_uleb128(ptr, end, &code)))
		return err;
	if (code == 0)
		return &drgn_stop;

	if (code < 1 || code > abbrev->decls.size) {
		return drgn_error_format(DRGN_ERROR_OTHER,
					 "unknown abbreviation code %" PRIu64,
					 code);
	}
	insnp = &abbrev->insns.data[abbrev->decls.data[code - 1]];

	while ((insn = *insnp++)) {
		size_t skip, tmp;

		switch (insn) {
		case ATTRIB_BLOCK1:
			if (!read_u8_into_size_t(ptr, end, &skip))
				return drgn_eof();
			goto skip;
		case ATTRIB_BLOCK2:
			if (!read_u16_into_size_t(ptr, end, cu->bswap, &skip))
				return drgn_eof();
			goto skip;
		case ATTRIB_BLOCK4:
			if (!read_u32_into_size_t(ptr, end, cu->bswap, &skip))
				return drgn_eof();
			goto skip;
		case ATTRIB_EXPRLOC:
			if ((err = read_uleb128_into_size_t(ptr, end, &skip)))
				return err;
			goto skip;
		case ATTRIB_LEB128:
			if (!skip_leb128(ptr, end))
				return drgn_eof();
			break;
		case ATTRIB_NAME_STRING:
			die->name = *ptr;
			/* fallthrough */
		case ATTRIB_STRING:
			if (!skip_string(ptr, end))
				return drgn_eof();
			break;
		case ATTRIB_SIBLING_REF1:
			if (!read_u8_into_size_t(ptr, end, &tmp))
				return drgn_eof();
			goto sibling;
		case ATTRIB_SIBLING_REF2:
			if (!read_u16_into_size_t(ptr, end, cu->bswap, &tmp))
				return drgn_eof();
			goto sibling;
		case ATTRIB_SIBLING_REF4:
			if (!read_u32_into_size_t(ptr, end, cu->bswap, &tmp))
				return drgn_eof();
			goto sibling;
		case ATTRIB_SIBLING_REF8:
			if (!read_u64_into_size_t(ptr, end, cu->bswap, &tmp))
				return drgn_eof();
			goto sibling;
		case ATTRIB_SIBLING_REF_UDATA:
			if ((err = read_uleb128_into_size_t(ptr, end, &tmp)))
				return err;
sibling:
			if (!read_in_bounds(cu->ptr, end, tmp))
				return drgn_eof();
			die->sibling = &cu->ptr[tmp];
			__builtin_prefetch(die->sibling);
			break;
		case ATTRIB_NAME_STRP4:
			if (!read_u32_into_size_t(ptr, end, cu->bswap, &tmp))
				return drgn_eof();
			goto strp;
		case ATTRIB_NAME_STRP8:
			if (!read_u64_into_size_t(ptr, end, cu->bswap, &tmp))
				return drgn_eof();
strp:
			if (!read_in_bounds(debug_str_buffer, debug_str_end,
					    tmp))
				return drgn_eof();
			die->name = &debug_str_buffer[tmp];
			__builtin_prefetch(die->name);
			break;
		case ATTRIB_STMT_LIST_LINEPTR4:
			if (!read_u32_into_size_t(ptr, end, cu->bswap,
						  &die->stmt_list))
				return drgn_eof();
			break;
		case ATTRIB_STMT_LIST_LINEPTR8:
			if (!read_u64_into_size_t(ptr, end, cu->bswap,
						  &die->stmt_list))
				return drgn_eof();
			break;
		case ATTRIB_DECL_FILE_DATA1:
			if (!read_u8_into_size_t(ptr, end, &die->decl_file))
				return drgn_eof();
			break;
		case ATTRIB_DECL_FILE_DATA2:
			if (!read_u16_into_size_t(ptr, end, cu->bswap,
						  &die->decl_file))
				return drgn_eof();
			break;
		case ATTRIB_DECL_FILE_DATA4:
			if (!read_u32_into_size_t(ptr, end, cu->bswap,
						  &die->decl_file))
				return drgn_eof();
			break;
		case ATTRIB_DECL_FILE_DATA8:
			if (!read_u64_into_size_t(ptr, end, cu->bswap,
						  &die->decl_file))
				return drgn_eof();
			break;
		case ATTRIB_DECL_FILE_UDATA:
			if ((err = read_uleb128_into_size_t(ptr, end,
							    &die->decl_file)))
				return err;
			break;
		case ATTRIB_SPECIFICATION_REF1:
			if (!read_u8_into_size_t(ptr, end, &tmp))
				return drgn_eof();
			goto specification;
		case ATTRIB_SPECIFICATION_REF2:
			if (!read_u16_into_size_t(ptr, end, cu->bswap, &tmp))
				return drgn_eof();
			goto specification;
		case ATTRIB_SPECIFICATION_REF4:
			if (!read_u32_into_size_t(ptr, end, cu->bswap, &tmp))
				return drgn_eof();
			goto specification;
		case ATTRIB_SPECIFICATION_REF8:
			if (!read_u64_into_size_t(ptr, end, cu->bswap, &tmp))
				return drgn_eof();
			goto specification;
		case ATTRIB_SPECIFICATION_REF_UDATA:
			if ((err = read_uleb128_into_size_t(ptr, end, &tmp)))
				return err;
specification:
			if (!read_in_bounds(cu->ptr, end, tmp))
				return drgn_eof();
			die->specification = &cu->ptr[tmp];
			__builtin_prefetch(die->specification);
			break;
		default:
			skip = insn;
skip:
			if (!read_in_bounds(*ptr, end, skip))
				return drgn_eof();
			*ptr += skip;
			break;
		}
	}

	die->flags = *insnp;

	return NULL;
}

static struct drgn_error *index_cu(struct drgn_dwarf_index *dindex,
				   struct compilation_unit *cu)
{
	struct drgn_error *err;
	struct abbrev_table abbrev;
	struct uint64_vector file_name_table;
	Elf_Data *debug_abbrev = cu->sections[SECTION_DEBUG_ABBREV];
	const char *debug_abbrev_end = section_end(debug_abbrev);
	const char *ptr = &cu->ptr[cu->is_64_bit ? 23 : 11];
	const char *end = &cu->ptr[(cu->is_64_bit ? 12 : 4) + cu->unit_length];
	Elf_Data *debug_info = cu->sections[SECTION_DEBUG_INFO];
	const char *debug_info_buffer = section_ptr(debug_info, 0);
	Elf_Data *debug_str = cu->sections[SECTION_DEBUG_STR];
	const char *debug_str_buffer = section_ptr(debug_str, 0);
	const char *debug_str_end = section_end(debug_str);
	unsigned int depth = 0;
	uint64_t enum_die_offset = 0;

	abbrev_table_init(&abbrev);
	uint64_vector_init(&file_name_table);

	if ((err = read_abbrev_table(section_ptr(debug_abbrev,
						 cu->debug_abbrev_offset),
				     debug_abbrev_end, cu, &abbrev)))
		goto out;

	for (;;) {
		struct die die = {
			.stmt_list = SIZE_MAX,
		};
		uint64_t die_offset = ptr - debug_info_buffer;
		uint64_t tag;

		err = read_die(cu, &abbrev, &ptr, end, debug_str_buffer,
			       debug_str_end, &die);
		if (err && err->code == DRGN_ERROR_STOP) {
			depth--;
			if (depth == 1)
				enum_die_offset = 0;
			else if (depth == 0)
				break;
			continue;
		} else if (err) {
			goto out;
		}

		tag = die.flags & TAG_MASK;
		if (tag == DW_TAG_compile_unit) {
			if (depth == 0 && die.stmt_list != SIZE_MAX &&
			    (err = read_file_name_table(dindex, cu,
							die.stmt_list,
							&file_name_table)))
				goto out;
		} else if (tag && !(die.flags & TAG_FLAG_DECLARATION)) {
			uint64_t file_name_hash;

			/*
			 * NB: the enumerator name points to the
			 * enumeration_type DIE instead of the enumerator DIE.
			 */
			if (depth == 1 && tag == DW_TAG_enumeration_type)
				enum_die_offset = die_offset;
			else if (depth == 2 && tag == DW_TAG_enumerator &&
				 enum_die_offset)
				die_offset = enum_die_offset;
			else if (depth != 1)
				goto next;

			if (die.specification && (!die.name || !die.decl_file)) {
				struct die decl = {};
				const char *decl_ptr = die.specification;

				if ((err = read_die(cu, &abbrev, &decl_ptr, end,
						    debug_str_buffer,
						    debug_str_end, &decl)))
					goto out;
				if (!die.name && decl.name)
					die.name = decl.name;
				if (!die.decl_file && decl.decl_file)
					die.decl_file = decl.decl_file;
			}

			if (die.name) {
				if (die.decl_file > file_name_table.size) {
					err = drgn_error_format(DRGN_ERROR_OTHER,
								"invalid DW_AT_decl_file %zu",
								die.decl_file);
					goto out;
				}
				if (die.decl_file)
					file_name_hash = file_name_table.data[die.decl_file - 1];
				else
					file_name_hash = 0;
				if ((err = index_die(dindex, die.name, tag,
						     file_name_hash, cu->module,
						     die_offset)))
					goto out;
			}
		}

next:
		if (die.flags & TAG_FLAG_CHILDREN) {
			if (die.sibling)
				ptr = die.sibling;
			else
				depth++;
		} else if (depth == 0) {
			break;
		}
	}

	err = NULL;
out:
	uint64_vector_deinit(&file_name_table);
	abbrev_table_deinit(&abbrev);
	return err;
}

static void rollback_dwarf_index(struct drgn_dwarf_index *dindex)
{
	size_t i;

	for (i = 0; i < ARRAY_SIZE(dindex->shards); i++) {
		struct drgn_dwarf_index_shard *shard;
		struct drgn_dwarf_index_die *die;
		struct drgn_dwarf_index_die_map_iterator it;
		size_t index;

		shard = &dindex->shards[i];

		/*
		 * Because we're deleting everything that was added since the
		 * last update, we can just shrink the dies array to the first
		 * entry that was added for this update.
		 */
		while (shard->dies.size) {
			void **userdatap;
			struct drgn_dwfl_module_userdata *userdata;

			die = &shard->dies.data[shard->dies.size - 1];
			dwfl_module_info(die->module, &userdatap, NULL,
					 NULL, NULL, NULL, NULL, NULL);
			userdata = *userdatap;
			if (userdata->state == DRGN_DWARF_MODULE_INDEXED)
				break;
			else
				shard->dies.size--;
		}

		/*
		 * The new entries may be chained off of existing entries;
		 * unchain them. Note that any entries chained off of the new
		 * entries must also be new, so there's no need to preserve
		 * them.
		 */
		for (index = 0; index < shard->dies.size; i++) {
			die = &shard->dies.data[index];
			if (die->next != SIZE_MAX &&
			    die->next >= shard->dies.size)
				die->next = SIZE_MAX;
		}

		/* Finally, delete the new entries in the map. */
		for (it = drgn_dwarf_index_die_map_first(&shard->map);
		     it.entry; ) {
			if (it.entry->value >= shard->dies.size) {
				it = drgn_dwarf_index_die_map_delete_iterator(&shard->map,
									      it);
			} else {
				it = drgn_dwarf_index_die_map_next(it);
			}
		}
	}
}

static struct drgn_error *index_cus(struct drgn_dwarf_index *dindex,
				    struct compilation_unit *cus,
				    size_t num_cus)
{
	struct drgn_error *err = NULL;
	size_t i;

	#pragma omp parallel for schedule(dynamic)
	for (i = 0; i < num_cus; i++) {
		struct drgn_error *cu_err;

		if (err)
			continue;

		cu_err = index_cu(dindex, &cus[i]);
		if (cu_err) {
			#pragma omp critical(drgn_index_cus)
			if (err)
				drgn_error_destroy(cu_err);
			else
				err = cu_err;
		}
	}
	return err;
}

/*
 * Like drgn_dwarf_index_report_end(), but doesn't finalize reported errors or
 * free unindexed modules on success.
 */
static struct drgn_error *
drgn_dwarf_index_report_end_internal(struct drgn_dwarf_index *dindex,
				     bool report_from_dwfl)
{
	struct drgn_error *err;
	struct drgn_dwarf_module_vector unindexed;
	struct compilation_unit_vector cus;

	dwfl_report_end(dindex->dwfl, NULL, NULL);
	if (report_from_dwfl &&
	    dwfl_getmodules(dindex->dwfl, drgn_dwarf_index_report_dwfl_module,
			    dindex, 0)) {
		err = &drgn_enomem;
		goto err;
	}
	drgn_dwarf_module_vector_init(&unindexed);
	compilation_unit_vector_init(&cus);
	err = drgn_dwarf_index_get_unindexed(dindex, &unindexed);
	if (err)
		goto err;
	err = read_cus(dindex, unindexed.data, unindexed.size, &cus);
	if (err)
		goto err;
	/*
	 * After this point, if we hit an error, then we have to roll back the
	 * index.
	 */
	err = index_cus(dindex, cus.data, cus.size);
	if (err) {
		rollback_dwarf_index(dindex);
		goto err;
	}

out:
	compilation_unit_vector_deinit(&cus);
	drgn_dwarf_module_vector_deinit(&unindexed);
	return err;

err:
	drgn_dwarf_index_free_modules(dindex, false, false);
	drgn_dwarf_index_reset_errors(dindex);
	goto out;
}

struct drgn_error *drgn_dwarf_index_report_end(struct drgn_dwarf_index *dindex,
					       bool report_from_dwfl)
{
	struct drgn_error *err;

	err = drgn_dwarf_index_report_end_internal(dindex, report_from_dwfl);
	if (err)
		return err;
	err = drgn_dwarf_index_finalize_errors(dindex);
	if (err && err->code != DRGN_ERROR_MISSING_DEBUG_INFO) {
		rollback_dwarf_index(dindex);
		drgn_dwarf_index_free_modules(dindex, false, false);
		return err;
	}
	drgn_dwarf_index_free_modules(dindex, true, false);
	return err;
}

struct drgn_error *drgn_dwarf_index_flush(struct drgn_dwarf_index *dindex,
					  bool report_from_dwfl)
{
	struct drgn_error *err;

	err = drgn_dwarf_index_report_end_internal(dindex, report_from_dwfl);
	if (err)
		return err;
	drgn_dwarf_index_free_modules(dindex, true, false);
	drgn_dwarf_index_report_begin(dindex);
	return NULL;
}

void drgn_dwarf_index_report_abort(struct drgn_dwarf_index *dindex)
{
	dwfl_report_end(dindex->dwfl, NULL, NULL);
	drgn_dwarf_index_free_modules(dindex, false, false);
	drgn_dwarf_index_reset_errors(dindex);
}

bool drgn_dwarf_index_is_indexed(struct drgn_dwarf_index *dindex,
				 const char *name)
{
	return c_string_set_search(&dindex->names, &name).entry != NULL;
}

void drgn_dwarf_index_iterator_init(struct drgn_dwarf_index_iterator *it,
				    struct drgn_dwarf_index *dindex,
				    const char *name, size_t name_len,
				    const uint64_t *tags, size_t num_tags)
{
	it->dindex = dindex;
	if (name) {
		struct string key = {
			.str = name,
			.len = name_len,
		};
		struct hash_pair hp;
		struct drgn_dwarf_index_shard *shard;
		struct drgn_dwarf_index_die_map_iterator map_it;

		hp = drgn_dwarf_index_die_map_hash(&key);
		it->shard = hash_pair_to_shard(hp);
		shard = &dindex->shards[it->shard];
		map_it = drgn_dwarf_index_die_map_search_hashed(&shard->map,
								&key, hp);
		it->index = map_it.entry ? map_it.entry->value : SIZE_MAX;
		it->any_name = false;
	} else {
		it->index = 0;
		for (it->shard = 0; it->shard < ARRAY_SIZE(dindex->shards);
		     it->shard++) {
			if (dindex->shards[it->shard].dies.size)
				break;
		}
		it->any_name = true;
	}
	it->tags = tags;
	it->num_tags = num_tags;
}

static inline bool
drgn_dwarf_index_iterator_matches_tag(struct drgn_dwarf_index_iterator *it,
				      struct drgn_dwarf_index_die *die)
{
	size_t i;

	if (it->num_tags == 0)
		return true;
	for (i = 0; i < it->num_tags; i++) {
		if (die->tag == it->tags[i])
			return true;
	}
	return false;
}

struct drgn_error *
drgn_dwarf_index_iterator_next(struct drgn_dwarf_index_iterator *it,
			       Dwarf_Die *die_ret, uint64_t *bias_ret)
{
	struct drgn_dwarf_index *dindex = it->dindex;
	struct drgn_dwarf_index_die *die;
	Dwarf *dwarf;
	Dwarf_Addr bias;

	if (it->any_name) {
		for (;;) {
			struct drgn_dwarf_index_shard *shard;

			if (it->shard >= ARRAY_SIZE(dindex->shards))
				return &drgn_stop;

			shard = &dindex->shards[it->shard];
			die = &shard->dies.data[it->index];

			if (++it->index >= shard->dies.size) {
				it->index = 0;
				while (++it->shard < ARRAY_SIZE(dindex->shards)) {
					if (dindex->shards[it->shard].dies.size)
						break;
				}
			}

			if (drgn_dwarf_index_iterator_matches_tag(it, die))
				break;
		}
	} else {
		for (;;) {
			struct drgn_dwarf_index_shard *shard;

			if (it->index == SIZE_MAX)
				return &drgn_stop;

			shard = &dindex->shards[it->shard];
			die = &shard->dies.data[it->index];

			it->index = die->next;

			if (drgn_dwarf_index_iterator_matches_tag(it, die))
				break;
		}
	}

	dwarf = dwfl_module_getdwarf(die->module, &bias);
	if (!dwarf)
		return drgn_error_libdwfl();
	if (!dwarf_offdie(dwarf, die->offset, die_ret))
		return drgn_error_libdw();
	if (bias_ret)
		*bias_ret = bias;
	return NULL;
}