drgn/libdrgn/dwarf_info.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

346 lines
11 KiB
C
Raw Normal View History

// Copyright (c) Meta Platforms, Inc. and affiliates.
// SPDX-License-Identifier: LGPL-2.1-or-later
/**
* @file
*
* DWARF and .eh_frame support.
*
* See @ref DebugInfo.
*/
#ifndef DRGN_DEBUG_INFO_DWARF_H
#define DRGN_DEBUG_INFO_DWARF_H
/**
* @ingroup DebugInfo
*
* @{
*/
#include <elfutils/libdw.h>
#include "cfi.h"
#include "drgn.h"
#include "hash_table.h"
#include "vector.h"
struct drgn_debug_info;
struct drgn_elf_file;
struct drgn_module;
struct drgn_register_state;
/** DWARF Frame Description Entry. */
struct drgn_dwarf_fde {
uint64_t initial_location;
uint64_t address_range;
/* CIE for this FDE as an index into drgn_dwarf_cfi::cies. */
size_t cie;
const char *instructions;
size_t instructions_size;
};
/** DWARF Call Frame Information. */
struct drgn_dwarf_cfi {
/** Array of DWARF Common Information Entries. */
struct drgn_dwarf_cie *cies;
/**
* Array of DWARF Frame Description Entries sorted by initial_location.
*/
struct drgn_dwarf_fde *fdes;
/** Number of elements in @ref drgn_dwarf_cfi::fdes. */
size_t num_fdes;
};
/** DWARF debugging information for a @ref drgn_module. */
struct drgn_module_dwarf_info {
/** Call Frame Information from .debug_frame. */
struct drgn_dwarf_cfi debug_frame;
/** Call Frame Information from .eh_frame. */
struct drgn_dwarf_cfi eh_frame;
/** Base for `DW_EH_PE_pcrel`. */
uint64_t pcrel_base;
/** Base for `DW_EH_PE_textrel`. */
uint64_t textrel_base;
/** Base for `DW_EH_PE_datarel`. */
uint64_t datarel_base;
};
void drgn_module_dwarf_info_deinit(struct drgn_module *module);
libdrgn: dwarf_info: scalably index all DIEs per name We currently deduplicate entries in the DWARF index by (name, tag, file name). We want to add support for looking up nested classes, so this is a problem: not every DIE defining a class also defines all of its nested types, so the one DIE we index may not allow us to find every nested class. Instead, we need to index every DIE with a given name. This sounds horribly expensive, both in terms of CPU and memory, but we can mitigate this in several ways: - We no longer need to parse the file name table, cache file name hashes, parse DW_AT_decl_file, or store the file name hash for indexed DIEs. - Instead of storing the tag for each indexed DIE, we can split the DIE map into a map per tag. - We can store the DIEs matching a name in a vector instead of a linked list. - We can use the new inline entry and small size variants of vectors. - We can move struct drgn_namespace_dwarf_index * to a tree separate from the indexed DIEs. After all of these changes, we only need a single uintptr_t per indexed DIE. - We can get rid of the struct drgn_dwarf_index_pending_die list for a namespace and use the indexed DIEs instead, which are half the size. - DW_TAG_base_type maps can be assumed to be globally unique, so they can be stored in their own map of one DIE indexed only by name. - Each thread can independently build the DIE maps without any synchronization to be merged at the end. Here are some performance results comparing the New version (this commit) to the Old version (commit 16164dbe6edc ("libdrgn: detect flattened vmcores and raise error")). Application is either a large, statically-linked C++ application or the live Linux kernel. Threads is the OMP_NUM_THREADS setting used (the machine used for testing has 80 CPUs). Time is the amount of time it took to load and index debugging information. Anon is the amount of anonymous (e.g., heap) memory used. File is the amount of file memory used. Large C++ Application | Threads | Version | Time | Anon | File ------------+---------+---------+--------+--------+------- Large C++ | 80 | New | 5 s | 3.5 GB | 1.4 GB | | Old | 15 s | 5.2 GB | 1.7 GB | 8 | New | 6.5 s | 3.4 GB | 1.4 GB | | Old | 10 s | 5.2 GB | 1.7 GB | 1 | New | 30 s | 3.4 GB | 1.4 GB | | Old | 51 s | 5.2 GB | 1.7 GB Linux | 80 | New | 270 ms | 128 MB | 300 MB | | Old | 380 ms | 73 MB | 326 MB | 8 | New | 240 ms | 115 MB | 300 MB | | Old | 240 ms | 73 MB | 326 MB | 1 | New | 700 ms | 87 MB | 300 MB | | Old | 800 ms | 73 MB | 326 MB The results show that the new approach is almost always faster. For the large C++ application, it is much better for both time and memory usage. For the Linux kernel, it is slightly faster and uses more anonymous memory, although that is partially offset by less file memory. (For the Linux kernel, there is a dip in performance for both approaches from 8 threads to 80 which is worth looking into later.) Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-12 09:30:19 +01:00
DEFINE_VECTOR_TYPE(drgn_dwarf_index_die_vector, uintptr_t,
vector_inline_minimal, uint32_t);
DEFINE_HASH_MAP_TYPE(drgn_dwarf_index_die_map, struct nstring,
struct drgn_dwarf_index_die_vector);
DEFINE_HASH_TABLE_TYPE(drgn_namespace_table,
struct drgn_namespace_dwarf_index *);
/* DWARF tags that we index. */
#define DRGN_DWARF_INDEX_TAGS \
X(structure_type) \
X(class_type) \
X(union_type) \
X(namespace) \
X(enumeration_type) \
X(typedef) \
X(enumerator) \
X(subprogram) \
X(variable) \
X(base_type)
enum drgn_dwarf_index_tag {
#define X(name) DRGN_DWARF_INDEX_##name,
DRGN_DWARF_INDEX_TAGS
#undef X
};
libdrgn: dwarf_info: scalably index all DIEs per name We currently deduplicate entries in the DWARF index by (name, tag, file name). We want to add support for looking up nested classes, so this is a problem: not every DIE defining a class also defines all of its nested types, so the one DIE we index may not allow us to find every nested class. Instead, we need to index every DIE with a given name. This sounds horribly expensive, both in terms of CPU and memory, but we can mitigate this in several ways: - We no longer need to parse the file name table, cache file name hashes, parse DW_AT_decl_file, or store the file name hash for indexed DIEs. - Instead of storing the tag for each indexed DIE, we can split the DIE map into a map per tag. - We can store the DIEs matching a name in a vector instead of a linked list. - We can use the new inline entry and small size variants of vectors. - We can move struct drgn_namespace_dwarf_index * to a tree separate from the indexed DIEs. After all of these changes, we only need a single uintptr_t per indexed DIE. - We can get rid of the struct drgn_dwarf_index_pending_die list for a namespace and use the indexed DIEs instead, which are half the size. - DW_TAG_base_type maps can be assumed to be globally unique, so they can be stored in their own map of one DIE indexed only by name. - Each thread can independently build the DIE maps without any synchronization to be merged at the end. Here are some performance results comparing the New version (this commit) to the Old version (commit 16164dbe6edc ("libdrgn: detect flattened vmcores and raise error")). Application is either a large, statically-linked C++ application or the live Linux kernel. Threads is the OMP_NUM_THREADS setting used (the machine used for testing has 80 CPUs). Time is the amount of time it took to load and index debugging information. Anon is the amount of anonymous (e.g., heap) memory used. File is the amount of file memory used. Large C++ Application | Threads | Version | Time | Anon | File ------------+---------+---------+--------+--------+------- Large C++ | 80 | New | 5 s | 3.5 GB | 1.4 GB | | Old | 15 s | 5.2 GB | 1.7 GB | 8 | New | 6.5 s | 3.4 GB | 1.4 GB | | Old | 10 s | 5.2 GB | 1.7 GB | 1 | New | 30 s | 3.4 GB | 1.4 GB | | Old | 51 s | 5.2 GB | 1.7 GB Linux | 80 | New | 270 ms | 128 MB | 300 MB | | Old | 380 ms | 73 MB | 326 MB | 8 | New | 240 ms | 115 MB | 300 MB | | Old | 240 ms | 73 MB | 326 MB | 1 | New | 700 ms | 87 MB | 300 MB | | Old | 800 ms | 73 MB | 326 MB The results show that the new approach is almost always faster. For the large C++ application, it is much better for both time and memory usage. For the Linux kernel, it is slightly faster and uses more anonymous memory, although that is partially offset by less file memory. (For the Linux kernel, there is a dip in performance for both approaches from 8 threads to 80 which is worth looking into later.) Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-12 09:30:19 +01:00
#define X(_) + 1
enum { DRGN_DWARF_INDEX_NUM_TAGS = DRGN_DWARF_INDEX_TAGS };
#undef X
_Static_assert(DRGN_DWARF_INDEX_base_type == DRGN_DWARF_INDEX_NUM_TAGS - 1,
"base_type must be last");
enum { DRGN_DWARF_INDEX_MAP_SIZE = DRGN_DWARF_INDEX_NUM_TAGS - 1 };
/** DWARF information for a namespace. */
struct drgn_namespace_dwarf_index {
libdrgn: dwarf_info: scalably index all DIEs per name We currently deduplicate entries in the DWARF index by (name, tag, file name). We want to add support for looking up nested classes, so this is a problem: not every DIE defining a class also defines all of its nested types, so the one DIE we index may not allow us to find every nested class. Instead, we need to index every DIE with a given name. This sounds horribly expensive, both in terms of CPU and memory, but we can mitigate this in several ways: - We no longer need to parse the file name table, cache file name hashes, parse DW_AT_decl_file, or store the file name hash for indexed DIEs. - Instead of storing the tag for each indexed DIE, we can split the DIE map into a map per tag. - We can store the DIEs matching a name in a vector instead of a linked list. - We can use the new inline entry and small size variants of vectors. - We can move struct drgn_namespace_dwarf_index * to a tree separate from the indexed DIEs. After all of these changes, we only need a single uintptr_t per indexed DIE. - We can get rid of the struct drgn_dwarf_index_pending_die list for a namespace and use the indexed DIEs instead, which are half the size. - DW_TAG_base_type maps can be assumed to be globally unique, so they can be stored in their own map of one DIE indexed only by name. - Each thread can independently build the DIE maps without any synchronization to be merged at the end. Here are some performance results comparing the New version (this commit) to the Old version (commit 16164dbe6edc ("libdrgn: detect flattened vmcores and raise error")). Application is either a large, statically-linked C++ application or the live Linux kernel. Threads is the OMP_NUM_THREADS setting used (the machine used for testing has 80 CPUs). Time is the amount of time it took to load and index debugging information. Anon is the amount of anonymous (e.g., heap) memory used. File is the amount of file memory used. Large C++ Application | Threads | Version | Time | Anon | File ------------+---------+---------+--------+--------+------- Large C++ | 80 | New | 5 s | 3.5 GB | 1.4 GB | | Old | 15 s | 5.2 GB | 1.7 GB | 8 | New | 6.5 s | 3.4 GB | 1.4 GB | | Old | 10 s | 5.2 GB | 1.7 GB | 1 | New | 30 s | 3.4 GB | 1.4 GB | | Old | 51 s | 5.2 GB | 1.7 GB Linux | 80 | New | 270 ms | 128 MB | 300 MB | | Old | 380 ms | 73 MB | 326 MB | 8 | New | 240 ms | 115 MB | 300 MB | | Old | 240 ms | 73 MB | 326 MB | 1 | New | 700 ms | 87 MB | 300 MB | | Old | 800 ms | 73 MB | 326 MB The results show that the new approach is almost always faster. For the large C++ application, it is much better for both time and memory usage. For the Linux kernel, it is slightly faster and uses more anonymous memory, although that is partially offset by less file memory. (For the Linux kernel, there is a dip in performance for both approaches from 8 threads to 80 which is worth looking into later.) Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-12 09:30:19 +01:00
/** Debugging information cache that owns this index. */
struct drgn_debug_info *dbinfo;
/** (Null-terminated) name of this namespace. */
const char *name;
/** Length of @ref name. */
size_t name_len;
/** Parent namespace, or @c NULL if it is the global namespace. */
struct drgn_namespace_dwarf_index *parent;
/** Children namespaces indexed by name. */
struct drgn_namespace_table children;
/**
libdrgn: dwarf_info: scalably index all DIEs per name We currently deduplicate entries in the DWARF index by (name, tag, file name). We want to add support for looking up nested classes, so this is a problem: not every DIE defining a class also defines all of its nested types, so the one DIE we index may not allow us to find every nested class. Instead, we need to index every DIE with a given name. This sounds horribly expensive, both in terms of CPU and memory, but we can mitigate this in several ways: - We no longer need to parse the file name table, cache file name hashes, parse DW_AT_decl_file, or store the file name hash for indexed DIEs. - Instead of storing the tag for each indexed DIE, we can split the DIE map into a map per tag. - We can store the DIEs matching a name in a vector instead of a linked list. - We can use the new inline entry and small size variants of vectors. - We can move struct drgn_namespace_dwarf_index * to a tree separate from the indexed DIEs. After all of these changes, we only need a single uintptr_t per indexed DIE. - We can get rid of the struct drgn_dwarf_index_pending_die list for a namespace and use the indexed DIEs instead, which are half the size. - DW_TAG_base_type maps can be assumed to be globally unique, so they can be stored in their own map of one DIE indexed only by name. - Each thread can independently build the DIE maps without any synchronization to be merged at the end. Here are some performance results comparing the New version (this commit) to the Old version (commit 16164dbe6edc ("libdrgn: detect flattened vmcores and raise error")). Application is either a large, statically-linked C++ application or the live Linux kernel. Threads is the OMP_NUM_THREADS setting used (the machine used for testing has 80 CPUs). Time is the amount of time it took to load and index debugging information. Anon is the amount of anonymous (e.g., heap) memory used. File is the amount of file memory used. Large C++ Application | Threads | Version | Time | Anon | File ------------+---------+---------+--------+--------+------- Large C++ | 80 | New | 5 s | 3.5 GB | 1.4 GB | | Old | 15 s | 5.2 GB | 1.7 GB | 8 | New | 6.5 s | 3.4 GB | 1.4 GB | | Old | 10 s | 5.2 GB | 1.7 GB | 1 | New | 30 s | 3.4 GB | 1.4 GB | | Old | 51 s | 5.2 GB | 1.7 GB Linux | 80 | New | 270 ms | 128 MB | 300 MB | | Old | 380 ms | 73 MB | 326 MB | 8 | New | 240 ms | 115 MB | 300 MB | | Old | 240 ms | 73 MB | 326 MB | 1 | New | 700 ms | 87 MB | 300 MB | | Old | 800 ms | 73 MB | 326 MB The results show that the new approach is almost always faster. For the large C++ application, it is much better for both time and memory usage. For the Linux kernel, it is slightly faster and uses more anonymous memory, although that is partially offset by less file memory. (For the Linux kernel, there is a dip in performance for both approaches from 8 threads to 80 which is worth looking into later.) Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-12 09:30:19 +01:00
* Mapping for each @ref drgn_dwarf_index_tag from name to a list of
* matching DIE addresses.
*
* This has a few quirks:
*
libdrgn: dwarf_info: scalably index all DIEs per name We currently deduplicate entries in the DWARF index by (name, tag, file name). We want to add support for looking up nested classes, so this is a problem: not every DIE defining a class also defines all of its nested types, so the one DIE we index may not allow us to find every nested class. Instead, we need to index every DIE with a given name. This sounds horribly expensive, both in terms of CPU and memory, but we can mitigate this in several ways: - We no longer need to parse the file name table, cache file name hashes, parse DW_AT_decl_file, or store the file name hash for indexed DIEs. - Instead of storing the tag for each indexed DIE, we can split the DIE map into a map per tag. - We can store the DIEs matching a name in a vector instead of a linked list. - We can use the new inline entry and small size variants of vectors. - We can move struct drgn_namespace_dwarf_index * to a tree separate from the indexed DIEs. After all of these changes, we only need a single uintptr_t per indexed DIE. - We can get rid of the struct drgn_dwarf_index_pending_die list for a namespace and use the indexed DIEs instead, which are half the size. - DW_TAG_base_type maps can be assumed to be globally unique, so they can be stored in their own map of one DIE indexed only by name. - Each thread can independently build the DIE maps without any synchronization to be merged at the end. Here are some performance results comparing the New version (this commit) to the Old version (commit 16164dbe6edc ("libdrgn: detect flattened vmcores and raise error")). Application is either a large, statically-linked C++ application or the live Linux kernel. Threads is the OMP_NUM_THREADS setting used (the machine used for testing has 80 CPUs). Time is the amount of time it took to load and index debugging information. Anon is the amount of anonymous (e.g., heap) memory used. File is the amount of file memory used. Large C++ Application | Threads | Version | Time | Anon | File ------------+---------+---------+--------+--------+------- Large C++ | 80 | New | 5 s | 3.5 GB | 1.4 GB | | Old | 15 s | 5.2 GB | 1.7 GB | 8 | New | 6.5 s | 3.4 GB | 1.4 GB | | Old | 10 s | 5.2 GB | 1.7 GB | 1 | New | 30 s | 3.4 GB | 1.4 GB | | Old | 51 s | 5.2 GB | 1.7 GB Linux | 80 | New | 270 ms | 128 MB | 300 MB | | Old | 380 ms | 73 MB | 326 MB | 8 | New | 240 ms | 115 MB | 300 MB | | Old | 240 ms | 73 MB | 326 MB | 1 | New | 700 ms | 87 MB | 300 MB | | Old | 800 ms | 73 MB | 326 MB The results show that the new approach is almost always faster. For the large C++ application, it is much better for both time and memory usage. For the Linux kernel, it is slightly faster and uses more anonymous memory, although that is partially offset by less file memory. (For the Linux kernel, there is a dip in performance for both approaches from 8 threads to 80 which is worth looking into later.) Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-12 09:30:19 +01:00
* - `base_type` DIEs are in @ref drgn_dwarf_info::base_types, not here.
* - `enumerator` entries store the addresses of the parent
* `enumeration_type` DIEs instead.
*/
libdrgn: dwarf_info: scalably index all DIEs per name We currently deduplicate entries in the DWARF index by (name, tag, file name). We want to add support for looking up nested classes, so this is a problem: not every DIE defining a class also defines all of its nested types, so the one DIE we index may not allow us to find every nested class. Instead, we need to index every DIE with a given name. This sounds horribly expensive, both in terms of CPU and memory, but we can mitigate this in several ways: - We no longer need to parse the file name table, cache file name hashes, parse DW_AT_decl_file, or store the file name hash for indexed DIEs. - Instead of storing the tag for each indexed DIE, we can split the DIE map into a map per tag. - We can store the DIEs matching a name in a vector instead of a linked list. - We can use the new inline entry and small size variants of vectors. - We can move struct drgn_namespace_dwarf_index * to a tree separate from the indexed DIEs. After all of these changes, we only need a single uintptr_t per indexed DIE. - We can get rid of the struct drgn_dwarf_index_pending_die list for a namespace and use the indexed DIEs instead, which are half the size. - DW_TAG_base_type maps can be assumed to be globally unique, so they can be stored in their own map of one DIE indexed only by name. - Each thread can independently build the DIE maps without any synchronization to be merged at the end. Here are some performance results comparing the New version (this commit) to the Old version (commit 16164dbe6edc ("libdrgn: detect flattened vmcores and raise error")). Application is either a large, statically-linked C++ application or the live Linux kernel. Threads is the OMP_NUM_THREADS setting used (the machine used for testing has 80 CPUs). Time is the amount of time it took to load and index debugging information. Anon is the amount of anonymous (e.g., heap) memory used. File is the amount of file memory used. Large C++ Application | Threads | Version | Time | Anon | File ------------+---------+---------+--------+--------+------- Large C++ | 80 | New | 5 s | 3.5 GB | 1.4 GB | | Old | 15 s | 5.2 GB | 1.7 GB | 8 | New | 6.5 s | 3.4 GB | 1.4 GB | | Old | 10 s | 5.2 GB | 1.7 GB | 1 | New | 30 s | 3.4 GB | 1.4 GB | | Old | 51 s | 5.2 GB | 1.7 GB Linux | 80 | New | 270 ms | 128 MB | 300 MB | | Old | 380 ms | 73 MB | 326 MB | 8 | New | 240 ms | 115 MB | 300 MB | | Old | 240 ms | 73 MB | 326 MB | 1 | New | 700 ms | 87 MB | 300 MB | | Old | 800 ms | 73 MB | 326 MB The results show that the new approach is almost always faster. For the large C++ application, it is much better for both time and memory usage. For the Linux kernel, it is slightly faster and uses more anonymous memory, although that is partially offset by less file memory. (For the Linux kernel, there is a dip in performance for both approaches from 8 threads to 80 which is worth looking into later.) Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-12 09:30:19 +01:00
struct drgn_dwarf_index_die_map map[DRGN_DWARF_INDEX_MAP_SIZE];
/**
* Number of CUs that were indexed the last time that this namespace was
* indexed.
*/
size_t cus_indexed;
/**
* Number of `DW_TAG_namespace` DIEs in the parent's index that were
* indexed the last time that this namespace was indexed.
*/
uint32_t dies_indexed;
/** Saved error from a previous index. */
struct drgn_error *saved_err;
};
/** Cached type in a @ref drgn_debug_info. */
struct drgn_dwarf_type {
struct drgn_type *type;
enum drgn_qualifiers qualifiers;
/**
* Whether this is an incomplete array type or a typedef of one.
*
* This is used to work around a GCC bug; see @ref
* drgn_type_from_dwarf_internal().
*/
bool is_incomplete_array;
};
libdrgn: dwarf_info: scalably index all DIEs per name We currently deduplicate entries in the DWARF index by (name, tag, file name). We want to add support for looking up nested classes, so this is a problem: not every DIE defining a class also defines all of its nested types, so the one DIE we index may not allow us to find every nested class. Instead, we need to index every DIE with a given name. This sounds horribly expensive, both in terms of CPU and memory, but we can mitigate this in several ways: - We no longer need to parse the file name table, cache file name hashes, parse DW_AT_decl_file, or store the file name hash for indexed DIEs. - Instead of storing the tag for each indexed DIE, we can split the DIE map into a map per tag. - We can store the DIEs matching a name in a vector instead of a linked list. - We can use the new inline entry and small size variants of vectors. - We can move struct drgn_namespace_dwarf_index * to a tree separate from the indexed DIEs. After all of these changes, we only need a single uintptr_t per indexed DIE. - We can get rid of the struct drgn_dwarf_index_pending_die list for a namespace and use the indexed DIEs instead, which are half the size. - DW_TAG_base_type maps can be assumed to be globally unique, so they can be stored in their own map of one DIE indexed only by name. - Each thread can independently build the DIE maps without any synchronization to be merged at the end. Here are some performance results comparing the New version (this commit) to the Old version (commit 16164dbe6edc ("libdrgn: detect flattened vmcores and raise error")). Application is either a large, statically-linked C++ application or the live Linux kernel. Threads is the OMP_NUM_THREADS setting used (the machine used for testing has 80 CPUs). Time is the amount of time it took to load and index debugging information. Anon is the amount of anonymous (e.g., heap) memory used. File is the amount of file memory used. Large C++ Application | Threads | Version | Time | Anon | File ------------+---------+---------+--------+--------+------- Large C++ | 80 | New | 5 s | 3.5 GB | 1.4 GB | | Old | 15 s | 5.2 GB | 1.7 GB | 8 | New | 6.5 s | 3.4 GB | 1.4 GB | | Old | 10 s | 5.2 GB | 1.7 GB | 1 | New | 30 s | 3.4 GB | 1.4 GB | | Old | 51 s | 5.2 GB | 1.7 GB Linux | 80 | New | 270 ms | 128 MB | 300 MB | | Old | 380 ms | 73 MB | 326 MB | 8 | New | 240 ms | 115 MB | 300 MB | | Old | 240 ms | 73 MB | 326 MB | 1 | New | 700 ms | 87 MB | 300 MB | | Old | 800 ms | 73 MB | 326 MB The results show that the new approach is almost always faster. For the large C++ application, it is much better for both time and memory usage. For the Linux kernel, it is slightly faster and uses more anonymous memory, although that is partially offset by less file memory. (For the Linux kernel, there is a dip in performance for both approaches from 8 threads to 80 which is worth looking into later.) Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-12 09:30:19 +01:00
DEFINE_HASH_MAP_TYPE(drgn_dwarf_base_type_map, struct nstring, uintptr_t);
DEFINE_HASH_MAP_TYPE(drgn_dwarf_specification_map, uintptr_t, uintptr_t);
DEFINE_VECTOR_TYPE(drgn_dwarf_index_cu_vector, struct drgn_dwarf_index_cu);
DEFINE_HASH_MAP_TYPE(drgn_dwarf_type_map, const void *, struct drgn_dwarf_type);
/** DWARF debugging information for a program/@ref drgn_debug_info. */
struct drgn_dwarf_info {
/** Global namespace index. */
struct drgn_namespace_dwarf_index global;
libdrgn: dwarf_info: scalably index all DIEs per name We currently deduplicate entries in the DWARF index by (name, tag, file name). We want to add support for looking up nested classes, so this is a problem: not every DIE defining a class also defines all of its nested types, so the one DIE we index may not allow us to find every nested class. Instead, we need to index every DIE with a given name. This sounds horribly expensive, both in terms of CPU and memory, but we can mitigate this in several ways: - We no longer need to parse the file name table, cache file name hashes, parse DW_AT_decl_file, or store the file name hash for indexed DIEs. - Instead of storing the tag for each indexed DIE, we can split the DIE map into a map per tag. - We can store the DIEs matching a name in a vector instead of a linked list. - We can use the new inline entry and small size variants of vectors. - We can move struct drgn_namespace_dwarf_index * to a tree separate from the indexed DIEs. After all of these changes, we only need a single uintptr_t per indexed DIE. - We can get rid of the struct drgn_dwarf_index_pending_die list for a namespace and use the indexed DIEs instead, which are half the size. - DW_TAG_base_type maps can be assumed to be globally unique, so they can be stored in their own map of one DIE indexed only by name. - Each thread can independently build the DIE maps without any synchronization to be merged at the end. Here are some performance results comparing the New version (this commit) to the Old version (commit 16164dbe6edc ("libdrgn: detect flattened vmcores and raise error")). Application is either a large, statically-linked C++ application or the live Linux kernel. Threads is the OMP_NUM_THREADS setting used (the machine used for testing has 80 CPUs). Time is the amount of time it took to load and index debugging information. Anon is the amount of anonymous (e.g., heap) memory used. File is the amount of file memory used. Large C++ Application | Threads | Version | Time | Anon | File ------------+---------+---------+--------+--------+------- Large C++ | 80 | New | 5 s | 3.5 GB | 1.4 GB | | Old | 15 s | 5.2 GB | 1.7 GB | 8 | New | 6.5 s | 3.4 GB | 1.4 GB | | Old | 10 s | 5.2 GB | 1.7 GB | 1 | New | 30 s | 3.4 GB | 1.4 GB | | Old | 51 s | 5.2 GB | 1.7 GB Linux | 80 | New | 270 ms | 128 MB | 300 MB | | Old | 380 ms | 73 MB | 326 MB | 8 | New | 240 ms | 115 MB | 300 MB | | Old | 240 ms | 73 MB | 326 MB | 1 | New | 700 ms | 87 MB | 300 MB | | Old | 800 ms | 73 MB | 326 MB The results show that the new approach is almost always faster. For the large C++ application, it is much better for both time and memory usage. For the Linux kernel, it is slightly faster and uses more anonymous memory, although that is partially offset by less file memory. (For the Linux kernel, there is a dip in performance for both approaches from 8 threads to 80 which is worth looking into later.) Signed-off-by: Omar Sandoval <osandov@osandov.com>
2023-08-12 09:30:19 +01:00
/**
* Mapping from name to `DW_TAG_base_type` DIE address with that name.
*
* Unlike user-defined types and variables, there can only be one base
* type with a given name in the entire program, so we don't store them
* in a @ref drgn_dwarf_index_die_map.
*/
struct drgn_dwarf_base_type_map base_types;
/**
* Map from the address of a (usually non-defining) DIE to the address
* of a DIE with a DW_AT_specification attribute that references it.
* This is used to resolve DIEs with DW_AT_declaration to their
* definition.
*/
struct drgn_dwarf_specification_map specifications;
/** Indexed compilation units. */
struct drgn_dwarf_index_cu_vector index_cus;
/**
* Cache of parsed types.
*
* The key is the address of the DIE (@c Dwarf_Die::addr). The value is
* a @ref drgn_dwarf_type.
*/
struct drgn_dwarf_type_map types;
/**
* Cache of parsed types which appear to be incomplete array types but
* can't be.
*
* See @ref drgn_type_from_dwarf_internal().
*/
struct drgn_dwarf_type_map cant_be_incomplete_array_types;
/** Current parsing recursion depth. */
int depth;
};
void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo);
void drgn_dwarf_info_deinit(struct drgn_debug_info *dbinfo);
/**
* State tracked while indexing new DWARF information in a @ref drgn_dwarf_info.
*/
struct drgn_dwarf_index_state {
struct drgn_debug_info *dbinfo;
/** Per-thread arrays of CUs to be indexed. */
struct drgn_dwarf_index_cu_vector *cus;
int max_threads;
};
/**
* Initialize state for indexing new DWARF information.
*
* @return @c true on success, @c false on failure to allocate memory.
*/
bool drgn_dwarf_index_state_init(struct drgn_dwarf_index_state *state,
struct drgn_debug_info *dbinfo);
/** Deinitialize state for indexing new DWARF information. */
void drgn_dwarf_index_state_deinit(struct drgn_dwarf_index_state *state);
/** Read a @ref drgn_module to index its DWARF information. */
struct drgn_error *
drgn_dwarf_index_read_module(struct drgn_dwarf_index_state *state,
struct drgn_module *module);
/**
* Index new DWARF information.
*
* This should be called once all modules have been read with @ref
* drgn_dwarf_index_read_module() to finish indexing those modules.
*/
struct drgn_error *
drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state);
/**
* Find the DWARF DIEs in a @ref drgn_module for the scope containing a given
* program counter.
*
* @param[in] module Module containing @p pc.
* @param[in] pc Program counter.
* @param[out] bias_ret Returned difference between addresses in the loaded
* module and addresses in the returned DIEs.
* @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret - 1]` is the
* innermost DIE containing @p pc, `(*dies_ret)[*length_ret - 2]` is its parent
* (which may not contain @p pc itself), `(*dies_ret)[*length_ret - 3]` is its
* grandparent, etc. Must be freed with @c free().
* @param[out] length_ret Returned length of @p dies_ret.
*/
struct drgn_error *drgn_module_find_dwarf_scopes(struct drgn_module *module,
uint64_t pc,
uint64_t *bias_ret,
Dwarf_Die **dies_ret,
size_t *length_ret)
__attribute__((__nonnull__(1, 3, 4, 5)));
/**
* Find the ancestors of a DWARF DIE.
*
* This finds the parent, grandparent, etc., of a DWARF DIE in the tree of DIEs.
*
* @param[in] module Module containing @p die.
* @param[in] die DIE to find.
* @param[out] dies_ret Returned DIEs. `(*dies_ret)[*length_ret]` is the DIE,
* `(*dies_ret)[*length_ret - 1]` is its parent, `(*dies_ret)[*length_ret - 2]`
* is its grandparent, etc., and `(*dies_ret)[0]` is the top-level unit DIE.
* @param[out] length_ret Returned number of ancestors in @p dies_ret.
*/
struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret,
size_t *length_ret)
__attribute__((__nonnull__(2, 3)));
/**
* Get an array of names of `DW_TAG_variable` and `DW_TAG_formal_parameter` DIEs
* in local scopes.
*
* @param[out] names_ret Returned array of names. On success, must be freed with
* @c free(). The individual strings should not be freed.
* @param[out] count_ret Returned number of names in @p names_ret.
*/
struct drgn_error *drgn_dwarf_scopes_names(Dwarf_Die *scopes,
size_t num_scopes,
const char ***names_ret,
size_t *count_ret);
/**
* Find an object DIE in an array of DWARF scopes.
*
* @param[in] scopes Array of scopes, from outermost to innermost.
* @param[in] num_scopes Number of scopes in @p scopes.
* @param[out] die_ret Returned object DIE.
* @param[out] type_ret If @p die_ret is a `DW_TAG_enumerator` DIE, its parent.
* Otherwise, undefined.
*/
struct drgn_error *drgn_find_in_dwarf_scopes(Dwarf_Die *scopes,
size_t num_scopes,
const char *name,
Dwarf_Die *die_ret,
Dwarf_Die *type_ret);
/**
* Create a @ref drgn_object from a `Dwarf_Die`.
*
* @param[in] die Object DIE (e.g., `DW_TAG_subprogram`, `DW_TAG_variable`,
* `DW_TAG_formal_parameter`, `DW_TAG_enumerator`,
* `DW_TAG_template_value_parameter`).
* @param[in] type_die DIE of object's type. If @c NULL, use the `DW_AT_type`
* attribute of @p die. If @p die is a `DW_TAG_enumerator` DIE, this should be
* its parent.
* @param[in] function_die DIE of current function. @c NULL if not in function
* context.
* @param[in] regs Registers of current stack frame. @c NULL if not in stack
* frame context.
* @param[out] ret Returned object.
*/
struct drgn_error *
drgn_object_from_dwarf(struct drgn_debug_info *dbinfo,
struct drgn_elf_file *file, Dwarf_Die *die,
Dwarf_Die *type_die, Dwarf_Die *function_die,
const struct drgn_register_state *regs,
struct drgn_object *ret);
struct drgn_error *
drgn_module_find_dwarf_cfi(struct drgn_module *module, uint64_t pc,
struct drgn_cfi_row **row_ret, bool *interrupted_ret,
drgn_register_number *ret_addr_regno_ret);
struct drgn_error *
drgn_module_find_eh_cfi(struct drgn_module *module, uint64_t pc,
struct drgn_cfi_row **row_ret, bool *interrupted_ret,
drgn_register_number *ret_addr_regno_ret);
struct drgn_error *
drgn_eval_cfi_dwarf_expression(struct drgn_program *prog,
struct drgn_elf_file *file,
const struct drgn_cfi_rule *rule,
const struct drgn_register_state *regs,
void *buf, size_t size);
/** @} */
#endif /* DRGN_DEBUG_INFO_DWARF_H */