mirror of
https://github.com/JakeHillion/drgn.git
synced 2024-12-22 01:03:07 +00:00
55a3ebca6c
We've addressed all of the smaller differences with GNU Debug Fission and split DWARF 5, so now all that remains is the DWARF index. The general approach is: in drgn_dwarf_index_read_cus(), for each CU, ask libdw for the "sub-DIE". For skeleton CUs, this is the split CU DIE from the .dwo file. From that Dwarf_Die, we can get the Dwarf_CU and then the Dwarf handle. Then, we wrap that in a struct drgn_elf_file (cached in a hash table in the struct drgn_module), which the DWARF index can work with from there. Additionally, a couple of places (.debug_addr parsing and stack trace local variable lookup) need to be updated to use the correct drgn_elf_file. Signed-off-by: Omar Sandoval <osandov@osandov.com>
411 lines
13 KiB
Python
411 lines
13 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# SPDX-License-Identifier: LGPL-2.1-or-later
|
|
|
|
from collections import OrderedDict
|
|
import os.path
|
|
from typing import Any, NamedTuple, Optional, Sequence, Union
|
|
import zlib
|
|
|
|
from tests.assembler import _append_sleb128, _append_uleb128
|
|
from tests.dwarf import DW_AT, DW_FORM, DW_LNCT, DW_TAG, DW_UT
|
|
from tests.elf import ET, SHT
|
|
from tests.elfwriter import ElfSection, create_elf_file
|
|
|
|
|
|
class DwarfAttrib(NamedTuple):
|
|
name: DW_AT
|
|
form: DW_FORM
|
|
value: Any
|
|
|
|
|
|
class DwarfLabel(NamedTuple):
|
|
name: str
|
|
|
|
|
|
class DwarfDie(NamedTuple):
|
|
tag: DW_TAG
|
|
attribs: Sequence[DwarfAttrib]
|
|
children: Sequence[Union["DwarfDie", DwarfLabel]] = ()
|
|
|
|
|
|
class DwarfUnit(NamedTuple):
|
|
type: DW_UT
|
|
die: DwarfDie
|
|
dwo_id: Optional[int] = None
|
|
type_signature: Optional[int] = None
|
|
type_offset: Optional[str] = None
|
|
|
|
|
|
def _compile_debug_abbrev(units, use_dw_form_indirect):
|
|
buf = bytearray()
|
|
code = 1
|
|
|
|
def aux(die):
|
|
if isinstance(die, DwarfLabel):
|
|
return
|
|
nonlocal code
|
|
_append_uleb128(buf, code)
|
|
code += 1
|
|
_append_uleb128(buf, die.tag)
|
|
buf.append(bool(die.children))
|
|
for attrib in die.attribs:
|
|
_append_uleb128(buf, attrib.name)
|
|
_append_uleb128(
|
|
buf, DW_FORM.indirect if use_dw_form_indirect else attrib.form
|
|
)
|
|
buf.append(0)
|
|
buf.append(0)
|
|
if die.children:
|
|
for child in die.children:
|
|
aux(child)
|
|
|
|
for unit in units:
|
|
aux(unit.die)
|
|
buf.append(0)
|
|
return buf
|
|
|
|
|
|
def _compile_debug_info(units, little_endian, bits, version, use_dw_form_indirect):
|
|
byteorder = "little" if little_endian else "big"
|
|
all_labels = set()
|
|
labels = {}
|
|
relocations = []
|
|
code = 1
|
|
decl_file = 1
|
|
|
|
def aux(buf, die, depth):
|
|
if isinstance(die, DwarfLabel):
|
|
# For now, labels are only supported within a unit, but make sure
|
|
# they're unique across all units.
|
|
if die.name in all_labels:
|
|
raise ValueError(f"duplicate label {die.name!r}")
|
|
all_labels.add(die.name)
|
|
labels[die.name] = len(buf)
|
|
return
|
|
|
|
nonlocal code, decl_file
|
|
_append_uleb128(buf, code)
|
|
code += 1
|
|
for attrib in die.attribs:
|
|
if use_dw_form_indirect:
|
|
_append_uleb128(buf, attrib.form)
|
|
if attrib.name == DW_AT.decl_file:
|
|
value = decl_file
|
|
decl_file += 1
|
|
else:
|
|
value = attrib.value
|
|
if attrib.form == DW_FORM.addr:
|
|
buf.extend(value.to_bytes(bits // 8, byteorder))
|
|
elif attrib.form == DW_FORM.data1:
|
|
buf.append(value)
|
|
elif attrib.form == DW_FORM.data2:
|
|
buf.extend(value.to_bytes(2, byteorder))
|
|
elif attrib.form == DW_FORM.data4:
|
|
buf.extend(value.to_bytes(4, byteorder))
|
|
elif attrib.form == DW_FORM.data8:
|
|
buf.extend(value.to_bytes(8, byteorder))
|
|
elif attrib.form == DW_FORM.udata:
|
|
_append_uleb128(buf, value)
|
|
elif attrib.form == DW_FORM.sdata:
|
|
_append_sleb128(buf, value)
|
|
elif attrib.form == DW_FORM.block:
|
|
_append_uleb128(buf, len(value))
|
|
buf.extend(value)
|
|
elif attrib.form == DW_FORM.block1:
|
|
buf.append(len(value))
|
|
buf.extend(value)
|
|
elif attrib.form == DW_FORM.string:
|
|
buf.extend(value.encode())
|
|
buf.append(0)
|
|
elif attrib.form == DW_FORM.ref4:
|
|
relocations.append((len(buf), value))
|
|
buf.extend(b"\0\0\0\0")
|
|
elif attrib.form == DW_FORM.ref_sig8:
|
|
buf.extend(value.to_bytes(8, byteorder))
|
|
elif attrib.form == DW_FORM.sec_offset:
|
|
buf.extend(b"\0\0\0\0")
|
|
elif attrib.form == DW_FORM.flag_present:
|
|
pass
|
|
elif attrib.form == DW_FORM.exprloc:
|
|
_append_uleb128(buf, len(value))
|
|
buf.extend(value)
|
|
else:
|
|
assert False, attrib.form
|
|
if die.children:
|
|
for child in die.children:
|
|
aux(buf, child, depth + 1)
|
|
buf.append(0)
|
|
|
|
debug_info = bytearray()
|
|
debug_types = bytearray()
|
|
for unit in units:
|
|
labels.clear()
|
|
relocations.clear()
|
|
decl_file = 1
|
|
if version == 4 and unit.type in (DW_UT.type, DW_UT.split_type):
|
|
buf = debug_types
|
|
else:
|
|
buf = debug_info
|
|
orig_len = len(buf)
|
|
buf.extend(b"\0\0\0\0") # unit_length
|
|
buf.extend(version.to_bytes(2, byteorder)) # version
|
|
if version >= 5:
|
|
buf.append(unit.type) # unit_type
|
|
buf.append(bits // 8) # address_size
|
|
buf.extend((0).to_bytes(4, byteorder)) # debug_abbrev_offset
|
|
if version < 5:
|
|
buf.append(bits // 8) # address_size
|
|
|
|
if version >= 5 and unit.type in (DW_UT.skeleton, DW_UT.split_compile):
|
|
buf.extend(unit.dwo_id.to_bytes(8, byteorder)) # dwo_id
|
|
else:
|
|
assert unit.dwo_id is None
|
|
if unit.type in (DW_UT.type, DW_UT.split_type):
|
|
buf.extend(unit.type_signature.to_bytes(8, byteorder)) # type_signature
|
|
relocations.append((len(buf), unit.type_offset))
|
|
buf.extend(b"\0\0\0\0") # type_offset
|
|
else:
|
|
assert unit.type_signature is None
|
|
assert unit.type_offset is None
|
|
|
|
aux(buf, unit.die, 0)
|
|
|
|
unit_length = len(buf) - orig_len - 4
|
|
buf[orig_len : orig_len + 4] = unit_length.to_bytes(4, byteorder)
|
|
|
|
for offset, label in relocations:
|
|
die_offset = labels[label] - orig_len
|
|
buf[offset : offset + 4] = die_offset.to_bytes(4, byteorder)
|
|
return debug_info, debug_types
|
|
|
|
|
|
def _compile_debug_line(units, little_endian, bits, version):
|
|
byteorder = "little" if little_endian else "big"
|
|
|
|
if not units:
|
|
units = [DwarfUnit(DW_UT.compile, DwarfDie(DW_TAG.compile_unit, []))]
|
|
|
|
buf = bytearray()
|
|
for unit in units:
|
|
unit.die.attribs.append(
|
|
DwarfAttrib(DW_AT.stmt_list, DW_FORM.sec_offset, len(buf))
|
|
)
|
|
if unit.type in (DW_UT.compile, DW_UT.partial, DW_UT.skeleton):
|
|
unit.die.attribs.append(DwarfAttrib(DW_AT.name, DW_FORM.string, "main.c"))
|
|
unit.die.attribs.append(
|
|
DwarfAttrib(DW_AT.comp_dir, DW_FORM.string, "/usr/src")
|
|
)
|
|
|
|
unit_length_start = len(buf)
|
|
buf.extend(b"\0\0\0\0") # unit_length
|
|
unit_length_end = len(buf)
|
|
buf.extend(version.to_bytes(2, byteorder)) # version
|
|
if version >= 5:
|
|
buf.append(bits // 8) # address_size
|
|
buf.append(0) # segment_selector_size
|
|
header_length_start = len(buf)
|
|
buf.extend(b"\0\0\0\0") # header_length
|
|
header_length_end = len(buf)
|
|
buf.append(1) # minimum_instruction_length
|
|
buf.append(1) # maximum_operations_per_instruction
|
|
buf.append(1) # default_is_stmt
|
|
buf.append(1) # line_base
|
|
buf.append(1) # line_range
|
|
buf.append(1) # opcode_base
|
|
# Don't need standard_opcode_lengths
|
|
if version >= 5:
|
|
buf.append(1) # directory_entry_format_count
|
|
# directory_entry_format
|
|
_append_uleb128(buf, DW_LNCT.path)
|
|
_append_uleb128(buf, DW_FORM.string)
|
|
|
|
directories = OrderedDict([("/usr/src", 0)])
|
|
|
|
def collect_directories(die):
|
|
if isinstance(die, DwarfLabel):
|
|
return
|
|
for attrib in die.attribs:
|
|
if attrib.name != DW_AT.decl_file:
|
|
continue
|
|
dirname = os.path.dirname(attrib.value)
|
|
if dirname:
|
|
directories.setdefault(dirname, len(directories))
|
|
for child in die.children:
|
|
collect_directories(child)
|
|
|
|
collect_directories(unit.die)
|
|
|
|
if version >= 5:
|
|
_append_uleb128(buf, len(directories)) # directories_count
|
|
|
|
# directories (or include_directories in version <= 4)
|
|
for directory, index in directories.items():
|
|
if index > 0 or version >= 5:
|
|
buf.extend(directory.encode("ascii"))
|
|
buf.append(0)
|
|
if version < 5:
|
|
buf.append(0)
|
|
|
|
if version >= 5:
|
|
buf.append(2) # file_name_entry_format_count
|
|
# file_name_entry_format
|
|
_append_uleb128(buf, DW_LNCT.path)
|
|
_append_uleb128(buf, DW_FORM.string)
|
|
_append_uleb128(buf, DW_LNCT.directory_index)
|
|
_append_uleb128(buf, DW_FORM.udata)
|
|
|
|
file_names = [("main.c", 0)]
|
|
|
|
def collect_file_names(die):
|
|
if isinstance(die, DwarfLabel):
|
|
return
|
|
for attrib in die.attribs:
|
|
if attrib.name != DW_AT.decl_file:
|
|
continue
|
|
dirname, basename = os.path.split(attrib.value)
|
|
directory_index = directories[dirname] if dirname else 0
|
|
file_names.append((basename, directory_index))
|
|
for child in die.children:
|
|
collect_file_names(child)
|
|
|
|
collect_file_names(unit.die)
|
|
|
|
if version >= 5:
|
|
_append_uleb128(buf, len(file_names)) # file_names_count
|
|
|
|
# file_names
|
|
for path, directory_index in file_names[0 if version >= 5 else 1 :]:
|
|
# path
|
|
buf.extend(path.encode("ascii"))
|
|
buf.append(0)
|
|
_append_uleb128(buf, directory_index) # directory_index
|
|
if version < 5:
|
|
_append_uleb128(buf, 0) # mtime
|
|
_append_uleb128(buf, 0) # size
|
|
|
|
if version < 5:
|
|
buf.append(0)
|
|
|
|
buf[unit_length_start:unit_length_end] = (len(buf) - unit_length_end).to_bytes(
|
|
unit_length_end - unit_length_start, byteorder
|
|
)
|
|
buf[header_length_start:header_length_end] = (
|
|
len(buf) - header_length_end
|
|
).to_bytes(header_length_end - header_length_start, byteorder)
|
|
return buf
|
|
|
|
|
|
_UNIT_TAGS = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit})
|
|
|
|
|
|
def dwarf_sections(
|
|
units_or_dies,
|
|
little_endian=True,
|
|
bits=64,
|
|
*,
|
|
version=4,
|
|
lang=None,
|
|
use_dw_form_indirect=False,
|
|
compress=None,
|
|
split=None,
|
|
):
|
|
assert compress in (None, "zlib-gnu", "zlib-gabi")
|
|
assert split in (None, "dwo")
|
|
|
|
if isinstance(units_or_dies, (DwarfDie, DwarfUnit)):
|
|
units_or_dies = (units_or_dies,)
|
|
if not units_or_dies or isinstance(units_or_dies[0], DwarfUnit):
|
|
units = units_or_dies
|
|
else:
|
|
assert all(isinstance(die, (DwarfDie, DwarfLabel)) for die in units_or_dies)
|
|
assert all(
|
|
not isinstance(die, DwarfDie) or die.tag not in _UNIT_TAGS
|
|
for die in units_or_dies
|
|
)
|
|
units = (
|
|
DwarfUnit(DW_UT.compile, DwarfDie(DW_TAG.compile_unit, (), units_or_dies)),
|
|
)
|
|
assert all(isinstance(unit, DwarfUnit) for unit in units)
|
|
assert all(unit.die.tag in _UNIT_TAGS for unit in units)
|
|
|
|
unit_attribs = []
|
|
if lang is not None:
|
|
unit_attribs.append(DwarfAttrib(DW_AT.language, DW_FORM.data1, lang))
|
|
|
|
units = [
|
|
unit._replace(
|
|
die=unit.die._replace(attribs=list(unit.die.attribs) + unit_attribs)
|
|
)
|
|
for unit in units
|
|
]
|
|
|
|
# TODO: line number information for a split file is in the skeleton file.
|
|
# We don't have any test cases yet that use line number information from a
|
|
# split file, but when we do, we'll have to add a way to include the split
|
|
# file's line number information in the skeleton file.
|
|
if not split:
|
|
debug_line = _compile_debug_line(units, little_endian, bits, version)
|
|
|
|
debug_info, debug_types = _compile_debug_info(
|
|
units, little_endian, bits, version, use_dw_form_indirect
|
|
)
|
|
|
|
def debug_section(name, data):
|
|
assert name.startswith(".debug")
|
|
if compress == "zlib-gnu":
|
|
name = ".z" + name[1:]
|
|
compressed_data = bytearray(b"ZLIB")
|
|
compressed_data.extend(len(data).to_bytes(8, "big"))
|
|
compressed_data.extend(zlib.compress(data))
|
|
data = compressed_data
|
|
if split:
|
|
name += ".dwo"
|
|
return ElfSection(
|
|
name=name,
|
|
sh_type=SHT.PROGBITS,
|
|
data=data,
|
|
compressed=(compress == "zlib-gabi"),
|
|
)
|
|
return name
|
|
|
|
sections = [
|
|
debug_section(
|
|
".debug_abbrev", _compile_debug_abbrev(units, use_dw_form_indirect)
|
|
),
|
|
debug_section(".debug_info", debug_info),
|
|
debug_section(".debug_str", b"\0"),
|
|
]
|
|
if not split:
|
|
sections.append(debug_section(".debug_line", debug_line))
|
|
if debug_types:
|
|
sections.append(debug_section(".debug_types", debug_types))
|
|
return sections
|
|
|
|
|
|
def compile_dwarf(
|
|
dies,
|
|
little_endian=True,
|
|
bits=64,
|
|
*,
|
|
version=4,
|
|
lang=None,
|
|
use_dw_form_indirect=False,
|
|
compress=None,
|
|
split=None,
|
|
):
|
|
return create_elf_file(
|
|
ET.EXEC,
|
|
dwarf_sections(
|
|
dies,
|
|
little_endian=little_endian,
|
|
bits=bits,
|
|
version=version,
|
|
lang=lang,
|
|
use_dw_form_indirect=use_dw_form_indirect,
|
|
compress=compress,
|
|
split=split,
|
|
),
|
|
little_endian=little_endian,
|
|
bits=bits,
|
|
)
|