From 72b9af2b8262eafbe76dbcb9bb69c3a910578934 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 25 Aug 2017 23:24:38 -0700 Subject: [PATCH] Add address range table parsing --- drgn/cli/dump.py | 76 ++++---- drgn/cli/probe.py | 106 ++++------- drgn/dwarf/__init__.py | 1 + drgn/dwarf/defs.py | 345 +---------------------------------- drgn/dwarf/file.py | 123 ++++++++++--- drgn/dwarf/program.py | 105 +++++++++++ drgn/dwarfdefs.py | 344 ++++++++++++++++++++++++++++++++++ drgn/elf.py | 17 +- lldwarf/arange.c | 323 ++++++++++++++++++++++++++++++++ lldwarf/lldwarf.h | 26 +++ lldwarf/module.c | 83 ++++++++- setup.py | 5 +- tests/lldwarf/test_arange.py | 125 +++++++++++++ 13 files changed, 1199 insertions(+), 480 deletions(-) create mode 100644 drgn/dwarf/program.py create mode 100644 drgn/dwarfdefs.py create mode 100644 lldwarf/arange.c create mode 100644 tests/lldwarf/test_arange.py diff --git a/drgn/cli/dump.py b/drgn/cli/dump.py index 3d5fbf5d..f38e3074 100644 --- a/drgn/cli/dump.py +++ b/drgn/cli/dump.py @@ -139,38 +139,49 @@ def dump_line_number_matrix(cu, lnp, matrix, *, indent=0): print(f'{prefix}}}') +def dump_cus(dwarf_file, args): + for cu in dwarf_file.cu_headers(): + die = dwarf_file.cu_die(cu) + try: + cu_name = dwarf_file.die_name(die).decode() + except KeyError: + cu_name = '' + for pattern in args.cu: + if fnmatch.fnmatch(cu_name, pattern): + break + else: + continue + + dump_cu(dwarf_file, cu, cu_name) + if args.die: + if args.recursive: + dwarf_file.parse_die_children(cu, die, recurse=True) + dump_die(dwarf_file, cu, die, indent=2, recurse=args.recursive) + if (args.include_directories or args.file_names or args.lines or + args.line_number_program): + lnp = dwarf_file.cu_line_number_program_header(cu, die) + if args.include_directories: + dump_lnp_include_directories(lnp, indent=2) + if args.file_names: + dump_lnp_file_names(lnp, indent=2) + if args.lines: + matrix = dwarf_file.execute_line_number_program(lnp) + dump_line_number_matrix(cu, lnp, matrix, indent=2) + if args.line_number_program: + dump_lnp_header(dwarf_file, lnp, indent=2) + dump_lnp_ops(dwarf_file, lnp, indent=4) + + def cmd_dump(args): with DwarfFile(args.file) as dwarf_file: - for cu in dwarf_file.cu_headers(): - die = dwarf_file.cu_die(cu) - try: - cu_name = dwarf_file.die_name(die).decode() - except KeyError: - cu_name = '' - for pattern in args.cu: - if fnmatch.fnmatch(cu_name, pattern): - break - else: - continue - - dump_cu(dwarf_file, cu, cu_name) - if args.die: - if args.recursive: - dwarf_file.parse_die_children(cu, die, recurse=True) - dump_die(dwarf_file, cu, die, indent=2, recurse=args.recursive) - if (args.include_directories or args.file_names or args.lines or - args.line_number_program): - lnp = dwarf_file.cu_line_number_program_header(cu, die) - if args.include_directories: - dump_lnp_include_directories(lnp, indent=2) - if args.file_names: - dump_lnp_file_names(lnp, indent=2) - if args.lines: - matrix = dwarf_file.execute_line_number_program(lnp) - dump_line_number_matrix(cu, lnp, matrix, indent=2) - if args.line_number_program: - dump_lnp_header(dwarf_file, lnp, indent=2) - dump_lnp_ops(dwarf_file, lnp, indent=4) + if args.cu: + dump_cus(dwarf_file, args) + if args.symtab: + symbols = sorted(dwarf_file.symbols().items()) + for name, syms in symbols: + print(name) + for sym in syms: + print(f' value=0x{sym.st_value:x} size=0x{sym.st_size:x}') def register(subparsers): subparser = subparsers.add_parser( @@ -191,8 +202,7 @@ def register(subparsers): subparser.add_argument( '--line-number-program', '--lnp', action='store_true', help='also dump the line number program') subparser.add_argument( - 'file', help='file to dump') + '--symtab', action='store_true', help='dump the symbol table') subparser.add_argument( - 'cu', nargs='+', metavar='glob', - help='pattern matching names of compilation units to dump') + 'file', help='file to dump') subparser.set_defaults(func=cmd_dump) diff --git a/drgn/cli/probe.py b/drgn/cli/probe.py index 92b7b93a..89c59186 100644 --- a/drgn/cli/probe.py +++ b/drgn/cli/probe.py @@ -1,74 +1,38 @@ -from drgn.dwarf import DwarfFile -from drgn.dwarf.defs import DW_TAG +from drgn.dwarf import DwarfProgram from drgn.ftrace import Kprobe, FtraceInstance +import re import os -import signal -def find_cu_by_name(dwarf_file, name): - for cu in dwarf_file.cu_headers(): - die = dwarf_file.cu_die(cu) - try: - cu_name = dwarf_file.die_name(die).decode() - except KeyError: - continue - if cu_name == name: - return cu, die - else: - raise ValueError('CU not found') - - -def find_addresses_for_line(dwarf_file, filename, lineno): - cu, die = find_cu_by_name(dwarf_file, filename) - lnp = dwarf_file.cu_line_number_program_header(cu, die) - matrix = dwarf_file.execute_line_number_program(lnp) - - rows = [] - for row in matrix: - if (dwarf_file.line_number_row_name(cu, lnp, row) == filename and - row.line == lineno): - rows.append(row) - return cu, die, rows - - -def best_breakpoint_address(rows): - for row in rows: - if row.is_stmt: - return row - return rows[0] - - -def find_subprogram_containing_address(dwarf_file, cu, die, address): - dwarf_file.parse_die_children(cu, die) - for child in die.children: - if child.tag != DW_TAG.subprogram: - continue - if dwarf_file.die_contains_address(child, address): - return child - assert False # XXX - - -def create_probe(dwarf_file, filename, lineno): - cu, die, rows = find_addresses_for_line(dwarf_file, filename, lineno) - row = best_breakpoint_address(rows) - subprogram = find_subprogram_containing_address(dwarf_file, cu, die, row.address) - subprogram_name = dwarf_file.die_name(subprogram).decode() - subprogram_address = dwarf_file.die_address(subprogram) - - name = f'drgn/{subprogram_name}_{os.getpid()}' - location = f'{subprogram_name}+{row.address - subprogram_address}' - return name, location +def sanitize_probe_name(name: str) -> str: + name = re.sub('[^0-9A-Za-z]', '_', name) + if name[0].isdigit(): + name = '_' + name + return name def cmd_probe(args): - # XXX check in argparse - filename, lineno = args.line.rsplit(':', 1) - lineno = int(lineno) - with DwarfFile(args.vmlinux) as dwarf_file: - name, location = create_probe(dwarf_file, filename, lineno) + if args.line or (not args.function and ':' in args.location): + function = None + filename, lineno = args.location.rsplit(':', 1) + # TODO: catch ValueError + lineno = int(lineno) + probe_name = sanitize_probe_name(f'{filename}_{lineno}') + else: + function = args.location + probe_name = sanitize_probe_name(function) + probe_name = f'drgn/{probe_name}' - with Kprobe(name, location) as probe, \ - FtraceInstance(f'drgn_{os.getpid()}') as instance: + binary = f'/lib/modules/{os.uname().release}/build/vmlinux' + with DwarfProgram(binary) as dwarf_program: + if function is not None: + probe_location = function + else: + probe_location = dwarf_program.find_breakpoint_location(filename, lineno) + + # TODO: deal with probe name collisions + with FtraceInstance(f'drgn_{os.getpid()}') as instance, \ + Kprobe(probe_name, probe_location) as probe: probe.enable(instance) try: import subprocess @@ -80,9 +44,17 @@ def cmd_probe(args): def register(subparsers): subparser = subparsers.add_parser( 'probe') + subparser.add_argument( - '--line', '-l', metavar='FILE:LINE', - help='probe a source location') - subparser.add_argument( - 'vmlinux', help='vmlinux file to use') + 'location', metavar='LOCATION', + help='location to probe; either a function name or file:line') + + group = subparser.add_mutually_exclusive_group() + group.add_argument( + '--line', '-l', action='store_true', + help='force location to be treated as file:line') + group.add_argument( + '--function', '-f', action='store_true', + help='force location to be treated as function name') + subparser.set_defaults(func=cmd_probe) diff --git a/drgn/dwarf/__init__.py b/drgn/dwarf/__init__.py index 6a4b069d..5f7b7c30 100644 --- a/drgn/dwarf/__init__.py +++ b/drgn/dwarf/__init__.py @@ -1 +1,2 @@ from drgn.dwarf.file import DwarfFile +from drgn.dwarf.program import DwarfProgram diff --git a/drgn/dwarf/defs.py b/drgn/dwarf/defs.py index 32fed7aa..56e08f36 100644 --- a/drgn/dwarf/defs.py +++ b/drgn/dwarf/defs.py @@ -1,344 +1 @@ -import enum - -class DW_CHILDREN(enum.IntEnum): - no = 0 - yes = 1 - - -class DW_TAG(enum.IntEnum): - array_type = 0x01 - class_type = 0x02 - entry_point = 0x03 - enumeration_type = 0x04 - formal_parameter = 0x05 - imported_declaration = 0x08 - label = 0x0a - lexical_block = 0x0b - member = 0x0d - pointer_type = 0x0f - reference_type = 0x10 - compile_unit = 0x11 - string_type = 0x12 - structure_type = 0x13 - subroutine_type = 0x15 - typedef = 0x16 - union_type = 0x17 - unspecified_parameters = 0x18 - variant = 0x19 - common_block = 0x1a - common_inclusion = 0x1b - inheritance = 0x1c - inlined_subroutine = 0x1d - module = 0x1e - ptr_to_member_type = 0x1f - set_type = 0x20 - subrange_type = 0x21 - with_stmt = 0x22 - access_declaration = 0x23 - base_type = 0x24 - catch_block = 0x25 - const_type = 0x26 - constant = 0x27 - enumerator = 0x28 - file_type = 0x29 - friend = 0x2a - namelist = 0x2b - namelist_item = 0x2c - packed_type = 0x2d - subprogram = 0x2e - template_type_parameter = 0x2f - template_value_parameter = 0x30 - thrown_type = 0x31 - try_block = 0x32 - variant_part = 0x33 - variable = 0x34 - volatile_type = 0x35 - dwarf_procedure = 0x36 - restrict_type = 0x37 - interface_type = 0x38 - namespace = 0x39 - imported_module = 0x3a - unspecified_type = 0x3b - partial_unit = 0x3c - imported_unit = 0x3d - # 0x3e reserved - condition = 0x3f - shared_type = 0x40 - type_unit = 0x41 - rvalue_reference_type = 0x42 - template_alias = 0x43 - - # DWARF 5 - atomic_type = 0x47 - - lo_user = 0x4080 - - MIPS_loop = 0x4081 - format_label = 0x4101 - function_template = 0x4102 - class_template = 0x4103 - - GNU_BINCL = 0x4104 - GNU_EINCL = 0x4105 - - GNU_template_template_param = 0x4106 - GNU_template_parameter_pack = 0x4107 - GNU_formal_parameter_pack = 0x4108 - GNU_call_site = 0x4109 - GNU_call_site_parameter = 0x410a - - hi_user = 0xffff - - -class DW_AT(enum.IntEnum): - sibling = 0x01 - location = 0x02 - name = 0x03 - ordering = 0x09 - subscr_data = 0x0a - byte_size = 0x0b - bit_offset = 0x0c - bit_size = 0x0d - element_list = 0x0f - stmt_list = 0x10 - low_pc = 0x11 - high_pc = 0x12 - language = 0x13 - member = 0x14 - discr = 0x15 - discr_value = 0x16 - visibility = 0x17 - import_ = 0x18 - string_length = 0x19 - common_reference = 0x1a - comp_dir = 0x1b - const_value = 0x1c - containing_type = 0x1d - default_value = 0x1e - inline = 0x20 - is_optional = 0x21 - lower_bound = 0x22 - producer = 0x25 - prototyped = 0x27 - return_addr = 0x2a - start_scope = 0x2c - bit_stride = 0x2e - upper_bound = 0x2f - abstract_origin = 0x31 - accessibility = 0x32 - address_class = 0x33 - artificial = 0x34 - base_types = 0x35 - calling_convention = 0x36 - count = 0x37 - data_member_location = 0x38 - decl_column = 0x39 - decl_file = 0x3a - decl_line = 0x3b - declaration = 0x3c - discr_list = 0x3d - encoding = 0x3e - external = 0x3f - frame_base = 0x40 - friend = 0x41 - identifier_case = 0x42 - macro_info = 0x43 - namelist_item = 0x44 - priority = 0x45 - segment = 0x46 - specification = 0x47 - static_link = 0x48 - type = 0x49 - use_location = 0x4a - variable_parameter = 0x4b - virtuality = 0x4c - vtable_elem_location = 0x4d - allocated = 0x4e - associated = 0x4f - data_location = 0x50 - byte_stride = 0x51 - entry_pc = 0x52 - use_UTF8 = 0x53 - extension = 0x54 - ranges = 0x55 - trampoline = 0x56 - call_column = 0x57 - call_file = 0x58 - call_line = 0x59 - description = 0x5a - binary_scale = 0x5b - decimal_scale = 0x5c - small = 0x5d - decimal_sign = 0x5e - digit_count = 0x5f - picture_string = 0x60 - mutable = 0x61 - threads_scaled = 0x62 - explicit = 0x63 - object_pointer = 0x64 - endianity = 0x65 - elemental = 0x66 - pure = 0x67 - recursive = 0x68 - signature = 0x69 - main_subprogram = 0x6a - data_bit_offset = 0x6b - const_expr = 0x6c - enum_class = 0x6d - linkage_name = 0x6e - - # DWARF5 - noreturn = 0x87 - - lo_user = 0x2000 - - MIPS_fde = 0x2001 - MIPS_loop_begin = 0x2002 - MIPS_tail_loop_begin = 0x2003 - MIPS_epilog_begin = 0x2004 - MIPS_loop_unroll_factor = 0x2005 - MIPS_software_pipeline_depth = 0x2006 - MIPS_linkage_name = 0x2007 - MIPS_stride = 0x2008 - MIPS_abstract_name = 0x2009 - MIPS_clone_origin = 0x200a - MIPS_has_inlines = 0x200b - MIPS_stride_byte = 0x200c - MIPS_stride_elem = 0x200d - MIPS_ptr_dopetype = 0x200e - MIPS_allocatable_dopetype = 0x200f - MIPS_assumed_shape_dopetype = 0x2010 - MIPS_assumed_size = 0x2011 - - # GNU extensions - sf_names = 0x2101 - src_info = 0x2102 - mac_info = 0x2103 - src_coords = 0x2104 - body_begin = 0x2105 - body_end = 0x2106 - GNU_vector = 0x2107 - GNU_guarded_by = 0x2108 - GNU_pt_guarded_by = 0x2109 - GNU_guarded = 0x210a - GNU_pt_guarded = 0x210b - GNU_locks_excluded = 0x210c - GNU_exclusive_locks_required = 0x210d - GNU_shared_locks_required = 0x210e - GNU_odr_signature = 0x210f - GNU_template_name = 0x2110 - GNU_call_site_value = 0x2111 - GNU_call_site_data_value = 0x2112 - GNU_call_site_target = 0x2113 - GNU_call_site_target_clobbered = 0x2114 - GNU_tail_call = 0x2115 - GNU_all_tail_call_sites = 0x2116 - GNU_all_call_sites = 0x2117 - GNU_all_source_call_sites = 0x2118 - GNU_macros = 0x2119 - GNU_deleted = 0x211a - - hi_user = 0x3fff - - -class DW_FORM(enum.IntEnum): - addr = 0x01 - block2 = 0x03 - block4 = 0x04 - data2 = 0x05 - data4 = 0x06 - data8 = 0x07 - string = 0x08 - block = 0x09 - block1 = 0x0a - data1 = 0x0b - flag = 0x0c - sdata = 0x0d - strp = 0x0e - udata = 0x0f - ref_addr = 0x10 - ref1 = 0x11 - ref2 = 0x12 - ref4 = 0x13 - ref8 = 0x14 - ref_udata = 0x15 - indirect = 0x16 - sec_offset = 0x17 - exprloc = 0x18 - flag_present = 0x19 - ref_sig8 = 0x20 - - -class DW_LNS(enum.IntEnum): - copy = 1 - advance_pc = 2 - advance_line = 3 - set_file = 4 - set_column = 5 - negate_stmt = 6 - set_basic_block = 7 - const_add_pc = 8 - fixed_advance_pc = 9 - set_prologue_end = 10 - set_epilogue_begin = 11 - set_isa = 12 - - -class DW_LNE(enum.IntEnum): - end_sequence = 1 - set_address = 2 - define_file = 3 - set_discriminator = 4 - - lo_user = 128 - hi_user = 255 - - -def at_name(at): - try: - return f'DW_AT_{DW_AT(at).name}' - except ValueError: - return str(at) - - -def at_class_constant(at): - return (at == DW_FORM.data1 or at == DW_FORM.data2 or - at == DW_FORM.data4 or at == DW_FORM.data8 or - at == DW_FORM.udata or at == DW_FORM.sdata) - - -def at_class_constant_bytes(at): - return (at == DW_FORM.data1 or at == DW_FORM.data2 or - at == DW_FORM.data4 or at == DW_FORM.data8) - - -def at_class_constant_int(at): - return at == DW_FORM.udata or at == DW_FORM.sdata - - -def form_name(form): - try: - return f'DW_FORM_{DW_FORM(form).name}' - except ValueError: - return str(form) - - -def tag_name(tag): - try: - return f'DW_TAG_{DW_TAG(tag).name}' - except ValueError: - return str(tag) - - -def lns_name(lns): - try: - return f'DW_LNS_{DW_LNS(lns).name}' - except ValueError: - return str(lns) - - -def lne_name(lne): - try: - return f'DW_LNE_{DW_LNE(lne).name}' - except ValueError: - return str(lne) +from drgn.dwarfdefs import * diff --git a/drgn/dwarf/file.py b/drgn/dwarf/file.py index 3b893c0c..a47a458b 100644 --- a/drgn/dwarf/file.py +++ b/drgn/dwarf/file.py @@ -1,13 +1,27 @@ import mmap import drgn.lldwarf as lldwarf from drgn.dwarf.defs import * -from drgn.elf import parse_elf_header, parse_elf_sections +from drgn.elf import parse_elf_header, parse_elf_sections, parse_elf_symtab import os.path import sys +from typing import List class DwarfFile: + """DWARF file parser + + A DwarfFile manages parsing a single DWARF file, abstracting away the + details of reading the file. + """ + def __init__(self, path): + """ + DwarfFile(path) -> new DWARF file parser + Create a new DWARF file parser. + + Arguments: + path -- file path + """ self._closed = False self._file = open(path, 'rb') self._mmap = mmap.mmap(self._file.fileno(), 0, access=mmap.ACCESS_READ) @@ -16,6 +30,8 @@ class DwarfFile: self._sections = parse_elf_sections(self._mmap, self._ehdr) self._abbrev_tables = {} + self._cu_dies = {} + self._symbols = None def close(self): if not self._closed: @@ -34,21 +50,49 @@ class DwarfFile: def __exit__(self, exc_type, exc_value, traceback): self.close() - def section(self, name): + def section(self, name: str): return self._sections[name] - def at_string(self, form, value): + def string_at(self, offset): + nul = self._mmap.find(b'\0', offset) + assert nul != -1 # XXX + return self._mmap[offset:nul] + + def symbols(self): + if self._symbols is None: + symtab = self.section('.symtab') + strtab = self.section('.strtab') + symbols = {} + for sym in parse_elf_symtab(self._mmap, symtab): + if sym.st_name: + sym_name = self.string_at(strtab.sh_offset + sym.st_name).decode() + else: + sym_name = '' + try: + symbols[sym_name].append(sym) + except KeyError: + symbols[sym_name] = [sym] + self._symbols = symbols + return self._symbols + + def symbol(self, name: str, *, all: bool=False): + syms = self.symbols()[name] + if all: + return syms + else: + if len(syms) > 1: + raise ValueError('multiple symbols with given name') + return syms[0] + + def at_string(self, form: DW_FORM, value) -> bytes: if form == DW_FORM.string: return self._mmap[value[0]:value[0] + value[1]] else: assert form == DW_FORM.strp debug_str = self.section('.debug_str') - offset = debug_str.sh_offset + value - nul = self._mmap.find(b'\0', offset) - assert nul != -1 # XXX - return self._mmap[offset:nul] + return self.string_at(debug_str.sh_offset + value) - def at_sec_offset(self, form, value): + def at_sec_offset(self, form: DW_FORM, value) -> int: if form == DW_FORM.data4: # DWARF 2 and 3 return int.from_bytes(value, sys.byteorder) @@ -57,7 +101,7 @@ class DwarfFile: assert form == DW_FORM.sec_offset return value - def abbrev_table(self, offset): + def abbrev_table(self, offset: int) -> lldwarf.AbbrevDecl: try: return self._abbrev_tables[offset] except KeyError: @@ -78,13 +122,29 @@ class DwarfFile: yield cu offset = cu.next_offset() - def cu_die(self, cu, *, recurse=False): + def cu_header(self, offset: int): + debug_info = self.section('.debug_info') + offset += debug_info.sh_offset + return lldwarf.parse_compilation_unit_header(self._mmap, offset) + + # Debugging information entries + + def cu_die(self, cu: lldwarf.CompilationUnitHeader, *, + recurse: bool=False) -> lldwarf.DwarfDie: + try: + return self._cu_dies[cu.offset] + except KeyError: + pass + debug_info = self.section('.debug_info') abbrev_table = self.abbrev_table(cu.debug_abbrev_offset) - return lldwarf.parse_die(cu, abbrev_table, self._mmap, cu.die_offset(), - recurse=recurse) + die = lldwarf.parse_die(cu, abbrev_table, self._mmap, cu.die_offset(), + recurse=recurse) + self._cu_dies[cu.offset] = die + return die - def parse_die_children(self, cu, die, *, recurse=False): + def parse_die_children(self, cu: lldwarf.CompilationUnitHeader, + die: lldwarf.DwarfDie, *, recurse: bool=False) -> None: if not hasattr(die, 'children'): debug_info = self.section('.debug_info') abbrev_table = self.abbrev_table(cu.debug_abbrev_offset) @@ -93,7 +153,7 @@ class DwarfFile: offset=die.offset + die.die_length, recurse=recurse) - def die_contains_address(self, die, address): + def die_contains_address(self, die: lldwarf.DwarfDie, address: int) -> bool: try: ranges_form, ranges_value = die.find(DW_AT.ranges) assert False @@ -114,11 +174,11 @@ class DwarfFile: high_pc = high_pc_value return low_pc <= address < high_pc - def die_name(self, die): + def die_name(self, die: lldwarf.DwarfDie) -> bytes: form, value = die.find(DW_AT.name) return self.at_string(form, value) - def die_address(self, die): + def die_address(self, die: lldwarf.DwarfDie) -> int: try: ranges_form, ranges_value = die.find(DW_AT.ranges) assert False @@ -128,8 +188,27 @@ class DwarfFile: assert form == DW_FORM.addr return value - def cu_line_number_program_header(self, cu, die): + # Address range tables + + def arange_table_headers(self): + debug_aranges = self.section('.debug_aranges') + offset = debug_aranges.sh_offset + end = debug_aranges.sh_offset + debug_aranges.sh_size + while offset < end: + art = lldwarf.parse_arange_table_header(self._mmap, offset) + yield art + offset = art.next_offset() + + def arange_table(self, art: lldwarf.ArangeTableHeader): + assert art.version == 2 + return lldwarf.parse_arange_table(art.segment_size, art.address_size, + self._mmap, art.table_offset()) + + # Line number programs + + def cu_line_number_program_header(self, cu: lldwarf.CompilationUnitHeader) -> lldwarf.LineNumberProgramHeader: debug_line = self.section('.debug_line') + die = self.cu_die(cu) try: form, value = die.find(DW_AT.stmt_list) except KeyError: @@ -137,13 +216,15 @@ class DwarfFile: offset = debug_line.sh_offset + self.at_sec_offset(form, value) return lldwarf.parse_line_number_program_header(self._mmap, offset) - def execute_line_number_program(self, lnp): + def execute_line_number_program(self, lnp: lldwarf.LineNumberProgramHeader) -> List[lldwarf.LineNumberRow]: return lldwarf.execute_line_number_program(lnp, self._mmap, lnp.program_offset()) - def line_number_row_name(self, cu, lnp, row): + def line_number_row_name(self, cu: lldwarf.CompilationUnitHeader, + lnp: lldwarf.LineNumberProgramHeader, + row: lldwarf.LineNumberRow) -> str: if row.file == 0: - return cu_name(cu) + return self.die_name(self.cu_die(cu)).decode() file_name, directory_index, mtime, file_size = lnp.file_names[row.file - 1] file_name = file_name.decode() @@ -153,7 +234,7 @@ class DwarfFile: else: return file_name - def decode_line_number_program(self, lnp): + def decode_line_number_program(self, lnp: lldwarf.LineNumberProgramHeader): offset = lnp.program_offset() end = lnp.end_offset() while offset < end: diff --git a/drgn/dwarf/program.py b/drgn/dwarf/program.py new file mode 100644 index 00000000..f35f223d --- /dev/null +++ b/drgn/dwarf/program.py @@ -0,0 +1,105 @@ +import drgn.lldwarf as lldwarf +from drgn.dwarf.defs import * +from drgn.dwarf.file import DwarfFile +from typing import List, Tuple + + +class DwarfProgram: + def __init__(self, path): + self._closed = False + self._file = DwarfFile(path) + self._files = {path: self._file} + + def close(self): + if hasattr(self, '_files'): + for file in self._files.values(): + file.close() + + def __del__(self): + self.close() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def find_cu_by_name(self, name: str) -> Tuple[DwarfFile, lldwarf.CompilationUnitHeader]: + for cu in self._file.cu_headers(): + die = self._file.cu_die(cu) + try: + cu_name = self._file.die_name(die).decode() + except KeyError: + continue + if cu_name == name: + return self._file, cu + else: + raise ValueError('CU not found') + + def find_cu_by_addr(self, addr: int) -> Tuple[DwarfFile, lldwarf.CompilationUnitHeader]: + dwarf_file = self._file + for art in dwarf_file.arange_table_headers(): + for arange in dwarf_file.arange_table(art): + if arange.address <= addr <= arange.address + arange.length: + return dwarf_file, dwarf_file.cu_header(art.debug_info_offset) + else: + raise ValueError('CU containing address not found') + + + def find_subprogram_by_name(self, name: str): + dwarf_file = self._file + symbol = dwarf_file.symbol(name) + dwarf_file, cu = self.find_cu_by_addr(symbol.st_value) + die = self._file.cu_die(cu) + dwarf_file.parse_die_children(cu, die) + for child in die.children: + if (child.tag == DW_TAG.subprogram and + dwarf_file.die_name(child).decode() == name): + return child + else: + raise ValueError('subprogram not found') + + @staticmethod + def _best_breakpoint_row(dwarf_file: DwarfFile, + cu: lldwarf.CompilationUnitHeader, + lnp: lldwarf.LineNumberProgramHeader, + matrix: List[lldwarf.LineNumberRow], + filename: str, + lineno: int) -> lldwarf.LineNumberRow: + # Find the first row which is a statement, or the first row if none are + # statements. + first_row = None + for row in matrix: + if (dwarf_file.line_number_row_name(cu, lnp, row) == filename and row.line == lineno): + if row.is_stmt: + return row + if first_row is None: + first_row = row + else: + assert first_row is not None # XXX + return first_row + + @staticmethod + def _find_subprogram_containing_address(dwarf_file: DwarfFile, + cu: lldwarf.CompilationUnitHeader, + addr: int) -> lldwarf.DwarfDie: + die = dwarf_file.cu_die(cu) + dwarf_file.parse_die_children(cu, die) + for child in die.children: + if (child.tag == DW_TAG.subprogram and + dwarf_file.die_contains_address(child, addr)): + return child + assert False # XXX + + def find_breakpoint_location(self, filename: str, lineno: int) -> str: + dwarf_file, cu = self.find_cu_by_name(filename) + lnp = dwarf_file.cu_line_number_program_header(cu) + matrix = dwarf_file.execute_line_number_program(lnp) + + row = self._best_breakpoint_row(dwarf_file, cu, lnp, matrix, filename, lineno) + + subprogram = self._find_subprogram_containing_address(dwarf_file, cu, row.address) + subprogram_name = dwarf_file.die_name(subprogram).decode() + subprogram_address = dwarf_file.die_address(subprogram) + assert row.address >= subprogram_address + return f'{subprogram_name}+0x{row.address - subprogram_address:x}' diff --git a/drgn/dwarfdefs.py b/drgn/dwarfdefs.py new file mode 100644 index 00000000..32fed7aa --- /dev/null +++ b/drgn/dwarfdefs.py @@ -0,0 +1,344 @@ +import enum + +class DW_CHILDREN(enum.IntEnum): + no = 0 + yes = 1 + + +class DW_TAG(enum.IntEnum): + array_type = 0x01 + class_type = 0x02 + entry_point = 0x03 + enumeration_type = 0x04 + formal_parameter = 0x05 + imported_declaration = 0x08 + label = 0x0a + lexical_block = 0x0b + member = 0x0d + pointer_type = 0x0f + reference_type = 0x10 + compile_unit = 0x11 + string_type = 0x12 + structure_type = 0x13 + subroutine_type = 0x15 + typedef = 0x16 + union_type = 0x17 + unspecified_parameters = 0x18 + variant = 0x19 + common_block = 0x1a + common_inclusion = 0x1b + inheritance = 0x1c + inlined_subroutine = 0x1d + module = 0x1e + ptr_to_member_type = 0x1f + set_type = 0x20 + subrange_type = 0x21 + with_stmt = 0x22 + access_declaration = 0x23 + base_type = 0x24 + catch_block = 0x25 + const_type = 0x26 + constant = 0x27 + enumerator = 0x28 + file_type = 0x29 + friend = 0x2a + namelist = 0x2b + namelist_item = 0x2c + packed_type = 0x2d + subprogram = 0x2e + template_type_parameter = 0x2f + template_value_parameter = 0x30 + thrown_type = 0x31 + try_block = 0x32 + variant_part = 0x33 + variable = 0x34 + volatile_type = 0x35 + dwarf_procedure = 0x36 + restrict_type = 0x37 + interface_type = 0x38 + namespace = 0x39 + imported_module = 0x3a + unspecified_type = 0x3b + partial_unit = 0x3c + imported_unit = 0x3d + # 0x3e reserved + condition = 0x3f + shared_type = 0x40 + type_unit = 0x41 + rvalue_reference_type = 0x42 + template_alias = 0x43 + + # DWARF 5 + atomic_type = 0x47 + + lo_user = 0x4080 + + MIPS_loop = 0x4081 + format_label = 0x4101 + function_template = 0x4102 + class_template = 0x4103 + + GNU_BINCL = 0x4104 + GNU_EINCL = 0x4105 + + GNU_template_template_param = 0x4106 + GNU_template_parameter_pack = 0x4107 + GNU_formal_parameter_pack = 0x4108 + GNU_call_site = 0x4109 + GNU_call_site_parameter = 0x410a + + hi_user = 0xffff + + +class DW_AT(enum.IntEnum): + sibling = 0x01 + location = 0x02 + name = 0x03 + ordering = 0x09 + subscr_data = 0x0a + byte_size = 0x0b + bit_offset = 0x0c + bit_size = 0x0d + element_list = 0x0f + stmt_list = 0x10 + low_pc = 0x11 + high_pc = 0x12 + language = 0x13 + member = 0x14 + discr = 0x15 + discr_value = 0x16 + visibility = 0x17 + import_ = 0x18 + string_length = 0x19 + common_reference = 0x1a + comp_dir = 0x1b + const_value = 0x1c + containing_type = 0x1d + default_value = 0x1e + inline = 0x20 + is_optional = 0x21 + lower_bound = 0x22 + producer = 0x25 + prototyped = 0x27 + return_addr = 0x2a + start_scope = 0x2c + bit_stride = 0x2e + upper_bound = 0x2f + abstract_origin = 0x31 + accessibility = 0x32 + address_class = 0x33 + artificial = 0x34 + base_types = 0x35 + calling_convention = 0x36 + count = 0x37 + data_member_location = 0x38 + decl_column = 0x39 + decl_file = 0x3a + decl_line = 0x3b + declaration = 0x3c + discr_list = 0x3d + encoding = 0x3e + external = 0x3f + frame_base = 0x40 + friend = 0x41 + identifier_case = 0x42 + macro_info = 0x43 + namelist_item = 0x44 + priority = 0x45 + segment = 0x46 + specification = 0x47 + static_link = 0x48 + type = 0x49 + use_location = 0x4a + variable_parameter = 0x4b + virtuality = 0x4c + vtable_elem_location = 0x4d + allocated = 0x4e + associated = 0x4f + data_location = 0x50 + byte_stride = 0x51 + entry_pc = 0x52 + use_UTF8 = 0x53 + extension = 0x54 + ranges = 0x55 + trampoline = 0x56 + call_column = 0x57 + call_file = 0x58 + call_line = 0x59 + description = 0x5a + binary_scale = 0x5b + decimal_scale = 0x5c + small = 0x5d + decimal_sign = 0x5e + digit_count = 0x5f + picture_string = 0x60 + mutable = 0x61 + threads_scaled = 0x62 + explicit = 0x63 + object_pointer = 0x64 + endianity = 0x65 + elemental = 0x66 + pure = 0x67 + recursive = 0x68 + signature = 0x69 + main_subprogram = 0x6a + data_bit_offset = 0x6b + const_expr = 0x6c + enum_class = 0x6d + linkage_name = 0x6e + + # DWARF5 + noreturn = 0x87 + + lo_user = 0x2000 + + MIPS_fde = 0x2001 + MIPS_loop_begin = 0x2002 + MIPS_tail_loop_begin = 0x2003 + MIPS_epilog_begin = 0x2004 + MIPS_loop_unroll_factor = 0x2005 + MIPS_software_pipeline_depth = 0x2006 + MIPS_linkage_name = 0x2007 + MIPS_stride = 0x2008 + MIPS_abstract_name = 0x2009 + MIPS_clone_origin = 0x200a + MIPS_has_inlines = 0x200b + MIPS_stride_byte = 0x200c + MIPS_stride_elem = 0x200d + MIPS_ptr_dopetype = 0x200e + MIPS_allocatable_dopetype = 0x200f + MIPS_assumed_shape_dopetype = 0x2010 + MIPS_assumed_size = 0x2011 + + # GNU extensions + sf_names = 0x2101 + src_info = 0x2102 + mac_info = 0x2103 + src_coords = 0x2104 + body_begin = 0x2105 + body_end = 0x2106 + GNU_vector = 0x2107 + GNU_guarded_by = 0x2108 + GNU_pt_guarded_by = 0x2109 + GNU_guarded = 0x210a + GNU_pt_guarded = 0x210b + GNU_locks_excluded = 0x210c + GNU_exclusive_locks_required = 0x210d + GNU_shared_locks_required = 0x210e + GNU_odr_signature = 0x210f + GNU_template_name = 0x2110 + GNU_call_site_value = 0x2111 + GNU_call_site_data_value = 0x2112 + GNU_call_site_target = 0x2113 + GNU_call_site_target_clobbered = 0x2114 + GNU_tail_call = 0x2115 + GNU_all_tail_call_sites = 0x2116 + GNU_all_call_sites = 0x2117 + GNU_all_source_call_sites = 0x2118 + GNU_macros = 0x2119 + GNU_deleted = 0x211a + + hi_user = 0x3fff + + +class DW_FORM(enum.IntEnum): + addr = 0x01 + block2 = 0x03 + block4 = 0x04 + data2 = 0x05 + data4 = 0x06 + data8 = 0x07 + string = 0x08 + block = 0x09 + block1 = 0x0a + data1 = 0x0b + flag = 0x0c + sdata = 0x0d + strp = 0x0e + udata = 0x0f + ref_addr = 0x10 + ref1 = 0x11 + ref2 = 0x12 + ref4 = 0x13 + ref8 = 0x14 + ref_udata = 0x15 + indirect = 0x16 + sec_offset = 0x17 + exprloc = 0x18 + flag_present = 0x19 + ref_sig8 = 0x20 + + +class DW_LNS(enum.IntEnum): + copy = 1 + advance_pc = 2 + advance_line = 3 + set_file = 4 + set_column = 5 + negate_stmt = 6 + set_basic_block = 7 + const_add_pc = 8 + fixed_advance_pc = 9 + set_prologue_end = 10 + set_epilogue_begin = 11 + set_isa = 12 + + +class DW_LNE(enum.IntEnum): + end_sequence = 1 + set_address = 2 + define_file = 3 + set_discriminator = 4 + + lo_user = 128 + hi_user = 255 + + +def at_name(at): + try: + return f'DW_AT_{DW_AT(at).name}' + except ValueError: + return str(at) + + +def at_class_constant(at): + return (at == DW_FORM.data1 or at == DW_FORM.data2 or + at == DW_FORM.data4 or at == DW_FORM.data8 or + at == DW_FORM.udata or at == DW_FORM.sdata) + + +def at_class_constant_bytes(at): + return (at == DW_FORM.data1 or at == DW_FORM.data2 or + at == DW_FORM.data4 or at == DW_FORM.data8) + + +def at_class_constant_int(at): + return at == DW_FORM.udata or at == DW_FORM.sdata + + +def form_name(form): + try: + return f'DW_FORM_{DW_FORM(form).name}' + except ValueError: + return str(form) + + +def tag_name(tag): + try: + return f'DW_TAG_{DW_TAG(tag).name}' + except ValueError: + return str(tag) + + +def lns_name(lns): + try: + return f'DW_LNS_{DW_LNS(lns).name}' + except ValueError: + return str(lns) + + +def lne_name(lne): + try: + return f'DW_LNE_{DW_LNE(lne).name}' + except ValueError: + return str(lne) diff --git a/drgn/elf.py b/drgn/elf.py index 2799fdf5..1fdfcd28 100644 --- a/drgn/elf.py +++ b/drgn/elf.py @@ -191,19 +191,12 @@ def parse_elf_sections(buffer, ehdr): return sections +def parse_elf_symtab(buffer, shdr): + symnum = shdr.sh_size // ctypes.sizeof(Elf64_Sym) + return (Elf64_Sym * symnum).from_buffer_copy(buffer, shdr.sh_offset) + + """ - def symtab(self): - try: - return self._symtab - except AttributeError: - pass - - shdr = self.section(b'.symtab') - - symnum = shdr.sh_size // ctypes.sizeof(Elf64_Sym) - self._symtab = (Elf64_Sym * symnum).from_buffer_copy(self._mm, shdr.sh_offset) - return self._symtab - def symbol(self, name, *, all=False): try: syms = self._symtab_by_name[name] diff --git a/lldwarf/arange.c b/lldwarf/arange.c new file mode 100644 index 00000000..3875b15a --- /dev/null +++ b/lldwarf/arange.c @@ -0,0 +1,323 @@ +#include "lldwarf.h" + +static void ArangeTableHeader_dealloc(ArangeTableHeader *self) +{ + Py_TYPE(self)->tp_free((PyObject *)self); +} + +PyObject *ArangeTableHeader_table_offset(ArangeTableHeader *self) +{ + uint64_t header_length = self->is_64_bit ? 24 : 12; + Py_ssize_t ret, alignment; + + if (__builtin_add_overflow(self->offset, header_length, &ret)) { + PyErr_SetString(PyExc_OverflowError, "table offset too large"); + return NULL; + } + + alignment = self->segment_size + 2 * self->address_size; + if (ret % alignment && + __builtin_add_overflow(ret, alignment - ret % alignment, &ret)) { + PyErr_SetString(PyExc_OverflowError, "table offset too large"); + return NULL; + } + + return PyLong_FromSsize_t(ret); +} + +PyObject *ArangeTableHeader_next_offset(ArangeTableHeader *self) +{ + uint64_t unit_length_length = self->is_64_bit ? 12 : 4; + uint64_t unit_length; + Py_ssize_t ret; + + if (__builtin_add_overflow(self->unit_length, unit_length_length, &unit_length) || + __builtin_add_overflow(self->offset, unit_length, &ret)) { + PyErr_SetString(PyExc_OverflowError, "next offset too large"); + return NULL; + } + + return PyLong_FromSsize_t(ret); +} + +PyObject *LLDwarf_ParseArangeTableHeader(Py_buffer *buffer, Py_ssize_t *offset) +{ + ArangeTableHeader *art; + uint32_t length; + + art = PyObject_New(ArangeTableHeader, &ArangeTableHeader_type); + if (!art) + return NULL; + + art->offset = *offset; + + if (read_u32(buffer, offset, &length) == -1) + goto err; + + art->is_64_bit = length == UINT32_C(0xffffffff); + if (art->is_64_bit) { + if (read_u64(buffer, offset, &art->unit_length) == -1) + goto err; + } else { + art->unit_length = length; + } + + if (read_u16(buffer, offset, &art->version) == -1) + goto err; + + if (art->is_64_bit) { + if (read_u64(buffer, offset, &art->debug_info_offset) == -1) + goto err; + } else { + unsigned int debug_info_offset; + + if (read_u32(buffer, offset, &debug_info_offset) == -1) + goto err; + art->debug_info_offset = debug_info_offset; + } + + if (read_u8(buffer, offset, &art->address_size) == -1) + goto err; + + if (read_u8(buffer, offset, &art->segment_size) == -1) + goto err; + + return (PyObject *)art; + +err: + PyErr_SetString(PyExc_ValueError, + "address range table header is truncated"); + Py_DECREF(art); + return NULL; +} + +static PyMethodDef ArangeTableHeader_methods[] = { + {"table_offset", (PyCFunction)ArangeTableHeader_table_offset, + METH_NOARGS, + "table_offset() -> int\n\n" + "Get the offset into the buffer where the address range table itself\n" + "begins. This is the starting offset of the arange table header plus\n" + "the length of the header, aligned up to a multiple of the address\n" + "range tuple size."}, + {"next_offset", (PyCFunction)ArangeTableHeader_next_offset, + METH_NOARGS, + "next_offset() -> int\n\n" + "Get the offset into the buffer where the next address range table\n" + "starts. This is the starting offset of the CU plus the length of\n" + "the unit, including the header. If this is the last address range\n" + "table, this offset is the end of the .debug_aranges section."}, + {}, +}; + +static PyMemberDef ArangeTableHeader_members[] = { + {"offset", T_PYSSIZET, offsetof(ArangeTableHeader, offset), 0, + "offset into the buffer where this arange table starts"}, + {"unit_length", T_UINT64T, offsetof(ArangeTableHeader, unit_length), 0, + "length of this arange table, not including the unit_length field"}, + {"version", T_UINT16T, offsetof(ArangeTableHeader, version), 0, + "format version of this arange table"}, + {"debug_info_offset", T_UINT64T, offsetof(ArangeTableHeader, debug_info_offset), 0, + "location of this arange table's compilation unit as an offset into the .debug_info section"}, + {"address_size", T_UINT8T, offsetof(ArangeTableHeader, address_size), 0, + "size of an address in this arange table"}, + {"segment_size", T_UINT8T, offsetof(ArangeTableHeader, segment_size), 0, + "size of a segment selector in this arange table"}, + {"is_64_bit", T_BOOL, offsetof(ArangeTableHeader, is_64_bit), 0, + "whether this CU is using the 64-bit format"}, + {}, +}; + +#define ArangeTableHeader_DOC \ + "ArangeTableHeader(offset, unit_length, version, debug_info_offset,\n" \ + " address_size, segment_size,\n" \ + " is_64_bit) -> new address range table header\n\n" \ + "Create a new DWARF address range table header.\n\n" \ + "Arguments:\n" \ + "offset -- integer offset\n" \ + "unit_length -- integer length\n" \ + "version -- integer format version\n" \ + "debug_info_offset -- integer offset\n" \ + "address_size -- integer size\n" \ + "segment_size -- integer size\n" \ + "is_64_bit -- boolean" + +PyTypeObject ArangeTableHeader_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "drgn.lldwarf.ArangeTableHeader", /* tp_name */ + sizeof(ArangeTableHeader), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)ArangeTableHeader_dealloc, /* tp_dealloc */ + NULL, /* tp_print */ + NULL, /* tp_getattr */ + NULL, /* tp_setattr */ + NULL, /* tp_as_async */ + LLDwarfObject_repr, /* tp_repr */ + NULL, /* tp_as_number */ + NULL, /* tp_as_sequence */ + NULL, /* tp_as_mapping */ + NULL, /* tp_hash */ + NULL, /* tp_call */ + NULL, /* tp_str */ + NULL, /* tp_getattro */ + NULL, /* tp_setattro */ + NULL, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + ArangeTableHeader_DOC, /* tp_doc */ + NULL, /* tp_traverse */ + NULL, /* tp_clear */ + LLDwarfObject_richcompare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + NULL, /* tp_iter */ + NULL, /* tp_iternext */ + ArangeTableHeader_methods, /* tp_methods */ + ArangeTableHeader_members, /* tp_members */ + NULL, /* tp_getset */ + NULL, /* tp_base */ + NULL, /* tp_dict */ + NULL, /* tp_descr_get */ + NULL, /* tp_descr_set */ + 0, /* tp_dictoffset */ + LLDwarfObject_init, /* tp_init */ +}; + +static void AddressRange_dealloc(AddressRange *self) +{ + Py_TYPE(self)->tp_free((PyObject *)self); +} + +PyObject *LLDwarf_ParseArangeTable(Py_buffer *buffer, Py_ssize_t *offset, + Py_ssize_t segment_size, + Py_ssize_t address_size) +{ + PyObject *arange_table; + + arange_table = PyList_New(0); + if (!arange_table) + return NULL; + + for (;;) { + AddressRange *arange; + uint64_t segment, address, length; + uint32_t tmp; + int ret; + + switch (segment_size) { + case 4: + if (read_u32(buffer, offset, &tmp) == -1) + goto err; + segment = tmp; + break; + case 8: + if (read_u64(buffer, offset, &segment) == -1) + goto err; + break; + case 0: + segment = 0; + break; + default: + PyErr_Format(PyExc_ValueError, "unsupported segment size %ld", + (long)segment_size); + goto err; + } + + switch (address_size) { + case 4: + if (read_u32(buffer, offset, &tmp) == -1) + goto err; + address = tmp; + if (read_u32(buffer, offset, &tmp) == -1) + goto err; + length = tmp; + break; + case 8: + if (read_u64(buffer, offset, &address) == -1) + goto err; + if (read_u64(buffer, offset, &length) == -1) + goto err; + break; + default: + PyErr_Format(PyExc_ValueError, "unsupported address size %ld", + (long)address_size); + goto err; + } + + if (segment == 0 && address == 0 && length == 0) + break; + + arange = PyMem_Malloc(sizeof(AddressRange)); + if (!arange) + goto err; + PyObject_Init((PyObject *)arange, &AddressRange_type); + arange->segment = segment; + arange->address = address; + arange->length = length; + + ret = PyList_Append(arange_table, (PyObject *)arange); + Py_DECREF((PyObject *)arange); + if (ret == -1) + goto err; + } + + return arange_table; + +err: + Py_DECREF(arange_table); + return NULL; +} + +static PyMemberDef AddressRange_members[] = { + {"segment", T_UINT64T, offsetof(AddressRange, segment), 0, + "segment selector of the address range"}, + {"address", T_UINT64T, offsetof(AddressRange, address), 0, + "starting address of the address range"}, + {"length", T_UINT64T, offsetof(AddressRange, length), 0, + "length of the address range"}, + {}, +}; + +#define AddressRange_DOC \ + "AddressRange(segment, address, length) -> new address range\n" \ + "Create a new address range.\n\n" \ + "Arguments:\n" \ + "segment -- integer segment selector\n" \ + "address -- integer start address\n" \ + "length -- integer range length\n" + +PyTypeObject AddressRange_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "drgn.lldwarf.AddressRange", /* tp_name */ + sizeof(AddressRange), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)AddressRange_dealloc, /* tp_dealloc */ + NULL, /* tp_print */ + NULL, /* tp_getattr */ + NULL, /* tp_setattr */ + NULL, /* tp_as_async */ + LLDwarfObject_repr, /* tp_repr */ + NULL, /* tp_as_number */ + NULL, /* tp_as_sequence */ + NULL, /* tp_as_mapping */ + NULL, /* tp_hash */ + NULL, /* tp_call */ + NULL, /* tp_str */ + NULL, /* tp_getattro */ + NULL, /* tp_setattro */ + NULL, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + AddressRange_DOC, /* tp_doc */ + NULL, /* tp_traverse */ + NULL, /* tp_clear */ + LLDwarfObject_richcompare, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + NULL, /* tp_iter */ + NULL, /* tp_iternext */ + NULL, /* tp_methods */ + AddressRange_members, /* tp_members */ + NULL, /* tp_getset */ + NULL, /* tp_base */ + NULL, /* tp_dict */ + NULL, /* tp_descr_get */ + NULL, /* tp_descr_set */ + 0, /* tp_dictoffset */ + LLDwarfObject_init, /* tp_init */ +}; diff --git a/lldwarf/lldwarf.h b/lldwarf/lldwarf.h index 1122f9ea..f29d5364 100644 --- a/lldwarf/lldwarf.h +++ b/lldwarf/lldwarf.h @@ -23,6 +23,28 @@ typedef struct { extern PyTypeObject AbbrevDecl_type; +typedef struct { + PyObject_VAR_HEAD + uint64_t segment; + uint64_t address; + uint64_t length; +} AddressRange; + +extern PyTypeObject AddressRange_type; + +typedef struct { + PyObject_VAR_HEAD + Py_ssize_t offset; + uint64_t unit_length; + uint16_t version; + uint64_t debug_info_offset; + uint8_t address_size; + uint8_t segment_size; + bool is_64_bit; +} ArangeTableHeader; + +extern PyTypeObject ArangeTableHeader_type; + typedef struct { PyObject_HEAD Py_ssize_t offset; @@ -123,6 +145,10 @@ int LLDwarfObject_RichCompareBool(PyObject *self, PyObject *other, int op); PyObject *LLDwarfObject_richcompare(PyObject *self, PyObject *other, int op); PyObject *LLDwarf_ParseAbbrevTable(Py_buffer *buffer, Py_ssize_t *offset); +PyObject *LLDwarf_ParseArangeTable(Py_buffer *buffer, Py_ssize_t *offset, + Py_ssize_t segment_size, + Py_ssize_t address_size); +PyObject *LLDwarf_ParseArangeTableHeader(Py_buffer *buffer, Py_ssize_t *offset); PyObject *LLDwarf_ParseCompilationUnitHeader(Py_buffer *buffer, Py_ssize_t *offset); PyObject *LLDwarf_ParseDie(Py_buffer *buffer, Py_ssize_t *offset, diff --git a/lldwarf/module.c b/lldwarf/module.c index e8ee7e97..3b17edc5 100644 --- a/lldwarf/module.c +++ b/lldwarf/module.c @@ -176,6 +176,57 @@ static PyObject *parse_abbrev_table(PyObject *self, PyObject *args, return ret; } +static PyObject *parse_arange_table(PyObject *self, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = { + "segment_size", "address_size", "buffer", "offset", NULL + }; + Py_ssize_t segment_size; + Py_ssize_t address_size; + Py_buffer buffer; + Py_ssize_t offset = 0; + PyObject *ret; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "nny*|n:parse_arange_table", + keywords, &segment_size, &address_size, + &buffer, &offset)) + return NULL; + + if (offset < 0) { + PyErr_SetString(PyExc_ValueError, "offset cannot be negative"); + PyBuffer_Release(&buffer); + return NULL; + } + + ret = LLDwarf_ParseArangeTable(&buffer, &offset, segment_size, address_size); + PyBuffer_Release(&buffer); + return ret; +} + +static PyObject *parse_arange_table_header(PyObject *self, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"buffer", "offset", NULL}; + Py_buffer buffer; + Py_ssize_t offset = 0; + PyObject *ret; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "y*|n:parse_arange_table_header", + keywords, &buffer, &offset)) + return NULL; + + if (offset < 0) { + PyErr_SetString(PyExc_ValueError, "offset cannot be negative"); + PyBuffer_Release(&buffer); + return NULL; + } + + ret = LLDwarf_ParseArangeTableHeader(&buffer, &offset); + PyBuffer_Release(&buffer); + return ret; +} + static PyObject *parse_compilation_unit_header(PyObject *self, PyObject *args, PyObject *kwds) { @@ -343,11 +394,27 @@ static PyMethodDef lldwarf_methods[] = { "offset -- optional offset into the buffer"}, {"parse_abbrev_table", (PyCFunction)parse_abbrev_table, METH_VARARGS | METH_KEYWORDS, - "parse_abbrev_table(buffer, offset=0) -> dict[code]: AbbrevDecl \n\n" + "parse_abbrev_table(buffer, offset=0) -> dict[code]: AbbrevDecl\n\n" "Parse an abbreviation table.\n\n" "Arguments:\n" "buffer -- readable source buffer\n" "offset -- optional offset into the buffer"}, + {"parse_arange_table", (PyCFunction)parse_arange_table, + METH_VARARGS | METH_KEYWORDS, + "parse_arange_table(segment_size, address_size, buffer, offset=0) -> list of AddressRange\n\n" + "Parse an address range table.\n\n" + "Arguments:\n" + "segment_size -- size of a segment selector in this arange table\n" + "address_size -- size of an address in this arange table\n" + "buffer -- readable source buffer\n" + "offset -- optional offset into the buffer"}, + {"parse_arange_table_header", (PyCFunction)parse_arange_table_header, + METH_VARARGS | METH_KEYWORDS, + "parse_arange_table_header(buffer, offset=0) -> dict[code]: ArangeTableHeader\n\n" + "Parse an address range table header.\n\n" + "Arguments:\n" + "buffer -- readable source buffer\n" + "offset -- optional offset into the buffer"}, {"parse_compilation_unit_header", (PyCFunction)parse_compilation_unit_header, METH_VARARGS | METH_KEYWORDS, @@ -412,6 +479,14 @@ PyInit_lldwarf(void) if (PyType_Ready(&AbbrevDecl_type) < 0) return NULL; + AddressRange_type.tp_new = PyType_GenericNew; + if (PyType_Ready(&AddressRange_type) < 0) + return NULL; + + ArangeTableHeader_type.tp_new = PyType_GenericNew; + if (PyType_Ready(&ArangeTableHeader_type) < 0) + return NULL; + CompilationUnitHeader_type.tp_new = PyType_GenericNew; if (PyType_Ready(&CompilationUnitHeader_type) < 0) return NULL; @@ -440,6 +515,12 @@ PyInit_lldwarf(void) Py_INCREF(&AbbrevDecl_type); PyModule_AddObject(m, "AbbrevDecl", (PyObject *)&AbbrevDecl_type); + Py_INCREF(&AddressRange_type); + PyModule_AddObject(m, "AddressRange", (PyObject *)&AddressRange_type); + + Py_INCREF(&ArangeTableHeader_type); + PyModule_AddObject(m, "ArangeTableHeader", (PyObject *)&ArangeTableHeader_type); + Py_INCREF(&CompilationUnitHeader_type); PyModule_AddObject(m, "CompilationUnitHeader", (PyObject *)&CompilationUnitHeader_type); diff --git a/setup.py b/setup.py index 58d2b47c..fc15f99a 100755 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ def out_of_date(dependencies, target): def gen_header(): - import drgn.dwarf.defs as defs + import drgn.dwarfdefs as defs def write_enum(e): f.write('enum {\n') @@ -39,7 +39,7 @@ def gen_header(): class my_build_ext(build_ext): def run(self): - if out_of_date(['drgn/dwarf/defs.py', 'setup.py'], 'lldwarf/dwarfdefs.h'): + if out_of_date(['drgn/dwarfdefs.py', 'setup.py'], 'lldwarf/dwarfdefs.h'): try: gen_header() except Exception as e: @@ -57,6 +57,7 @@ module = Extension( 'lldwarf/module.c', 'lldwarf/object.c', 'lldwarf/abbrev.c', + 'lldwarf/arange.c', 'lldwarf/cu.c', 'lldwarf/die.c', 'lldwarf/line.c', diff --git a/tests/lldwarf/test_arange.py b/tests/lldwarf/test_arange.py new file mode 100644 index 00000000..2111727b --- /dev/null +++ b/tests/lldwarf/test_arange.py @@ -0,0 +1,125 @@ +import drgn.lldwarf as lldwarf +import unittest + + +class TestArangeTableHeaderObject(unittest.TestCase): + def test_offset(self): + header = lldwarf.ArangeTableHeader( + offset=70, + unit_length=200, + version=2, + debug_info_offset=0, + address_size=8, + segment_size=0, + is_64_bit=False, + ) + + self.assertEqual(header.table_offset(), 96) + self.assertEqual(header.next_offset(), 274) + + header.is_64_bit = True + self.assertEqual(header.table_offset(), 96) + self.assertEqual(header.next_offset(), 282) + + def test_offset_overflow(self): + header = lldwarf.ArangeTableHeader( + offset=2**63 - 12, + unit_length=2**64 - 4, + version=2, + debug_info_offset=0, + address_size=8, + segment_size=0, + is_64_bit=False, + ) + with self.assertRaises(OverflowError): + header.table_offset() + with self.assertRaises(OverflowError): + header.next_offset() + + header.offset = 2**63 - 8 + header.unit_length = 4 + with self.assertRaises(OverflowError): + header.next_offset() + + header.offset = 2**63 - 24 + header.unit_length = 2**64 - 12 + header.is_64_bit = True + with self.assertRaises(OverflowError): + header.table_offset() + with self.assertRaises(OverflowError): + header.next_offset() + + header.offset = 2**63 - 16 + header.unit_length = 4 + with self.assertRaises(OverflowError): + header.next_offset() + + +class TestParseArangeTableHeader(unittest.TestCase): + def test_negative_offset(self): + with self.assertRaises(ValueError): + lldwarf.parse_arange_table_header(b'', -1) + + def test_32bit(self): + buf = (b'\xc8\x00\x00\x00' # unit_length + b'\x02\x00' # version + b'\x00\x00\x00\x00' # debug_info_offset + b'\x08' # address_size + b'\x00') # segment_size + header = lldwarf.ArangeTableHeader( + offset=0, + unit_length=200, + version=2, + debug_info_offset=0, + address_size=8, + segment_size=0, + is_64_bit=False, + ) + + for i in range(len(buf)): + with self.assertRaisesRegex(ValueError, 'address range table header is truncated'): + lldwarf.parse_arange_table_header(buf[:i]) + + self.assertEqual(lldwarf.parse_arange_table_header(buf), header) + + def test_64bit(self): + buf = (b'\xff\xff\xff\xff' + b'\xc8\x00\x00\x00\x00\x00\x00\x00' # unit_length + b'\x02\x00' # version + b'\x00\x00\x00\x00\x00\x00\x00\x00' # debug_info_offset + b'\x08' # address_size + b'\x00') # segment_size + header = lldwarf.ArangeTableHeader( + offset=0, + unit_length=200, + version=2, + debug_info_offset=0, + address_size=8, + segment_size=0, + is_64_bit=True, + ) + + for i in range(len(buf)): + with self.assertRaisesRegex(ValueError, 'address range table header is truncated'): + lldwarf.parse_arange_table_header(buf[:i]) + + self.assertEqual(lldwarf.parse_arange_table_header(buf), header) + + def test_offset(self): + buf = (b'\x01' # padding + b'\xc8\x00\x00\x00' # unit_length + b'\x02\x00' # version + b'\x00\x00\x00\x00' # debug_info_offset + b'\x08' # address_size + b'\x00') # segment_size + header = lldwarf.ArangeTableHeader( + offset=1, + unit_length=200, + version=2, + debug_info_offset=0, + address_size=8, + segment_size=0, + is_64_bit=False, + ) + + self.assertEqual(lldwarf.parse_arange_table_header(buf, 1), header)