Add address range table parsing

This commit is contained in:
Omar Sandoval 2017-08-25 23:24:38 -07:00
parent bb5881243f
commit 72b9af2b82
13 changed files with 1199 additions and 480 deletions

View File

@ -139,38 +139,49 @@ def dump_line_number_matrix(cu, lnp, matrix, *, indent=0):
print(f'{prefix}}}')
def dump_cus(dwarf_file, args):
for cu in dwarf_file.cu_headers():
die = dwarf_file.cu_die(cu)
try:
cu_name = dwarf_file.die_name(die).decode()
except KeyError:
cu_name = ''
for pattern in args.cu:
if fnmatch.fnmatch(cu_name, pattern):
break
else:
continue
dump_cu(dwarf_file, cu, cu_name)
if args.die:
if args.recursive:
dwarf_file.parse_die_children(cu, die, recurse=True)
dump_die(dwarf_file, cu, die, indent=2, recurse=args.recursive)
if (args.include_directories or args.file_names or args.lines or
args.line_number_program):
lnp = dwarf_file.cu_line_number_program_header(cu, die)
if args.include_directories:
dump_lnp_include_directories(lnp, indent=2)
if args.file_names:
dump_lnp_file_names(lnp, indent=2)
if args.lines:
matrix = dwarf_file.execute_line_number_program(lnp)
dump_line_number_matrix(cu, lnp, matrix, indent=2)
if args.line_number_program:
dump_lnp_header(dwarf_file, lnp, indent=2)
dump_lnp_ops(dwarf_file, lnp, indent=4)
def cmd_dump(args):
with DwarfFile(args.file) as dwarf_file:
for cu in dwarf_file.cu_headers():
die = dwarf_file.cu_die(cu)
try:
cu_name = dwarf_file.die_name(die).decode()
except KeyError:
cu_name = ''
for pattern in args.cu:
if fnmatch.fnmatch(cu_name, pattern):
break
else:
continue
dump_cu(dwarf_file, cu, cu_name)
if args.die:
if args.recursive:
dwarf_file.parse_die_children(cu, die, recurse=True)
dump_die(dwarf_file, cu, die, indent=2, recurse=args.recursive)
if (args.include_directories or args.file_names or args.lines or
args.line_number_program):
lnp = dwarf_file.cu_line_number_program_header(cu, die)
if args.include_directories:
dump_lnp_include_directories(lnp, indent=2)
if args.file_names:
dump_lnp_file_names(lnp, indent=2)
if args.lines:
matrix = dwarf_file.execute_line_number_program(lnp)
dump_line_number_matrix(cu, lnp, matrix, indent=2)
if args.line_number_program:
dump_lnp_header(dwarf_file, lnp, indent=2)
dump_lnp_ops(dwarf_file, lnp, indent=4)
if args.cu:
dump_cus(dwarf_file, args)
if args.symtab:
symbols = sorted(dwarf_file.symbols().items())
for name, syms in symbols:
print(name)
for sym in syms:
print(f' value=0x{sym.st_value:x} size=0x{sym.st_size:x}')
def register(subparsers):
subparser = subparsers.add_parser(
@ -191,8 +202,7 @@ def register(subparsers):
subparser.add_argument(
'--line-number-program', '--lnp', action='store_true', help='also dump the line number program')
subparser.add_argument(
'file', help='file to dump')
'--symtab', action='store_true', help='dump the symbol table')
subparser.add_argument(
'cu', nargs='+', metavar='glob',
help='pattern matching names of compilation units to dump')
'file', help='file to dump')
subparser.set_defaults(func=cmd_dump)

View File

@ -1,74 +1,38 @@
from drgn.dwarf import DwarfFile
from drgn.dwarf.defs import DW_TAG
from drgn.dwarf import DwarfProgram
from drgn.ftrace import Kprobe, FtraceInstance
import re
import os
import signal
def find_cu_by_name(dwarf_file, name):
for cu in dwarf_file.cu_headers():
die = dwarf_file.cu_die(cu)
try:
cu_name = dwarf_file.die_name(die).decode()
except KeyError:
continue
if cu_name == name:
return cu, die
else:
raise ValueError('CU not found')
def find_addresses_for_line(dwarf_file, filename, lineno):
cu, die = find_cu_by_name(dwarf_file, filename)
lnp = dwarf_file.cu_line_number_program_header(cu, die)
matrix = dwarf_file.execute_line_number_program(lnp)
rows = []
for row in matrix:
if (dwarf_file.line_number_row_name(cu, lnp, row) == filename and
row.line == lineno):
rows.append(row)
return cu, die, rows
def best_breakpoint_address(rows):
for row in rows:
if row.is_stmt:
return row
return rows[0]
def find_subprogram_containing_address(dwarf_file, cu, die, address):
dwarf_file.parse_die_children(cu, die)
for child in die.children:
if child.tag != DW_TAG.subprogram:
continue
if dwarf_file.die_contains_address(child, address):
return child
assert False # XXX
def create_probe(dwarf_file, filename, lineno):
cu, die, rows = find_addresses_for_line(dwarf_file, filename, lineno)
row = best_breakpoint_address(rows)
subprogram = find_subprogram_containing_address(dwarf_file, cu, die, row.address)
subprogram_name = dwarf_file.die_name(subprogram).decode()
subprogram_address = dwarf_file.die_address(subprogram)
name = f'drgn/{subprogram_name}_{os.getpid()}'
location = f'{subprogram_name}+{row.address - subprogram_address}'
return name, location
def sanitize_probe_name(name: str) -> str:
name = re.sub('[^0-9A-Za-z]', '_', name)
if name[0].isdigit():
name = '_' + name
return name
def cmd_probe(args):
# XXX check in argparse
filename, lineno = args.line.rsplit(':', 1)
lineno = int(lineno)
with DwarfFile(args.vmlinux) as dwarf_file:
name, location = create_probe(dwarf_file, filename, lineno)
if args.line or (not args.function and ':' in args.location):
function = None
filename, lineno = args.location.rsplit(':', 1)
# TODO: catch ValueError
lineno = int(lineno)
probe_name = sanitize_probe_name(f'{filename}_{lineno}')
else:
function = args.location
probe_name = sanitize_probe_name(function)
probe_name = f'drgn/{probe_name}'
with Kprobe(name, location) as probe, \
FtraceInstance(f'drgn_{os.getpid()}') as instance:
binary = f'/lib/modules/{os.uname().release}/build/vmlinux'
with DwarfProgram(binary) as dwarf_program:
if function is not None:
probe_location = function
else:
probe_location = dwarf_program.find_breakpoint_location(filename, lineno)
# TODO: deal with probe name collisions
with FtraceInstance(f'drgn_{os.getpid()}') as instance, \
Kprobe(probe_name, probe_location) as probe:
probe.enable(instance)
try:
import subprocess
@ -80,9 +44,17 @@ def cmd_probe(args):
def register(subparsers):
subparser = subparsers.add_parser(
'probe')
subparser.add_argument(
'--line', '-l', metavar='FILE:LINE',
help='probe a source location')
subparser.add_argument(
'vmlinux', help='vmlinux file to use')
'location', metavar='LOCATION',
help='location to probe; either a function name or file:line')
group = subparser.add_mutually_exclusive_group()
group.add_argument(
'--line', '-l', action='store_true',
help='force location to be treated as file:line')
group.add_argument(
'--function', '-f', action='store_true',
help='force location to be treated as function name')
subparser.set_defaults(func=cmd_probe)

View File

@ -1 +1,2 @@
from drgn.dwarf.file import DwarfFile
from drgn.dwarf.program import DwarfProgram

View File

@ -1,344 +1 @@
import enum
class DW_CHILDREN(enum.IntEnum):
no = 0
yes = 1
class DW_TAG(enum.IntEnum):
array_type = 0x01
class_type = 0x02
entry_point = 0x03
enumeration_type = 0x04
formal_parameter = 0x05
imported_declaration = 0x08
label = 0x0a
lexical_block = 0x0b
member = 0x0d
pointer_type = 0x0f
reference_type = 0x10
compile_unit = 0x11
string_type = 0x12
structure_type = 0x13
subroutine_type = 0x15
typedef = 0x16
union_type = 0x17
unspecified_parameters = 0x18
variant = 0x19
common_block = 0x1a
common_inclusion = 0x1b
inheritance = 0x1c
inlined_subroutine = 0x1d
module = 0x1e
ptr_to_member_type = 0x1f
set_type = 0x20
subrange_type = 0x21
with_stmt = 0x22
access_declaration = 0x23
base_type = 0x24
catch_block = 0x25
const_type = 0x26
constant = 0x27
enumerator = 0x28
file_type = 0x29
friend = 0x2a
namelist = 0x2b
namelist_item = 0x2c
packed_type = 0x2d
subprogram = 0x2e
template_type_parameter = 0x2f
template_value_parameter = 0x30
thrown_type = 0x31
try_block = 0x32
variant_part = 0x33
variable = 0x34
volatile_type = 0x35
dwarf_procedure = 0x36
restrict_type = 0x37
interface_type = 0x38
namespace = 0x39
imported_module = 0x3a
unspecified_type = 0x3b
partial_unit = 0x3c
imported_unit = 0x3d
# 0x3e reserved
condition = 0x3f
shared_type = 0x40
type_unit = 0x41
rvalue_reference_type = 0x42
template_alias = 0x43
# DWARF 5
atomic_type = 0x47
lo_user = 0x4080
MIPS_loop = 0x4081
format_label = 0x4101
function_template = 0x4102
class_template = 0x4103
GNU_BINCL = 0x4104
GNU_EINCL = 0x4105
GNU_template_template_param = 0x4106
GNU_template_parameter_pack = 0x4107
GNU_formal_parameter_pack = 0x4108
GNU_call_site = 0x4109
GNU_call_site_parameter = 0x410a
hi_user = 0xffff
class DW_AT(enum.IntEnum):
sibling = 0x01
location = 0x02
name = 0x03
ordering = 0x09
subscr_data = 0x0a
byte_size = 0x0b
bit_offset = 0x0c
bit_size = 0x0d
element_list = 0x0f
stmt_list = 0x10
low_pc = 0x11
high_pc = 0x12
language = 0x13
member = 0x14
discr = 0x15
discr_value = 0x16
visibility = 0x17
import_ = 0x18
string_length = 0x19
common_reference = 0x1a
comp_dir = 0x1b
const_value = 0x1c
containing_type = 0x1d
default_value = 0x1e
inline = 0x20
is_optional = 0x21
lower_bound = 0x22
producer = 0x25
prototyped = 0x27
return_addr = 0x2a
start_scope = 0x2c
bit_stride = 0x2e
upper_bound = 0x2f
abstract_origin = 0x31
accessibility = 0x32
address_class = 0x33
artificial = 0x34
base_types = 0x35
calling_convention = 0x36
count = 0x37
data_member_location = 0x38
decl_column = 0x39
decl_file = 0x3a
decl_line = 0x3b
declaration = 0x3c
discr_list = 0x3d
encoding = 0x3e
external = 0x3f
frame_base = 0x40
friend = 0x41
identifier_case = 0x42
macro_info = 0x43
namelist_item = 0x44
priority = 0x45
segment = 0x46
specification = 0x47
static_link = 0x48
type = 0x49
use_location = 0x4a
variable_parameter = 0x4b
virtuality = 0x4c
vtable_elem_location = 0x4d
allocated = 0x4e
associated = 0x4f
data_location = 0x50
byte_stride = 0x51
entry_pc = 0x52
use_UTF8 = 0x53
extension = 0x54
ranges = 0x55
trampoline = 0x56
call_column = 0x57
call_file = 0x58
call_line = 0x59
description = 0x5a
binary_scale = 0x5b
decimal_scale = 0x5c
small = 0x5d
decimal_sign = 0x5e
digit_count = 0x5f
picture_string = 0x60
mutable = 0x61
threads_scaled = 0x62
explicit = 0x63
object_pointer = 0x64
endianity = 0x65
elemental = 0x66
pure = 0x67
recursive = 0x68
signature = 0x69
main_subprogram = 0x6a
data_bit_offset = 0x6b
const_expr = 0x6c
enum_class = 0x6d
linkage_name = 0x6e
# DWARF5
noreturn = 0x87
lo_user = 0x2000
MIPS_fde = 0x2001
MIPS_loop_begin = 0x2002
MIPS_tail_loop_begin = 0x2003
MIPS_epilog_begin = 0x2004
MIPS_loop_unroll_factor = 0x2005
MIPS_software_pipeline_depth = 0x2006
MIPS_linkage_name = 0x2007
MIPS_stride = 0x2008
MIPS_abstract_name = 0x2009
MIPS_clone_origin = 0x200a
MIPS_has_inlines = 0x200b
MIPS_stride_byte = 0x200c
MIPS_stride_elem = 0x200d
MIPS_ptr_dopetype = 0x200e
MIPS_allocatable_dopetype = 0x200f
MIPS_assumed_shape_dopetype = 0x2010
MIPS_assumed_size = 0x2011
# GNU extensions
sf_names = 0x2101
src_info = 0x2102
mac_info = 0x2103
src_coords = 0x2104
body_begin = 0x2105
body_end = 0x2106
GNU_vector = 0x2107
GNU_guarded_by = 0x2108
GNU_pt_guarded_by = 0x2109
GNU_guarded = 0x210a
GNU_pt_guarded = 0x210b
GNU_locks_excluded = 0x210c
GNU_exclusive_locks_required = 0x210d
GNU_shared_locks_required = 0x210e
GNU_odr_signature = 0x210f
GNU_template_name = 0x2110
GNU_call_site_value = 0x2111
GNU_call_site_data_value = 0x2112
GNU_call_site_target = 0x2113
GNU_call_site_target_clobbered = 0x2114
GNU_tail_call = 0x2115
GNU_all_tail_call_sites = 0x2116
GNU_all_call_sites = 0x2117
GNU_all_source_call_sites = 0x2118
GNU_macros = 0x2119
GNU_deleted = 0x211a
hi_user = 0x3fff
class DW_FORM(enum.IntEnum):
addr = 0x01
block2 = 0x03
block4 = 0x04
data2 = 0x05
data4 = 0x06
data8 = 0x07
string = 0x08
block = 0x09
block1 = 0x0a
data1 = 0x0b
flag = 0x0c
sdata = 0x0d
strp = 0x0e
udata = 0x0f
ref_addr = 0x10
ref1 = 0x11
ref2 = 0x12
ref4 = 0x13
ref8 = 0x14
ref_udata = 0x15
indirect = 0x16
sec_offset = 0x17
exprloc = 0x18
flag_present = 0x19
ref_sig8 = 0x20
class DW_LNS(enum.IntEnum):
copy = 1
advance_pc = 2
advance_line = 3
set_file = 4
set_column = 5
negate_stmt = 6
set_basic_block = 7
const_add_pc = 8
fixed_advance_pc = 9
set_prologue_end = 10
set_epilogue_begin = 11
set_isa = 12
class DW_LNE(enum.IntEnum):
end_sequence = 1
set_address = 2
define_file = 3
set_discriminator = 4
lo_user = 128
hi_user = 255
def at_name(at):
try:
return f'DW_AT_{DW_AT(at).name}'
except ValueError:
return str(at)
def at_class_constant(at):
return (at == DW_FORM.data1 or at == DW_FORM.data2 or
at == DW_FORM.data4 or at == DW_FORM.data8 or
at == DW_FORM.udata or at == DW_FORM.sdata)
def at_class_constant_bytes(at):
return (at == DW_FORM.data1 or at == DW_FORM.data2 or
at == DW_FORM.data4 or at == DW_FORM.data8)
def at_class_constant_int(at):
return at == DW_FORM.udata or at == DW_FORM.sdata
def form_name(form):
try:
return f'DW_FORM_{DW_FORM(form).name}'
except ValueError:
return str(form)
def tag_name(tag):
try:
return f'DW_TAG_{DW_TAG(tag).name}'
except ValueError:
return str(tag)
def lns_name(lns):
try:
return f'DW_LNS_{DW_LNS(lns).name}'
except ValueError:
return str(lns)
def lne_name(lne):
try:
return f'DW_LNE_{DW_LNE(lne).name}'
except ValueError:
return str(lne)
from drgn.dwarfdefs import *

View File

@ -1,13 +1,27 @@
import mmap
import drgn.lldwarf as lldwarf
from drgn.dwarf.defs import *
from drgn.elf import parse_elf_header, parse_elf_sections
from drgn.elf import parse_elf_header, parse_elf_sections, parse_elf_symtab
import os.path
import sys
from typing import List
class DwarfFile:
"""DWARF file parser
A DwarfFile manages parsing a single DWARF file, abstracting away the
details of reading the file.
"""
def __init__(self, path):
"""
DwarfFile(path) -> new DWARF file parser
Create a new DWARF file parser.
Arguments:
path -- file path
"""
self._closed = False
self._file = open(path, 'rb')
self._mmap = mmap.mmap(self._file.fileno(), 0, access=mmap.ACCESS_READ)
@ -16,6 +30,8 @@ class DwarfFile:
self._sections = parse_elf_sections(self._mmap, self._ehdr)
self._abbrev_tables = {}
self._cu_dies = {}
self._symbols = None
def close(self):
if not self._closed:
@ -34,21 +50,49 @@ class DwarfFile:
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def section(self, name):
def section(self, name: str):
return self._sections[name]
def at_string(self, form, value):
def string_at(self, offset):
nul = self._mmap.find(b'\0', offset)
assert nul != -1 # XXX
return self._mmap[offset:nul]
def symbols(self):
if self._symbols is None:
symtab = self.section('.symtab')
strtab = self.section('.strtab')
symbols = {}
for sym in parse_elf_symtab(self._mmap, symtab):
if sym.st_name:
sym_name = self.string_at(strtab.sh_offset + sym.st_name).decode()
else:
sym_name = ''
try:
symbols[sym_name].append(sym)
except KeyError:
symbols[sym_name] = [sym]
self._symbols = symbols
return self._symbols
def symbol(self, name: str, *, all: bool=False):
syms = self.symbols()[name]
if all:
return syms
else:
if len(syms) > 1:
raise ValueError('multiple symbols with given name')
return syms[0]
def at_string(self, form: DW_FORM, value) -> bytes:
if form == DW_FORM.string:
return self._mmap[value[0]:value[0] + value[1]]
else:
assert form == DW_FORM.strp
debug_str = self.section('.debug_str')
offset = debug_str.sh_offset + value
nul = self._mmap.find(b'\0', offset)
assert nul != -1 # XXX
return self._mmap[offset:nul]
return self.string_at(debug_str.sh_offset + value)
def at_sec_offset(self, form, value):
def at_sec_offset(self, form: DW_FORM, value) -> int:
if form == DW_FORM.data4:
# DWARF 2 and 3
return int.from_bytes(value, sys.byteorder)
@ -57,7 +101,7 @@ class DwarfFile:
assert form == DW_FORM.sec_offset
return value
def abbrev_table(self, offset):
def abbrev_table(self, offset: int) -> lldwarf.AbbrevDecl:
try:
return self._abbrev_tables[offset]
except KeyError:
@ -78,13 +122,29 @@ class DwarfFile:
yield cu
offset = cu.next_offset()
def cu_die(self, cu, *, recurse=False):
def cu_header(self, offset: int):
debug_info = self.section('.debug_info')
offset += debug_info.sh_offset
return lldwarf.parse_compilation_unit_header(self._mmap, offset)
# Debugging information entries
def cu_die(self, cu: lldwarf.CompilationUnitHeader, *,
recurse: bool=False) -> lldwarf.DwarfDie:
try:
return self._cu_dies[cu.offset]
except KeyError:
pass
debug_info = self.section('.debug_info')
abbrev_table = self.abbrev_table(cu.debug_abbrev_offset)
return lldwarf.parse_die(cu, abbrev_table, self._mmap, cu.die_offset(),
recurse=recurse)
die = lldwarf.parse_die(cu, abbrev_table, self._mmap, cu.die_offset(),
recurse=recurse)
self._cu_dies[cu.offset] = die
return die
def parse_die_children(self, cu, die, *, recurse=False):
def parse_die_children(self, cu: lldwarf.CompilationUnitHeader,
die: lldwarf.DwarfDie, *, recurse: bool=False) -> None:
if not hasattr(die, 'children'):
debug_info = self.section('.debug_info')
abbrev_table = self.abbrev_table(cu.debug_abbrev_offset)
@ -93,7 +153,7 @@ class DwarfFile:
offset=die.offset + die.die_length,
recurse=recurse)
def die_contains_address(self, die, address):
def die_contains_address(self, die: lldwarf.DwarfDie, address: int) -> bool:
try:
ranges_form, ranges_value = die.find(DW_AT.ranges)
assert False
@ -114,11 +174,11 @@ class DwarfFile:
high_pc = high_pc_value
return low_pc <= address < high_pc
def die_name(self, die):
def die_name(self, die: lldwarf.DwarfDie) -> bytes:
form, value = die.find(DW_AT.name)
return self.at_string(form, value)
def die_address(self, die):
def die_address(self, die: lldwarf.DwarfDie) -> int:
try:
ranges_form, ranges_value = die.find(DW_AT.ranges)
assert False
@ -128,8 +188,27 @@ class DwarfFile:
assert form == DW_FORM.addr
return value
def cu_line_number_program_header(self, cu, die):
# Address range tables
def arange_table_headers(self):
debug_aranges = self.section('.debug_aranges')
offset = debug_aranges.sh_offset
end = debug_aranges.sh_offset + debug_aranges.sh_size
while offset < end:
art = lldwarf.parse_arange_table_header(self._mmap, offset)
yield art
offset = art.next_offset()
def arange_table(self, art: lldwarf.ArangeTableHeader):
assert art.version == 2
return lldwarf.parse_arange_table(art.segment_size, art.address_size,
self._mmap, art.table_offset())
# Line number programs
def cu_line_number_program_header(self, cu: lldwarf.CompilationUnitHeader) -> lldwarf.LineNumberProgramHeader:
debug_line = self.section('.debug_line')
die = self.cu_die(cu)
try:
form, value = die.find(DW_AT.stmt_list)
except KeyError:
@ -137,13 +216,15 @@ class DwarfFile:
offset = debug_line.sh_offset + self.at_sec_offset(form, value)
return lldwarf.parse_line_number_program_header(self._mmap, offset)
def execute_line_number_program(self, lnp):
def execute_line_number_program(self, lnp: lldwarf.LineNumberProgramHeader) -> List[lldwarf.LineNumberRow]:
return lldwarf.execute_line_number_program(lnp, self._mmap,
lnp.program_offset())
def line_number_row_name(self, cu, lnp, row):
def line_number_row_name(self, cu: lldwarf.CompilationUnitHeader,
lnp: lldwarf.LineNumberProgramHeader,
row: lldwarf.LineNumberRow) -> str:
if row.file == 0:
return cu_name(cu)
return self.die_name(self.cu_die(cu)).decode()
file_name, directory_index, mtime, file_size = lnp.file_names[row.file - 1]
file_name = file_name.decode()
@ -153,7 +234,7 @@ class DwarfFile:
else:
return file_name
def decode_line_number_program(self, lnp):
def decode_line_number_program(self, lnp: lldwarf.LineNumberProgramHeader):
offset = lnp.program_offset()
end = lnp.end_offset()
while offset < end:

105
drgn/dwarf/program.py Normal file
View File

@ -0,0 +1,105 @@
import drgn.lldwarf as lldwarf
from drgn.dwarf.defs import *
from drgn.dwarf.file import DwarfFile
from typing import List, Tuple
class DwarfProgram:
def __init__(self, path):
self._closed = False
self._file = DwarfFile(path)
self._files = {path: self._file}
def close(self):
if hasattr(self, '_files'):
for file in self._files.values():
file.close()
def __del__(self):
self.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def find_cu_by_name(self, name: str) -> Tuple[DwarfFile, lldwarf.CompilationUnitHeader]:
for cu in self._file.cu_headers():
die = self._file.cu_die(cu)
try:
cu_name = self._file.die_name(die).decode()
except KeyError:
continue
if cu_name == name:
return self._file, cu
else:
raise ValueError('CU not found')
def find_cu_by_addr(self, addr: int) -> Tuple[DwarfFile, lldwarf.CompilationUnitHeader]:
dwarf_file = self._file
for art in dwarf_file.arange_table_headers():
for arange in dwarf_file.arange_table(art):
if arange.address <= addr <= arange.address + arange.length:
return dwarf_file, dwarf_file.cu_header(art.debug_info_offset)
else:
raise ValueError('CU containing address not found')
def find_subprogram_by_name(self, name: str):
dwarf_file = self._file
symbol = dwarf_file.symbol(name)
dwarf_file, cu = self.find_cu_by_addr(symbol.st_value)
die = self._file.cu_die(cu)
dwarf_file.parse_die_children(cu, die)
for child in die.children:
if (child.tag == DW_TAG.subprogram and
dwarf_file.die_name(child).decode() == name):
return child
else:
raise ValueError('subprogram not found')
@staticmethod
def _best_breakpoint_row(dwarf_file: DwarfFile,
cu: lldwarf.CompilationUnitHeader,
lnp: lldwarf.LineNumberProgramHeader,
matrix: List[lldwarf.LineNumberRow],
filename: str,
lineno: int) -> lldwarf.LineNumberRow:
# Find the first row which is a statement, or the first row if none are
# statements.
first_row = None
for row in matrix:
if (dwarf_file.line_number_row_name(cu, lnp, row) == filename and row.line == lineno):
if row.is_stmt:
return row
if first_row is None:
first_row = row
else:
assert first_row is not None # XXX
return first_row
@staticmethod
def _find_subprogram_containing_address(dwarf_file: DwarfFile,
cu: lldwarf.CompilationUnitHeader,
addr: int) -> lldwarf.DwarfDie:
die = dwarf_file.cu_die(cu)
dwarf_file.parse_die_children(cu, die)
for child in die.children:
if (child.tag == DW_TAG.subprogram and
dwarf_file.die_contains_address(child, addr)):
return child
assert False # XXX
def find_breakpoint_location(self, filename: str, lineno: int) -> str:
dwarf_file, cu = self.find_cu_by_name(filename)
lnp = dwarf_file.cu_line_number_program_header(cu)
matrix = dwarf_file.execute_line_number_program(lnp)
row = self._best_breakpoint_row(dwarf_file, cu, lnp, matrix, filename, lineno)
subprogram = self._find_subprogram_containing_address(dwarf_file, cu, row.address)
subprogram_name = dwarf_file.die_name(subprogram).decode()
subprogram_address = dwarf_file.die_address(subprogram)
assert row.address >= subprogram_address
return f'{subprogram_name}+0x{row.address - subprogram_address:x}'

344
drgn/dwarfdefs.py Normal file
View File

@ -0,0 +1,344 @@
import enum
class DW_CHILDREN(enum.IntEnum):
no = 0
yes = 1
class DW_TAG(enum.IntEnum):
array_type = 0x01
class_type = 0x02
entry_point = 0x03
enumeration_type = 0x04
formal_parameter = 0x05
imported_declaration = 0x08
label = 0x0a
lexical_block = 0x0b
member = 0x0d
pointer_type = 0x0f
reference_type = 0x10
compile_unit = 0x11
string_type = 0x12
structure_type = 0x13
subroutine_type = 0x15
typedef = 0x16
union_type = 0x17
unspecified_parameters = 0x18
variant = 0x19
common_block = 0x1a
common_inclusion = 0x1b
inheritance = 0x1c
inlined_subroutine = 0x1d
module = 0x1e
ptr_to_member_type = 0x1f
set_type = 0x20
subrange_type = 0x21
with_stmt = 0x22
access_declaration = 0x23
base_type = 0x24
catch_block = 0x25
const_type = 0x26
constant = 0x27
enumerator = 0x28
file_type = 0x29
friend = 0x2a
namelist = 0x2b
namelist_item = 0x2c
packed_type = 0x2d
subprogram = 0x2e
template_type_parameter = 0x2f
template_value_parameter = 0x30
thrown_type = 0x31
try_block = 0x32
variant_part = 0x33
variable = 0x34
volatile_type = 0x35
dwarf_procedure = 0x36
restrict_type = 0x37
interface_type = 0x38
namespace = 0x39
imported_module = 0x3a
unspecified_type = 0x3b
partial_unit = 0x3c
imported_unit = 0x3d
# 0x3e reserved
condition = 0x3f
shared_type = 0x40
type_unit = 0x41
rvalue_reference_type = 0x42
template_alias = 0x43
# DWARF 5
atomic_type = 0x47
lo_user = 0x4080
MIPS_loop = 0x4081
format_label = 0x4101
function_template = 0x4102
class_template = 0x4103
GNU_BINCL = 0x4104
GNU_EINCL = 0x4105
GNU_template_template_param = 0x4106
GNU_template_parameter_pack = 0x4107
GNU_formal_parameter_pack = 0x4108
GNU_call_site = 0x4109
GNU_call_site_parameter = 0x410a
hi_user = 0xffff
class DW_AT(enum.IntEnum):
sibling = 0x01
location = 0x02
name = 0x03
ordering = 0x09
subscr_data = 0x0a
byte_size = 0x0b
bit_offset = 0x0c
bit_size = 0x0d
element_list = 0x0f
stmt_list = 0x10
low_pc = 0x11
high_pc = 0x12
language = 0x13
member = 0x14
discr = 0x15
discr_value = 0x16
visibility = 0x17
import_ = 0x18
string_length = 0x19
common_reference = 0x1a
comp_dir = 0x1b
const_value = 0x1c
containing_type = 0x1d
default_value = 0x1e
inline = 0x20
is_optional = 0x21
lower_bound = 0x22
producer = 0x25
prototyped = 0x27
return_addr = 0x2a
start_scope = 0x2c
bit_stride = 0x2e
upper_bound = 0x2f
abstract_origin = 0x31
accessibility = 0x32
address_class = 0x33
artificial = 0x34
base_types = 0x35
calling_convention = 0x36
count = 0x37
data_member_location = 0x38
decl_column = 0x39
decl_file = 0x3a
decl_line = 0x3b
declaration = 0x3c
discr_list = 0x3d
encoding = 0x3e
external = 0x3f
frame_base = 0x40
friend = 0x41
identifier_case = 0x42
macro_info = 0x43
namelist_item = 0x44
priority = 0x45
segment = 0x46
specification = 0x47
static_link = 0x48
type = 0x49
use_location = 0x4a
variable_parameter = 0x4b
virtuality = 0x4c
vtable_elem_location = 0x4d
allocated = 0x4e
associated = 0x4f
data_location = 0x50
byte_stride = 0x51
entry_pc = 0x52
use_UTF8 = 0x53
extension = 0x54
ranges = 0x55
trampoline = 0x56
call_column = 0x57
call_file = 0x58
call_line = 0x59
description = 0x5a
binary_scale = 0x5b
decimal_scale = 0x5c
small = 0x5d
decimal_sign = 0x5e
digit_count = 0x5f
picture_string = 0x60
mutable = 0x61
threads_scaled = 0x62
explicit = 0x63
object_pointer = 0x64
endianity = 0x65
elemental = 0x66
pure = 0x67
recursive = 0x68
signature = 0x69
main_subprogram = 0x6a
data_bit_offset = 0x6b
const_expr = 0x6c
enum_class = 0x6d
linkage_name = 0x6e
# DWARF5
noreturn = 0x87
lo_user = 0x2000
MIPS_fde = 0x2001
MIPS_loop_begin = 0x2002
MIPS_tail_loop_begin = 0x2003
MIPS_epilog_begin = 0x2004
MIPS_loop_unroll_factor = 0x2005
MIPS_software_pipeline_depth = 0x2006
MIPS_linkage_name = 0x2007
MIPS_stride = 0x2008
MIPS_abstract_name = 0x2009
MIPS_clone_origin = 0x200a
MIPS_has_inlines = 0x200b
MIPS_stride_byte = 0x200c
MIPS_stride_elem = 0x200d
MIPS_ptr_dopetype = 0x200e
MIPS_allocatable_dopetype = 0x200f
MIPS_assumed_shape_dopetype = 0x2010
MIPS_assumed_size = 0x2011
# GNU extensions
sf_names = 0x2101
src_info = 0x2102
mac_info = 0x2103
src_coords = 0x2104
body_begin = 0x2105
body_end = 0x2106
GNU_vector = 0x2107
GNU_guarded_by = 0x2108
GNU_pt_guarded_by = 0x2109
GNU_guarded = 0x210a
GNU_pt_guarded = 0x210b
GNU_locks_excluded = 0x210c
GNU_exclusive_locks_required = 0x210d
GNU_shared_locks_required = 0x210e
GNU_odr_signature = 0x210f
GNU_template_name = 0x2110
GNU_call_site_value = 0x2111
GNU_call_site_data_value = 0x2112
GNU_call_site_target = 0x2113
GNU_call_site_target_clobbered = 0x2114
GNU_tail_call = 0x2115
GNU_all_tail_call_sites = 0x2116
GNU_all_call_sites = 0x2117
GNU_all_source_call_sites = 0x2118
GNU_macros = 0x2119
GNU_deleted = 0x211a
hi_user = 0x3fff
class DW_FORM(enum.IntEnum):
addr = 0x01
block2 = 0x03
block4 = 0x04
data2 = 0x05
data4 = 0x06
data8 = 0x07
string = 0x08
block = 0x09
block1 = 0x0a
data1 = 0x0b
flag = 0x0c
sdata = 0x0d
strp = 0x0e
udata = 0x0f
ref_addr = 0x10
ref1 = 0x11
ref2 = 0x12
ref4 = 0x13
ref8 = 0x14
ref_udata = 0x15
indirect = 0x16
sec_offset = 0x17
exprloc = 0x18
flag_present = 0x19
ref_sig8 = 0x20
class DW_LNS(enum.IntEnum):
copy = 1
advance_pc = 2
advance_line = 3
set_file = 4
set_column = 5
negate_stmt = 6
set_basic_block = 7
const_add_pc = 8
fixed_advance_pc = 9
set_prologue_end = 10
set_epilogue_begin = 11
set_isa = 12
class DW_LNE(enum.IntEnum):
end_sequence = 1
set_address = 2
define_file = 3
set_discriminator = 4
lo_user = 128
hi_user = 255
def at_name(at):
try:
return f'DW_AT_{DW_AT(at).name}'
except ValueError:
return str(at)
def at_class_constant(at):
return (at == DW_FORM.data1 or at == DW_FORM.data2 or
at == DW_FORM.data4 or at == DW_FORM.data8 or
at == DW_FORM.udata or at == DW_FORM.sdata)
def at_class_constant_bytes(at):
return (at == DW_FORM.data1 or at == DW_FORM.data2 or
at == DW_FORM.data4 or at == DW_FORM.data8)
def at_class_constant_int(at):
return at == DW_FORM.udata or at == DW_FORM.sdata
def form_name(form):
try:
return f'DW_FORM_{DW_FORM(form).name}'
except ValueError:
return str(form)
def tag_name(tag):
try:
return f'DW_TAG_{DW_TAG(tag).name}'
except ValueError:
return str(tag)
def lns_name(lns):
try:
return f'DW_LNS_{DW_LNS(lns).name}'
except ValueError:
return str(lns)
def lne_name(lne):
try:
return f'DW_LNE_{DW_LNE(lne).name}'
except ValueError:
return str(lne)

View File

@ -191,19 +191,12 @@ def parse_elf_sections(buffer, ehdr):
return sections
def parse_elf_symtab(buffer, shdr):
symnum = shdr.sh_size // ctypes.sizeof(Elf64_Sym)
return (Elf64_Sym * symnum).from_buffer_copy(buffer, shdr.sh_offset)
"""
def symtab(self):
try:
return self._symtab
except AttributeError:
pass
shdr = self.section(b'.symtab')
symnum = shdr.sh_size // ctypes.sizeof(Elf64_Sym)
self._symtab = (Elf64_Sym * symnum).from_buffer_copy(self._mm, shdr.sh_offset)
return self._symtab
def symbol(self, name, *, all=False):
try:
syms = self._symtab_by_name[name]

323
lldwarf/arange.c Normal file
View File

@ -0,0 +1,323 @@
#include "lldwarf.h"
static void ArangeTableHeader_dealloc(ArangeTableHeader *self)
{
Py_TYPE(self)->tp_free((PyObject *)self);
}
PyObject *ArangeTableHeader_table_offset(ArangeTableHeader *self)
{
uint64_t header_length = self->is_64_bit ? 24 : 12;
Py_ssize_t ret, alignment;
if (__builtin_add_overflow(self->offset, header_length, &ret)) {
PyErr_SetString(PyExc_OverflowError, "table offset too large");
return NULL;
}
alignment = self->segment_size + 2 * self->address_size;
if (ret % alignment &&
__builtin_add_overflow(ret, alignment - ret % alignment, &ret)) {
PyErr_SetString(PyExc_OverflowError, "table offset too large");
return NULL;
}
return PyLong_FromSsize_t(ret);
}
PyObject *ArangeTableHeader_next_offset(ArangeTableHeader *self)
{
uint64_t unit_length_length = self->is_64_bit ? 12 : 4;
uint64_t unit_length;
Py_ssize_t ret;
if (__builtin_add_overflow(self->unit_length, unit_length_length, &unit_length) ||
__builtin_add_overflow(self->offset, unit_length, &ret)) {
PyErr_SetString(PyExc_OverflowError, "next offset too large");
return NULL;
}
return PyLong_FromSsize_t(ret);
}
PyObject *LLDwarf_ParseArangeTableHeader(Py_buffer *buffer, Py_ssize_t *offset)
{
ArangeTableHeader *art;
uint32_t length;
art = PyObject_New(ArangeTableHeader, &ArangeTableHeader_type);
if (!art)
return NULL;
art->offset = *offset;
if (read_u32(buffer, offset, &length) == -1)
goto err;
art->is_64_bit = length == UINT32_C(0xffffffff);
if (art->is_64_bit) {
if (read_u64(buffer, offset, &art->unit_length) == -1)
goto err;
} else {
art->unit_length = length;
}
if (read_u16(buffer, offset, &art->version) == -1)
goto err;
if (art->is_64_bit) {
if (read_u64(buffer, offset, &art->debug_info_offset) == -1)
goto err;
} else {
unsigned int debug_info_offset;
if (read_u32(buffer, offset, &debug_info_offset) == -1)
goto err;
art->debug_info_offset = debug_info_offset;
}
if (read_u8(buffer, offset, &art->address_size) == -1)
goto err;
if (read_u8(buffer, offset, &art->segment_size) == -1)
goto err;
return (PyObject *)art;
err:
PyErr_SetString(PyExc_ValueError,
"address range table header is truncated");
Py_DECREF(art);
return NULL;
}
static PyMethodDef ArangeTableHeader_methods[] = {
{"table_offset", (PyCFunction)ArangeTableHeader_table_offset,
METH_NOARGS,
"table_offset() -> int\n\n"
"Get the offset into the buffer where the address range table itself\n"
"begins. This is the starting offset of the arange table header plus\n"
"the length of the header, aligned up to a multiple of the address\n"
"range tuple size."},
{"next_offset", (PyCFunction)ArangeTableHeader_next_offset,
METH_NOARGS,
"next_offset() -> int\n\n"
"Get the offset into the buffer where the next address range table\n"
"starts. This is the starting offset of the CU plus the length of\n"
"the unit, including the header. If this is the last address range\n"
"table, this offset is the end of the .debug_aranges section."},
{},
};
static PyMemberDef ArangeTableHeader_members[] = {
{"offset", T_PYSSIZET, offsetof(ArangeTableHeader, offset), 0,
"offset into the buffer where this arange table starts"},
{"unit_length", T_UINT64T, offsetof(ArangeTableHeader, unit_length), 0,
"length of this arange table, not including the unit_length field"},
{"version", T_UINT16T, offsetof(ArangeTableHeader, version), 0,
"format version of this arange table"},
{"debug_info_offset", T_UINT64T, offsetof(ArangeTableHeader, debug_info_offset), 0,
"location of this arange table's compilation unit as an offset into the .debug_info section"},
{"address_size", T_UINT8T, offsetof(ArangeTableHeader, address_size), 0,
"size of an address in this arange table"},
{"segment_size", T_UINT8T, offsetof(ArangeTableHeader, segment_size), 0,
"size of a segment selector in this arange table"},
{"is_64_bit", T_BOOL, offsetof(ArangeTableHeader, is_64_bit), 0,
"whether this CU is using the 64-bit format"},
{},
};
#define ArangeTableHeader_DOC \
"ArangeTableHeader(offset, unit_length, version, debug_info_offset,\n" \
" address_size, segment_size,\n" \
" is_64_bit) -> new address range table header\n\n" \
"Create a new DWARF address range table header.\n\n" \
"Arguments:\n" \
"offset -- integer offset\n" \
"unit_length -- integer length\n" \
"version -- integer format version\n" \
"debug_info_offset -- integer offset\n" \
"address_size -- integer size\n" \
"segment_size -- integer size\n" \
"is_64_bit -- boolean"
PyTypeObject ArangeTableHeader_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"drgn.lldwarf.ArangeTableHeader", /* tp_name */
sizeof(ArangeTableHeader), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)ArangeTableHeader_dealloc, /* tp_dealloc */
NULL, /* tp_print */
NULL, /* tp_getattr */
NULL, /* tp_setattr */
NULL, /* tp_as_async */
LLDwarfObject_repr, /* tp_repr */
NULL, /* tp_as_number */
NULL, /* tp_as_sequence */
NULL, /* tp_as_mapping */
NULL, /* tp_hash */
NULL, /* tp_call */
NULL, /* tp_str */
NULL, /* tp_getattro */
NULL, /* tp_setattro */
NULL, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
ArangeTableHeader_DOC, /* tp_doc */
NULL, /* tp_traverse */
NULL, /* tp_clear */
LLDwarfObject_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
NULL, /* tp_iter */
NULL, /* tp_iternext */
ArangeTableHeader_methods, /* tp_methods */
ArangeTableHeader_members, /* tp_members */
NULL, /* tp_getset */
NULL, /* tp_base */
NULL, /* tp_dict */
NULL, /* tp_descr_get */
NULL, /* tp_descr_set */
0, /* tp_dictoffset */
LLDwarfObject_init, /* tp_init */
};
static void AddressRange_dealloc(AddressRange *self)
{
Py_TYPE(self)->tp_free((PyObject *)self);
}
PyObject *LLDwarf_ParseArangeTable(Py_buffer *buffer, Py_ssize_t *offset,
Py_ssize_t segment_size,
Py_ssize_t address_size)
{
PyObject *arange_table;
arange_table = PyList_New(0);
if (!arange_table)
return NULL;
for (;;) {
AddressRange *arange;
uint64_t segment, address, length;
uint32_t tmp;
int ret;
switch (segment_size) {
case 4:
if (read_u32(buffer, offset, &tmp) == -1)
goto err;
segment = tmp;
break;
case 8:
if (read_u64(buffer, offset, &segment) == -1)
goto err;
break;
case 0:
segment = 0;
break;
default:
PyErr_Format(PyExc_ValueError, "unsupported segment size %ld",
(long)segment_size);
goto err;
}
switch (address_size) {
case 4:
if (read_u32(buffer, offset, &tmp) == -1)
goto err;
address = tmp;
if (read_u32(buffer, offset, &tmp) == -1)
goto err;
length = tmp;
break;
case 8:
if (read_u64(buffer, offset, &address) == -1)
goto err;
if (read_u64(buffer, offset, &length) == -1)
goto err;
break;
default:
PyErr_Format(PyExc_ValueError, "unsupported address size %ld",
(long)address_size);
goto err;
}
if (segment == 0 && address == 0 && length == 0)
break;
arange = PyMem_Malloc(sizeof(AddressRange));
if (!arange)
goto err;
PyObject_Init((PyObject *)arange, &AddressRange_type);
arange->segment = segment;
arange->address = address;
arange->length = length;
ret = PyList_Append(arange_table, (PyObject *)arange);
Py_DECREF((PyObject *)arange);
if (ret == -1)
goto err;
}
return arange_table;
err:
Py_DECREF(arange_table);
return NULL;
}
static PyMemberDef AddressRange_members[] = {
{"segment", T_UINT64T, offsetof(AddressRange, segment), 0,
"segment selector of the address range"},
{"address", T_UINT64T, offsetof(AddressRange, address), 0,
"starting address of the address range"},
{"length", T_UINT64T, offsetof(AddressRange, length), 0,
"length of the address range"},
{},
};
#define AddressRange_DOC \
"AddressRange(segment, address, length) -> new address range\n" \
"Create a new address range.\n\n" \
"Arguments:\n" \
"segment -- integer segment selector\n" \
"address -- integer start address\n" \
"length -- integer range length\n"
PyTypeObject AddressRange_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"drgn.lldwarf.AddressRange", /* tp_name */
sizeof(AddressRange), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)AddressRange_dealloc, /* tp_dealloc */
NULL, /* tp_print */
NULL, /* tp_getattr */
NULL, /* tp_setattr */
NULL, /* tp_as_async */
LLDwarfObject_repr, /* tp_repr */
NULL, /* tp_as_number */
NULL, /* tp_as_sequence */
NULL, /* tp_as_mapping */
NULL, /* tp_hash */
NULL, /* tp_call */
NULL, /* tp_str */
NULL, /* tp_getattro */
NULL, /* tp_setattro */
NULL, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
AddressRange_DOC, /* tp_doc */
NULL, /* tp_traverse */
NULL, /* tp_clear */
LLDwarfObject_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
NULL, /* tp_iter */
NULL, /* tp_iternext */
NULL, /* tp_methods */
AddressRange_members, /* tp_members */
NULL, /* tp_getset */
NULL, /* tp_base */
NULL, /* tp_dict */
NULL, /* tp_descr_get */
NULL, /* tp_descr_set */
0, /* tp_dictoffset */
LLDwarfObject_init, /* tp_init */
};

View File

@ -23,6 +23,28 @@ typedef struct {
extern PyTypeObject AbbrevDecl_type;
typedef struct {
PyObject_VAR_HEAD
uint64_t segment;
uint64_t address;
uint64_t length;
} AddressRange;
extern PyTypeObject AddressRange_type;
typedef struct {
PyObject_VAR_HEAD
Py_ssize_t offset;
uint64_t unit_length;
uint16_t version;
uint64_t debug_info_offset;
uint8_t address_size;
uint8_t segment_size;
bool is_64_bit;
} ArangeTableHeader;
extern PyTypeObject ArangeTableHeader_type;
typedef struct {
PyObject_HEAD
Py_ssize_t offset;
@ -123,6 +145,10 @@ int LLDwarfObject_RichCompareBool(PyObject *self, PyObject *other, int op);
PyObject *LLDwarfObject_richcompare(PyObject *self, PyObject *other, int op);
PyObject *LLDwarf_ParseAbbrevTable(Py_buffer *buffer, Py_ssize_t *offset);
PyObject *LLDwarf_ParseArangeTable(Py_buffer *buffer, Py_ssize_t *offset,
Py_ssize_t segment_size,
Py_ssize_t address_size);
PyObject *LLDwarf_ParseArangeTableHeader(Py_buffer *buffer, Py_ssize_t *offset);
PyObject *LLDwarf_ParseCompilationUnitHeader(Py_buffer *buffer,
Py_ssize_t *offset);
PyObject *LLDwarf_ParseDie(Py_buffer *buffer, Py_ssize_t *offset,

View File

@ -176,6 +176,57 @@ static PyObject *parse_abbrev_table(PyObject *self, PyObject *args,
return ret;
}
static PyObject *parse_arange_table(PyObject *self, PyObject *args,
PyObject *kwds)
{
static char *keywords[] = {
"segment_size", "address_size", "buffer", "offset", NULL
};
Py_ssize_t segment_size;
Py_ssize_t address_size;
Py_buffer buffer;
Py_ssize_t offset = 0;
PyObject *ret;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "nny*|n:parse_arange_table",
keywords, &segment_size, &address_size,
&buffer, &offset))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
ret = LLDwarf_ParseArangeTable(&buffer, &offset, segment_size, address_size);
PyBuffer_Release(&buffer);
return ret;
}
static PyObject *parse_arange_table_header(PyObject *self, PyObject *args,
PyObject *kwds)
{
static char *keywords[] = {"buffer", "offset", NULL};
Py_buffer buffer;
Py_ssize_t offset = 0;
PyObject *ret;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "y*|n:parse_arange_table_header",
keywords, &buffer, &offset))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
ret = LLDwarf_ParseArangeTableHeader(&buffer, &offset);
PyBuffer_Release(&buffer);
return ret;
}
static PyObject *parse_compilation_unit_header(PyObject *self, PyObject *args,
PyObject *kwds)
{
@ -343,11 +394,27 @@ static PyMethodDef lldwarf_methods[] = {
"offset -- optional offset into the buffer"},
{"parse_abbrev_table", (PyCFunction)parse_abbrev_table,
METH_VARARGS | METH_KEYWORDS,
"parse_abbrev_table(buffer, offset=0) -> dict[code]: AbbrevDecl \n\n"
"parse_abbrev_table(buffer, offset=0) -> dict[code]: AbbrevDecl\n\n"
"Parse an abbreviation table.\n\n"
"Arguments:\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer"},
{"parse_arange_table", (PyCFunction)parse_arange_table,
METH_VARARGS | METH_KEYWORDS,
"parse_arange_table(segment_size, address_size, buffer, offset=0) -> list of AddressRange\n\n"
"Parse an address range table.\n\n"
"Arguments:\n"
"segment_size -- size of a segment selector in this arange table\n"
"address_size -- size of an address in this arange table\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer"},
{"parse_arange_table_header", (PyCFunction)parse_arange_table_header,
METH_VARARGS | METH_KEYWORDS,
"parse_arange_table_header(buffer, offset=0) -> dict[code]: ArangeTableHeader\n\n"
"Parse an address range table header.\n\n"
"Arguments:\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer"},
{"parse_compilation_unit_header",
(PyCFunction)parse_compilation_unit_header,
METH_VARARGS | METH_KEYWORDS,
@ -412,6 +479,14 @@ PyInit_lldwarf(void)
if (PyType_Ready(&AbbrevDecl_type) < 0)
return NULL;
AddressRange_type.tp_new = PyType_GenericNew;
if (PyType_Ready(&AddressRange_type) < 0)
return NULL;
ArangeTableHeader_type.tp_new = PyType_GenericNew;
if (PyType_Ready(&ArangeTableHeader_type) < 0)
return NULL;
CompilationUnitHeader_type.tp_new = PyType_GenericNew;
if (PyType_Ready(&CompilationUnitHeader_type) < 0)
return NULL;
@ -440,6 +515,12 @@ PyInit_lldwarf(void)
Py_INCREF(&AbbrevDecl_type);
PyModule_AddObject(m, "AbbrevDecl", (PyObject *)&AbbrevDecl_type);
Py_INCREF(&AddressRange_type);
PyModule_AddObject(m, "AddressRange", (PyObject *)&AddressRange_type);
Py_INCREF(&ArangeTableHeader_type);
PyModule_AddObject(m, "ArangeTableHeader", (PyObject *)&ArangeTableHeader_type);
Py_INCREF(&CompilationUnitHeader_type);
PyModule_AddObject(m, "CompilationUnitHeader",
(PyObject *)&CompilationUnitHeader_type);

View File

@ -15,7 +15,7 @@ def out_of_date(dependencies, target):
def gen_header():
import drgn.dwarf.defs as defs
import drgn.dwarfdefs as defs
def write_enum(e):
f.write('enum {\n')
@ -39,7 +39,7 @@ def gen_header():
class my_build_ext(build_ext):
def run(self):
if out_of_date(['drgn/dwarf/defs.py', 'setup.py'], 'lldwarf/dwarfdefs.h'):
if out_of_date(['drgn/dwarfdefs.py', 'setup.py'], 'lldwarf/dwarfdefs.h'):
try:
gen_header()
except Exception as e:
@ -57,6 +57,7 @@ module = Extension(
'lldwarf/module.c',
'lldwarf/object.c',
'lldwarf/abbrev.c',
'lldwarf/arange.c',
'lldwarf/cu.c',
'lldwarf/die.c',
'lldwarf/line.c',

View File

@ -0,0 +1,125 @@
import drgn.lldwarf as lldwarf
import unittest
class TestArangeTableHeaderObject(unittest.TestCase):
def test_offset(self):
header = lldwarf.ArangeTableHeader(
offset=70,
unit_length=200,
version=2,
debug_info_offset=0,
address_size=8,
segment_size=0,
is_64_bit=False,
)
self.assertEqual(header.table_offset(), 96)
self.assertEqual(header.next_offset(), 274)
header.is_64_bit = True
self.assertEqual(header.table_offset(), 96)
self.assertEqual(header.next_offset(), 282)
def test_offset_overflow(self):
header = lldwarf.ArangeTableHeader(
offset=2**63 - 12,
unit_length=2**64 - 4,
version=2,
debug_info_offset=0,
address_size=8,
segment_size=0,
is_64_bit=False,
)
with self.assertRaises(OverflowError):
header.table_offset()
with self.assertRaises(OverflowError):
header.next_offset()
header.offset = 2**63 - 8
header.unit_length = 4
with self.assertRaises(OverflowError):
header.next_offset()
header.offset = 2**63 - 24
header.unit_length = 2**64 - 12
header.is_64_bit = True
with self.assertRaises(OverflowError):
header.table_offset()
with self.assertRaises(OverflowError):
header.next_offset()
header.offset = 2**63 - 16
header.unit_length = 4
with self.assertRaises(OverflowError):
header.next_offset()
class TestParseArangeTableHeader(unittest.TestCase):
def test_negative_offset(self):
with self.assertRaises(ValueError):
lldwarf.parse_arange_table_header(b'', -1)
def test_32bit(self):
buf = (b'\xc8\x00\x00\x00' # unit_length
b'\x02\x00' # version
b'\x00\x00\x00\x00' # debug_info_offset
b'\x08' # address_size
b'\x00') # segment_size
header = lldwarf.ArangeTableHeader(
offset=0,
unit_length=200,
version=2,
debug_info_offset=0,
address_size=8,
segment_size=0,
is_64_bit=False,
)
for i in range(len(buf)):
with self.assertRaisesRegex(ValueError, 'address range table header is truncated'):
lldwarf.parse_arange_table_header(buf[:i])
self.assertEqual(lldwarf.parse_arange_table_header(buf), header)
def test_64bit(self):
buf = (b'\xff\xff\xff\xff'
b'\xc8\x00\x00\x00\x00\x00\x00\x00' # unit_length
b'\x02\x00' # version
b'\x00\x00\x00\x00\x00\x00\x00\x00' # debug_info_offset
b'\x08' # address_size
b'\x00') # segment_size
header = lldwarf.ArangeTableHeader(
offset=0,
unit_length=200,
version=2,
debug_info_offset=0,
address_size=8,
segment_size=0,
is_64_bit=True,
)
for i in range(len(buf)):
with self.assertRaisesRegex(ValueError, 'address range table header is truncated'):
lldwarf.parse_arange_table_header(buf[:i])
self.assertEqual(lldwarf.parse_arange_table_header(buf), header)
def test_offset(self):
buf = (b'\x01' # padding
b'\xc8\x00\x00\x00' # unit_length
b'\x02\x00' # version
b'\x00\x00\x00\x00' # debug_info_offset
b'\x08' # address_size
b'\x00') # segment_size
header = lldwarf.ArangeTableHeader(
offset=1,
unit_length=200,
version=2,
debug_info_offset=0,
address_size=8,
segment_size=0,
is_64_bit=False,
)
self.assertEqual(lldwarf.parse_arange_table_header(buf, 1), header)