Initial commit

I wrote all of this code a few months back and am just now getting
around to committing it. The low-level DWARF parsing library is pretty
solid, although it only implements a subset of DWARF so far. The CLI and
higher-level interface are experimental.
This commit is contained in:
Omar Sandoval 2017-08-24 22:46:16 -07:00
commit 1fc2b6e89b
27 changed files with 5494 additions and 0 deletions

9
.gitignore vendored Normal file
View File

@ -0,0 +1,9 @@
__pycache__
*.pyc
*.o
*.so
/build
/drgn.egg-info
/coverage.info
/out
/lldwarf/dwarfdefs.h

0
drgn/__init__.py Normal file
View File

0
drgn/cli/__init__.py Normal file
View File

22
drgn/cli/__main__.py Normal file
View File

@ -0,0 +1,22 @@
import argparse
import drgn.cli.dump
import drgn.cli.probe
def main():
parser = argparse.ArgumentParser(prog='drgn')
subparsers = parser.add_subparsers(
title='command', description='command to run', dest='command')
subparsers.required = True
drgn.cli.dump.register(subparsers)
drgn.cli.probe.register(subparsers)
args = parser.parse_args()
args.func(args)
if __name__ == '__main__':
main()

198
drgn/cli/dump.py Normal file
View File

@ -0,0 +1,198 @@
from drgn.dwarf import DwarfFile
from drgn.dwarf.defs import *
import fnmatch
import os.path
def dump_cu(dwarf_file, cu, cu_name, *, indent=0):
prefix = ' ' * indent
debug_info = dwarf_file.section('.debug_info')
print(f'{prefix}<{cu.offset - debug_info.sh_offset}> compilation unit', end='')
if cu_name:
print(f' ({cu_name!r})')
else:
print()
print(f'{prefix} unit_length = {cu.unit_length}')
print(f'{prefix} version = {cu.version}')
print(f'{prefix} debug_abbrev_offset = {cu.debug_abbrev_offset}')
print(f'{prefix} address_size = {cu.address_size}')
print(f'{prefix} is_64_bit = {cu.is_64_bit}')
def dump_die(dwarf_file, cu, die, *, indent=0, recurse=False):
prefix = ' ' * indent
print(f'{prefix}<{die.offset - cu.offset}> {tag_name(die.tag)}')
for name, form, value in die:
if form == DW_FORM.string or form == DW_FORM.strp:
value = repr(dwarf_file.at_string(form, value))[1:]
elif form in {DW_FORM.data1, DW_FORM.data2, DW_FORM.data4, DW_FORM.data8}:
value = repr(value)[1:]
print(f'{prefix} {at_name(name)} ({form_name(form)}) = {value}')
if recurse:
try:
children = die.children
except AttributeError:
pass
else:
if children is not None:
for child in children:
dump_die(dwarf_file, cu, child, indent=indent + 2, recurse=True)
def dump_lnp_include_directories(lnp, *, indent=0):
prefix = ' ' * indent
print(f'{prefix}include_directories = {{')
for directory in lnp.include_directories:
directory = directory.decode()
print(f'{prefix} {directory!r},')
print(f'{prefix}}}')
def dump_lnp_file_names(lnp, *, indent=0):
prefix = ' ' * indent
print(f'{prefix}file_names = {{')
for file_name, directory_index, mtime, file_size in lnp.file_names:
file_name = file_name.decode()
if directory_index > 0:
directory = lnp.include_directories[directory_index - 1].decode()
path = os.path.join(directory, file_name)
else:
path = file_name
print(f'{prefix} {path!r},')
print(f'{prefix}}}')
def dump_lnp_header(dwarf_file, lnp, *, indent=0):
prefix = ' ' * indent
debug_line = dwarf_file.section('.debug_line')
print(f'{prefix}<{lnp.offset - debug_line.sh_offset}> line number program')
print(f'{prefix} unit_length = {lnp.unit_length}')
print(f'{prefix} version = {lnp.version}')
print(f'{prefix} header_length = {lnp.header_length}')
print(f'{prefix} minimum_instruction_length = {lnp.minimum_instruction_length}')
print(f'{prefix} maximum_operations_per_instruction = {lnp.maximum_operations_per_instruction}')
print(f'{prefix} default_is_stmt = {lnp.default_is_stmt}')
print(f'{prefix} line_base = {lnp.line_base}')
print(f'{prefix} line_range = {lnp.line_range}')
print(f'{prefix} opcode_base = {lnp.opcode_base}')
print(f'{prefix} standard_opcode_lengths = {lnp.standard_opcode_lengths}')
print(f'{prefix} is_64_bit = {lnp.is_64_bit}')
def dump_lnp_ops(dwarf_file, lnp, *, indent=0):
prefix = ' ' * indent
print(f'{prefix}opcodes = {{')
for type_, opcode, args in dwarf_file.decode_line_number_program(lnp):
print(f'{prefix} ', end='')
if type_ == 'standard':
if len(args) > 2:
print(f'{lns_name(opcode)} {args}')
elif len(args) == 1:
print(f'{lns_name(opcode)} {args[0]}')
else:
print(lns_name(opcode))
elif type_ == 'extended':
if args[0]:
print(f'{lne_name(opcode)} {repr(args[0])[1:]}')
else:
print(f'{lne_name(opcode)}')
else:
assert type_ == 'special'
print(f'special op+={args[0]} ', end='')
if args[1] < 0:
print(f'line-={-args[1]}')
else:
print(f'line+={args[1]}')
print(f'{prefix}}}')
def dump_line_number_matrix(cu, lnp, matrix, *, indent=0):
prefix = ' ' * indent
print(f'{prefix}lines = {{')
for row in matrix:
if row.end_sequence:
continue
if row.file == 0:
path = cu_name(cu)
else:
file_name, directory_index, mtime, file_size = lnp.file_names[row.file - 1]
if directory_index > 0:
directory = lnp.include_directories[directory_index - 1]
path = directory + b'/' + file_name
else:
path = file_name
print(f'{prefix} 0x{row.address:016x} is {repr(path)[2:-1]}:{row.line}', end='')
flags = []
if row.is_stmt:
flags.append('is_stmt')
if row.basic_block:
flags.append('basic_block')
if row.prologue_end:
flags.append('prologue_end')
if row.epilogue_begin:
flags.append('epilogue_begin')
if flags:
print(f" ({', '.join(flags)})")
else:
print()
print(f'{prefix}}}')
def cmd_dump(args):
with DwarfFile(args.file) as dwarf_file:
for cu in dwarf_file.cu_headers():
die = dwarf_file.cu_die(cu)
try:
cu_name = dwarf_file.die_name(die).decode()
except KeyError:
cu_name = ''
for pattern in args.cu:
if fnmatch.fnmatch(cu_name, pattern):
break
else:
continue
dump_cu(dwarf_file, cu, cu_name)
if args.die:
if args.recursive:
dwarf_file.parse_die_children(cu, die, recurse=True)
dump_die(dwarf_file, cu, die, indent=2, recurse=args.recursive)
if (args.include_directories or args.file_names or args.lines or
args.line_number_program):
lnp = dwarf_file.cu_line_number_program_header(cu, die)
if args.include_directories:
dump_lnp_include_directories(lnp, indent=2)
if args.file_names:
dump_lnp_file_names(lnp, indent=2)
if args.lines:
matrix = dwarf_file.execute_line_number_program(lnp)
dump_line_number_matrix(cu, lnp, matrix, indent=2)
if args.line_number_program:
dump_lnp_header(dwarf_file, lnp, indent=2)
dump_lnp_ops(dwarf_file, lnp, indent=4)
def register(subparsers):
subparser = subparsers.add_parser(
'dump', help='dump raw debugging information')
subparser.add_argument(
'--cu', action='append', metavar='GLOB',
help='dump compilation units with names matching the given pattern (may be specified multiple times)')
subparser.add_argument(
'--die', action='store_true', help="also dump each compilation unit's debugging information entry")
subparser.add_argument(
'--recursive', action='store_true', help='dump debugging information entries recursively')
subparser.add_argument(
'--include-directories', action='store_true', help="also dump each compilation unit's include directories")
subparser.add_argument(
'--file-names', action='store_true', help="also dump each compilation unit's source files")
subparser.add_argument(
'--lines', action='store_true', help='also dump the line number matrix')
subparser.add_argument(
'--line-number-program', '--lnp', action='store_true', help='also dump the line number program')
subparser.add_argument(
'file', help='file to dump')
subparser.add_argument(
'cu', nargs='+', metavar='glob',
help='pattern matching names of compilation units to dump')
subparser.set_defaults(func=cmd_dump)

88
drgn/cli/probe.py Normal file
View File

@ -0,0 +1,88 @@
from drgn.dwarf import DwarfFile
from drgn.dwarf.defs import DW_TAG
from drgn.ftrace import Kprobe, FtraceInstance
import os
import signal
def find_cu_by_name(dwarf_file, name):
for cu in dwarf_file.cu_headers():
die = dwarf_file.cu_die(cu)
try:
cu_name = dwarf_file.die_name(die).decode()
except KeyError:
continue
if cu_name == name:
return cu, die
else:
raise ValueError('CU not found')
def find_addresses_for_line(dwarf_file, filename, lineno):
cu, die = find_cu_by_name(dwarf_file, filename)
lnp = dwarf_file.cu_line_number_program_header(cu, die)
matrix = dwarf_file.execute_line_number_program(lnp)
rows = []
for row in matrix:
if (dwarf_file.line_number_row_name(cu, lnp, row) == filename and
row.line == lineno):
rows.append(row)
return cu, die, rows
def best_breakpoint_address(rows):
for row in rows:
if row.is_stmt:
return row
return rows[0]
def find_subprogram_containing_address(dwarf_file, cu, die, address):
dwarf_file.parse_die_children(cu, die)
for child in die.children:
if child.tag != DW_TAG.subprogram:
continue
if dwarf_file.die_contains_address(child, address):
return child
assert False # XXX
def create_probe(dwarf_file, filename, lineno):
cu, die, rows = find_addresses_for_line(dwarf_file, filename, lineno)
row = best_breakpoint_address(rows)
subprogram = find_subprogram_containing_address(dwarf_file, cu, die, row.address)
subprogram_name = dwarf_file.die_name(subprogram).decode()
subprogram_address = dwarf_file.die_address(subprogram)
name = f'drgn/{subprogram_name}_{os.getpid()}'
location = f'{subprogram_name}+{row.address - subprogram_address}'
return name, location
def cmd_probe(args):
# XXX check in argparse
filename, lineno = args.line.rsplit(':', 1)
lineno = int(lineno)
with DwarfFile(args.vmlinux) as dwarf_file:
name, location = create_probe(dwarf_file, filename, lineno)
with Kprobe(name, location) as probe, \
FtraceInstance(f'drgn_{os.getpid()}') as instance:
probe.enable(instance)
try:
import subprocess
subprocess.call(['cat', f'/sys/kernel/debug/tracing/instances/{instance.name}/trace_pipe'])
finally:
probe.disable(instance)
def register(subparsers):
subparser = subparsers.add_parser(
'probe')
subparser.add_argument(
'--line', '-l', metavar='FILE:LINE',
help='probe a source location')
subparser.add_argument(
'vmlinux', help='vmlinux file to use')
subparser.set_defaults(func=cmd_probe)

1
drgn/dwarf/__init__.py Normal file
View File

@ -0,0 +1 @@
from drgn.dwarf.file import DwarfFile

344
drgn/dwarf/defs.py Normal file
View File

@ -0,0 +1,344 @@
import enum
class DW_CHILDREN(enum.IntEnum):
no = 0
yes = 1
class DW_TAG(enum.IntEnum):
array_type = 0x01,
class_type = 0x02,
entry_point = 0x03,
enumeration_type = 0x04,
formal_parameter = 0x05,
imported_declaration = 0x08,
label = 0x0a,
lexical_block = 0x0b,
member = 0x0d,
pointer_type = 0x0f,
reference_type = 0x10,
compile_unit = 0x11,
string_type = 0x12,
structure_type = 0x13,
subroutine_type = 0x15,
typedef = 0x16,
union_type = 0x17,
unspecified_parameters = 0x18,
variant = 0x19,
common_block = 0x1a,
common_inclusion = 0x1b,
inheritance = 0x1c,
inlined_subroutine = 0x1d,
module = 0x1e,
ptr_to_member_type = 0x1f,
set_type = 0x20,
subrange_type = 0x21,
with_stmt = 0x22,
access_declaration = 0x23,
base_type = 0x24,
catch_block = 0x25,
const_type = 0x26,
constant = 0x27,
enumerator = 0x28,
file_type = 0x29,
friend = 0x2a,
namelist = 0x2b,
namelist_item = 0x2c,
packed_type = 0x2d,
subprogram = 0x2e,
template_type_parameter = 0x2f,
template_value_parameter = 0x30,
thrown_type = 0x31,
try_block = 0x32,
variant_part = 0x33,
variable = 0x34,
volatile_type = 0x35,
dwarf_procedure = 0x36,
restrict_type = 0x37,
interface_type = 0x38,
namespace = 0x39,
imported_module = 0x3a,
unspecified_type = 0x3b,
partial_unit = 0x3c,
imported_unit = 0x3d,
# 0x3e reserved
condition = 0x3f,
shared_type = 0x40,
type_unit = 0x41,
rvalue_reference_type = 0x42,
template_alias = 0x43,
# DWARF 5
atomic_type = 0x47,
lo_user = 0x4080,
MIPS_loop = 0x4081,
format_label = 0x4101,
function_template = 0x4102,
class_template = 0x4103,
GNU_BINCL = 0x4104,
GNU_EINCL = 0x4105,
GNU_template_template_param = 0x4106,
GNU_template_parameter_pack = 0x4107,
GNU_formal_parameter_pack = 0x4108,
GNU_call_site = 0x4109,
GNU_call_site_parameter = 0x410a,
hi_user = 0xffff
class DW_AT(enum.IntEnum):
sibling = 0x01,
location = 0x02,
name = 0x03,
ordering = 0x09,
subscr_data = 0x0a,
byte_size = 0x0b,
bit_offset = 0x0c,
bit_size = 0x0d,
element_list = 0x0f,
stmt_list = 0x10,
low_pc = 0x11,
high_pc = 0x12,
language = 0x13,
member = 0x14,
discr = 0x15,
discr_value = 0x16,
visibility = 0x17,
import_ = 0x18,
string_length = 0x19,
common_reference = 0x1a,
comp_dir = 0x1b,
const_value = 0x1c,
containing_type = 0x1d,
default_value = 0x1e,
inline = 0x20,
is_optional = 0x21,
lower_bound = 0x22,
producer = 0x25,
prototyped = 0x27,
return_addr = 0x2a,
start_scope = 0x2c,
bit_stride = 0x2e,
upper_bound = 0x2f,
abstract_origin = 0x31,
accessibility = 0x32,
address_class = 0x33,
artificial = 0x34,
base_types = 0x35,
calling_convention = 0x36,
count = 0x37,
data_member_location = 0x38,
decl_column = 0x39,
decl_file = 0x3a,
decl_line = 0x3b,
declaration = 0x3c,
discr_list = 0x3d,
encoding = 0x3e,
external = 0x3f,
frame_base = 0x40,
friend = 0x41,
identifier_case = 0x42,
macro_info = 0x43,
namelist_item = 0x44,
priority = 0x45,
segment = 0x46,
specification = 0x47,
static_link = 0x48,
type = 0x49,
use_location = 0x4a,
variable_parameter = 0x4b,
virtuality = 0x4c,
vtable_elem_location = 0x4d,
allocated = 0x4e,
associated = 0x4f,
data_location = 0x50,
byte_stride = 0x51,
entry_pc = 0x52,
use_UTF8 = 0x53,
extension = 0x54,
ranges = 0x55,
trampoline = 0x56,
call_column = 0x57,
call_file = 0x58,
call_line = 0x59,
description = 0x5a,
binary_scale = 0x5b,
decimal_scale = 0x5c,
small = 0x5d,
decimal_sign = 0x5e,
digit_count = 0x5f,
picture_string = 0x60,
mutable = 0x61,
threads_scaled = 0x62,
explicit = 0x63,
object_pointer = 0x64,
endianity = 0x65,
elemental = 0x66,
pure = 0x67,
recursive = 0x68,
signature = 0x69,
main_subprogram = 0x6a,
data_bit_offset = 0x6b,
const_expr = 0x6c,
enum_class = 0x6d,
linkage_name = 0x6e,
# DWARF5
noreturn = 0x87,
lo_user = 0x2000,
MIPS_fde = 0x2001,
MIPS_loop_begin = 0x2002,
MIPS_tail_loop_begin = 0x2003,
MIPS_epilog_begin = 0x2004,
MIPS_loop_unroll_factor = 0x2005,
MIPS_software_pipeline_depth = 0x2006,
MIPS_linkage_name = 0x2007,
MIPS_stride = 0x2008,
MIPS_abstract_name = 0x2009,
MIPS_clone_origin = 0x200a,
MIPS_has_inlines = 0x200b,
MIPS_stride_byte = 0x200c,
MIPS_stride_elem = 0x200d,
MIPS_ptr_dopetype = 0x200e,
MIPS_allocatable_dopetype = 0x200f,
MIPS_assumed_shape_dopetype = 0x2010,
MIPS_assumed_size = 0x2011,
# GNU extensions
sf_names = 0x2101,
src_info = 0x2102,
mac_info = 0x2103,
src_coords = 0x2104,
body_begin = 0x2105,
body_end = 0x2106,
GNU_vector = 0x2107,
GNU_guarded_by = 0x2108,
GNU_pt_guarded_by = 0x2109,
GNU_guarded = 0x210a,
GNU_pt_guarded = 0x210b,
GNU_locks_excluded = 0x210c,
GNU_exclusive_locks_required = 0x210d,
GNU_shared_locks_required = 0x210e,
GNU_odr_signature = 0x210f,
GNU_template_name = 0x2110,
GNU_call_site_value = 0x2111,
GNU_call_site_data_value = 0x2112,
GNU_call_site_target = 0x2113,
GNU_call_site_target_clobbered = 0x2114,
GNU_tail_call = 0x2115,
GNU_all_tail_call_sites = 0x2116,
GNU_all_call_sites = 0x2117,
GNU_all_source_call_sites = 0x2118,
GNU_macros = 0x2119,
GNU_deleted = 0x211a,
hi_user = 0x3fff
class DW_FORM(enum.IntEnum):
addr = 0x01
block2 = 0x03
block4 = 0x04
data2 = 0x05
data4 = 0x06
data8 = 0x07
string = 0x08
block = 0x09
block1 = 0x0a
data1 = 0x0b
flag = 0x0c
sdata = 0x0d
strp = 0x0e
udata = 0x0f
ref_addr = 0x10
ref1 = 0x11
ref2 = 0x12
ref4 = 0x13
ref8 = 0x14
ref_udata = 0x15
indirect = 0x16
sec_offset = 0x17
exprloc = 0x18
flag_present = 0x19
ref_sig8 = 0x20
class DW_LNS(enum.IntEnum):
copy = 1
advance_pc = 2
advance_line = 3
set_file = 4
set_column = 5
negate_stmt = 6
set_basic_block = 7
const_add_pc = 8
fixed_advance_pc = 9
set_prologue_end = 10
set_epilogue_begin = 11
set_isa = 12
class DW_LNE(enum.IntEnum):
end_sequence = 1
set_address = 2
define_file = 3
set_discriminator = 4
lo_user = 128
hi_user = 255
def at_name(at):
try:
return f'DW_AT_{DW_AT(at).name}'
except ValueError:
return str(at)
def at_class_constant(at):
return (at == DW_FORM.data1 or at == DW_FORM.data2 or
at == DW_FORM.data4 or at == DW_FORM.data8 or
at == DW_FORM.udata or at == DW_FORM.sdata)
def at_class_constant_bytes(at):
return (at == DW_FORM.data1 or at == DW_FORM.data2 or
at == DW_FORM.data4 or at == DW_FORM.data8)
def at_class_constant_int(at):
return at == DW_FORM.udata or at == DW_FORM.sdata
def form_name(form):
try:
return f'DW_FORM_{DW_FORM(form).name}'
except ValueError:
return str(form)
def tag_name(tag):
try:
return f'DW_TAG_{DW_TAG(tag).name}'
except ValueError:
return str(tag)
def lns_name(lns):
try:
return f'DW_LNS_{DW_LNS(lns).name}'
except ValueError:
return str(lns)
def lne_name(lne):
try:
return f'DW_LNE_{DW_LNE(lne).name}'
except ValueError:
return str(lne)

183
drgn/dwarf/file.py Normal file
View File

@ -0,0 +1,183 @@
import mmap
import drgn.lldwarf as lldwarf
from drgn.dwarf.defs import *
from drgn.elf import parse_elf_header, parse_elf_sections
import os.path
import sys
class DwarfFile:
def __init__(self, path):
self._closed = False
self._file = open(path, 'rb')
self._mmap = mmap.mmap(self._file.fileno(), 0, access=mmap.ACCESS_READ)
self._ehdr = parse_elf_header(self._mmap)
self._sections = parse_elf_sections(self._mmap, self._ehdr)
self._abbrev_tables = {}
def close(self):
if not self._closed:
if hasattr(self, '_mmap'):
self._mmap.close()
if hasattr(self, '_file'):
self._file.close()
self._closed = True
def __del__(self):
self.close()
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def section(self, name):
return self._sections[name]
def at_string(self, form, value):
if form == DW_FORM.string:
return self._mmap[value[0]:value[0] + value[1]]
else:
assert form == DW_FORM.strp
debug_str = self.section('.debug_str')
offset = debug_str.sh_offset + value
nul = self._mmap.find(b'\0', offset)
assert nul != -1 # XXX
return self._mmap[offset:nul]
def at_sec_offset(self, form, value):
if form == DW_FORM.data4:
# DWARF 2 and 3
return int.from_bytes(value, sys.byteorder)
else:
# DWARF 4
assert form == DW_FORM.sec_offset
return value
def abbrev_table(self, offset):
try:
return self._abbrev_tables[offset]
except KeyError:
pass
debug_abbrev = self.section('.debug_abbrev')
offset += debug_abbrev.sh_offset
abbrev_table = lldwarf.parse_abbrev_table(self._mmap, offset)
self._abbrev_tables[offset] = abbrev_table
return abbrev_table
def cu_headers(self):
debug_info = self.section('.debug_info')
offset = debug_info.sh_offset
end = debug_info.sh_offset + debug_info.sh_size
while offset < end:
cu = lldwarf.parse_compilation_unit_header(self._mmap, offset)
yield cu
offset = cu.next_offset()
def cu_die(self, cu, *, recurse=False):
debug_info = self.section('.debug_info')
abbrev_table = self.abbrev_table(cu.debug_abbrev_offset)
return lldwarf.parse_die(cu, abbrev_table, self._mmap, cu.die_offset(),
recurse=recurse)
def parse_die_children(self, cu, die, *, recurse=False):
if not hasattr(die, 'children'):
debug_info = self.section('.debug_info')
abbrev_table = self.abbrev_table(cu.debug_abbrev_offset)
die.children = lldwarf.parse_die_siblings(cu, abbrev_table,
self._mmap,
offset=die.offset + die.die_length,
recurse=recurse)
def die_contains_address(self, die, address):
try:
ranges_form, ranges_value = die.find(DW_AT.ranges)
assert False
except KeyError:
pass
try:
low_pc_form, low_pc = die.find(DW_AT.low_pc)
except KeyError:
return False
high_pc_form, high_pc_value = die.find(DW_AT.high_pc)
assert low_pc_form == DW_FORM.addr
if at_class_constant_int(high_pc_form):
high_pc = low_pc + high_pc_value
elif at_class_constant_bytes(high_pc_form):
high_pc = low_pc + int.from_bytes(high_pc_value, sys.byteorder)
else:
assert high_pc_form == DW_FORM.addr
high_pc = high_pc_value
return low_pc <= address < high_pc
def die_name(self, die):
form, value = die.find(DW_AT.name)
return self.at_string(form, value)
def die_address(self, die):
try:
ranges_form, ranges_value = die.find(DW_AT.ranges)
assert False
except KeyError:
pass
form, value = die.find(DW_AT.low_pc)
assert form == DW_FORM.addr
return value
def cu_line_number_program_header(self, cu, die):
debug_line = self.section('.debug_line')
try:
form, value = die.find(DW_AT.stmt_list)
except KeyError:
return None
offset = debug_line.sh_offset + self.at_sec_offset(form, value)
return lldwarf.parse_line_number_program_header(self._mmap, offset)
def execute_line_number_program(self, lnp):
return lldwarf.execute_line_number_program(lnp, self._mmap,
lnp.program_offset())
def line_number_row_name(self, cu, lnp, row):
if row.file == 0:
return cu_name(cu)
file_name, directory_index, mtime, file_size = lnp.file_names[row.file - 1]
file_name = file_name.decode()
if directory_index > 0:
directory = lnp.include_directories[directory_index - 1].decode()
return os.path.join(directory, file_name)
else:
return file_name
def decode_line_number_program(self, lnp):
offset = lnp.program_offset()
end = lnp.end_offset()
while offset < end:
opcode = self._mmap[offset]
offset += 1
if opcode == 0:
length, offset = lldwarf.parse_uleb128_offset(self._mmap, offset)
opcode = self._mmap[offset]
length -= 1
offset += 1
yield 'extended', opcode, [self._mmap[offset:offset + length]]
offset += length
elif opcode < lnp.opcode_base:
if opcode == DW_LNS.fixed_advance_pc:
args = [int.from_bytes(self._mmap[offset:offset + 2], sys.byteorder)]
offset += 2
else:
args = []
for i in range(lnp.standard_opcode_lengths[opcode - 1]):
arg, offset = lldwarf.parse_uleb128_offset(self._mmap, offset)
args.append(arg)
yield 'standard', opcode, args
else:
opcode -= lnp.opcode_base
operation_advance = opcode // lnp.line_range
line_increment = lnp.line_base + (opcode % lnp.line_range)
yield 'special', opcode, (operation_advance, line_increment)

233
drgn/elf.py Normal file
View File

@ -0,0 +1,233 @@
"""
Minimal ELF format parser implementing only what's needed for DWARF.
"""
import ctypes
from collections import namedtuple, OrderedDict
# e_ident
EI_MAG0 = 0 # File identification
EI_MAG1 = 1 # File identification
EI_MAG2 = 2 # File identification
EI_MAG3 = 3 # File identification
EI_CLASS = 4 # File class
EI_DATA = 5 # Data encoding
EI_VERSION = 6 # File version
EI_PAD = 7 # Start of padding byte
EI_NIDENT = 16
# e_ident[EI_MAG*]
ELFMAG0 = 0x7f
ELFMAG1 = ord('E')
ELFMAG2 = ord('L')
ELFMAG3 = ord('F')
# e_ident[EI_CLASS]
ELFCLASSNONE = 0 # Invalid class
ELFCLASS32 = 1 # 32-bit objects
ELFCLASS64 = 2 # 64-bit objects
# e_ident[EI_DATA]
ELFDATANONE = 0 # Invalid data encoding
ELFDATA2LSB = 1 # Little-endian
ELFDATA2MSB = 2 # Big-endian
# e_type
ET_NONE = 0 # No file type
ET_REL = 1 # Relocatable file
ET_EXEC = 2 # Executable file
ET_DYN = 3 # Shared object file
ET_CORE = 4 # Core file
ET_LOPROC = 0xff00 # Processor-specific
ET_HIPROC = 0xffff # Processor-specific
# e_machine
EM_NONE = 0 # No machine
EM_M32 = 1 # AT&T WE 32100
EM_SPARC = 2 # SPARC
EM_386 = 3 # Intel 80386
EM_68K = 4 # Motorola 68000
EM_88K = 5 # Motorola 88000
EM_860 = 7 # Intel 80860
EM_MIPS = 8 # MIPS RS3000
# e_version
EV_NONE = 0 # Invalid version
EV_CURRENT = 1 # Current version
SHN_UNDEF = 0
SHN_LORESERVE = 0xff00
SHN_LOPROC = 0xff00
SHN_HIPROC = 0xff1f
SHN_LOOS = 0xff20
SHN_HIOS = 0xff3f
SHN_ABS = 0xfff1
SHN_COMMON = 0xfff2
SHN_XINDEX = 0xffff
SHN_HIRESERVE = 0xffff
SHT_NULL = 0
SHT_PROGBITS = 1
SHT_SYMTAB = 2
SHT_STRTAB = 3
SHT_RELA = 4
SHT_HASH = 5
SHT_DYNAMIC = 6
SHT_NOTE = 7
SHT_NOBITS = 8
SHT_REL = 9
SHT_SHLIB = 10
SHT_DYNSYM = 11
SHT_INIT_ARRAY = 14
SHT_FINI_ARRAY = 15
SHT_PREINIT_ARRAY = 16
SHT_GROUP = 17
SHT_SYMTAB_SHNDX = 18
SHT_LOOS = 0x60000000
SHT_HIOS = 0x6fffffff
SHT_LOPROC = 0x70000000
SHT_HIPROC = 0x7fffffff
SHT_LOUSER = 0x80000000
SHT_HIUSER = 0xffffffff
Elf64_Addr = ctypes.c_uint64
Elf64_Half = ctypes.c_uint16
Elf64_Off = ctypes.c_uint64
Elf64_Sword = ctypes.c_int32
Elf64_Word = ctypes.c_uint32
Elf64_Sxword = ctypes.c_int64
Elf64_Xword = ctypes.c_uint64
Elf64_Section = ctypes.c_uint16
class Elf64_Ehdr(ctypes.Structure):
_fields_ = [
('e_ident', ctypes.c_ubyte * EI_NIDENT),
('e_type', Elf64_Half),
('e_machine', Elf64_Half),
('e_version', Elf64_Word),
('e_entry', Elf64_Addr),
('e_phoff', Elf64_Off),
('e_shoff', Elf64_Off),
('e_flags', Elf64_Word),
('e_ehsize', Elf64_Half),
('e_phentsize', Elf64_Half),
('e_phnum', Elf64_Half),
('e_shentsize', Elf64_Half),
('e_shnum', Elf64_Half),
('e_shstrndx', Elf64_Half),
]
class Elf64_Shdr(ctypes.Structure):
_fields_ = [
('sh_name', Elf64_Word),
('sh_type', Elf64_Word),
('sh_flags', Elf64_Xword),
('sh_addr', Elf64_Addr),
('sh_offset', Elf64_Off),
('sh_size', Elf64_Xword),
('sh_link', Elf64_Word),
('sh_info', Elf64_Word),
('sh_addralign', Elf64_Xword),
('sh_entsize', Elf64_Xword),
]
class Elf64_Sym(ctypes.Structure):
_fields_ = [
('st_name', Elf64_Word),
('st_info', ctypes.c_ubyte),
('st_other', ctypes.c_ubyte),
('st_shndx', Elf64_Section),
('st_value', Elf64_Addr),
('st_size', Elf64_Xword),
]
def parse_elf_header(buffer):
e_ident = buffer[:EI_NIDENT]
if (len(e_ident) < EI_NIDENT or
e_ident[EI_MAG0] != ELFMAG0 or e_ident[EI_MAG1] != ELFMAG1 or
e_ident[EI_MAG2] != ELFMAG2 or e_ident[EI_MAG3] != ELFMAG3):
raise ValueError('not an ELF file')
ehdr = Elf64_Ehdr.from_buffer_copy(buffer)
assert ehdr.e_ident[EI_CLASS] == ELFCLASS64
assert ehdr.e_ident[EI_DATA] == ELFDATA2LSB
assert ehdr.e_ident[EI_VERSION] == EV_CURRENT
assert ehdr.e_shentsize == ctypes.sizeof(Elf64_Shdr)
return ehdr
def parse_elf_sections(buffer, ehdr):
if ehdr.e_shnum == 0:
shnum = Elf64_Shdr.from_buffer_copy(buffer, ehdr.e_shoff).sh_size
else:
shnum = ehdr.e_shnum
shdrs = (Elf64_Shdr * shnum).from_buffer_copy(buffer, ehdr.e_shoff)
sections = OrderedDict()
assert ehdr.e_shstrndx != SHN_UNDEF
if ehdr.e_shstrndx == SHN_XINDEX:
strtab_section = shdrs[shdrs[0].sh_link]
else:
assert ehdr.e_shstrndx < SHN_LORESERVE
strtab_section = shdrs[ehdr.e_shstrndx]
assert strtab_section.sh_type == SHT_STRTAB
assert strtab_section.sh_size > 0
strtab_offset = strtab_section.sh_offset
for shdr in shdrs:
if shdr.sh_name:
offset = strtab_offset + shdr.sh_name
nul = buffer.find(b'\0', offset)
section_name = buffer[offset:nul].decode('ascii')
else:
section_name = ''
assert section_name not in sections
sections[section_name] = shdr
return sections
"""
def symtab(self):
try:
return self._symtab
except AttributeError:
pass
shdr = self.section(b'.symtab')
symnum = shdr.sh_size // ctypes.sizeof(Elf64_Sym)
self._symtab = (Elf64_Sym * symnum).from_buffer_copy(self._mm, shdr.sh_offset)
return self._symtab
def symbol(self, name, *, all=False):
try:
syms = self._symtab_by_name[name]
if all:
return syms
else:
if len(syms) > 1:
raise ValueError('multiple symbols with given name')
return syms[0]
except AttributeError:
pass
strtab_offset = self.section(b'.strtab').sh_offset
symtab_by_name = {}
for sym in self.symtab():
if sym.st_name:
sym_name = string_at(self._mm, strtab_offset + sym.st_name)
else:
sym_name = b''
try:
symtab_by_name[sym_name].append(sym)
except KeyError:
symtab_by_name[sym_name] = [sym]
self._symtab_by_name = symtab_by_name
return self.symbol(name, all=all)
"""

74
drgn/ftrace.py Normal file
View File

@ -0,0 +1,74 @@
import os
import os.path
TRACEFS = '/sys/kernel/debug/tracing'
def write_tracefs(path, contents):
fd = os.open(os.path.join(TRACEFS, path), os.O_WRONLY)
try:
n = os.write(fd, contents)
assert n == len(contents)
finally:
os.close(fd)
def append_tracefs(path, contents):
fd = os.open(os.path.join(TRACEFS, path), os.O_WRONLY | os.O_APPEND)
try:
n = os.write(fd, contents)
assert n == len(contents)
finally:
os.close(fd)
class _Probe:
def __init__(self, probe_name):
self.probe_name = probe_name
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
s = f'-:{self.probe_name}\n'
append_tracefs('kprobe_events', s.encode())
def enable(self, instance=None):
if instance is None:
write_tracefs(f'events/{self.probe_name}/enable', b'1')
else:
write_tracefs(f'instances/{instance.name}/events/{self.probe_name}/enable', b'1')
def disable(self, instance=None):
if instance is None:
write_tracefs(f'events/{self.probe_name}/enable', b'0')
else:
write_tracefs(f'instances/{instance.name}/events/{self.probe_name}/enable', b'0')
class Kprobe(_Probe):
def __init__(self, probe_name, location, fetchargs=None):
super().__init__(probe_name)
self.location = location
if fetchargs is None:
self.fetchargs = ''
else:
self.fetchargs = ' '.join(fetchargs)
def __enter__(self):
s = f'p:{self.probe_name} {self.location} {self.fetchargs}\n'
append_tracefs('kprobe_events', s.encode())
return self
class FtraceInstance:
def __init__(self, name):
self.name = name
def __enter__(self):
os.mkdir(f'{TRACEFS}/instances/{self.name}')
return self
def __exit__(self, exc_type, exc_value, traceback):
os.rmdir(f'{TRACEFS}/instances/{self.name}')

368
lldwarf/abbrev.c Normal file
View File

@ -0,0 +1,368 @@
#include "lldwarf.h"
#include "dwarfdefs.h"
static PyObject *AbbrevDecl_new(PyTypeObject *subtype, PyObject *args,
PyObject *kwds)
{
static char *keywords[] = {"tag", "children", "attributes", NULL};
PyObject *tag;
int children;
PyObject *attribs;
PyObject *tmp = NULL;
AbbrevDecl *decl = NULL;
Py_ssize_t i, len;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OpO:AbbrevDecl", keywords,
&tag, &children, &attribs))
return NULL;
tmp = PySequence_Tuple(attribs);
if (!tmp)
goto err;
len = PyTuple_GET_SIZE(tmp);
decl = (AbbrevDecl *)subtype->tp_alloc(subtype, len);
if (!decl)
goto err;
decl->tag = PyLong_AsUint64_t(tag);
if (PyErr_Occurred()) {
PyErr_SetString(PyExc_OverflowError, "tag too big");
goto err;
}
decl->children = children;
for (i = 0; i < len; i++) {
PyObject *item;
item = PySequence_Tuple(PyTuple_GET_ITEM(tmp, i));
if (!item)
goto err;
if (PyTuple_GET_SIZE(item) != 2) {
PyErr_SetString(PyExc_ValueError, "attribute must be pair");
Py_DECREF(item);
goto err;
}
decl->attribs[i].name = PyLong_AsUint64_t(PyTuple_GET_ITEM(item, 0));
if (PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(PyExc_OverflowError, "name too big");
Py_DECREF(item);
goto err;
}
decl->attribs[i].form = PyLong_AsUint64_t(PyTuple_GET_ITEM(item, 1));
if (PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(PyExc_OverflowError, "form too big");
Py_DECREF(item);
goto err;
}
Py_DECREF(item);
}
Py_DECREF(tmp);
return (PyObject *)decl;
err:
Py_XDECREF(decl);
Py_XDECREF(tmp);
return NULL;
}
static void AbbrevDecl_dealloc(AbbrevDecl *self)
{
Py_TYPE(self)->tp_free((PyObject *)self);
}
static PyObject *AbbrevDecl_repr(AbbrevDecl *self)
{
PyObject *tmp, *ret;
Py_ssize_t i, len;
len = Py_SIZE(self);
tmp = PyTuple_New(len);
if (!tmp)
return NULL;
for (i = 0; i < len; i++) {
PyObject *item;
item = Py_BuildValue("KK",
(unsigned long long)self->attribs[i].name,
(unsigned long long)self->attribs[i].form);
if (!item) {
Py_DECREF(tmp);
return NULL;
}
PyTuple_SET_ITEM(tmp, i, item);
}
ret = PyUnicode_FromFormat("AbbrevDecl(tag=%llu, children=%s, attributes=%R)",
self->tag, self->children ? "True" : "False",
tmp);
Py_DECREF(tmp);
return ret;
}
static PyObject *AbbrevDecl_richcompare(AbbrevDecl *self, PyObject *other_, int op)
{
AbbrevDecl *other;
int cmp;
if (op != Py_EQ && op != Py_NE) {
PyErr_SetString(PyExc_TypeError, "not supported");
return NULL;
}
cmp = LLDwarfObject_RichCompareBool((PyObject *)self, other_, Py_EQ);
if (cmp == -1)
return NULL;
else if (!cmp)
goto out;
other = (AbbrevDecl *)other_;
cmp = (Py_SIZE(self) == Py_SIZE(other) &&
!memcmp(self->attribs, other->attribs,
Py_SIZE(self) * sizeof(struct AttribSpec)));
out:
if (op == Py_NE)
cmp = !cmp;
if (cmp)
Py_RETURN_TRUE;
else
Py_RETURN_FALSE;
}
static Py_ssize_t AbbrevDecl_length(AbbrevDecl *self)
{
return Py_SIZE(self);
}
static PyObject *AbbrevDecl_item(AbbrevDecl *self, Py_ssize_t i)
{
if (i < 0 || i >= Py_SIZE(self)) {
PyErr_SetString(PyExc_IndexError, "index out of range");
return NULL;
}
return Py_BuildValue("KK", (unsigned long long)self->attribs[i].name,
(unsigned long long)self->attribs[i].form);
}
static int AbbrevDecl_Realloc(AbbrevDecl **decl, size_t capacity)
{
AbbrevDecl *tmp;
size_t specsize, size;
if (__builtin_mul_overflow(capacity, sizeof(struct AttribSpec), &specsize) ||
__builtin_add_overflow(sizeof(AbbrevDecl), specsize, &size)) {
PyErr_NoMemory();
return -1;
}
tmp = PyMem_Realloc(*decl, size);
if (!tmp) {
PyErr_NoMemory();
return -1;
}
*decl = tmp;
return 0;
}
static PyObject *LLDwarf_ParseAbbrevDecl(Py_buffer *buffer, Py_ssize_t *offset,
uint64_t *code)
{
AbbrevDecl *decl = NULL;
uint8_t children;
size_t num = 0, capacity = 1;
if (read_uleb128(buffer, offset, code) == -1) {
if (PyErr_ExceptionMatches(PyExc_EOFError)) {
PyErr_SetString(PyExc_ValueError,
"abbreviation declaration code is truncated");
}
return NULL;
}
if (*code == 0)
return NULL;
if (AbbrevDecl_Realloc(&decl, capacity) == -1)
return NULL;
if (read_uleb128(buffer, offset, &decl->tag) == -1) {
if (PyErr_ExceptionMatches(PyExc_EOFError)) {
PyErr_SetString(PyExc_ValueError,
"abbreviation declaration tag is truncated");
}
goto err;
}
if (read_u8(buffer, offset, &children)) {
PyErr_SetString(PyExc_ValueError,
"abbreviation declaration children flag is truncated");
goto err;
}
decl->children = children != DW_CHILDREN_no;
for (;;) {
uint64_t name, form;
if (read_uleb128(buffer, offset, &name) == -1) {
if (PyErr_ExceptionMatches(PyExc_EOFError)) {
PyErr_SetString(PyExc_ValueError,
"abbreviation specification name is truncated");
}
goto err;
}
if (read_uleb128(buffer, offset, &form) == -1) {
if (PyErr_ExceptionMatches(PyExc_EOFError)) {
PyErr_SetString(PyExc_ValueError,
"abbreviation specification form is truncated");
}
goto err;
}
if (name == 0 && form == 0)
break;
if (num >= capacity) {
capacity *= 2;
if (AbbrevDecl_Realloc(&decl, capacity) == -1)
goto err;
}
decl->attribs[num].name = name;
decl->attribs[num].form = form;
num++;
}
if (AbbrevDecl_Realloc(&decl, num) == -1)
goto err;
return (PyObject *)PyObject_InitVar((PyVarObject *)decl,
&AbbrevDecl_type, num);
err:
PyMem_Free(decl);
return NULL;
}
PyObject *LLDwarf_ParseAbbrevTable(Py_buffer *buffer, Py_ssize_t *offset)
{
PyObject *table;
table = PyDict_New();
if (!table)
return NULL;
for (;;) {
PyObject *key, *value;
uint64_t code;
value = LLDwarf_ParseAbbrevDecl(buffer, offset, &code);
if (!value) {
if (PyErr_Occurred())
goto err;
else
break;
}
key = PyLong_FromUnsignedLongLong(code);
if (key == NULL) {
Py_DECREF(value);
goto err;
}
if (PyDict_GetItem(table, key) != NULL) {
Py_DECREF(key);
Py_DECREF(value);
PyErr_Format(PyExc_ValueError, "duplicate abbreviation code %llu\n",
(unsigned long long)code);
goto err;
}
if (PyDict_SetItem(table, key, value) == -1) {
Py_DECREF(key);
Py_DECREF(value);
goto err;
}
Py_DECREF(value);
}
return table;
err:
Py_DECREF(table);
return NULL;
}
static PySequenceMethods AbbrevDecl_as_sequence = {
(lenfunc)AbbrevDecl_length, /* sq_length */
NULL, /* sq_concat */
NULL, /* sq_repeat */
(ssizeargfunc)AbbrevDecl_item, /* sq_item */
};
static PyMemberDef AbbrevDecl_members[] = {
{"tag", T_UINT64T, offsetof(AbbrevDecl, tag), 0,
"tag of this entry (DW_TAG_*)"},
{"children", T_BOOL, offsetof(AbbrevDecl, children), 0,
"whether this entry has child entries"},
{},
};
#define AbbrevDecl_DOC \
"AbbrevDecl(tag, children, attribs) -> new abbreviation declaration\n\n" \
"Create a new DWARF abbreviation declaration. len(decl) is the number of\n" \
"attributes and decl[i] is the ith attribute specification.\n\n" \
"Arguments:\n" \
"tag -- integer tag of the abbreviation declaration\n" \
"children -- boolean specifying whether this entry has child entries\n" \
"attribs -- iterable of (name, form) pairs"
PyTypeObject AbbrevDecl_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"drgn.lldwarf.AbbrevDecl", /* tp_name */
sizeof(AbbrevDecl), /* tp_basicsize */
sizeof(struct AttribSpec), /* tp_itemsize */
(destructor)AbbrevDecl_dealloc, /* tp_dealloc */
NULL, /* tp_print */
NULL, /* tp_getattr */
NULL, /* tp_setattr */
NULL, /* tp_as_async */
(reprfunc)AbbrevDecl_repr, /* tp_repr */
NULL, /* tp_as_number */
&AbbrevDecl_as_sequence, /* tp_as_sequence */
NULL, /* tp_as_mapping */
NULL, /* tp_hash */
NULL, /* tp_call */
NULL, /* tp_str */
NULL, /* tp_getattro */
NULL, /* tp_setattro */
NULL, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
AbbrevDecl_DOC, /* tp_doc */
NULL, /* tp_traverse */
NULL, /* tp_clear */
(richcmpfunc)AbbrevDecl_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
NULL, /* tp_iter */
NULL, /* tp_iternext */
NULL, /* tp_methods */
AbbrevDecl_members, /* tp_members */
NULL, /* tp_getset */
NULL, /* tp_base */
NULL, /* tp_dict */
NULL, /* tp_descr_get */
NULL, /* tp_descr_set */
0, /* tp_dictoffset */
NULL, /* tp_init */
NULL, /* tp_alloc */
(newfunc)AbbrevDecl_new, /* tp_new */
};

167
lldwarf/cu.c Normal file
View File

@ -0,0 +1,167 @@
#include "lldwarf.h"
static void CompilationUnitHeader_dealloc(CompilationUnitHeader *self)
{
Py_TYPE(self)->tp_free((PyObject *)self);
}
PyObject *CompilationUnitHeader_die_offset(CompilationUnitHeader *self)
{
Py_ssize_t header_length = self->is_64_bit ? 23 : 11;
Py_ssize_t ret;
if (__builtin_add_overflow(self->offset, header_length, &ret)) {
PyErr_SetString(PyExc_OverflowError, "DIE offset too large");
return NULL;
}
return PyLong_FromSsize_t(ret);
}
PyObject *CompilationUnitHeader_next_offset(CompilationUnitHeader *self)
{
uint64_t unit_length_length = self->is_64_bit ? 12 : 4;
uint64_t unit_length;
Py_ssize_t ret;
if (__builtin_add_overflow(self->unit_length, unit_length_length, &unit_length) ||
__builtin_add_overflow(self->offset, unit_length, &ret)) {
PyErr_SetString(PyExc_OverflowError, "next offset too large");
return NULL;
}
return PyLong_FromSsize_t(ret);
}
PyObject *LLDwarf_ParseCompilationUnitHeader(Py_buffer *buffer,
Py_ssize_t *offset)
{
CompilationUnitHeader *cu;
uint32_t length;
cu = PyObject_New(CompilationUnitHeader, &CompilationUnitHeader_type);
if (!cu)
return NULL;
cu->offset = *offset;
if (read_u32(buffer, offset, &length) == -1)
goto err;
cu->is_64_bit = length == UINT32_C(0xffffffff);
if (cu->is_64_bit) {
if (read_u64(buffer, offset, &cu->unit_length) == -1)
goto err;
} else {
cu->unit_length = length;
}
if (read_u16(buffer, offset, &cu->version) == -1)
goto err;
if (cu->is_64_bit) {
if (read_u64(buffer, offset, &cu->debug_abbrev_offset) == -1)
goto err;
} else {
unsigned int debug_abbrev_offset;
if (read_u32(buffer, offset, &debug_abbrev_offset) == -1)
goto err;
cu->debug_abbrev_offset = debug_abbrev_offset;
}
if (read_u8(buffer, offset, &cu->address_size) == -1)
goto err;
return (PyObject *)cu;
err:
PyErr_SetString(PyExc_ValueError,
"compilation unit header is truncated");
Py_DECREF(cu);
return NULL;
}
static PyMethodDef CompilationUnitHeader_methods[] = {
{"die_offset", (PyCFunction)CompilationUnitHeader_die_offset,
METH_NOARGS,
"die_offset() -> int\n\n"
"Get the offset into the buffer where the DIE for this CU begins. This\n"
"is the starting offset of the CU plus the length of the header."},
{"next_offset", (PyCFunction)CompilationUnitHeader_next_offset,
METH_NOARGS,
"next_offset() -> int\n\n"
"Get the offset into the buffer where the next CU starts. This\n"
"is the starting offset of the CU plus the length of the unit,\n"
"including the header. If this is the last CU, this offset is the\n"
"end of the .debug_info section."},
{},
};
static PyMemberDef CompilationUnitHeader_members[] = {
{"offset", T_PYSSIZET, offsetof(CompilationUnitHeader, offset), 0,
"offset into the buffer where this CU starts"},
{"unit_length", T_UINT64T, offsetof(CompilationUnitHeader, unit_length), 0,
"length of this CU, not including the unit_length field"},
{"version", T_UINT16T, offsetof(CompilationUnitHeader, version), 0,
"format version of this CU"},
{"debug_abbrev_offset", T_UINT64T, offsetof(CompilationUnitHeader, debug_abbrev_offset), 0,
"location of this CU's abbreviation table as an offset into the .debug_abbrev section"},
{"address_size", T_UINT8T, offsetof(CompilationUnitHeader, address_size), 0,
"size of an address in this CU"},
{"is_64_bit", T_BOOL, offsetof(CompilationUnitHeader, is_64_bit), 0,
"whether this CU is using the 64-bit format"},
{},
};
#define CompilationUnitHeader_DOC \
"CompilationUnitHeader(offset, unit_length, version,\n" \
" debug_abbrev_offset, address_size,\n" \
" is_64_bit) -> new compilation unit header\n\n" \
"Create a new DWARF compilation unit header.\n\n" \
"Arguments:\n" \
"offset -- integer offset\n" \
"unit_length -- integer length\n" \
"version -- integer format version\n" \
"debug_abbrev_offset -- integer offset\n" \
"address_size -- integer size\n" \
"is_64_bit -- boolean"
PyTypeObject CompilationUnitHeader_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"drgn.lldwarf.CompilationUnitHeader", /* tp_name */
sizeof(CompilationUnitHeader), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)CompilationUnitHeader_dealloc, /* tp_dealloc */
NULL, /* tp_print */
NULL, /* tp_getattr */
NULL, /* tp_setattr */
NULL, /* tp_as_async */
LLDwarfObject_repr, /* tp_repr */
NULL, /* tp_as_number */
NULL, /* tp_as_sequence */
NULL, /* tp_as_mapping */
NULL, /* tp_hash */
NULL, /* tp_call */
NULL, /* tp_str */
NULL, /* tp_getattro */
NULL, /* tp_setattro */
NULL, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
CompilationUnitHeader_DOC, /* tp_doc */
NULL, /* tp_traverse */
NULL, /* tp_clear */
LLDwarfObject_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
NULL, /* tp_iter */
NULL, /* tp_iternext */
CompilationUnitHeader_methods, /* tp_methods */
CompilationUnitHeader_members, /* tp_members */
NULL, /* tp_getset */
NULL, /* tp_base */
NULL, /* tp_dict */
NULL, /* tp_descr_get */
NULL, /* tp_descr_set */
0, /* tp_dictoffset */
LLDwarfObject_init, /* tp_init */
};

700
lldwarf/die.c Normal file
View File

@ -0,0 +1,700 @@
#include "lldwarf.h"
#include "dwarfdefs.h"
static int DwarfDie_AttribFromObject(struct DwarfAttrib *attrib, PyObject *object)
{
PyObject *value;
Py_buffer buffer;
Py_ssize_t len;
switch (attrib->form) {
case DW_FORM_addr:
case DW_FORM_udata:
case DW_FORM_ref_udata:
case DW_FORM_ref1:
case DW_FORM_ref2:
case DW_FORM_ref4:
case DW_FORM_ref8:
case DW_FORM_ref_sig8:
case DW_FORM_sec_offset:
case DW_FORM_strp:
attrib->u = PyLong_AsUint64_t(object);
if (PyErr_Occurred())
return -1;
return 0;
case DW_FORM_sdata:
attrib->s = PyLong_AsInt64_t(object);
if (PyErr_Occurred())
return -1;
return 0;
case DW_FORM_block1:
case DW_FORM_block2:
case DW_FORM_block4:
case DW_FORM_block:
case DW_FORM_exprloc:
case DW_FORM_string:
value = PySequence_Tuple(object);
if (!value)
return -1;
if (PyTuple_GET_SIZE(value) != 2) {
PyErr_SetString(PyExc_ValueError, "attribute value must be pair");
Py_DECREF(value);
return -1;
}
attrib->offset = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
if (PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(PyExc_OverflowError, "offset too big");
Py_DECREF(value);
return -1;
}
attrib->length = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 1));
if (PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(PyExc_OverflowError, "length too big");
Py_DECREF(value);
return -1;
}
Py_DECREF(value);
return 0;
case DW_FORM_data1:
len = 1;
goto data;
case DW_FORM_data2:
len = 2;
goto data;
case DW_FORM_data4:
len = 4;
goto data;
case DW_FORM_data8:
len = 8;
data:
if (PyObject_GetBuffer(object, &buffer, PyBUF_SIMPLE) == -1)
return -1;
if (buffer.len != len) {
PyErr_Format(PyExc_ValueError, "DW_FORM_data%zd buffer must have length %zd",
len, len);
PyBuffer_Release(&buffer);
return -1;
}
memcpy(attrib->data, buffer.buf, len);
PyBuffer_Release(&buffer);
return 0;
case DW_FORM_flag:
attrib->u = PyObject_IsTrue(object);
return 0;
case DW_FORM_flag_present:
attrib->u = 1;
return 0;
default:
PyErr_Format(PyExc_ValueError, "unknown form %llu",
attrib->form);
return -1;
}
}
static PyObject *DwarfDie_new(PyTypeObject *subtype, PyObject *args,
PyObject *kwds)
{
static char *keywords[] = {
"offset", "die_length", "tag", "children", "attributes", NULL
};
PyObject *offset;
PyObject *die_length;
PyObject *tag;
PyObject *children, *attribs;
PyObject *tmp = NULL;
DwarfDie *die = NULL;
Py_ssize_t i, len;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOO:DwarfDie", keywords,
&offset, &die_length, &tag, &children,
&attribs))
return NULL;
tmp = PySequence_Tuple(attribs);
if (!tmp)
goto err;
len = PyTuple_GET_SIZE(tmp);
die = (DwarfDie *)subtype->tp_alloc(subtype, len);
if (!die)
goto err;
die->offset = PyLong_AsSsize_t(offset);
if (PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(PyExc_OverflowError, "offset too big");
goto err;
}
die->die_length = PyLong_AsSsize_t(die_length);
if (PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(PyExc_OverflowError, "die_length too big");
goto err;
}
die->tag = PyLong_AsUint64_t(tag);
if (PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(PyExc_OverflowError, "tag too big");
goto err;
}
if (children == Py_None) {
Py_INCREF(Py_None);
die->children = Py_None;
} else {
die->children = PySequence_List(children);
if (!die->children)
goto err;
}
memset(die->attribs, 0, len * sizeof(die->attribs[0]));
for (i = 0; i < len; i++) {
PyObject *item;
item = PySequence_Tuple(PyTuple_GET_ITEM(tmp, i));
if (!item)
goto err;
if (PyTuple_GET_SIZE(item) != 3) {
PyErr_SetString(PyExc_ValueError, "attribute must be triple");
Py_DECREF(item);
goto err;
}
die->attribs[i].name = PyLong_AsUint64_t(PyTuple_GET_ITEM(item, 0));
if (PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(PyExc_OverflowError, "name too big");
Py_DECREF(item);
goto err;
}
die->attribs[i].form = PyLong_AsUint64_t(PyTuple_GET_ITEM(item, 1));
if (PyErr_Occurred()) {
if (PyErr_ExceptionMatches(PyExc_OverflowError))
PyErr_SetString(PyExc_OverflowError, "form too big");
Py_DECREF(item);
goto err;
}
if (DwarfDie_AttribFromObject(&die->attribs[i],
PyTuple_GET_ITEM(item, 2)) == -1) {
Py_DECREF(item);
goto err;
}
Py_DECREF(item);
}
Py_DECREF(tmp);
return (PyObject *)die;
err:
Py_XDECREF(die);
Py_XDECREF(tmp);
return NULL;
}
static void DwarfDie_dealloc(DwarfDie *self)
{
Py_XDECREF(self->children);
Py_TYPE(self)->tp_free((PyObject *)self);
}
static int DwarfDie_traverse(DwarfDie *self, visitproc visit, void *arg)
{
Py_VISIT(self->children);
return 0;
}
static PyObject *DwarfDie_repr(DwarfDie *self)
{
PyObject *tmp, *ret = NULL;
int enter;
enter = Py_ReprEnter((PyObject *)self);
if (enter == -1)
return NULL;
if (enter)
return PyUnicode_FromString("DwarfDie(...)");
tmp = PySequence_Tuple((PyObject *)self);
if (!tmp)
goto out;
/* XXX: children = NULL? */
ret = PyUnicode_FromFormat("DwarfDie(offset=%zd, die_length=%zd, tag=%llu, children=%R, attributes=%R)",
self->offset, self->die_length,
(unsigned long long)self->tag,
self->children, tmp);
out:
Py_XDECREF(tmp);
Py_ReprLeave((PyObject *)self);
return ret;
}
static PyObject *DwarfDie_richcompare(DwarfDie *self, PyObject *other_, int op)
{
DwarfDie *other;
int cmp;
if (op != Py_EQ && op != Py_NE) {
PyErr_SetString(PyExc_TypeError, "not supported");
return NULL;
}
cmp = LLDwarfObject_RichCompareBool((PyObject *)self, other_, Py_EQ);
if (cmp == -1)
return NULL;
else if (!cmp)
goto out;
other = (DwarfDie *)other_;
cmp = (Py_SIZE(self) == Py_SIZE(other) &&
!memcmp(self->attribs, other->attribs,
Py_SIZE(self) * sizeof(struct DwarfAttrib)));
out:
if (op == Py_NE)
cmp = !cmp;
if (cmp)
Py_RETURN_TRUE;
else
Py_RETURN_FALSE;
}
static Py_ssize_t DwarfDie_length(DwarfDie *self)
{
return Py_SIZE(self);
}
static PyObject *DwarfDie_ObjectFromAttrib(struct DwarfAttrib *attrib)
{
switch (attrib->form) {
case DW_FORM_addr:
case DW_FORM_udata:
case DW_FORM_ref_udata:
case DW_FORM_ref1:
case DW_FORM_ref2:
case DW_FORM_ref4:
case DW_FORM_ref8:
case DW_FORM_ref_sig8:
case DW_FORM_sec_offset:
case DW_FORM_strp:
return PyLong_FromUnsignedLongLong(attrib->u);
case DW_FORM_block1:
case DW_FORM_block2:
case DW_FORM_block4:
case DW_FORM_block:
case DW_FORM_exprloc:
case DW_FORM_string:
return Py_BuildValue("nn", attrib->offset, attrib->length);
case DW_FORM_data1:
return PyBytes_FromStringAndSize(attrib->data, 1);
case DW_FORM_data2:
return PyBytes_FromStringAndSize(attrib->data, 2);
case DW_FORM_data4:
return PyBytes_FromStringAndSize(attrib->data, 4);
case DW_FORM_data8:
return PyBytes_FromStringAndSize(attrib->data, 8);
case DW_FORM_sdata:
return PyLong_FromLongLong(attrib->s);
case DW_FORM_flag:
return PyBool_FromLong(attrib->u ? 1 : 0);
case DW_FORM_flag_present:
Py_RETURN_TRUE;
default:
PyErr_Format(PyExc_ValueError, "unknown form %llu",
attrib->form);
return NULL;
}
}
static PyObject *DwarfDie_item(DwarfDie *self, Py_ssize_t i)
{
struct DwarfAttrib *attrib;
PyObject *value, *ret;
if (i < 0 || i >= Py_SIZE(self)) {
PyErr_SetString(PyExc_IndexError, "index out of range");
return NULL;
}
attrib = &self->attribs[i];
value = DwarfDie_ObjectFromAttrib(attrib);
if (!value)
return NULL;
ret = Py_BuildValue("KKO", (unsigned long long)attrib->name,
(unsigned long long)attrib->form, value);
Py_DECREF(value);
return ret;
}
static PyObject *DwarfDie_find(DwarfDie *self, PyObject *args, PyObject *kwds)
{
static char *keywords[] = {"name", NULL};
struct DwarfAttrib *attrib;
PyObject *value, *ret;
PyObject *name_obj;
uint64_t name;
Py_ssize_t i, len;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:find", keywords,
&name_obj))
return NULL;
name = PyLong_AsUint64_t(name_obj);
if (!name)
return NULL;
len = Py_SIZE(self);
for (i = 0; i < len; i++) {
if (self->attribs[i].name == name)
break;
}
if (i == len) {
PyErr_SetString(PyExc_KeyError, "no attribute with that name");
return NULL;
}
attrib = &self->attribs[i];
value = DwarfDie_ObjectFromAttrib(attrib);
if (!value)
return NULL;
ret = Py_BuildValue("KO", (unsigned long long)attrib->form, value);
Py_DECREF(value);
return ret;
}
static AbbrevDecl *get_decl(PyObject *abbrev_table, uint64_t code)
{
PyObject *key;
PyObject *value;
key = PyLong_FromUnsignedLongLong(code);
if (!key)
return NULL;
value = PyObject_GetItem(abbrev_table, key);
Py_DECREF(key);
if (!value) {
PyErr_Format(PyExc_ValueError, "unknown abbreviation code %llu\n",
(unsigned long long)code);
}
return (AbbrevDecl *)value;
}
static int LLDwarf_ParseAttrib(Py_buffer *buffer, Py_ssize_t *offset,
CompilationUnitHeader *cu,
struct DwarfAttrib *attrib)
{
uint8_t u8;
uint16_t u16;
uint32_t u32;
uint64_t u64;
switch (attrib->form) {
/* address */
case DW_FORM_addr:
switch (cu->address_size) {
case 4:
if (read_u32(buffer, offset, &u32) == -1)
return -1;
attrib->u = u32;
return 0;
case 8:
return read_u64(buffer, offset, &attrib->u);
default:
PyErr_Format(PyExc_ValueError, "unsupported address size %u",
(unsigned int)cu->address_size);
return -1;
}
/* block */
case DW_FORM_block1:
if (read_u8(buffer, offset, &u8) == -1)
return -1;
attrib->length = u8;
goto block;
case DW_FORM_block2:
if (read_u16(buffer, offset, &u16) == -1)
return -1;
attrib->length = u16;
goto block;
case DW_FORM_block4:
if (read_u32(buffer, offset, &u32) == -1)
return -1;
attrib->length = u32;
goto block;
case DW_FORM_block:
/* exprloc */
case DW_FORM_exprloc:
if (read_uleb128(buffer, offset, &u64) == -1)
return -1;
if (u64 > PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_ValueError, "attribute length too big");
return -1;
}
attrib->length = u64;
block:
if (read_check_bounds(buffer, *offset, attrib->length) == -1)
return -1;
attrib->offset = *offset;
*offset += attrib->length;
return 0;
/* constant */
case DW_FORM_data1:
return read_buffer(buffer, offset, &attrib->data, 1);
case DW_FORM_data2:
return read_buffer(buffer, offset, &attrib->data, 2);
case DW_FORM_data4:
return read_buffer(buffer, offset, &attrib->data, 4);
case DW_FORM_data8:
return read_buffer(buffer, offset, &attrib->data, 8);
case DW_FORM_sdata:
return read_sleb128(buffer, offset, &attrib->s);
case DW_FORM_udata:
/* reference */
case DW_FORM_ref_udata:
return read_uleb128(buffer, offset, &attrib->u);
case DW_FORM_ref_addr:
/* lineptr, loclistptr, macptr, rangelistptr */
case DW_FORM_sec_offset:
/* string */
case DW_FORM_strp:
if (cu->is_64_bit) {
return read_u64(buffer, offset, &attrib->u);
} else {
if (read_u32(buffer, offset, &u32) == -1)
return -1;
attrib->u = u32;
return 0;
}
case DW_FORM_string:
attrib->offset = *offset;
if (read_strlen(buffer, offset, &attrib->length) == -1)
return -1;
return 0;
/* flag */
case DW_FORM_flag_present:
attrib->u = 1;
return 0;
case DW_FORM_flag:
/* reference */
case DW_FORM_ref1:
if (read_u8(buffer, offset, &u8) == -1)
return -1;
attrib->u = u8;
return 0;
case DW_FORM_ref2:
if (read_u16(buffer, offset, &u16) == -1)
return -1;
attrib->u = u16;
return 0;
case DW_FORM_ref4:
if (read_u32(buffer, offset, &u32) == -1)
return -1;
attrib->u = u32;
return 0;
case DW_FORM_ref8:
case DW_FORM_ref_sig8:
return read_u64(buffer, offset, &attrib->u);
case DW_FORM_indirect:
PyErr_Format(PyExc_ValueError, "DW_FORM_indirect is not supported");
return -1;
default:
PyErr_Format(PyExc_ValueError, "unknown form 0x%llu",
attrib->form);
return -1;
}
}
PyObject *LLDwarf_ParseDieSiblings(Py_buffer *buffer, Py_ssize_t *offset,
CompilationUnitHeader *cu,
PyObject *abbrev_table, bool recurse)
{
PyObject *children;
children = PyList_New(0);
if (!children)
return NULL;
for (;;) {
PyObject *child;
child = LLDwarf_ParseDie(buffer, offset, cu, abbrev_table,
recurse, true);
if (PyErr_Occurred())
goto err;
if (!child)
break;
if (PyList_Append(children, child) == -1) {
Py_DECREF(child);
goto err;
}
Py_DECREF(child);
}
return children;
err:
Py_DECREF(children);
return NULL;
}
PyObject *LLDwarf_ParseDie(Py_buffer *buffer, Py_ssize_t *offset,
CompilationUnitHeader *cu, PyObject *abbrev_table,
bool recurse, bool jump_to_sibling)
{
Py_ssize_t orig_offset;
DwarfDie *die;
AbbrevDecl *decl;
uint64_t code;
Py_ssize_t i, len;
uint64_t sibling = 0;
orig_offset = *offset;
if (read_uleb128(buffer, offset, &code) == -1) {
if (PyErr_ExceptionMatches(PyExc_EOFError)) {
PyErr_SetString(PyExc_ValueError,
"DIE abbreviation code is truncated");
}
return NULL;
}
if (code == 0)
return NULL;
decl = get_decl(abbrev_table, code);
if (!decl)
return NULL;
len = Py_SIZE(decl);
die = PyObject_NewVar(DwarfDie, &DwarfDie_type, len);
if (!die) {
Py_DECREF(decl);
return NULL;
}
die->offset = orig_offset;
die->tag = decl->tag;
die->children = NULL;
memset(die->attribs, 0, len * sizeof(die->attribs[0]));
for (i = 0; i < len; i++) {
die->attribs[i].name = decl->attribs[i].name;
die->attribs[i].form = decl->attribs[i].form;
if (LLDwarf_ParseAttrib(buffer, offset, cu, &die->attribs[i]) == -1)
goto err;
if (die->attribs[i].name == DW_AT_sibling)
sibling = die->attribs[i].u;
}
die->die_length = *offset - orig_offset;
if (!decl->children) {
Py_INCREF(Py_None);
die->children = Py_None;
} else if (recurse || (jump_to_sibling && !sibling)) {
die->children = LLDwarf_ParseDieSiblings(buffer, offset, cu,
abbrev_table, true);
if (!die->children)
goto err;
} else if (jump_to_sibling) {
*offset = cu->offset + sibling;
}
Py_DECREF(decl);
return (PyObject *)die;
err:
Py_DECREF(die);
Py_DECREF(decl);
return NULL;
}
static PySequenceMethods DwarfDie_as_sequence = {
(lenfunc)DwarfDie_length, /* sq_length */
NULL, /* sq_concat */
NULL, /* sq_repeat */
(ssizeargfunc)DwarfDie_item, /* sq_item */
};
static PyMethodDef DwarfDie_methods[] = {
{"find", (PyCFunction)DwarfDie_find, METH_VARARGS | METH_KEYWORDS,
"find(name) -> (form, value)\n\n"
"Find an attribute.\n\n"
"Arguments:\n"
"name -- attribute name (DW_AT_*)"},
{},
};
static PyMemberDef DwarfDie_members[] = {
{"offset", T_UINT64T, offsetof(DwarfDie, offset), 0,
"offset into the buffer where this DIE starts"},
{"die_length", T_UINT64T, offsetof(DwarfDie, die_length), 0,
"length of this DIE"},
{"tag", T_UINT64T, offsetof(DwarfDie, tag), 0,
"this DIE's tag (DW_TAG_*)"},
{"children", T_OBJECT_EX, offsetof(DwarfDie, children), 0,
"list of this DIE's children, or None; this attribute may be\n"
"missing if the DIE was parsed non-recursively"},
{},
};
#define DwarfDie_DOC \
"DwarfDie(offset, die_length, tag, children, attribs) -> new debugging information entry\n\n" \
"Create a new DWARF debugging information entry. len(die) is the\n" \
"number of attributes and die[i] is the ith attribute.\n\n" \
"Arguments:\n" \
"offset -- integer offset\n" \
"die_length -- intger length\n" \
"tag -- integer tag of the DIE\n" \
"children -- list of children DIEs\n" \
"attribs -- iterable of (name, form, value) triples"
PyTypeObject DwarfDie_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"drgn.lldwarf.DwarfDie", /* tp_name */
sizeof(DwarfDie), /* tp_basicsize */
sizeof(struct DwarfAttrib), /* tp_itemsize */
(destructor)DwarfDie_dealloc, /* tp_dealloc */
NULL, /* tp_print */
NULL, /* tp_getattr */
NULL, /* tp_setattr */
NULL, /* tp_as_async */
(reprfunc)DwarfDie_repr, /* tp_repr */
NULL, /* tp_as_number */
&DwarfDie_as_sequence, /* tp_as_sequence */
NULL, /* tp_as_mapping */
NULL, /* tp_hash */
NULL, /* tp_call */
NULL, /* tp_str */
NULL, /* tp_getattro */
NULL, /* tp_setattro */
NULL, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
DwarfDie_DOC, /* tp_doc */
(traverseproc)DwarfDie_traverse, /* tp_traverse */
NULL, /* tp_clear */
(richcmpfunc)DwarfDie_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
NULL, /* tp_iter */
NULL, /* tp_iternext */
DwarfDie_methods, /* tp_methods */
DwarfDie_members, /* tp_members */
NULL, /* tp_getset */
NULL, /* tp_base */
NULL, /* tp_dict */
NULL, /* tp_descr_get */
NULL, /* tp_descr_set */
0, /* tp_dictoffset */
NULL, /* tp_init */
NULL, /* tp_alloc */
(newfunc)DwarfDie_new, /* tp_new */
};

713
lldwarf/line.c Normal file
View File

@ -0,0 +1,713 @@
#include "lldwarf.h"
#include "dwarfdefs.h"
static void LineNumberProgramHeader_dealloc(LineNumberProgramHeader *self)
{
Py_XDECREF(self->standard_opcode_lengths);
Py_XDECREF(self->include_directories);
Py_XDECREF(self->file_names);
Py_TYPE(self)->tp_free((PyObject *)self);
}
static int LineNumberProgramHeader_traverse(LineNumberProgramHeader *self,
visitproc visit, void *arg)
{
Py_VISIT(self->standard_opcode_lengths);
Py_VISIT(self->include_directories);
Py_VISIT(self->file_names);
return 0;
}
PyObject *LineNumberProgramHeader_program_offset(LineNumberProgramHeader *self)
{
uint64_t before_header_length_length = self->is_64_bit ? 22 : 10;
uint64_t header_length;
Py_ssize_t ret;
if (__builtin_add_overflow(self->header_length, before_header_length_length, &header_length) ||
__builtin_add_overflow(self->offset, header_length, &ret)) {
PyErr_SetString(PyExc_OverflowError, "program offset too large");
return NULL;
}
return PyLong_FromSsize_t(ret);
}
static Py_ssize_t lnp_end_offset(LineNumberProgramHeader *self)
{
uint64_t unit_length_length = self->is_64_bit ? 12 : 4;
uint64_t unit_length;
Py_ssize_t ret;
if (__builtin_add_overflow(self->unit_length, unit_length_length, &unit_length) ||
__builtin_add_overflow(self->offset, unit_length, &ret)) {
PyErr_SetString(PyExc_OverflowError, "end offset too large");
return -1;
}
return ret;
}
PyObject *LineNumberProgramHeader_end_offset(LineNumberProgramHeader *self)
{
Py_ssize_t ret;
ret = lnp_end_offset(self);
if (ret == -1)
return NULL;
return PyLong_FromSsize_t(ret);
}
static PyObject *parse_standard_opcode_lengths(Py_buffer *buffer,
Py_ssize_t *offset,
uint8_t opcode_base)
{
PyObject *lengths;
uint8_t i;
if (opcode_base == 0) {
PyErr_SetString(PyExc_ValueError, "opcode_base is 0");
return NULL;
}
lengths = PyList_New(opcode_base - 1);
if (!lengths)
return NULL;
for (i = 0; i < opcode_base - 1; i++) {
PyObject *item;
uint8_t length;
if (read_u8(buffer, offset, &length) == -1)
goto err;
item = PyLong_FromUnsignedLong(length);
if (!item)
goto err;
PyList_SET_ITEM(lengths, i, item);
}
return lengths;
err:
Py_DECREF(lengths);
return NULL;
}
static PyObject *parse_include_directories(Py_buffer *buffer,
Py_ssize_t *offset)
{
PyObject *directories;
directories = PyList_New(0);
if (!directories)
return NULL;
for (;;) {
const char *str;
Py_ssize_t len;
PyObject *directory;
str = (char *)buffer->buf + *offset;
if (read_strlen(buffer, offset, &len) == -1)
goto err;
if (len == 0)
break;
directory = PyBytes_FromStringAndSize(str, len);
if (!directory)
goto err;
if (PyList_Append(directories, directory) == -1) {
Py_DECREF(directory);
goto err;
}
Py_DECREF(directory);
}
return directories;
err:
Py_DECREF(directories);
return NULL;
}
static PyObject *parse_file_names(Py_buffer *buffer, Py_ssize_t *offset)
{
PyObject *file_names;
file_names = PyList_New(0);
for (;;) {
const char *str;
Py_ssize_t len;
uint64_t directory_index;
uint64_t mtime;
uint64_t file_size;
PyObject *item;
str = (char *)buffer->buf + *offset;
if (read_strlen(buffer, offset, &len) == -1)
goto err;
if (len == 0)
break;
if (read_uleb128(buffer, offset, &directory_index) == -1)
goto err;
if (read_uleb128(buffer, offset, &mtime) == -1)
goto err;
if (read_uleb128(buffer, offset, &file_size) == -1)
goto err;
item = Py_BuildValue("y#KKK", str, len,
(unsigned long long)directory_index,
(unsigned long long)mtime,
(unsigned long long)file_size);
if (!item)
goto err;
if (PyList_Append(file_names, item) == -1) {
Py_DECREF(item);
goto err;
}
Py_DECREF(item);
}
return file_names;
err:
Py_DECREF(file_names);
return NULL;
}
PyObject *LLDwarf_ParseLineNumberProgramHeader(Py_buffer *buffer,
Py_ssize_t *offset)
{
LineNumberProgramHeader *lnp;
uint32_t length;
uint8_t default_is_stmt;
lnp = PyObject_New(LineNumberProgramHeader, &LineNumberProgramHeader_type);
if (!lnp)
return NULL;
lnp->offset = *offset;
if (read_u32(buffer, offset, &length) == -1)
goto err;
lnp->is_64_bit = length == UINT32_C(0xffffffff);
if (lnp->is_64_bit) {
if (read_u64(buffer, offset, &lnp->unit_length) == -1)
goto err;
} else {
lnp->unit_length = length;
}
if (read_u16(buffer, offset, &lnp->version) == -1)
goto err;
if (lnp->is_64_bit) {
if (read_u64(buffer, offset, &lnp->header_length) == -1)
goto err;
} else {
if (read_u32(buffer, offset, &length) == -1)
goto err;
lnp->header_length = length;
}
if (read_u8(buffer, offset, &lnp->minimum_instruction_length) == -1)
goto err;
if (lnp->version >= 4) {
if (read_u8(buffer, offset, &lnp->maximum_operations_per_instruction) == -1)
goto err;
} else {
lnp->maximum_operations_per_instruction = 1;
}
if (read_u8(buffer, offset, &default_is_stmt) == -1)
goto err;
lnp->default_is_stmt = (bool)default_is_stmt;
if (read_s8(buffer, offset, &lnp->line_base) == -1)
goto err;
if (read_u8(buffer, offset, &lnp->line_range) == -1)
goto err;
if (read_u8(buffer, offset, &lnp->opcode_base) == -1)
goto err;
lnp->standard_opcode_lengths =
parse_standard_opcode_lengths(buffer, offset, lnp->opcode_base);
if (!lnp->standard_opcode_lengths)
goto err;
lnp->include_directories = parse_include_directories(buffer, offset);
if (!lnp->include_directories)
goto err;
lnp->file_names = parse_file_names(buffer, offset);
if (!lnp->file_names)
goto err;
return (PyObject *)lnp;
err:
PyErr_SetString(PyExc_ValueError,
"line number program header is truncated");
Py_DECREF(lnp);
return NULL;
}
static PyMethodDef LineNumberProgramHeader_methods[] = {
{"program_offset", (PyCFunction)LineNumberProgramHeader_program_offset,
METH_NOARGS,
"program_offset() -> int\n\n"
"Get the offset into the file where the line number program itself\n"
"starts. This is the starting offset of the line number program\n"
"header plus the length of the header."},
{"end_offset", (PyCFunction)LineNumberProgramHeader_end_offset,
METH_NOARGS,
"end_offset() -> int\n\n"
"Get the offset into the file where the line number program ends.\n"
"This is the starting offset of the line number program header plus\n"
"the length of the unit, including the header."},
{},
};
static PyMemberDef LineNumberProgramHeader_members[] = {
{"offset", T_PYSSIZET,
offsetof(LineNumberProgramHeader, offset), 0,
"offset into the file where this line number program starts"},
{"unit_length", T_UINT64T,
offsetof(LineNumberProgramHeader, unit_length), 0,
"length of this line number program, not including the unit_length field"},
{"version", T_UINT16T, offsetof(LineNumberProgramHeader, version), 0,
"format version of this line number program"},
{"header_length", T_UINT64T,
offsetof(LineNumberProgramHeader, header_length), 0,
"length of this line number program header, not including the\n"
"unit_length, version, or header_length fields"},
{"minimum_instruction_length", T_UINT8T,
offsetof(LineNumberProgramHeader, minimum_instruction_length), 0,
"size of the smallest target machine instruction"},
{"maximum_operations_per_instruction", T_UINT8T,
offsetof(LineNumberProgramHeader, maximum_operations_per_instruction), 0,
"maximum number of operations that may be encoded in an instruction"},
{"default_is_stmt", T_BOOL,
offsetof(LineNumberProgramHeader, default_is_stmt), 0,
"initial value of the is_stmt register"},
{"line_base", T_INT8T, offsetof(LineNumberProgramHeader, line_base), 0,
"parameter for special opcodes"},
{"line_range", T_UINT8T,
offsetof(LineNumberProgramHeader, line_range), 0,
"parameter for special opcodes"},
{"opcode_base", T_UINT8T,
offsetof(LineNumberProgramHeader, opcode_base), 0,
"number assigned to the first special opcode"},
{"standard_opcode_lengths", T_OBJECT,
offsetof(LineNumberProgramHeader, standard_opcode_lengths), 0,
"list of number of operands for each standard opcode"},
{"include_directories", T_OBJECT,
offsetof(LineNumberProgramHeader, include_directories), 0,
"list of path names that were searched for included source files"},
{"file_names", T_OBJECT,
offsetof(LineNumberProgramHeader, file_names), 0,
"list of (path name, directory index, mtime, file size)"},
{"is_64_bit", T_BOOL, offsetof(LineNumberProgramHeader, is_64_bit), 0,
"whether this CU is using the 64-bit format"},
{},
};
#define LineNumberProgramHeader_DOC \
"LineNumberProgramHeader(offset, unit_length, version, header_length,\n"\
" minimum_instruction_length,\n" \
" maximum_operations_per_instruction,\n" \
" default_is_stmt, line_base, line_range,\n" \
" opcode_base, standard_opcode_lengths,\n" \
" include_directories, file_names,\n" \
" is_64_bit) -> new line number program header\n\n" \
"Create a new DWARF line number program header.\n\n" \
"Arguments:\n" \
"offset -- integer offset\n" \
"unit_length -- integer length\n" \
"version -- integer format version\n" \
"header_length -- integer length\n" \
"minimum_instruction_length -- integer length\n" \
"maximum_operations_per_instruction -- integer\n" \
"default_is_stmt -- boolean\n" \
"line_base -- integer\n" \
"line_range -- integer\n" \
"opcode_base -- integer\n" \
"standard_opcode_lengths -- list of integers\n" \
"include_directories -- list of strings\n" \
"file_names -- list of (string, integer, integer, integer)\n" \
"is_64_bit -- boolean"
PyTypeObject LineNumberProgramHeader_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"drgn.lldwarf.LineNumberProgramHeader", /* tp_name */
sizeof(LineNumberProgramHeader), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)LineNumberProgramHeader_dealloc, /* tp_dealloc */
NULL, /* tp_print */
NULL, /* tp_getattr */
NULL, /* tp_setattr */
NULL, /* tp_as_async */
LLDwarfObject_repr, /* tp_repr */
NULL, /* tp_as_number */
NULL, /* tp_as_sequence */
NULL, /* tp_as_mapping */
NULL, /* tp_hash */
NULL, /* tp_call */
NULL, /* tp_str */
NULL, /* tp_getattro */
NULL, /* tp_setattro */
NULL, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
LineNumberProgramHeader_DOC, /* tp_doc */
(traverseproc)LineNumberProgramHeader_traverse, /* tp_traverse */
NULL, /* tp_clear */
LLDwarfObject_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
NULL, /* tp_iter */
NULL, /* tp_iternext */
LineNumberProgramHeader_methods, /* tp_methods */
LineNumberProgramHeader_members, /* tp_members */
NULL, /* tp_getset */
NULL, /* tp_base */
NULL, /* tp_dict */
NULL, /* tp_descr_get */
NULL, /* tp_descr_set */
0, /* tp_dictoffset */
LLDwarfObject_init, /* tp_init */
};
static void LineNumberRow_dealloc(PyObject *self)
{
Py_TYPE(self)->tp_free(self);
}
static void init_state(LineNumberProgramHeader *lnp, LineNumberRow *state)
{
state->address = 0;
state->op_index = 0;
state->file = 1;
state->line = 1;
state->column = 0;
state->is_stmt = lnp->default_is_stmt;
state->basic_block = false;
state->end_sequence = false;
state->prologue_end = false;
state->epilogue_begin = false;
state->isa = 0;
state->discriminator = 0;
}
static void reset_state(LineNumberRow *state)
{
state->basic_block = false;
state->prologue_end = false;
state->epilogue_begin = false;
state->discriminator = 0;
}
static int append_row(PyObject *matrix, LineNumberRow *state)
{
LineNumberRow *row;
int ret;
row = PyMem_Malloc(sizeof(LineNumberRow));
if (!row)
return -1;
*row = *state;
PyObject_Init((PyObject *)row, &LineNumberRow_type);
ret = PyList_Append(matrix, (PyObject *)row);
Py_DECREF((PyObject *)row);
return ret;
}
static int execute_extended_opcode(LineNumberProgramHeader *lnp,
LineNumberRow *state, PyObject *matrix,
Py_buffer *buffer, Py_ssize_t *offset)
{
Py_ssize_t end;
uint64_t length;
uint8_t opcode;
if (read_uleb128(buffer, offset, &length) == -1)
return -1;
if (read_check_bounds(buffer, *offset, length) == -1)
return -1;
end = *offset + length;
if (read_u8(buffer, offset, &opcode) == -1)
return -1;
switch (opcode) {
case DW_LNE_end_sequence:
state->end_sequence = true;
if (append_row(matrix, state) == -1)
return -1;
init_state(lnp, state);
return 0;
case DW_LNE_set_address:
if (length == 9) {
if (read_u64(buffer, offset, &state->address) == -1)
return -1;
} else if (length == 5) {
uint32_t address;
if (read_u32(buffer, offset, &address) == -1)
return -1;
state->address = address;
} else {
PyErr_Format(PyExc_ValueError, "unsupported address size %llu",
(unsigned long long)(length - 1));
return -1;
}
state->op_index = 0;
return 0;
case DW_LNE_define_file:
PyErr_Format(PyExc_NotImplementedError, "DW_LNE_define_file is not implemented");
*offset = end;
return -1;
case DW_LNE_set_discriminator:
return read_uleb128(buffer, offset, &state->discriminator);
default:
PyErr_Format(PyExc_ValueError, "unknown extended opcode %u",
(unsigned int)opcode);
return -1;
}
}
static void advance_pc(LineNumberProgramHeader *lnp, LineNumberRow *state,
uint64_t operation_advance)
{
state->address += (lnp->minimum_instruction_length *
((state->op_index + operation_advance) /
lnp->maximum_operations_per_instruction));
state->op_index = ((state->op_index + operation_advance) %
lnp->maximum_operations_per_instruction);
}
static int execute_standard_opcode(LineNumberProgramHeader *lnp,
LineNumberRow *state, PyObject *matrix,
uint8_t opcode, Py_buffer *buffer,
Py_ssize_t *offset)
{
uint64_t arg;
int64_t sarg;
uint16_t u16;
switch (opcode) {
case DW_LNS_copy:
if (append_row(matrix, state) == -1)
return -1;
reset_state(state);
return 0;
case DW_LNS_advance_pc:
if (read_uleb128(buffer, offset, &arg) == -1)
return -1;
advance_pc(lnp, state, arg);
return 0;
case DW_LNS_advance_line:
if (read_sleb128(buffer, offset, &sarg) == -1)
return -1;
state->line += sarg;
return 0;
case DW_LNS_set_file:
return read_uleb128(buffer, offset, &state->file);
case DW_LNS_set_column:
return read_uleb128(buffer, offset, &state->column);
case DW_LNS_negate_stmt:
state->is_stmt = !state->is_stmt;
return 0;
case DW_LNS_set_basic_block:
state->basic_block = true;
return 0;
case DW_LNS_const_add_pc:
advance_pc(lnp, state,
(255 - lnp->opcode_base) / lnp->line_range);
return 0;
case DW_LNS_fixed_advance_pc:
if (read_u16(buffer, offset, &u16) == -1)
return -1;
state->address += u16;
state->op_index = 0;
return 0;
case DW_LNS_set_prologue_end:
state->prologue_end = true;
return 0;
case DW_LNS_set_epilogue_begin:
state->epilogue_begin = true;
return 0;
case DW_LNS_set_isa:
return read_uleb128(buffer, offset, &state->isa);
default:
PyErr_Format(PyExc_ValueError, "unknown standard opcode %u",
(unsigned int)opcode);
return -1;
}
}
static int execute_special_opcode(LineNumberProgramHeader *lnp,
LineNumberRow *state, PyObject *matrix,
uint8_t opcode)
{
uint8_t adjusted_opcode = opcode - lnp->opcode_base;
uint8_t operation_advance = adjusted_opcode / lnp->line_range;
advance_pc(lnp, state, operation_advance);
state->line += lnp->line_base + (adjusted_opcode % lnp->line_range);
if (append_row(matrix, state) == -1)
return -1;
reset_state(state);
return 0;
}
static int execute_opcode(LineNumberProgramHeader *lnp, LineNumberRow *state,
PyObject *matrix, uint8_t opcode, Py_buffer *buffer,
Py_ssize_t *offset)
{
if (opcode == 0) {
return execute_extended_opcode(lnp, state, matrix, buffer, offset);
} else if (opcode < lnp->opcode_base) {
return execute_standard_opcode(lnp, state, matrix, opcode,
buffer, offset);
} else {
return execute_special_opcode(lnp, state, matrix, opcode);
}
}
PyObject *LLDwarf_ExecuteLineNumberProgram(LineNumberProgramHeader *lnp,
Py_buffer *buffer,
Py_ssize_t *offset)
{
LineNumberRow state = {};
Py_ssize_t end_offset;
PyObject *matrix;
if (lnp->line_range == 0) {
PyErr_SetString(PyExc_ValueError, "line_range is 0");
return NULL;
}
init_state(lnp, &state);
end_offset = lnp_end_offset(lnp);
if (end_offset == -1)
return NULL;
matrix = PyList_New(0);
if (!matrix)
return NULL;
while (*offset < end_offset) {
uint8_t opcode;
if (read_u8(buffer, offset, &opcode))
goto err;
if (execute_opcode(lnp, &state, matrix, opcode, buffer, offset) == -1)
goto err;
}
return matrix;
err:
Py_DECREF(matrix);
return NULL;
}
PyMemberDef LineNumberRow_members[] = {
{"address", T_UINT64T, offsetof(LineNumberRow, address), 0,
"the program counter value of this instruction"},
{"op_index", T_UINT8T, offsetof(LineNumberRow, op_index), 0,
"index of an operation within a VLIW instruction"},
{"file", T_UINT64T, offsetof(LineNumberRow, file), 0,
"source file as an index into file_names list"},
{"line", T_UINT64T, offsetof(LineNumberRow, line), 0,
"source line number, or 0 if the instruction cannot be attributed\n"
"to a source line"},
{"column", T_UINT64T, offsetof(LineNumberRow, column), 0,
"column number within a source line, or 0 for the left edge"},
{"is_stmt", T_BOOL, offsetof(LineNumberRow, is_stmt), 0,
"whether the instruction represents a line or statement and thus\n"
"is a recommended breakpoint location"},
{"basic_block", T_BOOL, offsetof(LineNumberRow, basic_block), 0,
"whether the instruction is the beginning of a basic block"},
{"end_sequence", T_BOOL, offsetof(LineNumberRow, end_sequence), 0,
"whether this instruction address is the first byte after the\n"
"end of a sequence of instructions; if this is true, only the\n"
"address is meaningful"},
{"prologue_end", T_BOOL, offsetof(LineNumberRow, prologue_end), 0,
"whether this instruction is a recommended function entry\n"
"breakpoint location"},
{"epilogue_begin", T_BOOL, offsetof(LineNumberRow, epilogue_begin), 0,
"whether this instruction is a recommended function exit\n"
"breakpoint location"},
{"isa", T_UINT64T, offsetof(LineNumberRow, isa), 0,
"the instruction set architecture of the current instruction"},
{"discriminator", T_UINT64T, offsetof(LineNumberRow, discriminator), 0,
"arbitrary identifier of the block to which the instruction\n"
"belongs, or 0 if only one block exists for the given source position"},
{},
};
#define LineNumberRow_DOC \
"LineNumberRow(address, op_index, file, line, column,\n" \
" is_stmt, basic_block, end_sequence,\n" \
" prologue_end, epilogue_begin, isa,\n" \
" discriminator) -> new line number matrix row\n\n" \
"Create a new DWARF line number matrix row.\n\n" \
"Arguments:\n" \
"address -- integer address\n" \
"op_index -- integer index\n" \
"file -- integer file index\n" \
"line -- integer line number\n" \
"column -- integer column number\n" \
"is_stmt -- boolean\n" \
"basic_block -- boolean\n" \
"end_sequence -- boolean\n" \
"prologue_end -- boolean\n" \
"epilogue_begin -- boolean"
PyTypeObject LineNumberRow_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"dwarfbh.LineNumberRow", /* tp_name */
sizeof(LineNumberRow), /* tp_basicsize */
0, /* tp_itemsize */
LineNumberRow_dealloc, /* tp_dealloc */
NULL, /* tp_print */
NULL, /* tp_getattr */
NULL, /* tp_setattr */
NULL, /* tp_as_async */
LLDwarfObject_repr, /* tp_repr */
NULL, /* tp_as_number */
NULL, /* tp_as_sequence */
NULL, /* tp_as_mapping */
NULL, /* tp_hash */
NULL, /* tp_call */
NULL, /* tp_str */
NULL, /* tp_getattro */
NULL, /* tp_setattro */
NULL, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
LineNumberRow_DOC, /* tp_doc */
NULL, /* tp_traverse */
NULL, /* tp_clear */
LLDwarfObject_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
NULL, /* tp_iter */
NULL, /* tp_iternext */
NULL, /* tp_methods */
LineNumberRow_members, /* tp_members */
NULL, /* tp_getset */
NULL, /* tp_base */
NULL, /* tp_dict */
NULL, /* tp_descr_get */
NULL, /* tp_descr_set */
0, /* tp_dictoffset */
LLDwarfObject_init, /* tp_init */
};

327
lldwarf/lldwarf.h Normal file
View File

@ -0,0 +1,327 @@
#ifndef LLDWARF_H
#define LLDWARF_H
#define PY_SSIZE_T_CLEAN
#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <Python.h>
#include "structmember.h"
struct AttribSpec {
uint64_t name;
uint64_t form;
};
typedef struct {
PyObject_VAR_HEAD
uint64_t tag;
bool children;
struct AttribSpec attribs[];
} AbbrevDecl;
extern PyTypeObject AbbrevDecl_type;
typedef struct {
PyObject_HEAD
Py_ssize_t offset;
uint64_t unit_length;
uint16_t version;
uint64_t debug_abbrev_offset;
uint8_t address_size;
bool is_64_bit;
} CompilationUnitHeader;
extern PyTypeObject CompilationUnitHeader_type;
struct DwarfAttrib {
uint64_t name;
uint64_t form;
union {
/*
* DW_FORM_addr, DW_FORM_udata, DW_FORM_flag{,_present},
* DW_FORM_sec_offset, DW_FORM_ref{1,2,4,8,_sig8,_udata}, and
* DW_FORM_strp. For DW_FORM_flag_present, always 1.
*/
uint64_t u;
/* DW_FORM_sdata. */
int64_t s;
/* DW_FORM_data{1,2,4,8} */
char data[8];
/*
* DW_FORM_block{,1,2,4}, DW_FORM_exprloc, and DW_FORM_string.
* Offset from the beginning of the buffer that the DIE was
* parsed from.
*/
struct {
Py_ssize_t offset;
Py_ssize_t length;
};
};
};
typedef struct {
PyObject_VAR_HEAD
Py_ssize_t offset;
Py_ssize_t die_length;
uint64_t tag;
PyObject *children;
struct DwarfAttrib attribs[];
} DwarfDie;
extern PyTypeObject DwarfDie_type;
typedef struct {
PyObject_HEAD
Py_ssize_t offset;
uint64_t unit_length;
uint16_t version;
uint64_t header_length;
uint8_t minimum_instruction_length;
uint8_t maximum_operations_per_instruction;
bool default_is_stmt;
int8_t line_base;
uint8_t line_range;
uint8_t opcode_base;
PyObject *standard_opcode_lengths;
PyObject *include_directories;
PyObject *file_names;
bool is_64_bit;
} LineNumberProgramHeader;
extern PyTypeObject LineNumberProgramHeader_type;
typedef struct {
PyObject_HEAD
uint64_t address;
uint64_t file;
uint64_t line;
uint64_t column;
uint64_t isa;
uint64_t discriminator;
uint8_t op_index;
bool is_stmt;
bool basic_block;
bool end_sequence;
bool prologue_end;
bool epilogue_begin;
} LineNumberRow;
extern PyTypeObject LineNumberRow_type;
#ifdef TEST_LLDWARFOBJECT
extern PyTypeObject TestObject_type;
#endif
int LLDwarfObject_init(PyObject *self, PyObject *args, PyObject *kwds);
PyObject *LLDwarfObject_repr(PyObject *self);
int LLDwarfObject_RichCompareBool(PyObject *self, PyObject *other, int op);
PyObject *LLDwarfObject_richcompare(PyObject *self, PyObject *other, int op);
PyObject *LLDwarf_ParseAbbrevTable(Py_buffer *buffer, Py_ssize_t *offset);
PyObject *LLDwarf_ParseCompilationUnitHeader(Py_buffer *buffer,
Py_ssize_t *offset);
PyObject *LLDwarf_ParseDie(Py_buffer *buffer, Py_ssize_t *offset,
CompilationUnitHeader *cu, PyObject *abbrev_table,
bool recurse, bool jump_to_sibling);
PyObject *LLDwarf_ParseDieSiblings(Py_buffer *buffer, Py_ssize_t *offset,
CompilationUnitHeader *cu,
PyObject *abbrev_table, bool recurse);
PyObject *LLDwarf_ParseLineNumberProgramHeader(Py_buffer *buffer,
Py_ssize_t *offset);
PyObject *LLDwarf_ExecuteLineNumberProgram(LineNumberProgramHeader *lnp,
Py_buffer *buffer,
Py_ssize_t *offset);
int read_uleb128(Py_buffer *buffer, Py_ssize_t *offset, uint64_t *ret);
int read_sleb128(Py_buffer *buffer, Py_ssize_t *offset, int64_t *ret);
int read_strlen(Py_buffer *buffer, Py_ssize_t *offset, Py_ssize_t *len);
static inline int read_check_bounds(Py_buffer *buffer, Py_ssize_t offset,
Py_ssize_t size)
{
if (buffer->len < size || offset > buffer->len - size) {
PyErr_SetString(PyExc_EOFError, "");
return -1;
}
return 0;
}
static inline int read_buffer(Py_buffer *buffer, Py_ssize_t *offset,
void *ret, Py_ssize_t size)
{
if (read_check_bounds(buffer, *offset, size))
return -1;
memcpy(ret, (char *)buffer->buf + *offset, size);
*offset += size;
return 0;
}
#define read_type(name, type) \
static inline int read_##name(Py_buffer *buffer, Py_ssize_t *offset, \
type *ret) \
{ \
return read_buffer(buffer, offset, ret, sizeof(*ret)); \
}
read_type(u8, uint8_t)
read_type(u16, uint16_t)
read_type(u32, uint32_t)
read_type(u64, uint64_t)
read_type(s8, int8_t)
read_type(s16, int16_t)
read_type(s32, int32_t)
read_type(s64, int64_t)
static inline char PyLong_AsChar(PyObject *pylong)
{
long ret;
ret = PyLong_AsLong(pylong);
if (PyErr_Occurred())
return ret;
if (ret < CHAR_MIN || ret > CHAR_MAX)
PyErr_SetString(PyExc_OverflowError, "int too big to convert");
return ret;
}
static inline unsigned char PyLong_AsUnsignedChar(PyObject *pylong)
{
unsigned long ret;
ret = PyLong_AsUnsignedLong(pylong);
if (PyErr_Occurred())
return ret;
if (ret > UCHAR_MAX)
PyErr_SetString(PyExc_OverflowError, "int too big to convert");
return ret;
}
static inline short PyLong_AsShort(PyObject *pylong)
{
long ret;
ret = PyLong_AsLong(pylong);
if (PyErr_Occurred())
return ret;
if (ret < SHRT_MIN || ret > SHRT_MAX)
PyErr_SetString(PyExc_OverflowError, "int too big to convert");
return ret;
}
static inline unsigned short PyLong_AsUnsignedShort(PyObject *pylong)
{
unsigned long ret;
ret = PyLong_AsUnsignedLong(pylong);
if (PyErr_Occurred())
return ret;
if (ret > USHRT_MAX)
PyErr_SetString(PyExc_OverflowError, "int too big to convert");
return ret;
}
static inline int PyLong_AsInt(PyObject *pylong)
{
long ret;
ret = PyLong_AsLong(pylong);
if (PyErr_Occurred())
return ret;
if (ret < INT_MIN || ret > INT_MAX)
PyErr_SetString(PyExc_OverflowError, "int too big to convert");
return ret;
}
static inline unsigned int PyLong_AsUnsignedInt(PyObject *pylong)
{
unsigned long ret;
ret = PyLong_AsUnsignedLong(pylong);
if (PyErr_Occurred())
return ret;
if (ret > UINT_MAX)
PyErr_SetString(PyExc_OverflowError, "int too big to convert");
return ret;
}
/* The T_* Python constants haven't caught up to stdint.h */
#define T_INT8T T_BYTE
#define T_UINT8T T_UBYTE
#define T_UINT16T T_USHORT
#define T_UINT32T T_UINT
#define T_UINT64T T_ULONGLONG
static inline uint8_t PyLong_AsUint8_t(PyObject *pylong)
{
unsigned long ret;
ret = PyLong_AsUnsignedLong(pylong);
if (PyErr_Occurred())
return ret;
if (ret > UINT8_MAX)
PyErr_SetString(PyExc_OverflowError, "int too big to convert");
return ret;
}
static inline int8_t PyLong_AsInt8_t(PyObject *pylong)
{
long ret;
ret = PyLong_AsLong(pylong);
if (PyErr_Occurred())
return ret;
if (ret < INT8_MIN || ret > INT8_MAX)
PyErr_SetString(PyExc_OverflowError, "int too big to convert");
return ret;
}
static inline uint16_t PyLong_AsUint16_t(PyObject *pylong)
{
unsigned long ret;
ret = PyLong_AsUnsignedLong(pylong);
if (PyErr_Occurred())
return ret;
if (ret > UINT16_MAX)
PyErr_SetString(PyExc_OverflowError, "int too big to convert");
return ret;
}
static inline uint64_t PyLong_AsUint64_t(PyObject *pylong)
{
return PyLong_AsUnsignedLongLong(pylong);
}
static inline int64_t PyLong_AsInt64_t(PyObject *pylong)
{
return PyLong_AsLongLong(pylong);
}
#endif /* LLDWARF_H */

463
lldwarf/module.c Normal file
View File

@ -0,0 +1,463 @@
#include "lldwarf.h"
int read_uleb128(Py_buffer *buffer, Py_ssize_t *offset, uint64_t *ret)
{
int shift = 0;
uint8_t byte;
*ret = 0;
for (;;) {
if (read_u8(buffer, offset, &byte) == -1)
return -1;
if (shift == 63 && byte > 1) {
PyErr_SetString(PyExc_OverflowError,
"ULEB128 overflowed unsigned 64-bit integer");
return -1;
}
*ret |= (byte & UINT64_C(0x7f)) << shift;
shift += 7;
if (!(byte & 0x80))
break;
}
return 0;
}
int read_sleb128(Py_buffer *buffer, Py_ssize_t *offset, int64_t *ret)
{
int shift = 0;
uint8_t byte;
*ret = 0;
for (;;) {
if (read_u8(buffer, offset, &byte) == -1)
return -1;
if (shift == 63 && byte != 0 && byte != 0x7f) {
PyErr_SetString(PyExc_OverflowError,
"SLEB128 overflowed signed 64-bit integer");
return -1;
}
*ret |= (byte & INT64_C(0x7f)) << shift;
shift += 7;
if (!(byte & 0x80))
break;
}
if (shift < 64 && (byte & 0x40))
*ret |= -(INT64_C(1) << shift);
return 0;
}
int read_strlen(Py_buffer *buffer, Py_ssize_t *offset, Py_ssize_t *len)
{
char *p, *nul;
if (*offset >= buffer->len) {
PyErr_Format(PyExc_ValueError,
"unexpected EOF while parsing string");
return -1;
}
p = (char *)buffer->buf + *offset;
nul = memchr(p, 0, buffer->len - *offset);
if (!nul) {
PyErr_Format(PyExc_ValueError, "unterminated string");
return -1;
}
*len = nul - p;
*offset += *len + 1;
return 0;
}
static PyObject *parse_uleb128(PyObject *self, PyObject *args, PyObject *kwds)
{
static char *keywords[] = {"buffer", "offset", NULL};
Py_buffer buffer;
Py_ssize_t offset = 0;
uint64_t value;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "y*|n:parse_uleb128",
keywords, &buffer, &offset))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
if (read_uleb128(&buffer, &offset, &value) == -1) {
if (PyErr_ExceptionMatches(PyExc_EOFError))
PyErr_SetString(PyExc_ValueError, "ULEB128 is truncated");
PyBuffer_Release(&buffer);
return NULL;
}
PyBuffer_Release(&buffer);
return PyLong_FromUnsignedLongLong(value);
}
static PyObject *parse_sleb128(PyObject *self, PyObject *args, PyObject *kwds)
{
static char *keywords[] = {"buffer", "offset", NULL};
Py_buffer buffer;
Py_ssize_t offset = 0;
int64_t value;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "y*|n:parse_sleb128",
keywords, &buffer, &offset))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
if (read_sleb128(&buffer, &offset, &value) == -1) {
if (PyErr_ExceptionMatches(PyExc_EOFError))
PyErr_SetString(PyExc_ValueError, "SLEB128 is truncated");
PyBuffer_Release(&buffer);
return NULL;
}
PyBuffer_Release(&buffer);
return PyLong_FromLongLong(value);
}
static PyObject *parse_uleb128_offset(PyObject *self, PyObject *args, PyObject *kwds)
{
static char *keywords[] = {"buffer", "offset", NULL};
Py_buffer buffer;
Py_ssize_t offset = 0;
uint64_t value;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "y*|n:parse_uleb128_offset",
keywords, &buffer, &offset))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
if (read_uleb128(&buffer, &offset, &value) == -1) {
if (PyErr_ExceptionMatches(PyExc_EOFError))
PyErr_SetString(PyExc_ValueError, "ULEB128 is truncated");
PyBuffer_Release(&buffer);
return NULL;
}
PyBuffer_Release(&buffer);
return Py_BuildValue("Kn", value, offset);
}
static PyObject *parse_abbrev_table(PyObject *self, PyObject *args,
PyObject *kwds)
{
static char *keywords[] = {"buffer", "offset", NULL};
Py_buffer buffer;
Py_ssize_t offset = 0;
PyObject *ret;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "y*|n:parse_abbrev_table",
keywords, &buffer, &offset))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
ret = LLDwarf_ParseAbbrevTable(&buffer, &offset);
PyBuffer_Release(&buffer);
return ret;
}
static PyObject *parse_compilation_unit_header(PyObject *self, PyObject *args,
PyObject *kwds)
{
static char *keywords[] = {"buffer", "offset", NULL};
Py_buffer buffer;
Py_ssize_t offset = 0;
PyObject *ret;
if (!PyArg_ParseTupleAndKeywords(args, kwds,
"y*|n:parse_compilation_unit_header",
keywords, &buffer, &offset))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
ret = LLDwarf_ParseCompilationUnitHeader(&buffer, &offset);
PyBuffer_Release(&buffer);
return ret;
}
static PyObject *parse_die(PyObject *self, PyObject *args, PyObject *kwds)
{
static char *keywords[] = {
"cu", "abbrev_table", "buffer", "offset", "recurse", NULL,
};
PyObject *cu, *abbrev_table;
Py_buffer buffer;
Py_ssize_t offset = 0;
int recurse = 0;
PyObject *ret;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O!y*|np:parse_die",
keywords,
(PyObject *)&CompilationUnitHeader_type, &cu,
(PyObject *)&PyDict_Type, &abbrev_table,
&buffer, &offset, &recurse))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
ret = LLDwarf_ParseDie(&buffer, &offset, (CompilationUnitHeader *)cu,
abbrev_table, recurse, false);
if (!ret && !PyErr_Occurred()) {
Py_INCREF(Py_None);
ret = Py_None;
}
PyBuffer_Release(&buffer);
return ret;
}
static PyObject *parse_die_siblings(PyObject *self, PyObject *args,
PyObject *kwds)
{
static char *keywords[] = {
"cu", "abbrev_table", "buffer", "offset", "recurse", NULL,
};
PyObject *cu, *abbrev_table;
Py_buffer buffer;
Py_ssize_t offset = 0;
int recurse = 0;
PyObject *ret;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O!y*|np:parse_die_siblings",
keywords,
(PyObject *)&CompilationUnitHeader_type, &cu,
(PyObject *)&PyDict_Type, &abbrev_table,
&buffer, &offset, &recurse))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
ret = LLDwarf_ParseDieSiblings(&buffer, &offset, (CompilationUnitHeader *)cu,
abbrev_table, recurse);
PyBuffer_Release(&buffer);
return ret;
}
static PyObject *parse_line_number_program_header(PyObject *self,
PyObject *args,
PyObject *kwds)
{
static char *keywords[] = {"buffer", "offset", NULL};
Py_buffer buffer;
Py_ssize_t offset = 0;
PyObject *ret;
if (!PyArg_ParseTupleAndKeywords(args, kwds,
"y*|n:parse_line_number_program_header",
keywords, &buffer, &offset))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
ret = LLDwarf_ParseLineNumberProgramHeader(&buffer, &offset);
PyBuffer_Release(&buffer);
return ret;
}
static PyObject *execute_line_number_program(PyObject *self, PyObject *args,
PyObject *kwds)
{
static char *keywords[] = {"lnp", "buffer", "offset", NULL};
PyObject *lnp;
Py_buffer buffer;
Py_ssize_t offset = 0;
PyObject *ret;
if (!PyArg_ParseTupleAndKeywords(args, kwds,
"O!y*|n:execute_line_number_program",
keywords,
(PyObject *)&LineNumberProgramHeader_type,
&lnp, &buffer, &offset))
return NULL;
if (offset < 0) {
PyErr_SetString(PyExc_ValueError, "offset cannot be negative");
PyBuffer_Release(&buffer);
return NULL;
}
ret = LLDwarf_ExecuteLineNumberProgram((LineNumberProgramHeader *)lnp,
&buffer, &offset);
PyBuffer_Release(&buffer);
return ret;
}
static PyMethodDef lldwarf_methods[] = {
{"parse_uleb128", (PyCFunction)parse_uleb128,
METH_VARARGS | METH_KEYWORDS,
"parse_uleb128(buffer, offset=0) -> int\n\n"
"Parse an unsigned LEB128-encoded integer.\n\n"
"Arguments:\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer"},
{"parse_sleb128", (PyCFunction)parse_sleb128,
METH_VARARGS | METH_KEYWORDS,
"parse_sleb128(buffer, offset=0) -> int\n\n"
"Parse a signed LEB128-encoded integer.\n\n"
"Arguments:\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer"},
{"parse_uleb128_offset", (PyCFunction)parse_uleb128_offset,
METH_VARARGS | METH_KEYWORDS,
"parse_uleb128_offset(buffer, offset=0) -> (int, int)\n\n"
"Like parse_uleb128() but also returns the ending offset in the\n"
"buffer.\n\n"
"Arguments:\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer"},
{"parse_abbrev_table", (PyCFunction)parse_abbrev_table,
METH_VARARGS | METH_KEYWORDS,
"parse_abbrev_table(buffer, offset=0) -> dict[code]: AbbrevDecl \n\n"
"Parse an abbreviation table.\n\n"
"Arguments:\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer"},
{"parse_compilation_unit_header",
(PyCFunction)parse_compilation_unit_header,
METH_VARARGS | METH_KEYWORDS,
"parse_compilation_unit_header(buffer, offset=0) -> CompilationUnitHeader\n\n"
"Parse a compilation unit header.\n\n"
"Arguments:\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer"},
{"parse_die", (PyCFunction)parse_die, METH_VARARGS | METH_KEYWORDS,
"parse_die(cu, abbrev_table, buffer, offset=0, recurse=False) -> DwarfDie\n\n"
"Parse a debugging information entry.\n\n"
"Arguments:\n"
"cu -- compilation unit header\n"
"abbrev_table -- abbreviation table\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer\n"
"recurse -- boolean specifying whether to also parse the DIE's children"},
{"parse_die_siblings", (PyCFunction)parse_die_siblings,
METH_VARARGS | METH_KEYWORDS,
"parse_die_siblings(cu, abbrev_table, buffer, offset=0, recurse=False) -> DwarfDie\n\n"
"Parse a list of sibling debugging information entries.\n\n"
"Arguments:\n"
"cu -- compilation unit header\n"
"abbrev_table -- abbreviation table\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer\n"
"recurse -- boolean specifying whether to also parse the DIEs' children"},
{"parse_line_number_program_header",
(PyCFunction)parse_line_number_program_header,
METH_VARARGS | METH_KEYWORDS,
"parse_line_number_program_header(buffer, offset=0) -> LineNumberProgramHeader\n\n"
"Parse a line number program header.\n\n"
"Arguments:\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer"},
{"execute_line_number_program",
(PyCFunction)execute_line_number_program,
METH_VARARGS | METH_KEYWORDS,
"execute_line_number_program(lnp, buffer, offset=0) -> list of LineNumberRow\n\n"
"Execute a line number program to reconstruct the line number\n"
"information matrix.\n\n"
"Arguments:\n"
"lnp -- line number program header\n"
"buffer -- readable source buffer\n"
"offset -- optional offset into the buffer"},
{},
};
static struct PyModuleDef lldwarfmodule = {
PyModuleDef_HEAD_INIT,
"lldwarf",
"Low-level DWARF debugging format library",
-1,
lldwarf_methods,
};
PyMODINIT_FUNC
PyInit_lldwarf(void)
{
PyObject *m;
if (PyType_Ready(&AbbrevDecl_type) < 0)
return NULL;
CompilationUnitHeader_type.tp_new = PyType_GenericNew;
if (PyType_Ready(&CompilationUnitHeader_type) < 0)
return NULL;
if (PyType_Ready(&DwarfDie_type) < 0)
return NULL;
LineNumberProgramHeader_type.tp_new = PyType_GenericNew;
if (PyType_Ready(&LineNumberProgramHeader_type) < 0)
return NULL;
LineNumberRow_type.tp_new = PyType_GenericNew;
if (PyType_Ready(&LineNumberRow_type) < 0)
return NULL;
#ifdef TEST_LLDWARFOBJECT
TestObject_type.tp_new = PyType_GenericNew;
if (PyType_Ready(&TestObject_type) < 0)
return NULL;
#endif
m = PyModule_Create(&lldwarfmodule);
if (m == NULL)
return NULL;
Py_INCREF(&AbbrevDecl_type);
PyModule_AddObject(m, "AbbrevDecl", (PyObject *)&AbbrevDecl_type);
Py_INCREF(&CompilationUnitHeader_type);
PyModule_AddObject(m, "CompilationUnitHeader",
(PyObject *)&CompilationUnitHeader_type);
Py_INCREF(&DwarfDie_type);
PyModule_AddObject(m, "DwarfDie", (PyObject *)&DwarfDie_type);
Py_INCREF(&LineNumberProgramHeader_type);
PyModule_AddObject(m, "LineNumberProgramHeader",
(PyObject *)&LineNumberProgramHeader_type);
Py_INCREF(&LineNumberRow_type);
PyModule_AddObject(m, "LineNumberRow", (PyObject *)&LineNumberRow_type);
#ifdef TEST_LLDWARFOBJECT
Py_INCREF(&TestObject_type);
PyModule_AddObject(m, "_TestObject", (PyObject *)&TestObject_type);
#endif
return m;
}

560
lldwarf/object.c Normal file
View File

@ -0,0 +1,560 @@
#include "lldwarf.h"
static const char *type_name(PyTypeObject *type)
{
const char *p;
p = strrchr(type->tp_name, '.');
if (p)
return p + 1;
else
return type->tp_name;
}
#define CONVERTARG(self, member, var) \
*(typeof(var) *)((char *)(self) + (member)->offset) = (var);
static int convertarg(PyObject *self, PyMemberDef *member, PyObject *arg)
{
switch (member->type) {
case T_SHORT:
{
short tmp = PyLong_AsShort(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
case T_INT:
{
int tmp = PyLong_AsInt(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
case T_LONG:
{
long tmp = PyLong_AsLong(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
case T_FLOAT:
PyErr_SetString(PyExc_NotImplementedError,
"T_FLOAT init not implemented");
return -1;
case T_DOUBLE:
PyErr_SetString(PyExc_NotImplementedError,
"T_DOUBLE init not implemented");
return -1;
case T_STRING:
PyErr_SetString(PyExc_NotImplementedError,
"T_STRING init not implemented");
return -1;
case T_OBJECT:
case T_OBJECT_EX:
Py_INCREF(arg);
CONVERTARG(self, member, arg);
return 0;
case T_CHAR:
{
Py_UCS4 tmp;
if (!PyUnicode_Check(arg) || PyUnicode_READY(arg) == -1)
return -1;
if (PyUnicode_GET_LENGTH(arg) != 1) {
PyErr_SetString(PyExc_ValueError,
"expected a character");
return -1;
}
tmp = PyUnicode_READ_CHAR(arg, 0);
if (tmp > 0x7f) {
PyErr_SetString(PyExc_ValueError,
"character out of range");
return -1;
}
*((char *)self + member->offset) = tmp;
return 0;
}
case T_BYTE:
{
char tmp = PyLong_AsChar(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
case T_UBYTE:
{
unsigned char tmp = PyLong_AsUnsignedChar(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
case T_UINT:
{
unsigned int tmp = PyLong_AsUnsignedInt(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
case T_USHORT:
{
unsigned short tmp = PyLong_AsUnsignedShort(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
case T_ULONG:
{
unsigned long tmp = PyLong_AsUnsignedLong(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
case T_BOOL:
{
bool tmp = PyObject_IsTrue(arg);
CONVERTARG(self, member, tmp);
return 0;
}
case T_LONGLONG:
{
long long tmp = PyLong_AsLongLong(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
case T_ULONGLONG:
{
unsigned long long tmp = PyLong_AsUnsignedLongLong(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
case T_PYSSIZET:
{
Py_ssize_t tmp = PyLong_AsSsize_t(arg);
if (PyErr_Occurred())
return -1;
CONVERTARG(self, member, tmp);
return 0;
}
default:
PyErr_Format(PyExc_NotImplementedError, "member type %d not implemented",
member->type);
return -1;
}
}
int LLDwarfObject_init(PyObject *self, PyObject *args, PyObject *kwds)
{
PyTypeObject *type = (PyTypeObject *)Py_TYPE(self);
Py_ssize_t nmembers, nargs, nkwargs, i;
if (!PyTuple_Check(args))
return -1;
if (kwds && !PyDict_Check(kwds))
return -1;
if (kwds && !PyArg_ValidateKeywordArguments(kwds))
return -1;
nmembers = 0;
while (type->tp_members[nmembers].name)
nmembers++;
nargs = PyTuple_GET_SIZE(args);
nkwargs = kwds ? PyDict_Size(kwds) : 0;
if (nargs + nkwargs > nmembers) {
PyErr_Format(PyExc_TypeError, "%s() takes at most %zd argument%s (%zd given)",
type_name(type), nmembers,
nmembers == 1 ? "" : "s", nargs + nkwargs);
return -1;
}
for (i = 0; i < nmembers; i++) {
PyMemberDef *member = &type->tp_members[i];
PyObject *arg;
if (i < nargs) {
arg = PyTuple_GET_ITEM(args, i);
} else if (nkwargs) {
arg = PyDict_GetItemString(kwds, member->name);
if (arg)
nkwargs--;
} else {
arg = NULL;
}
if (!arg) {
PyErr_Format(PyExc_TypeError, "Required argument '%s' (pos %d) not found",
member->name, i + 1);
return -1;
}
if (convertarg(self, member, arg) == -1)
return -1;
}
return 0;
}
#define MEMBER(self, member, type) \
*(type *)((char *)(self) + (member)->offset)
static PyObject *repr_member(PyObject *self, PyMemberDef *member)
{
PyObject *object;
switch (member->type) {
case T_SHORT:
return PyUnicode_FromFormat("%s=%d", member->name,
(int)MEMBER(self, member, short));
case T_INT:
return PyUnicode_FromFormat("%s=%d", member->name,
MEMBER(self, member, int));
case T_LONG:
return PyUnicode_FromFormat("%s=%ld", member->name,
MEMBER(self, member, long));
case T_FLOAT:
PyErr_SetString(PyExc_NotImplementedError,
"T_FLOAT repr not implemented");
return NULL;
case T_DOUBLE:
PyErr_SetString(PyExc_NotImplementedError,
"T_DOUBLE repr not implemented");
return NULL;
case T_STRING:
PyErr_SetString(PyExc_NotImplementedError,
"T_STRING repr not implemented");
return NULL;
case T_OBJECT:
case T_OBJECT_EX:
object = MEMBER(self, member, PyObject *);
if (object) {
return PyUnicode_FromFormat("%s=%R", member->name,
object);
} else {
return PyUnicode_FromFormat("%s=None", member->name);
}
case T_CHAR:
{
PyObject *tmp, *ret;
tmp = PyUnicode_FromStringAndSize((char *)self + member->offset,
1);
if (!tmp)
return NULL;
ret = PyUnicode_FromFormat("%s=%R", member->name, tmp);
Py_DECREF(tmp);
return ret;
}
case T_BYTE:
return PyUnicode_FromFormat("%s=%d", member->name,
(int)MEMBER(self, member, char));
case T_UBYTE:
return PyUnicode_FromFormat("%s=%u", member->name,
(unsigned int)MEMBER(self, member, unsigned char));
case T_UINT:
return PyUnicode_FromFormat("%s=%u", member->name,
MEMBER(self, member, unsigned int));
case T_USHORT:
return PyUnicode_FromFormat("%s=%u", member->name,
(unsigned int)MEMBER(self, member, unsigned short));
case T_ULONG:
return PyUnicode_FromFormat("%s=%lu", member->name,
MEMBER(self, member, unsigned long));
case T_BOOL:
if (MEMBER(self, member, char))
return PyUnicode_FromFormat("%s=True", member->name);
else
return PyUnicode_FromFormat("%s=False", member->name);
case T_LONGLONG:
return PyUnicode_FromFormat("%s=%lld", member->name,
MEMBER(self, member, long long));
case T_ULONGLONG:
return PyUnicode_FromFormat("%s=%llu", member->name,
MEMBER(self, member, unsigned long long));
case T_PYSSIZET:
return PyUnicode_FromFormat("%s=%zd", member->name,
MEMBER(self, member, Py_ssize_t));
default:
PyErr_Format(PyExc_ValueError, "unknown member type %d",
member->type);
return NULL;
}
}
PyObject *LLDwarfObject_repr(PyObject *self)
{
PyTypeObject *type = (PyTypeObject *)Py_TYPE(self);
PyObject *strs, *ret = NULL, *tmp, *sep;
Py_ssize_t nmembers, i;
int enter;
enter = Py_ReprEnter(self);
if (enter == -1)
return NULL;
else if (enter)
return PyUnicode_FromFormat("%s(...)", type_name(type));
nmembers = 0;
while (type->tp_members[nmembers].name)
nmembers++;
strs = PyTuple_New(nmembers);
if (!strs)
goto out;
for (i = 0; i < nmembers; i++) {
tmp = repr_member(self, &type->tp_members[i]);
if (!tmp) {
Py_DECREF(strs);
goto out;
}
PyTuple_SET_ITEM(strs, i, tmp);
}
sep = PyUnicode_FromString(", ");
if (!sep) {
Py_DECREF(strs);
goto out;
}
tmp = PyUnicode_Join(sep, strs);
Py_DECREF(strs);
Py_DECREF(sep);
if (!tmp)
goto out;
ret = PyUnicode_FromFormat("%s(%S)", type_name(type), tmp);
Py_DECREF(tmp);
out:
Py_ReprLeave(self);
return ret;
}
static int member_cmp(PyObject *self, PyObject *other, PyMemberDef *member)
{
PyObject *self_obj, *other_obj;
size_t size;
switch (member->type) {
case T_SHORT:
size = sizeof(short);
break;
case T_INT:
size = sizeof(int);
break;
case T_LONG:
size = sizeof(long);
break;
case T_FLOAT:
size = sizeof(float);
break;
case T_DOUBLE:
size = sizeof(double);
break;
case T_STRING:
size = sizeof(char *);
break;
case T_OBJECT:
case T_OBJECT_EX:
self_obj = MEMBER(self, member, PyObject *);
other_obj = MEMBER(other, member, PyObject *);
if (!self_obj || !other_obj)
return !self_obj && !other_obj;
return PyObject_RichCompareBool(self_obj, other_obj, Py_EQ);
case T_CHAR:
case T_BYTE:
case T_BOOL:
size = sizeof(char);
break;
case T_UBYTE:
size = sizeof(unsigned char);
break;
case T_UINT:
size = sizeof(unsigned int);
break;
case T_USHORT:
size = sizeof(unsigned short);
break;
case T_ULONG:
size = sizeof(unsigned long);
case T_LONGLONG:
size = sizeof(long long);
break;
case T_ULONGLONG:
size = sizeof(unsigned long long);
break;
case T_PYSSIZET:
size = sizeof(Py_ssize_t);
break;
default:
PyErr_Format(PyExc_ValueError, "unknown member type %d",
member->type);
return -1;
}
return !memcmp((char *)self + member->offset,
(char *)other + member->offset, size);
}
int LLDwarfObject_RichCompareBool(PyObject *self, PyObject *other, int op)
{
PyTypeObject *type = (PyTypeObject *)Py_TYPE(self);
Py_ssize_t nmembers, i;
int cmp;
if (op != Py_EQ && op != Py_NE) {
PyErr_SetString(PyExc_TypeError, "not supported");
return -1;
}
cmp = PyObject_IsInstance(other, (PyObject *)type);
if (cmp == -1)
return -1;
if (!cmp)
goto out;
nmembers = 0;
while (type->tp_members[nmembers].name)
nmembers++;
for (i = 0; i < nmembers; i++) {
cmp = member_cmp(self, other, &type->tp_members[i]);
if (cmp == -1)
return -1;
if (!cmp)
goto out;
}
out:
if (op == Py_NE)
cmp = !cmp;
return cmp;
}
PyObject *LLDwarfObject_richcompare(PyObject *self, PyObject *other, int op)
{
int cmp;
cmp = LLDwarfObject_RichCompareBool(self, other, op);
if (cmp == -1)
return NULL;
else if (cmp)
Py_RETURN_TRUE;
else
Py_RETURN_FALSE;
}
#ifdef TEST_LLDWARFOBJECT
typedef struct {
PyObject_HEAD
short m_short;
int m_int;
long m_long;
/* float m_float; */
/* double m_double; */
/* char *m_string; */
PyObject *m_object;
PyObject *m_object_ex;
char m_char;
char m_byte;
unsigned char m_ubyte;
unsigned int m_uint;
unsigned short m_ushort;
unsigned long m_ulong;
char m_bool;
long long m_longlong;
unsigned long long m_ulonglong;
Py_ssize_t m_pyssizet;
} TestObject;
static void TestObject_dealloc(TestObject *self)
{
Py_XDECREF(self->m_object);
Py_XDECREF(self->m_object_ex);
Py_TYPE(self)->tp_free((PyObject *)self);
}
static int TestObject_traverse(TestObject *self, visitproc visit, void *arg)
{
Py_VISIT(self->m_object);
Py_VISIT(self->m_object_ex);
return 0;
}
static PyMemberDef TestObject_members[] = {
{"m_short", T_SHORT, offsetof(TestObject, m_short), 0, ""},
{"m_int", T_INT, offsetof(TestObject, m_int), 0, ""},
{"m_long", T_LONG, offsetof(TestObject, m_long), 0, ""},
/* {"m_float", T_FLOAT, offsetof(TestObject, m_float), 0, ""}, */
/* {"m_double", T_DOUBLE, offsetof(TestObject, m_double), 0, ""}, */
/* {"m_string", T_STRING, offsetof(TestObject, m_string), 0, ""}, */
{"m_object", T_OBJECT, offsetof(TestObject, m_object), 0, ""},
{"m_object_ex", T_OBJECT_EX, offsetof(TestObject, m_object_ex), 0, ""},
{"m_char", T_CHAR, offsetof(TestObject, m_char), 0, ""},
{"m_byte", T_BYTE, offsetof(TestObject, m_byte), 0, ""},
{"m_ubyte", T_UBYTE, offsetof(TestObject, m_ubyte), 0, ""},
{"m_uint", T_UINT, offsetof(TestObject, m_uint), 0, ""},
{"m_ushort", T_USHORT, offsetof(TestObject, m_ushort), 0, ""},
{"m_ulong", T_ULONG, offsetof(TestObject, m_ulong), 0, ""},
{"m_bool", T_BOOL, offsetof(TestObject, m_bool), 0, ""},
{"m_longlong", T_LONGLONG, offsetof(TestObject, m_longlong), 0, ""},
{"m_ulonglong", T_ULONGLONG, offsetof(TestObject, m_ulonglong), 0, ""},
{"m_pyssizet", T_PYSSIZET, offsetof(TestObject, m_pyssizet), 0, ""},
{},
};
PyTypeObject TestObject_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"drgn.lldwarf._TestObject", /* tp_name */
sizeof(TestObject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)TestObject_dealloc, /* tp_dealloc */
NULL, /* tp_print */
NULL, /* tp_getattr */
NULL, /* tp_setattr */
NULL, /* tp_as_async */
LLDwarfObject_repr, /* tp_repr */
NULL, /* tp_as_number */
NULL, /* tp_as_sequence */
NULL, /* tp_as_mapping */
NULL, /* tp_hash */
NULL, /* tp_call */
NULL, /* tp_str */
NULL, /* tp_getattro */
NULL, /* tp_setattro */
NULL, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT, /* tp_flags */
"Test object", /* tp_doc */
(traverseproc)TestObject_traverse, /* tp_traverse */
NULL, /* tp_clear */
LLDwarfObject_richcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
NULL, /* tp_iter */
NULL, /* tp_iternext */
NULL, /* tp_methods */
TestObject_members, /* tp_members */
NULL, /* tp_getset */
NULL, /* tp_base */
NULL, /* tp_dict */
NULL, /* tp_descr_get */
NULL, /* tp_descr_set */
0, /* tp_dictoffset */
LLDwarfObject_init, /* tp_init */
};
#endif /* TEST_LLDWARFOBJECT */

76
setup.py Executable file
View File

@ -0,0 +1,76 @@
#!/usr/bin/env python3
from setuptools import setup, find_packages, Extension
from setuptools.command.build_ext import build_ext
import os.path
def out_of_date(dependencies, target):
dependency_mtimes = [os.path.getmtime(dependency) for dependency in dependencies]
try:
target_mtime = os.path.getmtime(target)
except OSError:
return True
return any(dependency_mtime >= target_mtime for dependency_mtime in dependency_mtimes)
def gen_header():
import drgn.dwarf.defs as defs
def write_enum(e):
f.write('enum {\n')
for name, value in e.__members__.items():
f.write(f'\t{e.__name__}_{name} = 0x{value:x},\n')
f.write('};\n\n')
with open('lldwarf/dwarfdefs.h', 'w') as f:
f.write('#ifndef DWARFDEFS_H\n')
f.write('#define DWARFDEFS_H\n\n')
write_enum(defs.DW_CHILDREN)
write_enum(defs.DW_TAG)
write_enum(defs.DW_AT)
write_enum(defs.DW_FORM)
write_enum(defs.DW_LNS)
write_enum(defs.DW_LNE)
f.write('#endif /* DWARFDEFS_H */\n')
class my_build_ext(build_ext):
def run(self):
if out_of_date(['drgn/dwarf/defs.py', 'setup.py'], 'lldwarf/dwarfdefs.h'):
try:
gen_header()
except Exception as e:
try:
os.remove('lldwarf/dwarfdefs.h')
except OSError:
pass
raise e
super().run()
module = Extension(
name='drgn.lldwarf',
sources=[
'lldwarf/module.c',
'lldwarf/object.c',
'lldwarf/abbrev.c',
'lldwarf/cu.c',
'lldwarf/die.c',
'lldwarf/line.c',
],
extra_compile_args=['-DTEST_LLDWARFOBJECT'],
)
setup(
name='drgn',
entry_points={
'console_scripts': ['drgn=drgn.cli:main'],
},
cmdclass={'build_ext': my_build_ext},
ext_modules=[module],
packages=find_packages(),
test_suite='tests',
)

0
tests/__init__.py Normal file
View File

View File

View File

@ -0,0 +1,112 @@
import drgn.lldwarf as lldwarf
import unittest
class TestAbbrevObject(unittest.TestCase):
def test_object(self):
decl = lldwarf.AbbrevDecl(0x11, True, ((0x03, 0x08), (0x0c, 0x0b)))
self.assertEqual(decl.tag, 0x11)
self.assertEqual(decl.children, True)
self.assertEqual(decl[0], (0x03, 0x08))
self.assertEqual(decl[1], (0x0c, 0x0b))
self.assertEqual(decl[-1], (0x0c, 0x0b))
self.assertEqual(decl[-2], (0x03, 0x08))
with self.assertRaises(IndexError):
decl[2]
with self.assertRaises(IndexError):
decl[-3]
def test_init_errors(self):
with self.assertRaises(TypeError):
lldwarf.AbbrevDecl(0x11, False, None)
with self.assertRaises(TypeError):
lldwarf.AbbrevDecl(0x11, False, (None,))
with self.assertRaises(TypeError):
lldwarf.AbbrevDecl(0x11, False, ((None, None),))
with self.assertRaisesRegex(ValueError, 'pair'):
lldwarf.AbbrevDecl(0x11, False, ((1, 2, 3),))
def test_init_overflow(self):
with self.assertRaisesRegex(OverflowError, 'tag'):
lldwarf.AbbrevDecl(2**64, False, ())
with self.assertRaisesRegex(OverflowError, 'name'):
lldwarf.AbbrevDecl(0x11, False, ((2**64, 0x08),))
with self.assertRaisesRegex(OverflowError, 'form'):
lldwarf.AbbrevDecl(0x11, False, ((0x03, 2**64),))
def test_repr(self):
decl = lldwarf.AbbrevDecl(0x11, False, ())
self.assertEqual(repr(decl), 'AbbrevDecl(tag=17, children=False, attributes=())')
decl = lldwarf.AbbrevDecl(0x11, False, ((0x03, 0x08),))
self.assertEqual(repr(decl), 'AbbrevDecl(tag=17, children=False, attributes=((3, 8),))')
class TestParseAbbrev(unittest.TestCase):
def test_negative_offset(self):
with self.assertRaises(ValueError):
lldwarf.parse_abbrev_table(b'', -1)
def test_empty_table(self):
buf = b'\0'
abbrev_table = {}
self.assertEqual(lldwarf.parse_abbrev_table(buf), abbrev_table)
def test_empty_decl(self):
buf = (b'\x01' # code = 1
b'\x11' # tag = 0x11 (DW_TAG_compile_unit)
b'\0' # DW_CHILDREN_no
b'\0\0' # null attribute spec
b'\0') # null attribute declaration
abbrev_table = {
1: lldwarf.AbbrevDecl(0x11, False, ()),
}
self.assertEqual(lldwarf.parse_abbrev_table(buf), abbrev_table)
def test_one_attrib(self):
buf = (b'\x01' # code = 1
b'\x11' # tag = 0x11 (DW_TAG_compile_unit)
b'\0' # DW_CHILDREN_no
b'\x03\x08' # name = 0x03 (DW_AT_name), form = 0x08 (DW_FORM_string)
b'\0\0' # null attribute spec
b'\0') # null attribute declaration
abbrev_table = {
1: lldwarf.AbbrevDecl(0x11, False, ((0x03, 0x08),)),
}
self.assertEqual(lldwarf.parse_abbrev_table(buf), abbrev_table)
def test_two_attribs(self):
buf = (b'\x01' # code = 1
b'\x11' # tag = 0x11 (DW_TAG_compile_unit)
b'\x01' # DW_CHILDREN_yes
b'\x03\x08' # name = 0x03 (DW_AT_name), form = 0x08 (DW_FORM_string)
b'\x0c\x0b' # name = 0x03 (DW_AT_bit_offset), form = 0x0b (DW_FORM_data1)
b'\0\0' # null attribute spec
b'\0') # null attribute declaration
abbrev_table = {
1: lldwarf.AbbrevDecl(0x11, True, ((0x03, 0x08), (0x0c, 0x0b))),
}
self.assertEqual(lldwarf.parse_abbrev_table(buf), abbrev_table)
def test_duplicate_code(self):
buf = (b'\x01' # code = 1
b'\x11' # tag = 0x11 (DW_TAG_compile_unit)
b'\0' # DW_CHILDREN_no
b'\0\0' # null attribute spec
b'\x01' # code = 1
b'\x11' # tag = 0x11 (DW_TAG_compile_unit)
b'\0' # DW_CHILDREN_no
b'\0\0' # null attribute spec
b'\0') # null attribute declaration
with self.assertRaisesRegex(ValueError, 'duplicate abbreviation code'):
lldwarf.parse_abbrev_table(buf)
def test_truncated(self):
buf = (b'\x01' # code = 1
b'\x11' # tag = 0x11 (DW_TAG_compile_unit)
b'\0' # DW_CHILDREN_no
b'\0\0' # null attribute spec
b'\0') # null attribute declaration
for i in range(len(buf)):
with self.assertRaisesRegex(ValueError, 'abbreviation .* truncated'):
lldwarf.parse_abbrev_table(buf[:i])

117
tests/lldwarf/test_cu.py Normal file
View File

@ -0,0 +1,117 @@
import drgn.lldwarf as lldwarf
import unittest
class TestCompilationUnitHeaderObject(unittest.TestCase):
def test_offset(self):
header = lldwarf.CompilationUnitHeader(
offset=70,
unit_length=200,
version=2,
debug_abbrev_offset=0,
address_size=8,
is_64_bit=False,
)
self.assertEqual(header.die_offset(), 81)
self.assertEqual(header.next_offset(), 274)
header.is_64_bit = True
self.assertEqual(header.die_offset(), 93)
self.assertEqual(header.next_offset(), 282)
def test_offset_overflow(self):
header = lldwarf.CompilationUnitHeader(
offset=2**63 - 11,
unit_length=2**64 - 4,
version=2,
debug_abbrev_offset=0,
address_size=8,
is_64_bit=False,
)
with self.assertRaises(OverflowError):
header.die_offset()
with self.assertRaises(OverflowError):
header.next_offset()
header.offset = 2**63 - 8
header.unit_length = 4
with self.assertRaises(OverflowError):
header.next_offset()
header.offset = 2**63 - 23
header.unit_length = 2**64 - 12
header.is_64_bit = True
with self.assertRaises(OverflowError):
header.die_offset()
with self.assertRaises(OverflowError):
header.next_offset()
header.offset = 2**63 - 16
header.unit_length = 4
with self.assertRaises(OverflowError):
header.next_offset()
class TestParseCompilationUnitHeader(unittest.TestCase):
def test_negative_offset(self):
with self.assertRaises(ValueError):
lldwarf.parse_compilation_unit_header(b'', -1)
def test_32bit(self):
buf = (b'\xc8\x00\x00\x00' # unit_length
b'\x02\x00' # version
b'\x00\x00\x00\x00' # debug_abbrev_offset
b'\x08') # address_size
header = lldwarf.CompilationUnitHeader(
offset=0,
unit_length=200,
version=2,
debug_abbrev_offset=0,
address_size=8,
is_64_bit=False,
)
for i in range(len(buf)):
with self.assertRaisesRegex(ValueError, 'compilation unit header is truncated'):
lldwarf.parse_compilation_unit_header(buf[:i])
self.assertEqual(lldwarf.parse_compilation_unit_header(buf), header)
def test_64bit(self):
buf = (b'\xff\xff\xff\xff'
b'\xc8\x00\x00\x00\x00\x00\x00\x00' # unit_length
b'\x02\x00' # version
b'\x00\x00\x00\x00\x00\x00\x00\x00' # debug_abbrev_offset
b'\x08') # address_size
header = lldwarf.CompilationUnitHeader(
offset=0,
unit_length=200,
version=2,
debug_abbrev_offset=0,
address_size=8,
is_64_bit=True,
)
for i in range(len(buf)):
with self.assertRaisesRegex(ValueError, 'compilation unit header is truncated'):
lldwarf.parse_compilation_unit_header(buf[:i])
self.assertEqual(lldwarf.parse_compilation_unit_header(buf), header)
def test_offset(self):
buf = (b'\x01' # padding
b'\xc8\x00\x00\x00' # unit_length
b'\x02\x00' # version
b'\x00\x00\x00\x00' # debug_abbrev_offset
b'\x08') # address_size
header = lldwarf.CompilationUnitHeader(
offset=1,
unit_length=200,
version=2,
debug_abbrev_offset=0,
address_size=8,
is_64_bit=False,
)
self.assertEqual(lldwarf.parse_compilation_unit_header(buf, 1), header)

349
tests/lldwarf/test_die.py Normal file
View File

@ -0,0 +1,349 @@
import drgn.lldwarf as lldwarf
from drgn.dwarf.defs import DW_TAG, DW_FORM, DW_AT
import unittest
class TestDieObject(unittest.TestCase):
def test_find(self):
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.sdata, -99),))
self.assertEqual(die.find(DW_AT.lo_user), (DW_FORM.sdata, -99))
with self.assertRaises(KeyError):
die.find(DW_AT.name)
def test_init_errors(self):
with self.assertRaises(TypeError):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, None)
with self.assertRaises(TypeError):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, (None,))
with self.assertRaises(TypeError):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((None, None, None),))
with self.assertRaisesRegex(ValueError, 'triple'):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((1, 2, 3, 4),))
def test_init_overflow(self):
with self.assertRaisesRegex(OverflowError, 'offset'):
lldwarf.DwarfDie(2**63, 10, DW_TAG.lo_user, None, ())
with self.assertRaisesRegex(OverflowError, 'die_length'):
lldwarf.DwarfDie(0, 2**63, DW_TAG.lo_user, None, ())
with self.assertRaisesRegex(OverflowError, 'tag'):
lldwarf.DwarfDie(0, 10, 2**64, None, ())
with self.assertRaisesRegex(OverflowError, 'name'):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((2**64, DW_FORM.flag_present, True),))
with self.assertRaisesRegex(OverflowError, 'form'):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, 2**64, True),))
def test_udata(self):
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.udata, 2**33),))
self.assertEqual(die[0], (DW_AT.lo_user, DW_FORM.udata, 2**33))
with self.assertRaises(OverflowError):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.udata, 2**64),))
with self.assertRaises(TypeError):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.udata, 'foo'),))
def test_sdata(self):
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.sdata, -2**33),))
self.assertEqual(die[0], (DW_AT.lo_user, DW_FORM.sdata, -2**33))
with self.assertRaises(OverflowError):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.sdata, 2**63),))
with self.assertRaises(TypeError):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.sdata, 'foo'),))
def test_string(self):
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.string, (0, 20)),))
self.assertEqual(die[0], (DW_AT.lo_user, DW_FORM.string, (0, 20)))
with self.assertRaises(TypeError):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.string, None),))
with self.assertRaises(TypeError):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.string, (None, None)),))
with self.assertRaises(ValueError):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.string, (1, 2, 3)),))
with self.assertRaisesRegex(OverflowError, 'offset'):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.string, (2**63, 1)),))
with self.assertRaisesRegex(OverflowError, 'length'):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.string, (0, 2**63)),))
def test_data(self):
with self.assertRaises(TypeError):
lldwarf.DwarfDie(0, 0, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.data1, 64),))
with self.assertRaises(ValueError):
lldwarf.DwarfDie(0, 0, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.data1, b'aa'),))
with self.assertRaises(ValueError):
lldwarf.DwarfDie(0, 0, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.data2, b'aaa'),))
with self.assertRaises(ValueError):
lldwarf.DwarfDie(0, 0, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.data4, b'aaa'),))
with self.assertRaises(ValueError):
lldwarf.DwarfDie(0, 0, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.data8, b''),))
def test_flag(self):
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.flag, True),))
self.assertEqual(die[0], (DW_AT.lo_user, DW_FORM.flag, True))
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.flag, 0),))
self.assertEqual(die[0], (DW_AT.lo_user, DW_FORM.flag, False))
def test_flag_present(self):
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.flag_present, True),))
self.assertEqual(die[0], (DW_AT.lo_user, DW_FORM.flag_present, True))
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.flag_present, 0),))
self.assertEqual(die[0], (DW_AT.lo_user, DW_FORM.flag_present, True))
def test_unknown_form(self):
with self.assertRaisesRegex(ValueError, f'unknown form {2**64 - 1}'):
lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, 2**64 - 1, None),))
def test_repr(self):
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ())
self.assertEqual(repr(die), f'DwarfDie(offset=0, die_length=10, tag={DW_TAG.lo_user.value}, children=None, attributes=())')
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.flag, True),))
self.assertEqual(repr(die), f'DwarfDie(offset=0, die_length=10, tag={DW_TAG.lo_user.value}, children=None, attributes=(({DW_AT.lo_user.value}, {DW_FORM.flag.value}, True),))')
def test_recursive_repr(self):
die = lldwarf.DwarfDie(0, 10, DW_TAG.lo_user, None, ())
die.children = [die]
self.assertEqual(repr(die), f'DwarfDie(offset=0, die_length=10, tag={DW_TAG.lo_user.value}, children=[DwarfDie(...)], attributes=())')
header = lldwarf.CompilationUnitHeader(
offset=0,
unit_length=200,
version=2,
debug_abbrev_offset=0,
address_size=8,
is_64_bit=False,
)
header32addr = lldwarf.CompilationUnitHeader(
offset=0,
unit_length=200,
version=2,
debug_abbrev_offset=0,
address_size=4,
is_64_bit=False,
)
header64 = lldwarf.CompilationUnitHeader(
offset=0,
unit_length=200,
version=2,
debug_abbrev_offset=0,
address_size=8,
is_64_bit=True,
)
class TestParseDie(unittest.TestCase):
def test_negative_offset(self):
with self.assertRaises(ValueError):
lldwarf.parse_die(header, {}, b'', -1)
with self.assertRaises(ValueError):
lldwarf.parse_die_siblings(header, {}, b'', -1)
def test_bad_cu(self):
with self.assertRaises(TypeError):
lldwarf.parse_die(None, {}, b'')
def test_bad_abbrev_table(self):
with self.assertRaises(TypeError):
lldwarf.parse_die(header, None, b'')
def test_null(self):
self.assertIsNone(lldwarf.parse_die(header, {}, b'\0'))
def test_unknown_abbreviation(self):
with self.assertRaisesRegex(ValueError, 'unknown abbreviation code'):
lldwarf.parse_die(header, {}, b'\x01\xff')
def assertDie(self, header, abbrev_table, buf, die_args):
tag, children, attribs = die_args
die = lldwarf.DwarfDie(0, len(buf), tag, children, attribs)
self.assertEqual(tuple(die), tuple(attribs))
self.assertEqual(lldwarf.parse_die(header, abbrev_table, buf), die)
def test_address(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.addr),)),
}
self.assertDie(header, abbrev_table, b'\x01\xff\xff\xff\xff\xff\xff\xff\x7f',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.addr, 2**63 - 1),)))
self.assertDie(header32addr, abbrev_table, b'\x01\xff\xff\xff\x7f',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.addr, 2**31 - 1),)))
bogus_header = lldwarf.CompilationUnitHeader(
offset=0,
unit_length=200,
version=2,
debug_abbrev_offset=0,
address_size=1,
is_64_bit=False,
)
with self.assertRaisesRegex(ValueError, 'unsupported address size'):
lldwarf.parse_die(bogus_header, abbrev_table, b'\x01\xff')
def test_block(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.block1),)),
2: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.block2),)),
3: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.block4),)),
4: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.block),)),
5: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.exprloc),)),
}
self.assertDie(header, abbrev_table, b'\x01\x04aaaa',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.block1, (2, 4)),)))
self.assertDie(header, abbrev_table, b'\x02\x01\x00b',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.block2, (3, 1)),)))
self.assertDie(header, abbrev_table, b'\x03\x10\x00\x00\x00' + b'z' * 16,
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.block4, (5, 16)),)))
self.assertDie(header, abbrev_table, b'\x04\x03xyz',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.block, (2, 3)),)))
self.assertDie(header, abbrev_table, b'\x05\x0f012345678901234',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.exprloc, (2, 15)),)))
with self.assertRaisesRegex(ValueError, 'attribute length too big'):
lldwarf.parse_die(header, abbrev_table, b'\x05\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01')
def test_data(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.data1),)),
2: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.data2),)),
3: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.data4),)),
4: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.data8),)),
}
self.assertDie(header, abbrev_table, b'\x01a',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.data1, b'a'),)))
self.assertDie(header, abbrev_table, b'\x02ab',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.data2, b'ab'),)))
self.assertDie(header, abbrev_table, b'\x03abcd',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.data4, b'abcd'),)))
self.assertDie(header, abbrev_table, b'\x04abcdefgh',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.data8, b'abcdefgh'),)))
def test_constant(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.udata),)),
2: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.sdata),)),
}
self.assertDie(header, abbrev_table, b'\x01\x64',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.udata, 100),)))
self.assertDie(header, abbrev_table, b'\x02\x7f',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.sdata, -1),)))
with self.assertRaises(OverflowError):
lldwarf.DwarfDie(0, 0, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.udata, 2**64),))
with self.assertRaises(OverflowError):
lldwarf.DwarfDie(0, 0, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.sdata, 2**63),))
with self.assertRaises(OverflowError):
lldwarf.DwarfDie(0, 0, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.sdata, -2**63 - 1),))
def test_flag(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.flag),)),
2: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.flag_present),)),
}
self.assertDie(header, abbrev_table, b'\x01\x01',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.flag, True),)))
self.assertDie(header, abbrev_table, b'\x01\x00',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.flag, False),)))
self.assertDie(header, abbrev_table, b'\x02',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.flag_present, True),)))
def test_reference(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.ref1),)),
2: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.ref2),)),
3: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.ref4),)),
4: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.ref8),)),
5: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.ref_sig8),)),
6: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.ref_udata),)),
}
self.assertDie(header, abbrev_table, b'\x01\xff',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.ref1, 255),)))
self.assertDie(header, abbrev_table, b'\x02\x10\x27',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.ref2, 10000),)))
self.assertDie(header, abbrev_table, b'\x03\x00\x00\x00\x80',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.ref4, 2**31),)))
self.assertDie(header, abbrev_table, b'\x04\x00\x00\x00\x00\x00\x00\x00\x80',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.ref8, 2**63),)))
self.assertDie(header, abbrev_table, b'\x05\x00\x00\x00\x00\x00\x00\x00\x80',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.ref_sig8, 2**63),)))
self.assertDie(header, abbrev_table, b'\x06\x00',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.ref_udata, 0),)))
def test_sec_offset(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.sec_offset),)),
}
self.assertDie(header, abbrev_table, b'\x01\xff\xff\xff\x7f',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.sec_offset, 2**31 - 1),)))
self.assertDie(header64, abbrev_table, b'\x01\xff\xff\xff\xff\xff\xff\xff\x7f',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.sec_offset, 2**63 - 1),)))
def test_strp(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.strp),)),
}
self.assertDie(header, abbrev_table, b'\x01\xff\xff\xff\x7f',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.strp, 2**31 - 1),)))
self.assertDie(header64, abbrev_table, b'\x01\xff\xff\xff\xff\xff\xff\xff\x7f',
(DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.strp, 2**63 - 1),)))
def test_string(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, False,
((DW_AT.lo_user, DW_FORM.string),
(DW_AT.lo_user, DW_FORM.string))),
}
self.assertDie(header, abbrev_table, b'\x01foo\0asdf\0',
(DW_TAG.lo_user, None,
((DW_AT.lo_user, DW_FORM.string, (1, 3)),
(DW_AT.lo_user, DW_FORM.string, (5, 4)))))
with self.assertRaisesRegex(ValueError, 'unterminated string'):
lldwarf.parse_die(header, abbrev_table, b'\x01foo')
def test_recursive(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, True, ((DW_AT.lo_user, DW_FORM.udata),)),
2: lldwarf.AbbrevDecl(DW_TAG.lo_user + 1, False, ((DW_AT.lo_user + 1, DW_FORM.sdata),)),
}
die = lldwarf.parse_die(header, abbrev_table, b'\x01\x01\x02\x02\x00', recurse=True)
child = lldwarf.DwarfDie(2, 2, DW_TAG.lo_user + 1, None, ((DW_AT.lo_user + 1, DW_FORM.sdata, 2),))
parent = lldwarf.DwarfDie(0, 2, DW_TAG.lo_user, [child], ((DW_AT.lo_user, DW_FORM.udata, 1),))
self.assertEqual(die, parent)
def test_siblings(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, False, ((DW_AT.lo_user, DW_FORM.udata),)),
2: lldwarf.AbbrevDecl(DW_TAG.lo_user + 1, False, ((DW_AT.lo_user + 1, DW_FORM.sdata),)),
}
siblings = lldwarf.parse_die_siblings(header, abbrev_table, b'\x01\x01\x02\x02\x00')
self.assertEqual(siblings, [
lldwarf.DwarfDie(0, 2, DW_TAG.lo_user, None, ((DW_AT.lo_user, DW_FORM.udata, 1),)),
lldwarf.DwarfDie(2, 2, DW_TAG.lo_user + 1, None, ((DW_AT.lo_user + 1, DW_FORM.sdata, 2),)),
])
def test_siblings_skip(self):
abbrev_table = {
1: lldwarf.AbbrevDecl(DW_TAG.lo_user, True, ((DW_AT.sibling, DW_FORM.udata),)),
2: lldwarf.AbbrevDecl(DW_TAG.lo_user + 1, False, ((DW_AT.lo_user + 1, DW_FORM.sdata),)),
}
siblings = lldwarf.parse_die_siblings(header, abbrev_table, b'\x01\x04\x02\x02\x02\x03\x00')
parent_die = lldwarf.DwarfDie(0, 2, DW_TAG.lo_user, None, ((DW_AT.sibling, DW_FORM.udata, 4),))
del parent_die.children
self.assertEqual(siblings, [
parent_die,
lldwarf.DwarfDie(4, 2, DW_TAG.lo_user + 1, None, ((DW_AT.lo_user + 1, DW_FORM.sdata, 3),)),
])

View File

@ -0,0 +1,98 @@
import drgn.lldwarf as lldwarf
import unittest
"""
def encode_uleb128(value):
encoded = bytearray()
while True:
byte = value & 0x7f
value >>= 7
if value:
byte |= 0x80
encoded.append(byte)
if not value:
return encoded
"""
class TestLeb128(unittest.TestCase):
def test_negative_offset(self):
with self.assertRaises(ValueError):
lldwarf.parse_uleb128(b'', -1)
with self.assertRaises(ValueError):
lldwarf.parse_uleb128_offset(b'', -1)
with self.assertRaises(ValueError):
lldwarf.parse_sleb128(b'', -1)
def test_truncated(self):
cases = [
b'',
b'\x80',
]
for case in cases:
with self.subTest(case=case, signed=False), \
self.assertRaisesRegex(ValueError, 'ULEB128 is truncated'):
lldwarf.parse_uleb128(case)
with self.subTest(case=case, signed=True), \
self.assertRaisesRegex(ValueError, 'SLEB128 is truncated'):
lldwarf.parse_sleb128(case)
def test_uleb128(self):
self.assertEqual(lldwarf.parse_uleb128(b'\x00'), 0)
self.assertEqual(lldwarf.parse_uleb128(b'\x02'), 2)
self.assertEqual(lldwarf.parse_uleb128(b'\x7f'), 127)
self.assertEqual(lldwarf.parse_uleb128(b'\x80\x01'), 128)
self.assertEqual(lldwarf.parse_uleb128(b'\x81\x01'), 129)
self.assertEqual(lldwarf.parse_uleb128(b'\x82\x01'), 130)
self.assertEqual(lldwarf.parse_uleb128(b'\xb9\x64'), 12857)
self.assertEqual(lldwarf.parse_uleb128(b'\xbf\x84\x3d'), 999999)
self.assertEqual(lldwarf.parse_uleb128(b'\x95\x9a\xef\x3a'), 123456789)
self.assertEqual(lldwarf.parse_uleb128(b'\xff\xff\xff\xff\x0f'), 0xffffffff)
self.assertEqual(lldwarf.parse_uleb128(b'\x90\xf1\xd9\xa2\xa3\x02'), 0x1234567890)
self.assertEqual(lldwarf.parse_uleb128(b'\xff\xff\xff\xff\xff\xff\xff\xff\x7f'),
2**63 - 1)
self.assertEqual(lldwarf.parse_uleb128(b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00'),
2**63 - 1)
self.assertEqual(lldwarf.parse_uleb128(b'\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01'),
2**63)
self.assertEqual(lldwarf.parse_uleb128(b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01'),
2**64 - 1)
def test_uleb128_overflow(self):
cases = [
b'\x80\x80\x80\x80\x80\x80\x80\x80\x80\x02', # 2**64
b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\x03', # 2**65 - 1
b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x03', # 2**72 - 1
]
for encoded in cases:
with self.subTest(encoded=encoded), self.assertRaises(OverflowError):
lldwarf.parse_uleb128(encoded)
def test_sleb128(self):
self.assertEqual(lldwarf.parse_sleb128(b'\x00'), 0)
self.assertEqual(lldwarf.parse_sleb128(b'\x02'), 2)
self.assertEqual(lldwarf.parse_sleb128(b'\x7e'), -2)
self.assertEqual(lldwarf.parse_sleb128(b'\xff\x00'), 127)
self.assertEqual(lldwarf.parse_sleb128(b'\x81\x7f'), -127)
self.assertEqual(lldwarf.parse_sleb128(b'\x80\x01'), 128)
self.assertEqual(lldwarf.parse_sleb128(b'\x80\x7f'), -128)
self.assertEqual(lldwarf.parse_sleb128(b'\x81\x01'), 129)
self.assertEqual(lldwarf.parse_sleb128(b'\xff\x7e'), -129)
self.assertEqual(lldwarf.parse_sleb128(b'\xff\xff\xff\xff\x07'), 2**31 - 1)
self.assertEqual(lldwarf.parse_sleb128(b'\x80\x80\x80\x80\x78'), -2**31)
self.assertEqual(lldwarf.parse_sleb128(b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00'),
2**63 - 1)
self.assertEqual(lldwarf.parse_sleb128(b'\x80\x80\x80\x80\x80\x80\x80\x80\x80\x7f'),
-2**63)
self.assertEqual(lldwarf.parse_sleb128(b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\x7f'),
-1)
def test_sleb128_overflow(self):
cases = [
b'\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01', # 2**63
b'\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01', # 2**64 - 1
]
for encoded in cases:
with self.subTest(encoded=encoded), self.assertRaises(OverflowError):
lldwarf.parse_sleb128(encoded)

118
tests/lldwarf/test_line.py Normal file
View File

@ -0,0 +1,118 @@
import drgn.lldwarf as lldwarf
import unittest
def header(offset=0, unit_length=8192, version=2, header_length=57,
minimum_instruction_length=1, maximum_operations_per_instruction=1,
default_is_stmt=True, line_base=-5, line_range=14, opcode_base=13,
standard_opcode_lengths=None, include_directories=None,
file_names=None, is_64_bit=False):
if standard_opcode_lengths is None:
standard_opcode_lengths = [0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1]
if include_directories is None:
include_directories = [b'include']
if file_names is None:
file_names = [(b'main.c', 0, 1, 2), (b'defs.h', 1, 2, 3)]
return lldwarf.LineNumberProgramHeader(
offset, unit_length, version, header_length,
minimum_instruction_length, maximum_operations_per_instruction,
default_is_stmt, line_base, line_range, opcode_base,
standard_opcode_lengths, include_directories, file_names, is_64_bit)
def row(address=0, op_index=0, file=1, line=1, column=0, is_stmt=True,
basic_block=False, end_sequence=False, prologue_end=False,
epilogue_begin=False, isa=0, discriminator=0):
return lldwarf.LineNumberRow(address, op_index, file, line, column,
is_stmt, basic_block, end_sequence,
prologue_end, epilogue_begin, isa,
discriminator)
class TestLineNumberProgramHeaderObject(unittest.TestCase):
def test_offset(self):
self.assertEqual(header().program_offset(), 67)
self.assertEqual(header(is_64_bit=True).program_offset(), 79)
self.assertEqual(header().end_offset(), 8196)
self.assertEqual(header(is_64_bit=True).end_offset(), 8204)
def test_offset_overflow(self):
lnp = header(header_length=2**64 - 10)
with self.assertRaises(OverflowError):
lnp.program_offset()
lnp = header(offset=2**63 - 100, header_length=90)
with self.assertRaises(OverflowError):
lnp.program_offset()
lnp = header(unit_length=2**64 - 4)
with self.assertRaises(OverflowError):
lnp.end_offset()
lnp = header(offset=2**63 - 8192)
with self.assertRaises(OverflowError):
lnp.end_offset()
class TestParseLineNumberProgramHeader(unittest.TestCase):
def test_v2(self):
buf = (b'\x00\x20\x00\x00' # unit_length
b'\x02\x00' # version
b'\x39\x00\x00\x00' # header_length
b'\x01' # minimum_instruction_length
b'\x01' # default_is_stmt
b'\xfb' # line_base
b'\x0e' # line_range
b'\x0d' # opcode_base
b'\x00\x01\x01\x01\x01\x00\x00\x00\x01\x00\x00\x01'
b'include\x00\x00' # include_directories
b'main.c\x00\x00\x01\x02'
b'defs.h\x00\x01\x02\x03' # file_names
b'\x00')
self.assertEqual(lldwarf.parse_line_number_program_header(buf), header())
def test_v4(self):
buf = (b'\x00\x20\x00\x00' # unit_length
b'\x04\x00' # version
b'\x39\x00\x00\x00' # header_length
b'\x01' # minimum_instruction_length
b'\x01' # maximum_operations_per_instruction
b'\x01' # default_is_stmt
b'\xfb' # line_base
b'\x0e' # line_range
b'\x0d' # opcode_base
b'\x00\x01\x01\x01\x01\x00\x00\x00\x01\x00\x00\x01'
b'include\x00\x00' # include_directories
b'main.c\x00\x00\x01\x02'
b'defs.h\x00\x01\x02\x03' # file_names
b'\x00')
self.assertEqual(lldwarf.parse_line_number_program_header(buf), header(version=4))
def test_64bit(self):
buf = (b'\xff\xff\xff\xff\x00\x20\x00\x00\x00\x00\x00\x00' # unit_length
b'\x02\x00' # version
b'\x39\x00\x00\x00\x00\x00\x00\x00' # header_length
b'\x01' # minimum_instruction_length
b'\x01' # default_is_stmt
b'\xfb' # line_base
b'\x0e' # line_range
b'\x0d' # opcode_base
b'\x00\x01\x01\x01\x01\x00\x00\x00\x01\x00\x00\x01'
b'include\x00\x00' # include_directories
b'main.c\x00\x00\x01\x02'
b'defs.h\x00\x01\x02\x03' # file_names
b'\x00')
self.assertEqual(lldwarf.parse_line_number_program_header(buf), header(is_64_bit=True))
def test_bad_opcode_base(self):
buf = (b'\x00\x20\x00\x00' # unit_length
b'\x02\x00' # version
b'\x00\x00\x00\x00' # header_length
b'\x01' # minimum_instruction_length
b'\x01' # default_is_stmt
b'\xfb' # line_base
b'\x0e' # line_range
b'\x00') # opcode_base
with self.assertRaises(ValueError):
lldwarf.parse_line_number_program_header(buf)

View File

@ -0,0 +1,174 @@
from collections import OrderedDict
import ctypes
import unittest
try:
from drgn.lldwarf import _TestObject
except ImportError:
pass
ARGS = OrderedDict([
('m_short', -16),
('m_int', -32),
('m_long', -48),
# ('m_float', 4.0),
# ('m_double', 8.0),
# ('m_string', 'asdf'),
('m_object', []),
('m_object_ex', {}),
('m_char', '@'),
('m_byte', -8),
('m_ubyte', 8),
('m_uint', 32),
('m_ushort', 16),
('m_ulong', 48),
('m_bool', True),
('m_longlong', -64),
('m_ulonglong', 64),
('m_pyssizet', -63),
])
@unittest.skipIf('_TestObject' not in globals(), '_TestObject not enabled')
class TestLLDwarfObject(unittest.TestCase):
def test_args(self):
obj = _TestObject(*ARGS.values())
for attr, val in ARGS.items():
self.assertEqual(getattr(obj, attr), val)
def test_kwargs(self):
obj = _TestObject(**ARGS)
for attr, val in ARGS.items():
self.assertEqual(getattr(obj, attr), val)
def test_mixed(self):
args = []
kwds = {}
for i, (key, value) in enumerate(ARGS.items()):
if i < len(ARGS) // 2:
args.append(value)
else:
kwds[key] = value
obj = _TestObject(*args, **kwds)
for attr, val in ARGS.items():
self.assertEqual(getattr(obj, attr), val)
def test_extra_args(self):
args = ARGS.copy()
args['m_foo'] = 5
with self.assertRaises(TypeError):
_TestObject(*args.values())
with self.assertRaises(TypeError):
_TestObject(**args)
def test_missing_args(self):
args = ARGS.copy()
del args['m_pyssizet']
with self.assertRaises(TypeError):
_TestObject(*args.values())
with self.assertRaises(TypeError):
_TestObject(**args)
def test_cmp(self):
obj1 = _TestObject(**ARGS)
obj2 = _TestObject(**ARGS)
self.assertEqual(obj1, obj2)
self.assertTrue(obj1 == obj2)
self.assertFalse(obj1 != obj2)
with self.assertRaises(TypeError):
obj1 < obj2
def _test_int(self, attr, min, max):
args = ARGS.copy()
args[attr] = min
self.assertEqual(getattr(_TestObject(**args), attr), min)
args = ARGS.copy()
args[attr] = max
self.assertEqual(getattr(_TestObject(**args), attr), max)
args = ARGS.copy()
args[attr] = min - 1
with self.assertRaises(OverflowError):
_TestObject(**args)
args = ARGS.copy()
args[attr] = max + 1
with self.assertRaises(OverflowError):
_TestObject(**args)
args = ARGS.copy()
args[attr] = min
obj1 = _TestObject(**args)
args = ARGS.copy()
args[attr] = max
obj2 = _TestObject(**args)
self.assertNotEqual(obj1, obj2)
def test_short(self):
self._test_int('m_short', -2**15, 2**15 - 1)
def test_int(self):
self._test_int('m_int', -2**31, 2**31 - 1)
def test_long(self):
bits = 8 * ctypes.sizeof(ctypes.c_long) - 1
self._test_int('m_long', -2**bits, 2**bits - 1)
# test_object
def test_char(self):
args = ARGS.copy()
args['m_char'] = 'ab'
with self.assertRaisesRegex(ValueError, 'expected a character'):
_TestObject(**args)
args = ARGS.copy()
args['m_char'] = '\x80'
with self.assertRaisesRegex(ValueError, 'character out of range'):
_TestObject(**args)
def test_byte(self):
self._test_int('m_byte', -2**7, 2**7 - 1)
def test_ubyte(self):
self._test_int('m_ubyte', 0, 2**8 - 1)
def test_uint(self):
self._test_int('m_uint', 0, 2**32 - 1)
def test_ushort(self):
self._test_int('m_ushort', 0, 2**16 - 1)
def test_ulong(self):
bits = 8 * ctypes.sizeof(ctypes.c_long)
self._test_int('m_ulong', 0, 2**bits - 1)
def test_bool(self):
args = ARGS.copy()
args['m_bool'] = True
self.assertEqual(_TestObject(**args).m_bool, True)
args = ARGS.copy()
args['m_bool'] = False
self.assertEqual(_TestObject(**args).m_bool, False)
def test_longlong(self):
self._test_int('m_longlong', -2**63, 2**63 - 1)
def test_ulonglong(self):
self._test_int('m_ulonglong', 0, 2**64 - 1)
def test_pyssizet(self):
self._test_int('m_pyssizet', -2**63, 2**63 - 1)
def test_repr(self):
args_repr = ', '.join(f'{key}={value!r}' for key, value in ARGS.items())
obj = _TestObject(*ARGS.values())
self.assertEqual(repr(obj), f'_TestObject({args_repr})')
def test_recursive_repr(self):
args_repr = ', '.join(f'{key}={value!r}' for key, value in ARGS.items())
args_repr = args_repr.replace('[]', '[_TestObject(...)]')
obj = _TestObject(*ARGS.values())
obj.m_object.append(obj)
self.assertEqual(repr(obj), f'_TestObject({args_repr})')