From 87d9058ba38e2b1f04bdfec5280f4f5541c7f730 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 5 Apr 2018 23:40:28 -0700 Subject: [PATCH] Add type annotations everywhere --- cli.py | 24 +- drgn/coredump.py | 50 ++- drgn/dwarfindex.pyi | 8 + drgn/elf.py | 847 +++----------------------------------------- drgn/type.py | 212 ++++++----- drgn/typename.py | 132 ++++--- drgn/util.py | 7 +- 7 files changed, 318 insertions(+), 962 deletions(-) create mode 100644 drgn/dwarfindex.pyi diff --git a/cli.py b/cli.py index 3c395185..9c4ff532 100644 --- a/cli.py +++ b/cli.py @@ -6,16 +6,19 @@ import os.path import platform import runpy import sys +from typing import List, Tuple, Union from drgn.coredump import Coredump, CoredumpObject from drgn.dwarf import DW_TAG from drgn.dwarfindex import DwarfIndex -from drgn.elf import ElfFile +from drgn.elf import parse_elf_phdrs import drgn.type +from drgn.type import Type +from drgn.typename import TypeName from drgn.util import parse_symbol_file -def find_vmlinux(release): +def find_vmlinux(release: str) -> str: paths = [ f'/usr/lib/debug/lib/modules/{release}/vmlinux', f'/boot/vmlinux-{release}', @@ -28,7 +31,7 @@ def find_vmlinux(release): raise ValueError() -def find_modules(release): +def find_modules(release: str) -> List[str]: patterns = [ f'/usr/lib/debug/lib/modules/{release}/kernel/**/*.ko.debug', f'/lib/modules/{release}/kernel/**/*.ko', @@ -41,7 +44,7 @@ def find_modules(release): return [] -def main(): +def main() -> None: parser = argparse.ArgumentParser(prog='drgn') parser.add_argument( '-k', '--kernel', action='store_true', @@ -80,19 +83,19 @@ def main(): symbols = parse_symbol_file(f) with open('/proc/kcore', 'rb') as core_file: - core_elf_file = ElfFile(core_file) + phdrs = parse_elf_phdrs(core_file) - def lookup_type(type_name): + def lookup_type(type_name: Union[str, TypeName]) -> Type: return drgn.type.from_dwarf_type_name(dwarf_index, type_name) - def lookup_variable(name): + def lookup_variable(name: str) -> Tuple[int, Type]: address = symbols[name][-1] dwarf_type = dwarf_index.find(name, DW_TAG.variable).type() type_ = drgn.type.from_dwarf_type(dwarf_index, dwarf_type) return address, type_ - def read_memory(address, size): - for phdr in core_elf_file.phdrs(): + def read_memory(address: int, size: int) -> bytes: + for phdr in phdrs: if phdr.p_vaddr <= address <= phdr.p_vaddr + phdr.p_memsz: break else: @@ -113,7 +116,8 @@ def main(): else: init_globals['__name__'] = '__main__' init_globals['__doc__'] = None - code.interact(banner='', exitmsg='', local=init_globals) + code.interact(banner='', exitmsg='', local=init_globals) # type: ignore + # typeshed issue #2024 if __name__ == '__main__': main() diff --git a/drgn/coredump.py b/drgn/coredump.py index 75c8de11..cd74bf72 100644 --- a/drgn/coredump.py +++ b/drgn/coredump.py @@ -5,7 +5,9 @@ from drgn.type import ( Type, TypedefType, ) +from drgn.type import TypeName import itertools +from typing import Any, Callable, Iterable, Tuple, Union class CoredumpObject: @@ -31,7 +33,7 @@ class CoredumpObject: conflict. """ - def __init__(self, coredump, address, type): + def __init__(self, coredump: 'Coredump', address: int, type: Type) -> None: self.coredump_ = coredump self.address_ = address self.type_ = type @@ -39,33 +41,38 @@ class CoredumpObject: type = type.type self._real_type = type - def __getattr__(self, name): + def __getattr__(self, name: str) -> 'CoredumpObject': """Implement self.name. Shortcurt for self.member_(name)""" return self.member_(name) - def __getitem__(self, idx): + def __getitem__(self, idx: Any) -> 'CoredumpObject': """ Implement self[idx]. Return a CoredumpObject representing an array element at the given index. This is only valid for pointers and arrays. """ + try: + i = idx.__index__() + except AttributeError: + raise TypeError('index must be integer') if isinstance(self._real_type, PointerType): buffer = self.coredump_.read(self.address_, self._real_type.sizeof()) address = self._real_type.read(buffer) - offset = idx.__index__() * self._real_type.type.sizeof() + type_ = self._real_type.type elif isinstance(self._real_type, ArrayType): address = self.address_ - offset = idx.__index__() * self._real_type.type.sizeof() + # Duplicated here to work around mypy issue #4864. + type_ = self._real_type.type else: raise ValueError('not an array or pointer') - return CoredumpObject(self.coredump_, address + offset, - self._real_type.type) + offset = i * type_.sizeof() + return CoredumpObject(self.coredump_, address + offset, type_) - def __repr__(self): + def __repr__(self) -> str: return f'CoredumpObject(address=0x{self.address_:x}, type=<{self.type_.type_name()}>)' - def __str__(self): + def __str__(self) -> str: """ Implement str(self). Return a string representation of the value of this object in C syntax. @@ -73,7 +80,7 @@ class CoredumpObject: buffer = self.coredump_.read(self.address_, self._real_type.sizeof()) return self._real_type.format(buffer) - def value_(self): + def value_(self) -> Any: """ Return the value of this object as a Python object. @@ -86,7 +93,7 @@ class CoredumpObject: buffer = self.coredump_.read(self.address_, self._real_type.sizeof()) return self._real_type.read(buffer) - def string_(self): + def string_(self) -> bytes: """ Return the null-terminated string pointed to by this object as bytes. @@ -94,7 +101,7 @@ class CoredumpObject: """ if isinstance(self._real_type, PointerType): - addresses = itertools.count(self.value_()) + addresses: Iterable[int] = itertools.count(self.value_()) elif isinstance(self._real_type, ArrayType): if self._real_type.size is None: addresses = itertools.count(self.address_) @@ -110,7 +117,7 @@ class CoredumpObject: b.append(byte) return bytes(b) - def member_(self, name): + def member_(self, name: str) -> 'CoredumpObject': """ Return a CoredumpObject representing the given structure or union member. @@ -133,7 +140,7 @@ class CoredumpObject: offset = type_.offsetof(name) return CoredumpObject(self.coredump_, address + offset, member_type) - def cast_(self, type): + def cast_(self, type: Union[str, Type, TypeName]) -> 'CoredumpObject': """ Return a copy of this object casted to another type. The given type is usually a string, but it can also be a Type or TypeName object. @@ -142,7 +149,8 @@ class CoredumpObject: type = self.coredump_.find_type(type) return CoredumpObject(self.coredump_, self.address_, type) - def container_of_(self, type, member): + def container_of_(self, type: Union[str, Type, TypeName], + member: str) -> 'CoredumpObject': """ Return the containing object of the object pointed to by this object. The given type is the type of the containing object, and the given @@ -153,6 +161,8 @@ class CoredumpObject: """ if not isinstance(type, Type): type = self.coredump_.find_type(type) + if not isinstance(type, CompoundType): + raise ValueError('containerof is only valid with struct or union types') if not isinstance(self._real_type, PointerType): raise ValueError('containerof is only valid on pointers') address = self.value_() - type.offsetof(member) @@ -164,12 +174,14 @@ class Coredump: A Coredump object represents a crashed or running program to be debugged. """ - def __init__(self, *, lookup_type_fn, lookup_variable_fn, read_memory_fn): + def __init__(self, *, lookup_type_fn: Callable[[Union[str, TypeName]], Type], + lookup_variable_fn: Callable[[str], Tuple[int, Type]], + read_memory_fn: Callable[[int, int], bytes]) -> None: self._lookup_type = lookup_type_fn self._read_memory = read_memory_fn self._lookup_variable = lookup_variable_fn - def __getitem__(self, name): + def __getitem__(self, name: str) -> CoredumpObject: """ Implement self[name]. Return a CoredumpObject representing the variable with the given name. @@ -180,14 +192,14 @@ class Coredump: address, type_ = self._lookup_variable(name) return CoredumpObject(self, address, type_) - def find_type(self, name): + def find_type(self, name: Union[str, TypeName]) -> Type: """ Return a Type object for the type with the given name. The name is usually a string, but it can also be a TypeName object. """ return self._lookup_type(name) - def read(self, address, size): + def read(self, address: int, size: int) -> bytes: """ Return size bytes of memory starting at address in the coredump. diff --git a/drgn/dwarfindex.pyi b/drgn/dwarfindex.pyi new file mode 100644 index 00000000..6482b8e3 --- /dev/null +++ b/drgn/dwarfindex.pyi @@ -0,0 +1,8 @@ +from drgn.dwarf import Die +from typing import List + + +class DwarfIndex: + address_size: int + def __init__(self, paths: List[str]) -> None: ... + def find(self, name: str, tag: int) -> Die: ... diff --git a/drgn/elf.py b/drgn/elf.py index 0688c6ae..3858f2bc 100644 --- a/drgn/elf.py +++ b/drgn/elf.py @@ -1,13 +1,11 @@ -from collections import namedtuple import struct +from typing import BinaryIO, List, NamedTuple class ElfFormatError(Exception): pass -# Automatically generated from elf.h -EI_NIDENT = (16) EI_MAG0 = 0 ELFMAG0 = 0x7f EI_MAG1 = 1 @@ -27,805 +25,74 @@ ELFDATA2LSB = 1 ELFDATA2MSB = 2 ELFDATANUM = 3 EI_VERSION = 6 -EI_OSABI = 7 -ELFOSABI_NONE = 0 -ELFOSABI_SYSV = 0 -ELFOSABI_HPUX = 1 -ELFOSABI_NETBSD = 2 -ELFOSABI_GNU = 3 -ELFOSABI_LINUX = ELFOSABI_GNU -ELFOSABI_SOLARIS = 6 -ELFOSABI_AIX = 7 -ELFOSABI_IRIX = 8 -ELFOSABI_FREEBSD = 9 -ELFOSABI_TRU64 = 10 -ELFOSABI_MODESTO = 11 -ELFOSABI_OPENBSD = 12 -ELFOSABI_ARM_AEABI = 64 -ELFOSABI_ARM = 97 -ELFOSABI_STANDALONE = 255 -EI_ABIVERSION = 8 -EI_PAD = 9 -ET_NONE = 0 -ET_REL = 1 -ET_EXEC = 2 -ET_DYN = 3 -ET_CORE = 4 -ET_NUM = 5 -ET_LOOS = 0xfe00 -ET_HIOS = 0xfeff -ET_LOPROC = 0xff00 -ET_HIPROC = 0xffff -EM_NONE = 0 -EM_M32 = 1 -EM_SPARC = 2 -EM_386 = 3 -EM_68K = 4 -EM_88K = 5 -EM_IAMCU = 6 -EM_860 = 7 -EM_MIPS = 8 -EM_S370 = 9 -EM_MIPS_RS3_LE = 10 -EM_PARISC = 15 -EM_VPP500 = 17 -EM_SPARC32PLUS = 18 -EM_960 = 19 -EM_PPC = 20 -EM_PPC64 = 21 -EM_S390 = 22 -EM_SPU = 23 -EM_V800 = 36 -EM_FR20 = 37 -EM_RH32 = 38 -EM_RCE = 39 -EM_ARM = 40 -EM_FAKE_ALPHA = 41 -EM_SH = 42 -EM_SPARCV9 = 43 -EM_TRICORE = 44 -EM_ARC = 45 -EM_H8_300 = 46 -EM_H8_300H = 47 -EM_H8S = 48 -EM_H8_500 = 49 -EM_IA_64 = 50 -EM_MIPS_X = 51 -EM_COLDFIRE = 52 -EM_68HC12 = 53 -EM_MMA = 54 -EM_PCP = 55 -EM_NCPU = 56 -EM_NDR1 = 57 -EM_STARCORE = 58 -EM_ME16 = 59 -EM_ST100 = 60 -EM_TINYJ = 61 -EM_X86_64 = 62 -EM_PDSP = 63 -EM_PDP10 = 64 -EM_PDP11 = 65 -EM_FX66 = 66 -EM_ST9PLUS = 67 -EM_ST7 = 68 -EM_68HC16 = 69 -EM_68HC11 = 70 -EM_68HC08 = 71 -EM_68HC05 = 72 -EM_SVX = 73 -EM_ST19 = 74 -EM_VAX = 75 -EM_CRIS = 76 -EM_JAVELIN = 77 -EM_FIREPATH = 78 -EM_ZSP = 79 -EM_MMIX = 80 -EM_HUANY = 81 -EM_PRISM = 82 -EM_AVR = 83 -EM_FR30 = 84 -EM_D10V = 85 -EM_D30V = 86 -EM_V850 = 87 -EM_M32R = 88 -EM_MN10300 = 89 -EM_MN10200 = 90 -EM_PJ = 91 -EM_OPENRISC = 92 -EM_ARC_COMPACT = 93 -EM_XTENSA = 94 -EM_VIDEOCORE = 95 -EM_TMM_GPP = 96 -EM_NS32K = 97 -EM_TPC = 98 -EM_SNP1K = 99 -EM_ST200 = 100 -EM_IP2K = 101 -EM_MAX = 102 -EM_CR = 103 -EM_F2MC16 = 104 -EM_MSP430 = 105 -EM_BLACKFIN = 106 -EM_SE_C33 = 107 -EM_SEP = 108 -EM_ARCA = 109 -EM_UNICORE = 110 -EM_EXCESS = 111 -EM_DXP = 112 -EM_ALTERA_NIOS2 = 113 -EM_CRX = 114 -EM_XGATE = 115 -EM_C166 = 116 -EM_M16C = 117 -EM_DSPIC30F = 118 -EM_CE = 119 -EM_M32C = 120 -EM_TSK3000 = 131 -EM_RS08 = 132 -EM_SHARC = 133 -EM_ECOG2 = 134 -EM_SCORE7 = 135 -EM_DSP24 = 136 -EM_VIDEOCORE3 = 137 -EM_LATTICEMICO32 = 138 -EM_SE_C17 = 139 -EM_TI_C6000 = 140 -EM_TI_C2000 = 141 -EM_TI_C5500 = 142 -EM_TI_ARP32 = 143 -EM_TI_PRU = 144 -EM_MMDSP_PLUS = 160 -EM_CYPRESS_M8C = 161 -EM_R32C = 162 -EM_TRIMEDIA = 163 -EM_QDSP6 = 164 -EM_8051 = 165 -EM_STXP7X = 166 -EM_NDS32 = 167 -EM_ECOG1X = 168 -EM_MAXQ30 = 169 -EM_XIMO16 = 170 -EM_MANIK = 171 -EM_CRAYNV2 = 172 -EM_RX = 173 -EM_METAG = 174 -EM_MCST_ELBRUS = 175 -EM_ECOG16 = 176 -EM_CR16 = 177 -EM_ETPU = 178 -EM_SLE9X = 179 -EM_L10M = 180 -EM_K10M = 181 -EM_AARCH64 = 183 -EM_AVR32 = 185 -EM_STM8 = 186 -EM_TILE64 = 187 -EM_TILEPRO = 188 -EM_MICROBLAZE = 189 -EM_CUDA = 190 -EM_TILEGX = 191 -EM_CLOUDSHIELD = 192 -EM_COREA_1ST = 193 -EM_COREA_2ND = 194 -EM_ARC_COMPACT2 = 195 -EM_OPEN8 = 196 -EM_RL78 = 197 -EM_VIDEOCORE5 = 198 -EM_78KOR = 199 -EM_56800EX = 200 -EM_BA1 = 201 -EM_BA2 = 202 -EM_XCORE = 203 -EM_MCHP_PIC = 204 -EM_KM32 = 210 -EM_KMX32 = 211 -EM_EMX16 = 212 -EM_EMX8 = 213 -EM_KVARC = 214 -EM_CDP = 215 -EM_COGE = 216 -EM_COOL = 217 -EM_NORC = 218 -EM_CSR_KALIMBA = 219 -EM_Z80 = 220 -EM_VISIUM = 221 -EM_FT32 = 222 -EM_MOXIE = 223 -EM_AMDGPU = 224 -EM_RISCV = 243 -EM_BPF = 247 -EM_NUM = 248 -EM_ARC_A5 = EM_ARC_COMPACT -EM_ALPHA = 0x9026 -EV_NONE = 0 EV_CURRENT = 1 -EV_NUM = 2 -SHN_UNDEF = 0 -SHN_LORESERVE = 0xff00 -SHN_LOPROC = 0xff00 -SHN_BEFORE = 0xff00 -SHN_AFTER = 0xff01 -SHN_HIPROC = 0xff1f -SHN_LOOS = 0xff20 -SHN_HIOS = 0xff3f -SHN_ABS = 0xfff1 -SHN_COMMON = 0xfff2 -SHN_XINDEX = 0xffff -SHN_HIRESERVE = 0xffff -SHT_NULL = 0 -SHT_PROGBITS = 1 -SHT_SYMTAB = 2 -SHT_STRTAB = 3 -SHT_RELA = 4 -SHT_HASH = 5 -SHT_DYNAMIC = 6 -SHT_NOTE = 7 -SHT_NOBITS = 8 -SHT_REL = 9 -SHT_SHLIB = 10 -SHT_DYNSYM = 11 -SHT_INIT_ARRAY = 14 -SHT_FINI_ARRAY = 15 -SHT_PREINIT_ARRAY = 16 -SHT_GROUP = 17 -SHT_SYMTAB_SHNDX = 18 -SHT_NUM = 19 -SHT_LOOS = 0x60000000 -SHT_GNU_ATTRIBUTES = 0x6ffffff5 -SHT_GNU_HASH = 0x6ffffff6 -SHT_GNU_LIBLIST = 0x6ffffff7 -SHT_CHECKSUM = 0x6ffffff8 -SHT_LOSUNW = 0x6ffffffa -SHT_SUNW_move = 0x6ffffffa -SHT_SUNW_COMDAT = 0x6ffffffb -SHT_SUNW_syminfo = 0x6ffffffc -SHT_GNU_verdef = 0x6ffffffd -SHT_GNU_verneed = 0x6ffffffe -SHT_GNU_versym = 0x6fffffff -SHT_HISUNW = 0x6fffffff -SHT_HIOS = 0x6fffffff -SHT_LOPROC = 0x70000000 -SHT_HIPROC = 0x7fffffff -SHT_LOUSER = 0x80000000 -SHT_HIUSER = 0x8fffffff -SHF_WRITE = (1 << 0) -SHF_ALLOC = (1 << 1) -SHF_EXECINSTR = (1 << 2) -SHF_MERGE = (1 << 4) -SHF_STRINGS = (1 << 5) -SHF_INFO_LINK = (1 << 6) -SHF_LINK_ORDER = (1 << 7) -SHF_OS_NONCONFORMING = (1 << 8) -SHF_GROUP = (1 << 9) -SHF_TLS = (1 << 10) -SHF_COMPRESSED = (1 << 11) -SHF_MASKOS = 0x0ff00000 -SHF_MASKPROC = 0xf0000000 -SHF_ORDERED = (1 << 30) -SHF_EXCLUDE = (1 << 31) -ELFCOMPRESS_ZLIB = 1 -ELFCOMPRESS_LOOS = 0x60000000 -ELFCOMPRESS_HIOS = 0x6fffffff -ELFCOMPRESS_LOPROC = 0x70000000 -ELFCOMPRESS_HIPROC = 0x7fffffff -GRP_COMDAT = 0x1 -STB_LOCAL = 0 -STB_GLOBAL = 1 -STB_WEAK = 2 -STB_NUM = 3 -STB_LOOS = 10 -STB_GNU_UNIQUE = 10 -STB_HIOS = 12 -STB_LOPROC = 13 -STB_HIPROC = 15 -STT_NOTYPE = 0 -STT_OBJECT = 1 -STT_FUNC = 2 -STT_SECTION = 3 -STT_FILE = 4 -STT_COMMON = 5 -STT_TLS = 6 -STT_NUM = 7 -STT_LOOS = 10 -STT_GNU_IFUNC = 10 -STT_HIOS = 12 -STT_LOPROC = 13 -STT_HIPROC = 15 -STV_DEFAULT = 0 -STV_INTERNAL = 1 -STV_HIDDEN = 2 -STV_PROTECTED = 3 -PT_NULL = 0 -PT_LOAD = 1 -PT_DYNAMIC = 2 -PT_INTERP = 3 -PT_NOTE = 4 -PT_SHLIB = 5 -PT_PHDR = 6 -PT_TLS = 7 -PT_NUM = 8 -PT_LOOS = 0x60000000 -PT_GNU_EH_FRAME = 0x6474e550 -PT_GNU_STACK = 0x6474e551 -PT_GNU_RELRO = 0x6474e552 -PT_LOSUNW = 0x6ffffffa -PT_SUNWBSS = 0x6ffffffa -PT_SUNWSTACK = 0x6ffffffb -PT_HISUNW = 0x6fffffff -PT_HIOS = 0x6fffffff -PT_LOPROC = 0x70000000 -PT_HIPROC = 0x7fffffff -PF_X = (1 << 0) -PF_W = (1 << 1) -PF_R = (1 << 2) -PF_MASKOS = 0x0ff00000 -PF_MASKPROC = 0xf0000000 -DT_NULL = 0 -DT_NEEDED = 1 -DT_PLTRELSZ = 2 -DT_PLTGOT = 3 -DT_HASH = 4 -DT_STRTAB = 5 -DT_SYMTAB = 6 -DT_RELA = 7 -DT_RELASZ = 8 -DT_RELAENT = 9 -DT_STRSZ = 10 -DT_SYMENT = 11 -DT_INIT = 12 -DT_FINI = 13 -DT_SONAME = 14 -DT_RPATH = 15 -DT_SYMBOLIC = 16 -DT_REL = 17 -DT_RELSZ = 18 -DT_RELENT = 19 -DT_PLTREL = 20 -DT_DEBUG = 21 -DT_TEXTREL = 22 -DT_JMPREL = 23 -DT_BIND_NOW = 24 -DT_INIT_ARRAY = 25 -DT_FINI_ARRAY = 26 -DT_INIT_ARRAYSZ = 27 -DT_FINI_ARRAYSZ = 28 -DT_RUNPATH = 29 -DT_FLAGS = 30 -DT_ENCODING = 32 -DT_PREINIT_ARRAY = 32 -DT_PREINIT_ARRAYSZ = 33 -DT_NUM = 34 -DT_LOOS = 0x6000000d -DT_HIOS = 0x6ffff000 -DT_LOPROC = 0x70000000 -DT_HIPROC = 0x7fffffff -DT_PROCNUM = 0x36 -DT_VALRNGLO = 0x6ffffd00 -DT_GNU_PRELINKED = 0x6ffffdf5 -DT_GNU_CONFLICTSZ = 0x6ffffdf6 -DT_GNU_LIBLISTSZ = 0x6ffffdf7 -DT_CHECKSUM = 0x6ffffdf8 -DT_PLTPADSZ = 0x6ffffdf9 -DT_MOVEENT = 0x6ffffdfa -DT_MOVESZ = 0x6ffffdfb -DT_FEATURE_1 = 0x6ffffdfc -DT_POSFLAG_1 = 0x6ffffdfd -DT_SYMINSZ = 0x6ffffdfe -DT_SYMINENT = 0x6ffffdff -DT_VALRNGHI = 0x6ffffdff -DT_VALNUM = 12 -DT_ADDRRNGLO = 0x6ffffe00 -DT_GNU_HASH = 0x6ffffef5 -DT_TLSDESC_PLT = 0x6ffffef6 -DT_TLSDESC_GOT = 0x6ffffef7 -DT_GNU_CONFLICT = 0x6ffffef8 -DT_GNU_LIBLIST = 0x6ffffef9 -DT_CONFIG = 0x6ffffefa -DT_DEPAUDIT = 0x6ffffefb -DT_AUDIT = 0x6ffffefc -DT_PLTPAD = 0x6ffffefd -DT_MOVETAB = 0x6ffffefe -DT_SYMINFO = 0x6ffffeff -DT_ADDRRNGHI = 0x6ffffeff -DT_ADDRNUM = 11 -DT_VERSYM = 0x6ffffff0 -DT_RELACOUNT = 0x6ffffff9 -DT_RELCOUNT = 0x6ffffffa -DT_FLAGS_1 = 0x6ffffffb -DT_VERDEF = 0x6ffffffc -DT_VERDEFNUM = 0x6ffffffd -DT_VERNEED = 0x6ffffffe -DT_VERNEEDNUM = 0x6fffffff -DT_VERSIONTAGNUM = 16 -DT_AUXILIARY = 0x7ffffffd -DT_FILTER = 0x7fffffff -DT_EXTRANUM = 3 -STT_SPARC_REGISTER = 13 -DT_SPARC_REGISTER = 0x70000001 -DT_SPARC_NUM = 2 -SHN_MIPS_ACOMMON = 0xff00 -SHN_MIPS_TEXT = 0xff01 -SHN_MIPS_DATA = 0xff02 -SHN_MIPS_SCOMMON = 0xff03 -SHN_MIPS_SUNDEFINED = 0xff04 -SHT_MIPS_LIBLIST = 0x70000000 -SHT_MIPS_MSYM = 0x70000001 -SHT_MIPS_CONFLICT = 0x70000002 -SHT_MIPS_GPTAB = 0x70000003 -SHT_MIPS_UCODE = 0x70000004 -SHT_MIPS_DEBUG = 0x70000005 -SHT_MIPS_REGINFO = 0x70000006 -SHT_MIPS_PACKAGE = 0x70000007 -SHT_MIPS_PACKSYM = 0x70000008 -SHT_MIPS_RELD = 0x70000009 -SHT_MIPS_IFACE = 0x7000000b -SHT_MIPS_CONTENT = 0x7000000c -SHT_MIPS_OPTIONS = 0x7000000d -SHT_MIPS_SHDR = 0x70000010 -SHT_MIPS_FDESC = 0x70000011 -SHT_MIPS_EXTSYM = 0x70000012 -SHT_MIPS_DENSE = 0x70000013 -SHT_MIPS_PDESC = 0x70000014 -SHT_MIPS_LOCSYM = 0x70000015 -SHT_MIPS_AUXSYM = 0x70000016 -SHT_MIPS_OPTSYM = 0x70000017 -SHT_MIPS_LOCSTR = 0x70000018 -SHT_MIPS_LINE = 0x70000019 -SHT_MIPS_RFDESC = 0x7000001a -SHT_MIPS_DELTASYM = 0x7000001b -SHT_MIPS_DELTAINST = 0x7000001c -SHT_MIPS_DELTACLASS = 0x7000001d -SHT_MIPS_DWARF = 0x7000001e -SHT_MIPS_DELTADECL = 0x7000001f -SHT_MIPS_SYMBOL_LIB = 0x70000020 -SHT_MIPS_EVENTS = 0x70000021 -SHT_MIPS_TRANSLATE = 0x70000022 -SHT_MIPS_PIXIE = 0x70000023 -SHT_MIPS_XLATE = 0x70000024 -SHT_MIPS_XLATE_DEBUG = 0x70000025 -SHT_MIPS_WHIRL = 0x70000026 -SHT_MIPS_EH_REGION = 0x70000027 -SHT_MIPS_XLATE_OLD = 0x70000028 -SHT_MIPS_PDR_EXCEPTION = 0x70000029 -SHF_MIPS_GPREL = 0x10000000 -SHF_MIPS_MERGE = 0x20000000 -SHF_MIPS_ADDR = 0x40000000 -SHF_MIPS_STRINGS = 0x80000000 -SHF_MIPS_NOSTRIP = 0x08000000 -SHF_MIPS_LOCAL = 0x04000000 -SHF_MIPS_NAMES = 0x02000000 -SHF_MIPS_NODUPE = 0x01000000 -STB_MIPS_SPLIT_COMMON = 13 -PT_MIPS_REGINFO = 0x70000000 -PT_MIPS_RTPROC = 0x70000001 -PT_MIPS_OPTIONS = 0x70000002 -PT_MIPS_ABIFLAGS = 0x70000003 -PF_MIPS_LOCAL = 0x10000000 -DT_MIPS_RLD_VERSION = 0x70000001 -DT_MIPS_TIME_STAMP = 0x70000002 -DT_MIPS_ICHECKSUM = 0x70000003 -DT_MIPS_IVERSION = 0x70000004 -DT_MIPS_FLAGS = 0x70000005 -DT_MIPS_BASE_ADDRESS = 0x70000006 -DT_MIPS_MSYM = 0x70000007 -DT_MIPS_CONFLICT = 0x70000008 -DT_MIPS_LIBLIST = 0x70000009 -DT_MIPS_LOCAL_GOTNO = 0x7000000a -DT_MIPS_CONFLICTNO = 0x7000000b -DT_MIPS_LIBLISTNO = 0x70000010 -DT_MIPS_SYMTABNO = 0x70000011 -DT_MIPS_UNREFEXTNO = 0x70000012 -DT_MIPS_GOTSYM = 0x70000013 -DT_MIPS_HIPAGENO = 0x70000014 -DT_MIPS_RLD_MAP = 0x70000016 -DT_MIPS_DELTA_CLASS = 0x70000017 -DT_MIPS_DELTA_CLASS_NO = 0x70000018 -DT_MIPS_DELTA_INSTANCE = 0x70000019 -DT_MIPS_DELTA_INSTANCE_NO = 0x7000001a -DT_MIPS_DELTA_RELOC = 0x7000001b -DT_MIPS_DELTA_RELOC_NO = 0x7000001c -DT_MIPS_DELTA_SYM = 0x7000001d -DT_MIPS_DELTA_SYM_NO = 0x7000001e -DT_MIPS_DELTA_CLASSSYM = 0x70000020 -DT_MIPS_DELTA_CLASSSYM_NO = 0x70000021 -DT_MIPS_CXX_FLAGS = 0x70000022 -DT_MIPS_PIXIE_INIT = 0x70000023 -DT_MIPS_SYMBOL_LIB = 0x70000024 -DT_MIPS_LOCALPAGE_GOTIDX = 0x70000025 -DT_MIPS_LOCAL_GOTIDX = 0x70000026 -DT_MIPS_HIDDEN_GOTIDX = 0x70000027 -DT_MIPS_PROTECTED_GOTIDX = 0x70000028 -DT_MIPS_OPTIONS = 0x70000029 -DT_MIPS_INTERFACE = 0x7000002a -DT_MIPS_DYNSTR_ALIGN = 0x7000002b -DT_MIPS_INTERFACE_SIZE = 0x7000002c -DT_MIPS_RLD_TEXT_RESOLVE_ADDR = 0x7000002d -DT_MIPS_PERF_SUFFIX = 0x7000002e -DT_MIPS_COMPACT_SIZE = 0x7000002f -DT_MIPS_GP_VALUE = 0x70000030 -DT_MIPS_AUX_DYNAMIC = 0x70000031 -DT_MIPS_PLTGOT = 0x70000032 -DT_MIPS_RWPLT = 0x70000034 -DT_MIPS_RLD_MAP_REL = 0x70000035 -DT_MIPS_NUM = 0x36 -SHN_PARISC_ANSI_COMMON = 0xff00 -SHN_PARISC_HUGE_COMMON = 0xff01 -SHT_PARISC_EXT = 0x70000000 -SHT_PARISC_UNWIND = 0x70000001 -SHT_PARISC_DOC = 0x70000002 -SHF_PARISC_SHORT = 0x20000000 -SHF_PARISC_HUGE = 0x40000000 -SHF_PARISC_SBP = 0x80000000 -STT_PARISC_MILLICODE = 13 -STT_HP_OPAQUE = (STT_LOOS + 0x1) -STT_HP_STUB = (STT_LOOS + 0x2) -PT_HP_TLS = (PT_LOOS + 0x0) -PT_HP_CORE_NONE = (PT_LOOS + 0x1) -PT_HP_CORE_VERSION = (PT_LOOS + 0x2) -PT_HP_CORE_KERNEL = (PT_LOOS + 0x3) -PT_HP_CORE_COMM = (PT_LOOS + 0x4) -PT_HP_CORE_PROC = (PT_LOOS + 0x5) -PT_HP_CORE_LOADABLE = (PT_LOOS + 0x6) -PT_HP_CORE_STACK = (PT_LOOS + 0x7) -PT_HP_CORE_SHM = (PT_LOOS + 0x8) -PT_HP_CORE_MMF = (PT_LOOS + 0x9) -PT_HP_PARALLEL = (PT_LOOS + 0x10) -PT_HP_FASTBIND = (PT_LOOS + 0x11) -PT_HP_OPT_ANNOT = (PT_LOOS + 0x12) -PT_HP_HSL_ANNOT = (PT_LOOS + 0x13) -PT_HP_STACK = (PT_LOOS + 0x14) -PT_PARISC_ARCHEXT = 0x70000000 -PT_PARISC_UNWIND = 0x70000001 -PF_PARISC_SBP = 0x08000000 -PF_HP_PAGE_SIZE = 0x00100000 -PF_HP_FAR_SHARED = 0x00200000 -PF_HP_NEAR_SHARED = 0x00400000 -PF_HP_CODE = 0x01000000 -PF_HP_MODIFY = 0x02000000 -PF_HP_LAZYSWAP = 0x04000000 -PF_HP_SBP = 0x08000000 -SHT_ALPHA_DEBUG = 0x70000001 -SHT_ALPHA_REGINFO = 0x70000002 -SHF_ALPHA_GPREL = 0x10000000 -DT_ALPHA_PLTRO = (DT_LOPROC + 0) -DT_ALPHA_NUM = 1 -DT_PPC_GOT = (DT_LOPROC + 0) -DT_PPC_OPT = (DT_LOPROC + 1) -DT_PPC_NUM = 2 -DT_PPC64_GLINK = (DT_LOPROC + 0) -DT_PPC64_OPD = (DT_LOPROC + 1) -DT_PPC64_OPDSZ = (DT_LOPROC + 2) -DT_PPC64_OPT = (DT_LOPROC + 3) -DT_PPC64_NUM = 4 -STT_ARM_TFUNC = STT_LOPROC -STT_ARM_16BIT = STT_HIPROC -SHF_ARM_ENTRYSECT = 0x10000000 -SHF_ARM_COMDEF = 0x80000000 -PF_ARM_SB = 0x10000000 -PF_ARM_PI = 0x20000000 -PF_ARM_ABS = 0x40000000 -PT_ARM_EXIDX = (PT_LOPROC + 1) -SHT_ARM_EXIDX = (SHT_LOPROC + 1) -SHT_ARM_PREEMPTMAP = (SHT_LOPROC + 2) -SHT_ARM_ATTRIBUTES = (SHT_LOPROC + 3) -PT_IA_64_ARCHEXT = (PT_LOPROC + 0) -PT_IA_64_UNWIND = (PT_LOPROC + 1) -PT_IA_64_HP_OPT_ANOT = (PT_LOOS + 0x12) -PT_IA_64_HP_HSL_ANOT = (PT_LOOS + 0x13) -PT_IA_64_HP_STACK = (PT_LOOS + 0x14) -PF_IA_64_NORECOV = 0x80000000 -SHT_IA_64_EXT = (SHT_LOPROC + 0) -SHT_IA_64_UNWIND = (SHT_LOPROC + 1) -SHF_IA_64_SHORT = 0x10000000 -SHF_IA_64_NORECOV = 0x20000000 -DT_IA_64_PLT_RESERVE = (DT_LOPROC + 0) -DT_IA_64_NUM = 1 -DT_NIOS2_GP = 0x70000002 -Elf_Ehdr = namedtuple('Elf_Ehdr', [ - 'e_ident', - 'e_type', - 'e_machine', - 'e_version', - 'e_entry', - 'e_phoff', - 'e_shoff', - 'e_flags', - 'e_ehsize', - 'e_phentsize', - 'e_phnum', - 'e_shentsize', - 'e_shnum', - 'e_shstrndx', -]) +class Elf_Ehdr(NamedTuple): + e_ident: bytes + e_type: int + e_machine: int + e_version: int + e_entry: int + e_phoff: int + e_shoff: int + e_flags: int + e_ehsize: int + e_phentsize: int + e_phnum: int + e_shentsize: int + e_shnum: int + e_shstrndx: int -Elf_Shdr = namedtuple('Elf_Shdr', [ - 'sh_name', - 'sh_type', - 'sh_flags', - 'sh_addr', - 'sh_offset', - 'sh_size', - 'sh_link', - 'sh_info', - 'sh_addralign', - 'sh_entsize', -]) +class Elf_Phdr(NamedTuple): + p_type: int + p_flags: int + p_offset: int + p_vaddr: int + p_paddr: int + p_filesz: int + p_memsz: int + p_align: int -Elf_Sym = namedtuple('Elf_Sym', [ - 'st_name', - 'st_info', - 'st_other', - 'st_shndx', - 'st_value', - 'st_size', -]) +def parse_elf_phdrs(file: BinaryIO) -> List[Elf_Phdr]: + file.seek(0) + buf = file.read(64) # sizeof(struct Elf64_Ehdr) + if (buf[EI_MAG0] != ELFMAG0 or buf[EI_MAG1] != ELFMAG1 or + buf[EI_MAG2] != ELFMAG2 or buf[EI_MAG3] != ELFMAG3): + raise ValueError('not an ELF file') -Elf_Phdr = namedtuple('Elf_Phdr', [ - 'p_type', - 'p_flags', - 'p_offset', - 'p_vaddr', - 'p_paddr', - 'p_filesz', - 'p_memsz', - 'p_align', -]) + if buf[EI_VERSION] != EV_CURRENT: + raise ValueError('ELF version is not EV_CURRENT') + if buf[EI_DATA] == ELFDATA2LSB: + fmt = '<' + elif buf[EI_DATA] == ELFDATA2MSB: + fmt = '>' + else: + raise ValueError(f'unknown ELF data encoding {buf[EI_DATA]}') -class ElfFile: - def __init__(self, file): - self.file = file - self._ehdr = None - self._shdrs = None - self._shstrtab_shdr = None - self._shdrs_by_name = None - self._phdrs = None - self._symbols = None - self._symbols_by_name = None + if buf[EI_CLASS] == ELFCLASS64: + fmt += '16sHHLQQQLHHHHHH' + elif buf[EI_CLASS] == ELFCLASS32: + raise NotImplementedError('32-bit ELF is not implemented') + else: + raise ValueError(f'unknown ELF class {buf[EI_CLASS]}') - def ehdr(self): - if self._ehdr is None: - self.file.seek(0) - buf = self.file.read(64) # sizeof(struct Elf64_Ehdr) + ehdr = Elf_Ehdr._make(struct.unpack_from(fmt, buf)) - if (buf[EI_MAG0] != ELFMAG0 or buf[EI_MAG1] != ELFMAG1 or - buf[EI_MAG2] != ELFMAG2 or buf[EI_MAG3] != ELFMAG3): - raise ValueError('not an ELF file') + file.seek(ehdr.e_phoff) + buf = file.read(ehdr.e_phnum * ehdr.e_phentsize) - if buf[EI_VERSION] != EV_CURRENT: - raise ValueError('ELF version is not EV_CURRENT') + if ehdr.e_ident[EI_DATA] == ELFDATA2LSB: + fmt = '<' + else: + fmt = '>' - if buf[EI_DATA] == ELFDATA2LSB: - fmt = '<' - elif buf[EI_DATA] == ELFDATA2MSB: - fmt = '>' - else: - raise ValueError(f'unknown ELF data encoding {buf[EI_DATA]}') - - if buf[EI_CLASS] == ELFCLASS64: - fmt += '16sHHLQQQLHHHHHH' - elif buf[EI_CLASS] == ELFCLASS32: - raise NotImplementedError('32-bit ELF is not implemented') - else: - raise ValueError(f'unknown ELF class {buf[EI_CLASS]}') - self._ehdr = Elf_Ehdr._make(struct.unpack_from(fmt, buf)) - return self._ehdr - - def shdrs(self): - if self._shdrs is None: - ehdr = self.ehdr() - self.file.seek(ehdr.e_shoff) - # TODO: e_shnum == 0 - buf = self.file.read(ehdr.e_shnum * ehdr.e_shentsize) - - if ehdr.e_ident[EI_DATA] == ELFDATA2LSB: - fmt = '<' - else: - fmt = '>' - - if ehdr.e_ident[EI_CLASS] == ELFCLASS64: - fmt += 'LLQQQQLLQQ' - else: - assert False - self._shdrs = [Elf_Shdr._make(x) for x in struct.iter_unpack(fmt, buf)] - return self._shdrs - - def shstrtab_shdr(self): - if self._shstrtab_shdr is None: - ehdr = self.ehdr() - shdrs = self.shdrs() - if ehdr.e_shstrndx == SHN_UNDEF: - raise ValueError('no string table index in ELF header') - elif ehdr.e_shstrndx == SHN_XINDEX: - shdr = shdrs[shdrs[0].sh_link] - else: - if ehdr.e_shstrndx >= SHN_LORESERVE: - raise ValueError('invalid string table index in ELF header') - shdr = shdrs[ehdr.e_shstrndx] - if shdr.sh_type != SHT_STRTAB or shdr.sh_size == 0: - raise ValueError('invalid string table section') - self._shstrtab_shdr = shdr - return self._shstrtab_shdr - - def shdrs_by_name(self): - if self._shdrs_by_name is None: - shstrtab_shdr = self.shstrtab_shdr() - self.file.seek(shstrtab_shdr.sh_offset) - shstrtab = self.file.read(shstrtab_shdr.sh_size) - shdrs = self.shdrs() - shdrs_by_name = {} - for shdr in shdrs: - if not shdr.sh_name: - continue - end = shstrtab.index(b'\0', shdr.sh_name) - name = shstrtab[shdr.sh_name:end].decode() - if name in shdrs_by_name: - raise ValueError(f'duplicate section name {name!r}') - shdrs_by_name[name] = shdr - self._shdrs_by_name = shdrs_by_name - return self._shdrs_by_name - - def shdr(self, name): - return self.shdrs_by_name()[name] - - def phdrs(self): - if self._phdrs is None: - ehdr = self.ehdr() - self.file.seek(ehdr.e_phoff) - buf = self.file.read(ehdr.e_phnum * ehdr.e_phentsize) - - if ehdr.e_ident[EI_DATA] == ELFDATA2LSB: - fmt = '<' - else: - fmt = '>' - - if ehdr.e_ident[EI_CLASS] == ELFCLASS64: - fmt += 'LLQQQQQQ' - else: - assert False - self._phdrs = [Elf_Phdr._make(x) for x in struct.iter_unpack(fmt, buf)] - return self._phdrs - - def symbols(self): - if self._symbols is None: - ehdr = self.ehdr() - shdr = self.shdr('.symtab') - self.file.seek(shdr.sh_offset) - buf = self.file.read(shdr.sh_size) - - if ehdr.e_ident[EI_DATA] == ELFDATA2LSB: - fmt = '<' - else: - fmt = '>' - - if ehdr.e_ident[EI_CLASS] == ELFCLASS64: - fmt += 'LBBHQQ' - else: - assert False - self._symbols = [Elf_Sym._make(x) for x in struct.iter_unpack(fmt, buf)] - return self._symbols - - def symbols_by_name(self): - if self._symbols_by_name is None: - strtab_shdr = self.shdr('.strtab') - self.file.seek(strtab_shdr.sh_offset) - strtab = self.file.read(strtab_shdr.sh_size) - symbols = self.symbols() - symbols_by_name = {} - for symbol in symbols_by_name: - if symbol.st_name: - end = strtab.index(b'\0', symbol.st_name) - name = strtab[symbol.st_name:end].decode() - else: - name = '' - try: - symbols_by_name[name].append(symbol) - except KeyError: - symbols_by_name[name] = [symbol] - self._symbols_by_name = symbols_by_name - return self._symbols_by_name + if ehdr.e_ident[EI_CLASS] == ELFCLASS64: + fmt += 'LLQQQQQQ' + else: + assert False + return [Elf_Phdr._make(x) for x in struct.iter_unpack(fmt, buf)] diff --git a/drgn/type.py b/drgn/type.py index 5099a1b8..ed534b36 100644 --- a/drgn/type.py +++ b/drgn/type.py @@ -1,5 +1,13 @@ from collections import OrderedDict -from drgn.dwarf import DwarfAttribNotFoundError, DW_AT, DW_ATE, DW_TAG +from drgn.dwarfindex import DwarfIndex +from drgn.dwarf import ( + Die, + DwarfAttribNotFoundError, + DwarfFormatError, + DW_AT, + DW_ATE, + DW_TAG, +) from drgn.typename import ( parse_type_name, ArrayTypeName, @@ -17,15 +25,16 @@ import functools import re import struct import sys +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union class Type: - def __init__(self, qualifiers=None): + def __init__(self, qualifiers: Optional[Set[str]] = None) -> None: if qualifiers is None: qualifiers = set() self.qualifiers = qualifiers - def __repr__(self): + def __repr__(self) -> str: parts = [self.__class__.__name__, '('] if self.qualifiers: parts.append(', ') @@ -33,47 +42,50 @@ class Type: parts.append(')') return ''.join(parts) - def __str__(self): + def __str__(self) -> str: return str(self.type_name()) - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: return (isinstance(other, self.__class__) and self.__dict__ == other.__dict__) - def type_name(self): + def type_name(self) -> TypeName: raise NotImplementedError() - def sizeof(self): + def sizeof(self) -> int: raise NotImplementedError() - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> Any: raise NotImplementedError() - def format(self, buffer, offset=0, *, cast=True): + def format(self, buffer: bytes, offset: int = 0, *, + cast: bool = True) -> str: raise NotImplementedError() class VoidType(Type): - def type_name(self): + def type_name(self) -> VoidTypeName: return VoidTypeName(self.qualifiers) - def sizeof(self): + def sizeof(self) -> int: raise ValueError("can't get size of void") - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> Any: raise ValueError("can't read void") - def format(self, buffer, offset=0, *, cast=True): + def format(self, buffer: bytes, offset: int = 0, *, + cast: bool = True) -> str: raise ValueError("can't read void") class BasicType(Type): - def __init__(self, name, size, qualifiers=None): + def __init__(self, name: str, size: int, + qualifiers: Optional[Set[str]] = None) -> None: super().__init__(qualifiers) self.name = name self.size = size - def __repr__(self): + def __repr__(self) -> str: parts = [ self.__class__.__name__, '(', repr(self.name), ', ', @@ -85,13 +97,14 @@ class BasicType(Type): parts.append(')') return ''.join(parts) - def type_name(self): + def type_name(self) -> BasicTypeName: return BasicTypeName(self.name, self.qualifiers) - def sizeof(self): + def sizeof(self) -> int: return self.size - def format(self, buffer, offset=0, *, cast=True): + def format(self, buffer: bytes, offset: int = 0, *, + cast: bool = True) -> str: if cast: parts = ['(', str(self.type_name()), ')'] else: @@ -101,11 +114,12 @@ class BasicType(Type): class IntType(BasicType): - def __init__(self, name, size, signed, qualifiers=None): + def __init__(self, name: str, size: int, signed: bool, + qualifiers: Optional[Set[str]] = None) -> None: super().__init__(name, size, qualifiers) self.signed = signed - def __repr__(self): + def __repr__(self) -> str: parts = [ self.__class__.__name__, '(', repr(self.name), ', ', @@ -118,7 +132,7 @@ class IntType(BasicType): parts.append(')') return ''.join(parts) - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> int: if len(buffer) - offset < self.size: raise ValueError(f'buffer must be at least {self.size} bytes') return int.from_bytes(buffer[offset:offset + self.size], sys.byteorder, @@ -126,13 +140,14 @@ class IntType(BasicType): class BoolType(BasicType): - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> bool: if len(buffer) - offset < self.size: raise ValueError(f'buffer must be at least {self.size} bytes') return bool(int.from_bytes(buffer[offset:offset + self.size], sys.byteorder)) - def format(self, buffer, offset=0, *, cast=True): + def format(self, buffer: bytes, offset: int = 0, *, + cast: bool = True) -> str: if cast: parts = ['(', str(self), ')'] else: @@ -142,7 +157,7 @@ class BoolType(BasicType): class FloatType(BasicType): - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> float: if len(buffer) - offset < self.size: raise ValueError(f'buffer must be at least {self.size} bytes') if self.size == 4: @@ -155,12 +170,13 @@ class FloatType(BasicType): # Not a real C type, but it needs a separate representation. class BitFieldType(Type): - def __init__(self, type, bit_offset, bit_size): + def __init__(self, type: IntType, bit_offset: int, bit_size: int, + qualifiers: Optional[Set[str]] = None) -> None: self.type = type self.bit_offset = bit_offset self.bit_size = bit_size - def __repr__(self): + def __repr__(self) -> str: parts = [ self.__class__.__name__, '(', repr(self.type), ', ', @@ -169,18 +185,18 @@ class BitFieldType(Type): ] return ''.join(parts) - def __str__(self): + def __str__(self) -> str: parts = [str(self.type.type_name()), ':', repr(self.bit_size)] return ' '.join(parts) - def type_name(self): + def type_name(self) -> TypeName: raise ValueError("can't get type of bit field") - def sizeof(self): + def sizeof(self) -> int: # Not really, but for convenience. return (self.bit_offset + self.bit_size + 7) // 8 - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> int: if len(buffer) - offset < self.sizeof(): raise ValueError(f'buffer must be at least {self.sizeof()} bytes') @@ -196,7 +212,8 @@ class BitFieldType(Type): value -= (1 << self.bit_size) return value - def format(self, buffer, offset=0, *, cast=True): + def format(self, buffer: bytes, offset: int = 0, *, + cast: bool = True) -> str: if cast: parts = ['(', str(self.type.type_name()), ')'] else: @@ -205,8 +222,13 @@ class BitFieldType(Type): return ''.join(parts) +_TypeThunk = Callable[[], Type] + + class CompoundType(Type): - def __init__(self, name, size, members, qualifiers=None): + def __init__(self, name: str, size: int, + members: Optional[List[Tuple[str, int, _TypeThunk]]], + qualifiers: Optional[Set[str]] = None) -> None: super().__init__(qualifiers) # List of name, offset, type_thunk. type_thunk is a callable taking no # parameters which returns the type of the member. This lets us lazily @@ -215,11 +237,12 @@ class CompoundType(Type): self.name = name self.size = size self._members = members - self._members_by_name = OrderedDict() + # XXX + self._members_by_name: Dict[str, Tuple[int, _TypeThunk]] = OrderedDict() if members: self._index_members_by_name(members, 0) - def _index_members_by_name(self, members, offset): + def _index_members_by_name(self, members: Any, offset: int) -> None: for name, member_offset, type_thunk in members: if name: self._members_by_name[name] = (offset + member_offset, type_thunk) @@ -227,7 +250,7 @@ class CompoundType(Type): self._index_members_by_name(type_thunk()._members, offset + member_offset) - def _eager_members(self): + def _eager_members(self) -> Optional[List[Tuple[str, int, Type]]]: if self._members is None: return None return [ @@ -235,7 +258,7 @@ class CompoundType(Type): self._members ] - def __repr__(self): + def __repr__(self) -> str: parts = [ self.__class__.__name__, '(', repr(self.name), ', ', @@ -248,7 +271,7 @@ class CompoundType(Type): parts.append(')') return ''.join(parts) - def __str__(self): + def __str__(self) -> str: parts = [str(self.type_name())] if self._members is not None: parts.append(' {\n') @@ -263,7 +286,7 @@ class CompoundType(Type): parts.append(name) else: if isinstance(member_type, BitFieldType): - member_type_name = member_type.type.type_name() + member_type_name: TypeName = member_type.type.type_name() else: member_type_name = member_type.type_name() parts.append('\t') @@ -275,7 +298,7 @@ class CompoundType(Type): parts.append('}') return ''.join(parts) - def _dict_for_eq(self): + def _dict_for_eq(self) -> Dict: # Compare the result of the type thunks rather than the thunks # themselves. __eq__ is only used for testing, so it's okay to eagerly # evaluate the struct member types. @@ -284,16 +307,17 @@ class CompoundType(Type): del d['_members_by_name'] return d - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: return (isinstance(other, self.__class__) and - self._dict_for_eq() == other._dict_for_eq()) + self._dict_for_eq() == other._dict_for_eq()) # type: ignore + # mypy issue #3061 - def sizeof(self): + def sizeof(self) -> int: if self.size is None: raise ValueError("can't get size of incomplete type") return self.size - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> Dict: if len(buffer) - offset < self.size: raise ValueError(f'buffer must be at least {self.size} bytes') return OrderedDict([ @@ -301,7 +325,8 @@ class CompoundType(Type): for name, (member_offset, type_thunk) in self._members_by_name.items() ]) - def format(self, buffer, offset=0, *, cast=True): + def format(self, buffer: bytes, offset: int = 0, *, + cast: bool = True) -> str: if cast and self.name: parts = ['(', str(self.type_name()), ')'] else: @@ -317,28 +342,30 @@ class CompoundType(Type): parts.append('}') return ''.join(parts) - def members(self): + def members(self) -> List[str]: return list(self._members_by_name) - def offsetof(self, member): + def offsetof(self, member: str) -> int: return self._members_by_name[member][0] - def typeof(self, member): + def typeof(self, member: str) -> Type: return self._members_by_name[member][1]() class StructType(CompoundType): - def type_name(self): + def type_name(self) -> StructTypeName: return StructTypeName(self.name, self.qualifiers) class UnionType(CompoundType): - def type_name(self): + def type_name(self) -> UnionTypeName: return UnionTypeName(self.name, self.qualifiers) class EnumType(Type): - def __init__(self, name, size, signed, enumerators, qualifiers=None): + def __init__(self, name: str, size: int, signed: bool, + enumerators: Optional[List[Tuple[str, int]]], + qualifiers: Optional[Set[str]] = None) -> None: super().__init__(qualifiers) self.name = name self.size = size @@ -346,10 +373,10 @@ class EnumType(Type): if enumerators is None: self._enum = None else: - self._enum = enum.IntEnum('' if name is None else name, - OrderedDict(enumerators)) + self._enum = enum.IntEnum('' if name is None else name, enumerators) # type: ignore + # mypy issue #4865. - def __repr__(self): + def __repr__(self) -> str: parts = [ self.__class__.__name__, '(', repr(self.name), ', ', @@ -363,7 +390,7 @@ class EnumType(Type): parts.append(')') return ''.join(parts) - def __str__(self): + def __str__(self) -> str: parts = [str(self.type_name())] if self._enum is not None: parts.append(' {\n') @@ -376,25 +403,26 @@ class EnumType(Type): parts.append('}') return ''.join(parts) - def _dict_for_eq(self): + def _dict_for_eq(self) -> Dict: d = dict(self.__dict__) if d['_enum'] is not None: d['_enum'] = d['_enum'].__members__ return d - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: return (isinstance(other, self.__class__) and - self._dict_for_eq() == other._dict_for_eq()) + self._dict_for_eq() == other._dict_for_eq()) # type: ignore + # mypy issue #3061 - def type_name(self): + def type_name(self) -> EnumTypeName: return EnumTypeName(self.name, self.qualifiers) - def sizeof(self): + def sizeof(self) -> int: if self.size is None: raise ValueError("can't get size of incomplete type") return self.size - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> Union[enum.IntEnum, int]: if self._enum is None: raise ValueError("can't read incomplete enum type") if len(buffer) - offset < self.size: @@ -406,7 +434,8 @@ class EnumType(Type): except ValueError: return value - def format(self, buffer, offset=0, *, cast=True): + def format(self, buffer: bytes, offset: int = 0, *, + cast: bool = True) -> str: if cast: parts = ['(', str(self.type_name()), ')'] else: @@ -420,12 +449,13 @@ class EnumType(Type): class TypedefType(Type): - def __init__(self, name, type, qualifiers=None): + def __init__(self, name: str, type: Type, + qualifiers: Optional[Set[str]] = None) -> None: super().__init__(qualifiers) self.name = name self.type = type - def __repr__(self): + def __repr__(self) -> str: parts = [ self.__class__.__name__, '(', repr(self.name), ', ', @@ -437,22 +467,23 @@ class TypedefType(Type): parts.append(')') return ''.join(parts) - def __str__(self): + def __str__(self) -> str: parts = sorted(self.qualifiers) # Not real C syntax, but it gets the point across parts.append('typedef') parts.append(self.type.type_name().declaration(self.name)) return ' '.join(parts) - def type_name(self): + def type_name(self) -> TypedefTypeName: return TypedefTypeName(self.name, self.qualifiers) - def sizeof(self): + def sizeof(self) -> int: return self.type.sizeof() - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> Any: return self.type.read(buffer, offset) - def format(self, buffer, offset=0, *, cast=True): + def format(self, buffer: bytes, offset: int = 0, *, + cast: bool = True) -> str: if cast: parts = ['(', str(self.type_name()), ')'] else: @@ -462,12 +493,13 @@ class TypedefType(Type): class PointerType(Type): - def __init__(self, size, type, qualifiers=None): + def __init__(self, size: int, type: Type, + qualifiers: Optional[Set[str]] = None) -> None: super().__init__(qualifiers) self.size = size self.type = type - def __repr__(self): + def __repr__(self) -> str: parts = [ self.__class__.__name__, '(', repr(self.type), ', ', @@ -479,18 +511,19 @@ class PointerType(Type): parts.append(')') return ''.join(parts) - def type_name(self): + def type_name(self) -> PointerTypeName: return PointerTypeName(self.type.type_name(), self.qualifiers) - def sizeof(self): + def sizeof(self) -> int: return self.size - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> int: if len(buffer) - offset < self.size: raise ValueError(f'buffer must be at least {self.size} bytes') return int.from_bytes(buffer[offset:offset + self.size], sys.byteorder) - def format(self, buffer, offset=0, *, cast=True): + def format(self, buffer: bytes, offset: int = 0, *, + cast: bool = True) -> str: if cast: parts = ['(', str(self), ')'] else: @@ -500,22 +533,22 @@ class PointerType(Type): class ArrayType(Type): - def __init__(self, type, size=None): + def __init__(self, type: Type, size: Optional[int] = None) -> None: self.type = type self.size = size - def __repr__(self): + def __repr__(self) -> str: return f'{self.__class__.__name__}({self.type!r}, {self.size!r})' - def type_name(self): + def type_name(self) -> ArrayTypeName: return ArrayTypeName(self.type.type_name(), self.size) - def sizeof(self): + def sizeof(self) -> int: if self.size is None: raise ValueError("can't get size of incomplete array type") return self.size * self.type.sizeof() - def read(self, buffer, offset=0): + def read(self, buffer: bytes, offset: int = 0) -> List: if self.size is None: raise ValueError("can't read incomplete array type") element_size = self.type.sizeof() @@ -527,7 +560,8 @@ class ArrayType(Type): for i in range(self.size) ] - def format(self, buffer, offset=0, *, cast=True): + def format(self, buffer: bytes, offset: int = 0, *, + cast: bool = True) -> str: if cast: parts = ['(', str(self.type_name()), ')'] else: @@ -564,8 +598,10 @@ class ArrayType(Type): return ''.join(parts) -def _from_dwarf_bit_field(dwarf_index, die): +def _from_dwarf_bit_field(dwarf_index: DwarfIndex, die: Die) -> Type: type_ = from_dwarf_type(dwarf_index, die.type()) + if not isinstance(type_, IntType): + raise DwarfFormatError('bit field type is not integer') bit_size = die.find_constant(DW_AT.bit_size) try: bit_offset = die.find_constant(DW_AT.data_bit_offset) @@ -575,7 +611,8 @@ def _from_dwarf_bit_field(dwarf_index, die): return BitFieldType(type_, bit_offset, bit_size) -def from_dwarf_type(dwarf_index, dwarf_type, qualifiers=None): +def from_dwarf_type(dwarf_index: DwarfIndex, dwarf_type: Die, + qualifiers: Optional[Set[str]] = None) -> Type: if qualifiers is None: qualifiers = set() else: @@ -645,9 +682,11 @@ def from_dwarf_type(dwarf_index, dwarf_type, qualifiers=None): except DwarfAttribNotFoundError: name = None if dwarf_type.tag == DW_TAG.structure_type: - return StructType(name, size, members, qualifiers) + return StructType(name, size, members, qualifiers) # type: ignore + # mypy issue #1484 else: - return UnionType(name, size, members, qualifiers) + return UnionType(name, size, members, qualifiers) # type: ignore + # mypy issue #1484 elif dwarf_type.tag == DW_TAG.enumeration_type: if dwarf_type.find_flag(DW_AT.declaration): size = None @@ -683,7 +722,7 @@ def from_dwarf_type(dwarf_index, dwarf_type, qualifiers=None): try: deref_type = dwarf_type.type() except DwarfAttribNotFoundError: - type_ = VoidType() + type_: Type = VoidType() else: type_ = from_dwarf_type(dwarf_index, deref_type) return PointerType(size, type_, qualifiers) @@ -704,7 +743,8 @@ def from_dwarf_type(dwarf_index, dwarf_type, qualifiers=None): raise NotImplementedError(DW_TAG.str(dwarf_type.tag)) -def from_dwarf_type_name(dwarf_index, type_name): +def from_dwarf_type_name(dwarf_index: DwarfIndex, + type_name: Union[str, TypeName]) -> Type: if not isinstance(type_name, TypeName): type_name = parse_type_name(type_name) if isinstance(type_name, VoidTypeName): diff --git a/drgn/typename.py b/drgn/typename.py index bcefda1d..b63c4bdc 100644 --- a/drgn/typename.py +++ b/drgn/typename.py @@ -1,15 +1,17 @@ from collections import namedtuple import re +from typing import Any, Dict, List, NamedTuple, Optional, Set, Tuple, Union class TypeName: - def __init__(self, name, qualifiers=None): + def __init__(self, name: str, + qualifiers: Optional[Set[str]] = None) -> None: self.name = name if qualifiers is None: qualifiers = set() self.qualifiers = qualifiers - def __repr__(self): + def __repr__(self) -> str: parts = [self.__class__.__name__, '(', repr(self.name)] if self.qualifiers: parts.append(', ') @@ -17,14 +19,14 @@ class TypeName: parts.append(')') return ''.join(parts) - def __str__(self): + def __str__(self) -> str: return self.declaration('') - def __eq__(self, other): + def __eq__(self, other: Any) -> bool: return (isinstance(other, self.__class__) and self.__dict__ == other.__dict__) - def declaration(self, name): + def declaration(self, name: str) -> str: parts = sorted(self.qualifiers) parts.append(self.name) if name: @@ -33,7 +35,8 @@ class TypeName: class VoidTypeName(TypeName): - def __init__(self, qualifiers=None): + def __init__(self, + qualifiers: Optional[Set[str]] = None) -> None: super().__init__('void', qualifiers) @@ -41,7 +44,8 @@ class BasicTypeName(TypeName): pass -def _tagged_declaration(keyword, tag, name, qualifiers): +def _tagged_declaration(keyword: str, tag: str, name: str, + qualifiers: Set[str]) -> str: parts = sorted(qualifiers) parts.append(keyword) if tag: @@ -52,17 +56,17 @@ def _tagged_declaration(keyword, tag, name, qualifiers): class StructTypeName(TypeName): - def declaration(self, name): + def declaration(self, name: str) -> str: return _tagged_declaration('struct', self.name, name, self.qualifiers) class UnionTypeName(TypeName): - def declaration(self, name): + def declaration(self, name: str) -> str: return _tagged_declaration('union', self.name, name, self.qualifiers) class EnumTypeName(TypeName): - def declaration(self, name): + def declaration(self, name: str) -> str: return _tagged_declaration('enum', self.name, name, self.qualifiers) @@ -71,13 +75,14 @@ class TypedefTypeName(TypeName): class PointerTypeName(TypeName): - def __init__(self, type, qualifiers=None): + def __init__(self, type: TypeName, + qualifiers: Optional[Set[str]] = None) -> None: self.type = type if qualifiers is None: qualifiers = set() self.qualifiers = qualifiers - def __repr__(self): + def __repr__(self) -> str: parts = ['PointerTypeName(', repr(self.type)] if self.qualifiers: parts.append(', ') @@ -85,7 +90,7 @@ class PointerTypeName(TypeName): parts.append(')') return ''.join(parts) - def declaration(self, name): + def declaration(self, name: str) -> str: if self.qualifiers: if name: name = ' ' + name @@ -98,11 +103,11 @@ class PointerTypeName(TypeName): class ArrayTypeName(TypeName): - def __init__(self, type, size=None): + def __init__(self, type: TypeName, size: Optional[int] = None) -> None: self.type = type self.size = size - def __repr__(self): + def __repr__(self) -> str: parts = ['ArrayTypeName(', repr(self.type)] if self.size is not None: parts.append(', ') @@ -110,7 +115,7 @@ class ArrayTypeName(TypeName): parts.append(')') return ''.join(parts) - def declaration(self, name): + def declaration(self, name: str) -> str: if self.size is None: name += '[]' else: @@ -118,28 +123,34 @@ class ArrayTypeName(TypeName): return self.type.declaration(name) +_TOKEN_REGEX = re.compile('|'.join('(?P<%s>%s)' % pair for pair in [ + ('SPECIFIER', r'void|char|short|int|long|float|double|signed|unsigned|_Bool|_Complex'), + ('QUALIFIER', r'const|restrict|volatile|_Atomic'), + ('TAG', r'enum|struct|union'), + ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z0-9_]*'), + ('NUMBER', r'(?:0x)?[0-9]+'), + ('LPAREN', r'\('), + ('RPAREN', r'\)'), + ('LBRACKET', r'\['), + ('RBRACKET', r']'), + ('ASTERISK', r'\*'), + ('SKIP', r'[ \t\n\r\f\v]+'), + ('MISMATCH', r'.'), +])) + + +class _Token(NamedTuple): + kind: str + value: Union[str, int] + + class _TypeNameLexer: - TOKEN_REGEX = re.compile('|'.join('(?P<%s>%s)' % pair for pair in [ - ('SPECIFIER', r'void|char|short|int|long|float|double|signed|unsigned|_Bool|_Complex'), - ('QUALIFIER', r'const|restrict|volatile|_Atomic'), - ('TAG', r'enum|struct|union'), - ('IDENTIFIER', r'[a-zA-Z_][a-zA-Z0-9_]*'), - ('NUMBER', r'(?:0x)?[0-9]+'), - ('LPAREN', r'\('), - ('RPAREN', r'\)'), - ('LBRACKET', r'\['), - ('RBRACKET', r']'), - ('ASTERISK', r'\*'), - ('SKIP', r'[ \t\n\r\f\v]+'), - ('MISMATCH', r'.'), - ])) - Token = namedtuple('Token', ['kind', 'value']) - def __init__(self, str): - self._tokens = _TypeNameLexer.TOKEN_REGEX.finditer(str) - self._stack = [] + def __init__(self, s: str) -> None: + self._tokens = _TOKEN_REGEX.finditer(s) + self._stack: List[_Token] = [] - def pop(self): + def pop(self) -> _Token: if self._stack: return self._stack.pop() @@ -147,7 +158,7 @@ class _TypeNameLexer: try: match = next(self._tokens) except StopIteration: - return _TypeNameLexer.Token('EOF', None) + return _Token('EOF', None) kind = match.lastgroup value = match.group(kind) if kind == 'SKIP': @@ -157,27 +168,29 @@ class _TypeNameLexer: else: if kind == 'NUMBER': if value.startswith('0x'): - value = int(value, 16) + number = int(value, 16) elif value.startswith('0'): - value = int(value, 8) + number = int(value, 8) else: - value = int(value, 10) - return _TypeNameLexer.Token(kind, value) + number = int(value, 10) + return _Token(kind, number) + else: + return _Token(kind, value) - def push(self, token): + def push(self, token: _Token) -> None: self._stack.append(token) - def peek(self): + def peek(self) -> _Token: token = self.pop() self.push(token) return token class _TypeNameParser: - def __init__(self, lexer): + def __init__(self, lexer: _TypeNameLexer) -> None: self._lexer = lexer - def parse(self): + def parse(self) -> TypeName: type_name = self._parse_specifier_qualifier_list() if self._lexer.peek().kind != 'EOF': type_name = self._parse_abstract_declarator(type_name)[0] @@ -186,11 +199,11 @@ class _TypeNameParser: return type_name @staticmethod - def _specifier_error(old_specifier, new_specifier): + def _specifier_error(old_specifier: str, new_specifier: str) -> Exception: return ValueError(f"cannot combine {new_specifier!r} with {old_specifier!r}") @staticmethod - def _add_specifier(specifiers, specifier): + def _add_specifier(specifiers: Dict[str, Any], specifier: str) -> None: data_type = specifiers.get('data_type') size = specifiers.get('size') sign = specifiers.get('sign') @@ -222,7 +235,8 @@ class _TypeNameParser: specifiers['data_type'] = specifier @staticmethod - def _type_name_from_specifiers(specifiers, is_typedef): + def _type_name_from_specifiers(specifiers: Dict[str, Any], + is_typedef: bool) -> TypeName: data_type = specifiers['data_type'] qualifiers = specifiers.get('qualifiers') if data_type.startswith('struct '): @@ -245,8 +259,8 @@ class _TypeNameParser: parts.append(data_type) return BasicTypeName(' '.join(parts), qualifiers) - def _parse_specifier_qualifier_list(self): - specifiers = {} + def _parse_specifier_qualifier_list(self) -> TypeName: + specifiers: Dict[str, Any] = {} is_typedef = False while True: token = self._lexer.peek() @@ -260,9 +274,11 @@ class _TypeNameParser: # type-specifier elif token.kind == 'SPECIFIER': self._lexer.pop() + assert isinstance(token.value, str) _TypeNameParser._add_specifier(specifiers, token.value) elif token.kind == 'IDENTIFIER': self._lexer.pop() + assert isinstance(token.value, str) _TypeNameParser._add_specifier(specifiers, token.value) is_typedef = True elif token.kind == 'TAG': @@ -270,6 +286,8 @@ class _TypeNameParser: token2 = self._lexer.pop() if token2.kind != 'IDENTIFIER': raise ValueError(f'expected identifier after {token.value}') + assert isinstance(token.value, str) + assert isinstance(token2.value, str) _TypeNameParser._add_specifier(specifiers, token.value + ' ' + token2.value) else: break @@ -279,7 +297,8 @@ class _TypeNameParser: specifiers['data_type'] = 'int' return _TypeNameParser._type_name_from_specifiers(specifiers, is_typedef) - def _parse_abstract_declarator(self, type_name): + def _parse_abstract_declarator( + self, type_name: TypeName) -> Tuple[TypeName, Union[ArrayTypeName, PointerTypeName, None]]: if self._lexer.peek().kind == 'ASTERISK': type_name, inner_type = self._parse_pointer(type_name) token = self._lexer.peek() @@ -289,7 +308,7 @@ class _TypeNameParser: else: return self._parse_direct_abstract_declarator(type_name) - def _parse_pointer(self, type_name): + def _parse_pointer(self, type_name: TypeName) -> Tuple[TypeName, Optional[PointerTypeName]]: if self._lexer.peek().kind != 'ASTERISK': raise ValueError("expected '*'") inner_type = None @@ -301,17 +320,19 @@ class _TypeNameParser: inner_type = type_name return type_name, inner_type - def _parse_optional_type_qualifier_list(self): + def _parse_optional_type_qualifier_list(self) -> Set[str]: qualifiers = set() while True: token = self._lexer.peek() if token.kind != 'QUALIFIER': break self._lexer.pop() + assert isinstance(token.value, str) qualifiers.add(token.value) return qualifiers - def _parse_direct_abstract_declarator(self, type_name): + def _parse_direct_abstract_declarator( + self, type_name: TypeName) -> Tuple[TypeName, Union[ArrayTypeName, PointerTypeName, None]]: inner_type = None token = self._lexer.peek() if token.kind == 'LPAREN': @@ -333,6 +354,7 @@ class _TypeNameParser: token = self._lexer.peek() if token.kind == 'NUMBER': self._lexer.pop() + assert isinstance(token.value, int) size = token.value else: size = None @@ -351,5 +373,5 @@ class _TypeNameParser: return type_name, inner_type -def parse_type_name(str): - return _TypeNameParser(_TypeNameLexer(str)).parse() +def parse_type_name(s: str) -> TypeName: + return _TypeNameParser(_TypeNameLexer(s)).parse() diff --git a/drgn/util.py b/drgn/util.py index 3948eaa4..24504087 100644 --- a/drgn/util.py +++ b/drgn/util.py @@ -1,5 +1,8 @@ -def parse_symbol_file(file): - symbols = {} +from typing import Dict, List, TextIO + + +def parse_symbol_file(file: TextIO) -> Dict[str, List[int]]: + symbols: Dict[str, List[int]] = {} for line in file: fields = line.split() name = fields[2]