diff --git a/.gitignore b/.gitignore index 391e0fbe..aea92663 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ /build /coverage.info /dist +/docs/_build /drgn.egg-info /htmlcov __pycache__ diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..4e368ddf --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,3 @@ +version: 2 +sphinx: + configuration: docs/conf.py diff --git a/MANIFEST.in b/MANIFEST.in index b562f73d..a9c44507 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1,2 @@ +recursive-include docs *.py *.rst recursive-include tests *.py diff --git a/README.md b/README.md deleted file mode 100644 index 2d20326c..00000000 --- a/README.md +++ /dev/null @@ -1,95 +0,0 @@ -drgn -==== - -`drgn` is a debugger-as-a-library. In contrast to existing debuggers like -[GDB](https://www.gnu.org/software/gdb/) which excel in breakpoint-based -debugging, drgn focuses on live introspection. drgn exposes the types and data -in a program for easy, expressive scripting. - -drgn was developed for debugging the Linux kernel (as an alternative to the -[crash](http://people.redhat.com/anderson/) utility), but it can also debug -userspace programs written in C. C++ support is planned. - -Python is the main interface for drgn, although an experimental C library, -`libdrgn`, is also provided. - -Installation ------------- - -drgn is built with setuptools. Build it like so: - -``` -$ python3 setup.py build_ext -i -``` - -Then, you can either run it locally: - -``` -$ python3 -m drgn --help -``` - -Or install it and run it: - -``` -$ sudo python3 setup.py install -$ drgn --help -``` - -Or, pick your favorite Python package installation method. - -Getting Started ---------------- - -drgn can be used as a command line tool or as a library. The rest of this -section describes using the CLI; the CLI is basically a wrapper around the -library which provides a nice interface, including history and tab completion. - -To debug the running kernel, run `sudo drgn -k`. To debug a running program, -run `sudo drgn -p $PID`. To debug a core dump (either a kernel vmcore or a -userspace core dump), run `drgn -c $PATH`. - -The drgn CLI has an interactive mode and a script mode. If no arguments are -passed, drgn runs in interactive mode; otherwise, the given script is run with -the given arguments. The drgn CLI is actually just the Python interpreter -initialized with a `prog` object representing the debugged program: - -``` -$ sudo drgn -k ->>> prog.type('struct list_head') -struct list_head { - struct list_head *next; - struct list_head *prev; -} ->>> prog['modules'] -(struct list_head){ - .next = (struct list_head *)0xffffffffc0b91048, - .prev = (struct list_head *)0xffffffffc0066148, -} ->>> prog['init_task'].pid -(pid_t)0 ->>> from drgn.helpers.linux import list_for_each_entry ->>> for mod in list_for_each_entry('struct module', prog['modules'].address_of_(), 'list'): -... if mod.refcnt.counter > 10: -... print(mod.name) -... -(char [56])"snd" -(char [56])"evdev" -(char [56])"i915" -``` - -See the in-program documentation in interactive mode with `help(drgn)` for more -information. See `examples` and `drgn/helpers` for some examples. - -License -------- - -Copyright 2018-2019 - Omar Sandoval - -Licensed under the GPLv3 or later - -Acknowledgements ----------------- - -drgn is named after -[this](https://giraffesgiraffes.bandcamp.com/track/drgnfkr-2) because dragons -eat [dwarves](http://dwarfstd.org/). diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..25b9f00a --- /dev/null +++ b/README.rst @@ -0,0 +1,99 @@ +drgn +==== + +.. image:: https://readthedocs.org/projects/drgn/badge/?version=latest + :target: https://drgn.readthedocs.io/en/latest/?badge=latest + :alt: Documentation Status + +.. start-introduction + +drgn is a debugger-as-a-library. In contrast to existing debuggers like `GDB +`_ which focus on breakpoint-based +debugging, drgn excels in live introspection. drgn exposes the types and +variables in a program for easy, expressive scripting in Python. For example, +you can debug the Linux kernel: + +.. code-block:: pycon + + >>> from drgn.helpers.linux import list_for_each_entry + >>> for mod in list_for_each_entry('struct module', + ... prog['modules'].address_of_(), + ... 'list'): + ... if mod.refcnt.counter > 10: + ... print(mod.name) + ... + (char [56])"snd" + (char [56])"evdev" + (char [56])"i915" + +drgn was developed for debugging the Linux kernel (as an alternative to the +`crash `_ utility), but it can also debug +userspace programs written in C. C++ support is planned. + +.. end-introduction + +Documentation can be found at `drgn.readthedocs.io +`_. + +Installation +------------ + +Install the following dependencies: + +* Python 3.6 or newer +* elfutils development libraries (libelf and libdw) +* GNU autotools (autoconf, automake, and libtool) and pkgconf + +Then, run: + +.. code-block:: console + + $ git clone https://github.com/osandov/drgn.git + $ cd drgn + $ python3 setup.py build + $ sudo python3 setup.py install + +See the `installation documentation +`_ for more details. + +Quick Start +----------- + +.. start-quick-start + +To debug the running kernel, run ``sudo drgn -k``. To debug a running program, +run ``sudo drgn -p $PID``. To debug a core dump (either a kernel vmcore or a +userspace core dump), run ``drgn -c $PATH``. The program must have debugging +symbols available. + +Then, you can access variables in the program with ``prog['name']``, access +structure members with ``.``, use various predefined helpers, and more: + +.. code-block:: pycon + + $ sudo drgn -k + >>> prog['init_task'].comm + (char [16])"swapper/0" + >>> d_path(fget(find_task(prog, 1), 0).f_path.address_of_()) + b'/dev/null' + >>> max(task.stime for task in for_each_task(prog)) + (u64)4192109975952 + >>> sum(disk.gendisk.part0.nr_sects for disk in for_each_disk(prog)) + (sector_t)999705952 + +.. end-quick-start + +See the `user guide `_ +for more information. + +License +------- + +.. start-license + +Copyright 2018-2019 Omar Sandoval + +drgn is licensed under the `GPLv3 +`_ or later. + +.. end-license diff --git a/docs/api_reference.rst b/docs/api_reference.rst new file mode 100644 index 00000000..0aeba020 --- /dev/null +++ b/docs/api_reference.rst @@ -0,0 +1,892 @@ +API Reference +============= + +.. module:: drgn + +Programs +-------- + +.. class:: Program + + A ``Program`` represents a crashed or running program. It can be used to + lookup type definitions, access variables, and read arbitrary memory. + + The main functionality of a ``Program`` is looking up objects (i.e., + variables, constants, or functions). This is usually done with the + :meth:`[] <__getitem__>` operator. + + This class cannot be constructed directly. Instead, use one of the + :ref:`api-program-constructors`. + + .. attribute:: flags + + Flags which apply to this program. + + :vartype: ProgramFlags + + .. attribute:: word_size + + Size of a word in this program in bytes. + + :vartype: int + + .. attribute:: byteorder + + Byte order (a.k.a. endianness) in this program (either ``'little'`` + or ``'big'``). + + :vartype: str + + .. attribute:: __getitem__(name) + + Implement ``self[name]``. Get the object (variable, constant, or + function) with the given name. + + If there are multiple objects with the same name, one is returned + arbitrarily. In this case, the :meth:`variable()`, :meth:`constant()`, + or :meth:`function()` methods can be used instead. + + >>> prog['jiffies'] + Object(prog, 'volatile unsigned long', address=0xffffffff94c05000) + + :param str name: The object name. + :rtype: Object + + .. attribute:: variable(name, filename=None) + + Get the variable with the given name. + + >>> prog.variable('jiffies') + Object(prog, 'volatile unsigned long', address=0xffffffff94c05000) + + :param str name: The variable name. + :param filename: The source code file that contains the definition. See + :ref:`api-filenames`. + :type filename: str or None + :rtype: Object + :raises LookupError: if no variables with the given name are found in + the given file + + .. attribute:: constant(name, filename=None) + + Get the constant (e.g., enumeration constant) with the given name. + + Note that support for macro constants is not yet implemented for DWARF + files, and most compilers don't generate macro debugging information + by default anyways. + + >>> prog.constant('PIDTYPE_MAX') + Object(prog, 'enum pid_type', value=4) + + :param str name: The constant name. + :param filename: The source code file that contains the definition. See + :ref:`api-filenames`. + :type filename: str or None + :rtype: Object + :raises LookupError: if no constants with the given name are found in + the given file + + .. attribute:: function(name, filename=None) + + Get the function with the given name. + + >>> prog.function('schedule') + Object(prog, 'void (void)', address=0xffffffff94392370) + + :param str name: The function name. + :param filename: The source code file that contains the definition. See + :ref:`api-filenames`. + :type filename: str or None + :rtype: Object + :raises LookupError: if no functions with the given name are found in + the given file + + .. attribute:: type(name, filename=None) + + Get the type with the given name. + + >>> prog.type('long') + int_type(name='long', size=8, is_signed=True) + + :param str name: The type name. + :param filename: The source code file that contains the definition. See + :ref:`api-filenames`. + :type filename: str or None + :rtype: Type + :raises LookupError: if no types with the given name are found in + the given file + + .. attribute:: read(address, size, physical=False) + + Read *size* bytes of memory starting at *address* in the program. The + address may be virtual (the default) or physical if the program + supports it. + + >>> prog.read(0xffffffffbe012b40, 16) + b'swapper/0\x00\x00\x00\x00\x00\x00\x00' + + :param int address: The starting address. + :param int size: The number of bytes to read. + :param bool physical: Whether *address* is a physical memory address. + If ``False``, then it is a virtual memory address. Physical memory + can usually only be read when the program is an operating system + kernel. + :rtype: bytes + :raises FaultError: if the address range is invalid or the type of + address (physical or virtual) is not supported by the program + :raises ValueError: if *size* is negative + +.. class:: ProgramFlags + + ``ProgramFlags`` is an :class:`enum.IntFlag` of flags that can apply to a + :class:`Program` (e.g., about what kind of program it is). + + .. attribute:: IS_LINUX_KERNEL + + The program is the Linux kernel. + +.. _api-filenames: + +Filenames +^^^^^^^^^ + +The :meth:`Program.type()`, :meth:`Program.variable()`, +:meth:`Program.constant()`, and :meth:`Program.function()` methods all take a +*filename* parameter to distinguish between multiple definitions with the same +name. The filename refers to the source code file that contains the definition. +``None`` matches any definition. Otherwise, the filename is matched from right +to left, so ``'stdio.h'``, ``'include/stdio.h'``, ``'usr/include/stdio.h'``, +and ``'/usr/include/stdio.h'`` would all match a definition in +``/usr/include/stdio.h``. If multiple definitions match, one is returned +arbitrarily. + +.. _api-program-constructors: + +Program Constructors +^^^^^^^^^^^^^^^^^^^^ + +The drgn command line interface automatically creates a :class:`Program` named +``prog``. However, drgn may also be used as a library without the CLI, in which +case a ``Program`` must be created manually. + +.. function:: program_from_core_dump(path, verbose=False) + + Create a :class:`Program` from a core dump file. The type of program (e.g., + userspace or kernel) is determined automatically. + + :param str path: Core dump file path. + :param bool verbose: Whether to print non-fatal errors to stderr (e.g., + about not being able to find debugging symbols). + :rtype: Program + +.. function:: program_from_kernel(verbose=False) + + Create a :class:`Program` from the running operating system kernel. This + requires root privileges. + + :param bool verbose: Whether to print non-fatal errors to stderr (e.g., + about not being able to find kernel modules or debugging symbols). + :rtype: Program + +.. function:: program_from_pid(pid) + + Create a :class:`Program` from a running program with the given PID. This + requires appropriate permissions (on Linux, :manpage:`ptrace(2)` attach + permissions). + + :param int pid: Process ID of the program to debug. + :rtype: Program + +Objects +------- + +.. class:: Object(prog, type=None, *, address=None, value=None, byteorder=None, bit_offset=None, bit_field_size=None) + + An ``Object`` represents a symbol or value in a program. An object may + exist in the memory of the program (a *reference*), or it may be a + temporary computed value (a *value*). + + All instances of this class have two attributes: :attr:`prog_`, the program + that the object is from; and :attr:`type_`, the type of the object. + Reference objects also have an :attr:`address_` attribute. Objects may also + have a :attr:`byteorder_`, :attr:`bit_offset_`, and + :attr:`bit_field_size_`. + + :func:`repr()` of an object returns a Python representation of the object: + + >>> print(repr(prog['jiffies'])) + Object(prog, 'volatile long unsigned int', address=0xffffffffbf005000) + + :class:`str() ` returns a representation of the object in programming + language syntax: + + >>> print(prog['jiffies']) + (volatile long unsigned int)4326237045 + + Note that the drgn CLI is set up so that objects are displayed with + ``str()`` instead of ``repr()`` (the latter is the default behavior of + Python's interactive mode). This means that in the drgn CLI, the call to + ``print()`` in the second example above is not necessary. + + Objects support the following operators: + + * Arithmetic operators: ``+``, ``-``, ``*``, ``/``, ``%`` + * Bitwise operators: ``<<``, ``>>``, ``&``, ``|``, ``^``, ``~`` + * Relational operators: ``==``, ``!=``, ``<``, ``>``, ``<=``, ``>=`` + * Subscripting: :meth:`[] <__getitem__>` (Python does not have a unary + ``*`` operator, so pointers are dereferenced with ``ptr[0]``) + * Member access: :meth:`. <__getattribute__>` (Python does not have a + ``->`` operator, so ``.`` is also used to access members of pointers to + structures) + * The address-of operator: :meth:`drgn.Object.address_of_()` (this is a + method because Python does not have a ``&`` operator) + * Array length: :meth:`len() <__len__>` + + These operators all have the semantics of the program's programming + language. For example, adding two objects from a program written in C + results in an object with a type and value according to the rules of C: + + >>> Object(prog, 'unsigned long', value=2**64 - 1) + Object(prog, 'int', value=1) + Object(prog, 'unsigned long', value=0) + + If only one operand to a binary operator is an object, the other operand + will be converted to an object according to the language's rules for + literals: + + >>> Object(prog, 'char', value=0) - 1 + Object(prog, 'int', value=-1) + + The standard :class:`int() `, :class:`float() `, and + :class:`bool() ` functions convert an object to that Python type. + Conversion to ``bool`` uses the programming language's notion of + "truthiness". Additionally, certain Python functions will automatically + coerce an object to the appropriate Python type (e.g., :func:`hex()`, + :func:`round()`, and :meth:`list subscripting `). + + Object attributes and methods are named with a trailing underscore to avoid + conflicting with structure or union members. The attributes and methods + always take precedence; use :meth:`member_()` if there is a conflict. + + Objects are usually obtained directly from a :class:`Program`, but they can + be constructed manually, as well (for example, if you got a variable + address from a log file). + + :param Program prog: The program to create this object in. + :param type: The type of the object. If omitted, this is deduced from + *value* according to the language's rules for literals. + :type type: str or Type + :param int address: The address of this object in the program. Either this + or *value* must be given, but not both. + :param value: The value of this object. See :meth:`value_()`. + :param byteorder: Byte order of the object. This should be ``'little'`` or + ``'big'``. The default is ``None``, which indicates the program byte + order. This must be ``None`` for primitive values. + :type byteorder: str or None + :param bit_offset: Offset in bits from the object's address to the + beginning of the object. The default is ``None``, which means no + offset. This must be ``None`` for primitive values. + :type bit_offset: int or None + :param bit_field_size: Size in bits of this object if it is a bit field. + The default is ``None``, which means the object is not a bit field. + :type bit_field_size: int or None + + .. attribute:: prog_ + + Program that this object is from. + + :vartype: Program + + .. attribute:: type_ + + Type of this object. + + :vartype: Type + + .. attribute:: address_ + + Address of this object if it is a reference, ``None`` if it is a value. + + :vartype: int or None + + .. attribute:: byteorder_ + + Byte order of this object (either ``'little'`` or ``'big'``) if it is a + reference or a non-primitive value, ``None`` otherwise. + + :vartype: str or None + + .. attribute:: bit_offset_ + + Offset in bits from this object's address to the beginning of the + object if it is a reference or a non-primitive value, ``None`` + otherwise. + + :vartype: int or None + + .. attribute:: bit_field_size_ + + Size in bits of this object if it is a bit field, ``None`` if it is + not. + + :vartype: int or None + + .. method:: __getattribute__(name) + + Implement ``self.name``. + + If *name* is an attribute of the :class:`Object` class, then this + returns that attribute. Otherwise, it is equivalent to + :meth:`member_()`. + + >>> print(prog['init_task'].pid) + (pid_t)0 + + :param str name: Attribute name. + + .. method:: __getitem__(idx) + + Implement ``self[idx]``. Get the array element at the given index. + + >>> print(prog['init_task'].comm[0]) + (char)115 + + This is only valid for pointers and arrays. + + :param int idx: The array index. + :rtype: Object + :raises TypeError: if this object is not a pointer or array + + .. method:: __len__() + + Implement ``len(self)``. Get the number of elements in this object. + + >>> len(prog['init_task'].comm) + 16 + + This is only valid for arrays. + + :rtype: int + :raises TypeError: if this object is not an array with complete type + + .. method:: value_() + + Get the value of this object as a Python object. + + For basic types (integer, floating-point, boolean), this returns an + object of the directly corresponding Python type (``int``, ``float``, + ``bool``). For pointers, this returns the address value of the pointer. + For enums, this returns an ``int``. For structures and unions, this + returns a ``dict`` of members. For arrays, this returns a ``list`` of + values. + + :raises FaultError: if reading the object causes a bad memory access + :raises TypeError: if this object has an unreadable type (e.g., + ``void``) + + .. method:: string_() + + Read a null-terminated string pointed to by this object. + + This is only valid for pointers and arrays. The element type is + ignored; this operates byte-by-byte. + + For pointers and flexible arrays, this stops at the first null byte. + + For complete arrays, this stops at the first null byte or at the end of + the array. + + :rtype: bytes + :raises FaultError: if reading the string causes a bad memory access + :raises TypeError: if this object is not a pointer or array + + .. method:: member_(name) + + Get a member of this object. + + This is valid for structures, unions, and pointers to either. + + Normally the dot operator (``.``) can be used to accomplish the same + thing, but this method can be used if there is a name conflict with an + Object member or method. + + :param str name: Name of the member. + :rtype: Object + :raises TypeError: if this object is not a structure, union, or a + pointer to either + :raises LookupError: if this object does not have a member with the + given name + + .. method:: address_of_() + + Get a pointer to this object. + + This corresponds to the address-of (``&``) operator in C. It is only + possible for reference objects, as value objects don't have an address + in the program. + + As opposed to :attr:`address_`, this returns an ``Object``, not an + ``int``. + + :rtype: Object + :raises ValueError: if this object is a value + + .. method:: read_() + + Read this object (which may be a reference or a value) and return it as + a value object. + + This is useful if the object can change in the running program (but of + course nothing stops the program from modifying the object while it is + being read). + + As opposed to :meth:`value_()`, this returns an ``Object``, not a + standard Python type. + + :rtype: Object + :raises FaultError: if reading this object causes a bad memory access + :raises TypeError: if this object has an unreadable type (e.g., + ``void``) + +.. function:: NULL(prog, type) + + Get an object representing ``NULL`` casted to the given type. + + This is equivalent to ``Object(prog, type, value=0)``. + + :param Program prog: The program. + :param type: The type. + :type type: str or Type + :rtype: Object + +.. function:: cast(type, obj) + + Get the value of the given object casted to another type. + + Objects with a scalar type (integer, boolean, enumerated, floating-point, + or pointer) can be casted to a different scalar type. Other objects can + only be casted to the same type. This always results in a value object. See + also :func:`drgn.reinterpret()`. + + :param type: The type to cast to. + :type type: str or Type + :param Object obj: The object to cast. + :rtype: Object + +.. function:: reinterpret(type, obj, byteorder=None) + + Get a copy of the given object reinterpreted as another type and/or byte + order. + + This reinterprets the raw memory of the object, so an object can be + reinterpreted as any other type. However, value objects with a scalar type + cannot be reinterpreted, as their memory layout in the program is not + known. Reinterpreting a reference results in a reference, and + reinterpreting a value results in a value. See also :func:`drgn.cast()`. + + :param type: The type to reinterpret as. + :type type: str or Type + :param Object obj: The object to reinterpret. + :param byteorder: The byte order to reinterpret as. This should be + ``'little'`` or ``'big'``. The default is ``None``, which indicates the + program byte order. + :type byteorder: str or None + :rtype: Object + +.. function:: container_of(ptr, type, member) + + Get the containing object of a pointer object. + + This corresponds to the ``container_of()`` macro in C. + + :param Object ptr: The pointer. + :param type: The type of the containing object. + :type type: str or Type + :param str member: The name of the member in ``type``. + :raises TypeError: if the object is not a pointer or the type is not a + structure or union type + :raises LookupError: If the type does not have a member with the given name + +.. _api-reference-types: + +Types +----- + +.. class:: Type + + A ``Type`` object describes a type in a program. Each kind of type (e.g., + integer, structure) has different attributes (e.g., name, size). Types can + also have qualifiers (e.g., constant, atomic). Accessing an attribute which + does not apply to a type raises an :exc:`AttributeError`. + + :func:`repr()` of a Type returns a Python representation of the type: + + >>> print(repr(prog.type('sector_t'))) + typedef_type(name='sector_t', type=int_type(name='unsigned long', size=8, is_signed=False)) + + :class:`str() ` returns a representation of the type in programming + language syntax: + + >>> print(prog.type('sector_t')) + typedef unsigned long sector_t + + The drgn CLI is set up so that types are displayed with ``str()`` instead + of ``repr()`` by default. + + This class cannot be constructed directly. Instead, use one of the + :ref:`api-type-constructors`. + + .. attribute:: kind + + Kind of this type. + + :vartype: TypeKind + + .. attribute:: qualifiers + + Bitmask of this type's qualifier. + + :vartype: Qualifiers + + .. attribute:: name + + Name of this type. This is present for integer, boolean, + floating-point, complex, and typedef types. + + :vartype: str + + .. attribute:: tag + + Tag of this type, or ``None`` if this is an anonymous type. This is + present for structure, union, and enumerated types. + + :vartype: str or None + + .. attribute:: size + + Size of this type in bytes, or ``None`` if this is an incomplete type. + This is present for integer, boolean, floating-point, complex, + structure, union, and pointer types. + + :vartype: int or None + + .. attribute:: length + + Number of elements in this type, or ``None`` if this is an incomplete + type. This is only present for array types. + + :vartype: int or None + + .. attribute:: is_signed + + Whether this type is signed. This is only present for integer types. + + :vartype: bool + + .. attribute:: type + + Type underlying this type, defined as follows: + + * For complex types, the corresponding the real type. + * For typedef types, the aliased type. + * For enumerated types, the compatible integer type, which is ``None`` + if this is an incomplete type. + * For pointer types, the referenced type. + * For array types, the element type. + * For function types, the return type. + + For other types, this attribute is not present. + + :vartype: Type + + .. attribute:: members + + List of members of this type, or ``None`` if this is an incomplete + type. This is present for structure and union types. + + Each member is a (type, name, bit offset, bit field size) tuple. The + name is ``None`` if the member is unnamed; the bit field size is zero + if the member is not a bit field. + + :vartype: list[tuple(Type, str or None, int, int)] + + .. attribute:: enumerators + + List of enumeration constants of this type, or ``None`` if this is an + incomplete type. This is only present for enumerated types. + + Each enumeration constant is a (name, value) tuple. + + :vartype: list[tuple(str, int)] or None + + .. attribute:: parameters + + List of parameters of this type. This is only present for function + types. + + Each parameter is a (type, name) tuple. The name is ``None`` if the + parameter is unnamed. + + :vartype: list[tuple(Type, str or None)] + + .. attribute:: is_variadic + + Whether this type takes a variable number of arguments. This is only + present for function types. + + :vartype: bool + + .. method:: type_name() + + Get a descriptive full name of this type. + + :rtype: str + + .. method:: is_complete() + + Get whether this type is complete (i.e., the type definition is known). + This is always ``False`` for void types. It may be ``False`` for + structure, union, enumerated, and array types, as well as typedef types + where the underlying type is one of those. Otherwise, it is always + ``True``. + + :rtype: bool + + .. method:: qualified(qualifiers) + + Get a copy of this type with different qualifiers. + + Note that the original qualifiers are replaced, not added to. + + :param qualifiers: New type qualifiers. + :type qualifiers: Qualifiers or None + :rtype: Type + + .. method:: unqualified() + + Get a copy of this type with no qualifiers. + + :rtype: Type + +.. class:: TypeKind + + ``TypeKind`` is an :class:`enum.Enum` of the different kinds of types. + + .. attribute:: VOID + + Void type. + + .. attribute:: INT + + Integer type. + + .. attribute:: BOOL + + Boolean type. + + .. attribute:: FLOAT + + Floating-point type. + + .. attribute:: COMPLEX + + Complex type. + + .. attribute:: STRUCT + + Structure type. + + .. attribute:: UNION + + Union type. + + .. attribute:: ENUM + + Enumerated type. + + .. attribute:: TYPEDEF + + Type definition (a.k.a. alias) type. + + .. attribute:: POINTER + + Pointer type. + + .. attribute:: ARRAY + + Array type. + + .. attribute:: FUNCTION + + Function type. + +.. class:: Qualifiers + + ``Qualifiers`` is an :class:`enum.IntFlag` of type qualifiers. + + .. attribute:: CONST + + Constant type. + + .. attribute:: VOLATILE + + Volatile type. + + .. attribute:: RESTRICT + + `Restrict `_ type. + + .. attribute:: ATOMIC + + Atomic type. + +.. _api-type-constructors: + +Type Constructors +^^^^^^^^^^^^^^^^^ + +Custom drgn types can be created with the following factory functions. These +can be used just like types obtained from :meth:`Program.type()`. + +.. function:: void_type(qualifiers=None) + + Create a new void type. It has kind :attr:`TypeKind.VOID`. + + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +.. function:: int_type(name, size, is_signed, qualifiers=None) + + Create a new integer type. It has kind :attr:`TypeKind.INT`. + + :param str name: :attr:`Type.name` + :param int size: :attr:`Type.size` + :param bool is_signed: :attr:`Type.is_signed` + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +.. function:: bool_type(name, size, qualifiers=None) + + Create a new boolean type. It has kind :attr:`TypeKind.BOOL`. + + :param str name: :attr:`Type.name` + :param int size: :attr:`Type.size` + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +.. function:: float_type(name, size, qualifiers=None) + + Create a new floating-point type. It has kind :attr:`TypeKind.FLOAT`. + + :param str name: :attr:`Type.name` + :param int size: :attr:`Type.size` + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +.. function:: complex_type(name, size, type, qualifiers=None) + + Create a new complex type. It has kind :attr:`TypeKind.COMPLEX`. + + :param str name: :attr:`Type.name` + :param int size: :attr:`Type.size` + :param Type type: The corresponding real type (:attr:`Type.type`) + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +.. function:: struct_type(tag, size, members, qualifiers=None) + + Create a new structure type. It has kind :attr:`TypeKind.STRUCT`. + + :param tag: :attr:`Type.tag` + :type tag: str or None + :param int size: :attr:`Type.size` + :param list[tuple] members: :attr:`Type.members`. The type of a member may + be given as a callable returning a ``Type``; it will be called the + first time that the member is accessed. The name, bit offset, and bit + field size may be omitted; they default to ``None``, 0, and 0, + respectively. + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +.. function:: union_type(tag, size, members, qualifiers=None) + + Create a new union type. It has kind :attr:`TypeKind.UNION`. Otherwise, + this is the same as :func:`struct_type()`. + +.. function:: enum_type(tag, type, enumerators, qualifiers=None) + + Create a new enumerated type. It has kind :attr:`TypeKind.ENUM`. + + :param tag: :attr:`Type.tag` + :type tag: str or None + :param type: The compatible integer type (:attr:`Type.type`) + :type param Type or None: + :param enumerators: :attr:`Type.enumerators` + :type enumerators: list[tuple] or None + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +.. function:: typedef_type(name, type, qualifiers=None) + + Create a new typedef type. It has kind :attr:`TypeKind.TYPEDEF`. + + :param str name: :attr:`Type.name` + :param Type type: The aliased type (:attr:`Type.type`) + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +.. function:: pointer_type(size, type, qualifiers=None) + + Create a new pointer type. It has kind :attr:`TypeKind.POINTER`, + + :param int size: :attr:`Type.size` + :param Type type: The referenced type (:attr:`Type.type`) + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +.. function:: array_type(length, type, qualifiers=None) + + Create a new array type. It has kind :attr:`TypeKind.ARRAY`. + + :param length: :attr:`Type.length` + :type length: int or None + :param Type type: The element type (:attr:`Type.type`) + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +.. function:: function_type(type, parameters, is_variadic=False, qualifiers=None) + + Create a new function type. It has kind :attr:`TypeKind.FUNCTION`. + + :param Type type: The return type (:attr:`Type.type`) + :param list[tuple] parameters: :attr:`Type.parameters`. The type of a + parameter may be given as a callable returning a ``Type``; it will be + called the first time that the parameter is accessed. The name may be + omitted and defaults to ``None``. + :param bool is_variadic: :attr:`Type.is_variadic` + :param qualifiers: :attr:`Type.qualifiers` + :type qualifiers: Qualifiers or None + :rtype: Type + +Exceptions +---------- + +.. exception:: FaultError + + This error is raised when a bad memory access is attempted (i.e., when + accessing a memory address which is not valid in a program, or when + accessing out of bounds of a value object). + +.. exception:: FileFormatError + + This is error raised when a file cannot be parsed according to its expected + format (e.g., ELF or DWARF). diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..f01d2c26 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,38 @@ +import os.path +import sys + +sys.path.append(os.path.abspath('..')) +sys.path.append(os.path.abspath('exts')) + +master_doc = 'index' + +extensions = [ + 'autopackage', + 'setuptools_config', + 'sphinx.ext.autodoc', + 'sphinx.ext.extlinks', + 'sphinx.ext.intersphinx', + 'sphinx.ext.viewcode', +] + +autodoc_mock_imports = ['_drgn'] + +extlinks = { + 'linux': ('https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/%s', ''), +} + +intersphinx_mapping = { + 'python': ('https://docs.python.org/3', None), +} + +manpages_url = 'http://man7.org/linux/man-pages/man{section}/{page}.{section}.html' + +html_theme = 'alabaster' + +html_theme_options = { + 'description': 'Debugger-as-a-library', + 'github_user': 'osandov', + 'github_repo': 'drgn', + 'github_button': True, + 'github_type': 'star', +} diff --git a/docs/exts/autopackage.py b/docs/exts/autopackage.py new file mode 100644 index 00000000..1f7f7c90 --- /dev/null +++ b/docs/exts/autopackage.py @@ -0,0 +1,65 @@ +# Copyright 2018-2019 - Omar Sandoval +# SPDX-License-Identifier: GPL-3.0+ + +import docutils.nodes +from docutils.statemachine import StringList +import importlib +import pkgutil +import sphinx.ext.autodoc +from sphinx.util.docutils import SphinxDirective +from sphinx.util.nodes import nested_parse_with_titles + + +# sphinx.ext.autodoc doesn't recursively document packages, so we need our own +# directive to do that. +class AutopackageDirective(SphinxDirective): + required_arguments = 1 + optional_arguments = 0 + + def run(self): + sourcename = '' + def aux(name): + module = importlib.import_module(name) + + contents = StringList() + contents.append(f'.. automodule:: {name}', sourcename) + if hasattr(module, '__all__'): + module_attrs = [ + attr_name for attr_name in module.__all__ + if getattr(module, attr_name).__module__ == name + ] + if module_attrs: + contents.append(f" :members: {', '.join(module_attrs)}", + sourcename) + else: + contents.append(' :members:', sourcename) + contents.append('', sourcename) + + node = docutils.nodes.section() + nested_parse_with_titles(self.state, contents, node) + + # If this module defines any sections, then submodules should go + # inside of the last one. + section = node + for child in node.children: + if isinstance(child, docutils.nodes.section): + section = child + + if hasattr(module, '__path__'): + submodules = sorted(module_info.name for module_info in + pkgutil.iter_modules(module.__path__, + prefix=name + '.')) + for submodule in submodules: + section.extend(aux(submodule)) + + return node.children + + with sphinx.ext.autodoc.mock(self.env.config.autodoc_mock_imports): + return aux(self.arguments[0]) + + +def setup(app): + app.setup_extension('sphinx.ext.autodoc') + app.add_directive('autopackage', AutopackageDirective) + + return {'parallel_read_safe': True} diff --git a/docs/exts/setuptools_config.py b/docs/exts/setuptools_config.py new file mode 100644 index 00000000..9c9c4470 --- /dev/null +++ b/docs/exts/setuptools_config.py @@ -0,0 +1,63 @@ +# https://pypi.org/project/jaraco.packaging/ +# +# Copyright Jason R. Coombs +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from __future__ import unicode_literals + +import os +import sys +import subprocess + + +if 'check_output' not in dir(subprocess): + import subprocess32 as subprocess + + +def setup(app): + app.add_config_value('package_url', '', '') + app.connect('builder-inited', load_config_from_setup) + app.connect('html-page-context', add_package_url) + + +def load_config_from_setup(app): + """ + Replace values in app.config from package metadata + """ + # for now, assume project root is one level up + root = os.path.join(app.confdir, '..') + setup_script = os.path.join(root, 'setup.py') + fields = ['--name', '--version', '--url', '--author'] + dist_info_cmd = [sys.executable, setup_script] + fields + output = subprocess.check_output( + dist_info_cmd, + cwd=root, + universal_newlines=True, + ) + outputs = output.strip().split('\n') + project, version, url, author = outputs + app.config.project = project + app.config.version = app.config.release = version + app.config.package_url = url + app.config.author = app.config.copyright = author + + +def add_package_url(app, pagename, templatename, context, doctree): + context['package_url'] = app.config.package_url diff --git a/docs/helpers.rst b/docs/helpers.rst new file mode 100644 index 00000000..054a4ab6 --- /dev/null +++ b/docs/helpers.rst @@ -0,0 +1 @@ +.. autopackage:: drgn.helpers diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..02d73b05 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,37 @@ +drgn +==== + +.. include:: ../README.rst + :start-after: start-introduction + :end-before: end-introduction + +In addition to the main Python API, an experimental C library, ``libdrgn``, is +also available. + +See the :doc:`installation` instructions. Then, start with the +:doc:`user_guide`. + +License +------- + +.. include:: ../README.rst + :start-after: start-license + :end-before: end-license + +Acknowledgements +---------------- + +drgn is named after `this +`_ because dragons eat +`dwarves `_. + +Table of Contents +----------------- + +.. toctree:: + :maxdepth: 3 + + installation + user_guide + api_reference + helpers diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 00000000..6313abdb --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,47 @@ +Installation +============ + +.. highlight:: console + +drgn depends on `Python `_ 3.6 or newer as well as +`elfutils `_. The build requires `GCC +`_ or `Clang `_, `pkgconf +`_, and `setuptools +`_. A build from a Git checkout also +requires the GNU Autotools (`autoconf +`_, `automake +`_, and `libtool +`_). Install those +dependencies: + +Arch Linux:: + + $ sudo pacman -S --needed autoconf automake libtool gcc pkgconf libelf python python-setuptools + +Debian/Ubuntu:: + + $ sudo apt-get install autoconf automake libtool gcc pkgconf libelf-dev libdw-dev python3 python3-setuptools + +Note that Debian, Ubuntu Trusty, and Ubuntu Xenial ship Python versions which +are too old, so a newer version must be installed manually. + +Due to a packaging `bug +`_, the following may +also be required:: + + $ sudo apt-get install liblzma-dev zlib1g-dev + +Fedora:: + + $ sudo dnf install autoconf automake libtool gcc pkgconf elfutils-devel python3 python3-setuptools + +Then, drgn can be built and installed:: + + $ python3 setup.py build + $ sudo python3 setup.py install + $ drgn --help + +Or, it can be be built and run locally:: + + $ python3 setup.py build_ext -i + $ python3 -m drgn --help diff --git a/docs/user_guide.rst b/docs/user_guide.rst new file mode 100644 index 00000000..b84393a3 --- /dev/null +++ b/docs/user_guide.rst @@ -0,0 +1,284 @@ +User Guide +========== + +Quick Start +----------- + +.. include:: ../README.rst + :start-after: start-quick-start + :end-before: end-quick-start + +Core Concepts +------------- + +.. highlight:: pycon + +Programs +^^^^^^^^ + +A program being debugged is represented by an instance of the +:class:`drgn.Program` class. The drgn CLI is initialized with a ``Program`` +named ``prog``; unless you are using the drgn library directly, this is usually +the only ``Program`` you will need. + +A ``Program`` is used to look up type definitions, access variables, and read +arbitrary memory:: + + >>> prog.type('unsigned long') + int_type(name='unsigned long', size=8, is_signed=False) + >>> prog['jiffies'] + Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) + >>> prog.read(0xffffffffbe411e10, 16) + b'swapper/0\x00\x00\x00\x00\x00\x00\x00' + +The :meth:`drgn.Program.type()`, :meth:`drgn.Program.variable()`, +:meth:`drgn.Program.constant()`, and :meth:`drgn.Program.function()` methods +look up those various things in a program. :meth:`drgn.Program.read()` reads +memory from the program's address space. The :meth:`[] +` operator looks up a variable, constant, or +function:: + + >>> prog['jiffies'] == prog.variable('jiffies') + True + +It is usually more convenient to use the ``[]`` operator rather than the +``variable()``, ``constant()``, or ``function()`` methods unless the program +has multiple objects with the same name, in which case the methods provide more +control. + +Objects +^^^^^^^ + +Variables, constants, functions, and computed values are all called *objects* +in drgn. Objects are represented by the :class:`drgn.Object` class. An object +may exist in the memory of the program (a *reference*):: + + >>> Object(prog, 'int', address=0xffffffffc09031a0) + +Or, an object may be a temporary computed value (a *value*):: + + >>> Object(prog, 'int', value=4) + +What makes drgn scripts expressive is that objects can be used almost exactly +like they would be in the program's own source code. For example, structure +members can be accessed with the dot (``.``) operator, arrays can be +subscripted with ``[]``, arithmetic can be performed, and objects can be +compared:: + + >>> print(prog['init_task'].comm[0]) + (char)115 + >>> print(repr(prog['init_task'].nsproxy.mnt_ns.mounts + 1)) + Object(prog, 'unsigned int', value=34) + >>> prog['init_task'].nsproxy.mnt_ns.pending_mounts > 0 + False + +A common use case is converting a ``drgn.Object`` to a Python value so it can +be used by a standard Python library. There are a few ways to do this: + +* The :meth:`drgn.Object.value_()` method gets the value of the object with the + directly corresponding Python type (i.e., integers and pointers become + ``int``, floating-point types become ``float``, booleans become ``bool``, + arrays become ``list``, structures and unions become ``dict``). +* The :meth:`drgn.Object.string_()` method gets a null-terminated string as + ``bytes`` from an array or pointer. +* The :class:`int() `, :class:`float() `, and :class:`bool() + ` functions do an explicit conversion to that Python type. + +Objects have several attributes; the most important are +:attr:`drgn.Object.prog_` and :attr:`drgn.Object.type_`. The former is the +:class:`drgn.Program` that the object is from, and the latter is the +:class:`drgn.Type` of the object. + +Note that all attributes and methods of the ``Object`` class end with an +underscore (``_``) in order to avoid conflicting with structure or union +members. The ``Object`` attributes and methods always take precedence; use +:meth:`drgn.Object.member_()` if there is a conflict. + +References vs. Values +""""""""""""""""""""" + +The main difference between reference objects and value objects is how they are +evaluated. References are read from the program's memory every time they are +evaluated; values simply return the stored value (:meth:`drgn.Object.read_()` +reads a reference object and returns it as a value object):: + + >>> import time + >>> jiffies = prog['jiffies'] + >>> jiffies.value_() + 4391639989 + >>> time.sleep(1) + >>> jiffies.value_() + 4391640290 + >>> jiffies2 = jiffies.read_() + >>> jiffies2.value_() + 4391640291 + >>> time.sleep(1) + >>> jiffies2.value_() + 4391640291 + >>> jiffies.value_() + 4391640593 + +References have a :attr:`drgn.Object.address_` attribute, which is the object's +address as a Python ``int``. This is slightly different from the +:meth:`drgn.Object.address_of_()` method, which returns the address as a +``drgn.Object``. Of course, both references and values can have a pointer type; +``address_`` refers to the address of the pointer object itself, and +:meth:`drgn.Object.value_()` refers to the value of the pointer (i.e., the +address it points to):: + + >>> address = prog['jiffies'].address_ + >>> type(address) + + >>> print(hex(address)) + 0xffffffffbe405000 + >>> jiffiesp = prog['jiffies'].address_of_() + >>> jiffiesp + Object(prog, 'volatile unsigned long *', value=0xffffffffbe405000) + >>> print(hex(jiffiesp.value_())) + 0xffffffffbe405000 + +Types +^^^^^ + +drgn automatically obtains type definitions from the program. Types are +represented by the :class:`drgn.Type` class and created by various factory +functions like :func:`int_type()`:: + + >>> prog.type('int') + int_type(name='int', size=4, is_signed=True) + +You won't usually need to work with types directly, but see +:ref:`api-reference-types` if you do. + +Helpers +^^^^^^^ + +Some programs have common data structures that you may want to examine. For +example, consider linked lists in the Linux kernel: + +.. code-block:: c + + struct list_head { + struct list_head *next, *prev; + }; + + #define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +When working with these lists, you'd probably want to define a function: + +.. code-block:: python3 + + def list_for_each(head): + pos = head.next + while pos != head: + yield pos + pos = pos.next + +Then, you could use it like so for any list you need to look at:: + + >>> for pos in list_for_each(head): + ... do_something_with(pos) + +Of course, it would be a waste of time and effort for everyone to have to +define these helpers for themselves, so drgn includes a collection of helpers +for many use cases. See :doc:`helpers`. + +Command Line Interface +---------------------- + +The drgn CLI is basically a wrapper around the drgn library which automatically +creates a :class:`drgn.Program`. The CLI can be run in interactive mode or +script mode. + +Script Mode +^^^^^^^^^^^ + +Script mode is useful for reusable scripts. Simply pass the path to the script +along with any arguments: + +.. code-block:: console + + $ cat script.py + import sys + from drgn.helpers.linux import find_task + + pid = int(sys.argv[1]) + uid = find_task(prog, pid).cred.uid.val.value_() + print(f'PID {pid} is being run by UID {uid}') + $ sudo drgn -k script.py 601 + PID 601 is being run by UID 1000 + +It's even possible to run drgn scripts directly with the proper `shebang +`_:: + + $ cat script2.py + #!/usr/bin/drgn -k + + mounts = prog['init_task'].nsproxy.mnt_ns.mounts.value_() + print(f'You have {mounts} filesystems mounted') + $ sudo ./script2.py + You have 36 filesystems mounted + +You usually shouldn't depend on an executable being installed at a specific +absolute path. With newer versions of GNU coreutils (since v8.30), you can +use |env -S|_: + +.. |env -S| replace:: ``env --split-string`` +.. _env -S: https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#g_t_002dS_002f_002d_002dsplit_002dstring-usage-in-scripts + +.. code-block:: sh + + #!/usr/bin/env -S drgn -k + +Interactive Mode +^^^^^^^^^^^^^^^^ + +Interactive mode uses the Python interpreter's `interactive mode +`_ and +adds a few nice features, including: + +* History +* Tab completion +* Automatic import of relevant helpers +* Pretty printing of objects and types + +The default behavior of the Python `REPL +`_ is to +print the output of :func:`repr()`. For :class:`drgn.Object` and +:class:`drgn.Type`, this is a raw representation:: + + >>> print(repr(prog['jiffies'])) + Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) + >>> print(repr(prog.type('atomic_t'))) + typedef_type(name='atomic_t', type=struct_type(tag=None, size=4, members=((int_type(name='int', size=4, is_signed=True), 'counter', 0, 0),))) + +The standard :func:`print()` function uses the output of :func:`str()`. For +drgn objects and types, this is a representation in programming language +syntax:: + + >>> print(prog['jiffies']) + (volatile unsigned long)4395387628 + >>> print(prog.type('atomic_t')) + typedef struct { + int counter; + } atomic_t + +In interactive mode, the drgn CLI automatically uses ``str()`` instead of +``repr()`` for objects and types, so you don't need to call ``print()`` +explicitly:: + + $ sudo drgn -k + >>> prog['jiffies'] + (volatile unsigned long)4395387628 + >>> prog.type('atomic_t') + typedef struct { + int counter; + } atomic_t + +Next Steps +---------- + +Refer to the :doc:`api_reference`. Look through the :doc:`helpers`. Browse +through the official `examples +`_. diff --git a/drgn/__init__.py b/drgn/__init__.py index 8cb16a3a..5ece46bb 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -40,18 +40,17 @@ that package should be considered implementation details and should not be used. """ -from typing import Union - from _drgn import ( - __version__, FaultError, FileFormatError, + NULL, Object, Program, ProgramFlags, Qualifiers, Type, TypeKind, + __version__, array_type, bool_type, cast, @@ -102,13 +101,3 @@ __all__ = [ 'union_type', 'void_type', ] - - -def NULL(prog: Program, type: Union[str, Type]) -> Object: - """ - Return an Object representing NULL cast to the given type. The type can - be a string or a Type object. - - This is equivalent to Object(prog, type, value=0). - """ - return Object(prog, type, value=0) diff --git a/drgn/helpers/__init__.py b/drgn/helpers/__init__.py index b3ce56bb..d8b8ea42 100644 --- a/drgn/helpers/__init__.py +++ b/drgn/helpers/__init__.py @@ -1,11 +1,18 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Common program helpers +Helpers +------- -This package contains subpackages which provide helpers for working with -particular types of programs. +The ``drgn.helpers`` package contains subpackages which provide helpers for +working with particular types of programs. Currently, there are only helpers +for the Linux kernel. In the future, there may be helpers for, e.g., glibc and +libstdc++. + +Parameter types and return types are :class:`drgn.Object` unless noted +otherwise. Many helpers include a C function signature indicating the expected +object types. """ from typing import Iterable diff --git a/drgn/helpers/linux/__init__.py b/drgn/helpers/linux/__init__.py index ec20ca2a..3d83f7d6 100644 --- a/drgn/helpers/linux/__init__.py +++ b/drgn/helpers/linux/__init__.py @@ -1,21 +1,33 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel helpers +Linux Kernel +------------ -This package contains several modules for working with data structures and -subsystems in the Linux kernel. The helpers are available from the individual -modules in which they are defined and from this top-level package. E.g., the -following are both valid: +The ``drgn.helpers.linux`` package contains several modules for working with +data structures and subsystems in the Linux kernel. The helpers are available +from the individual modules in which they are defined and from this top-level +package. E.g., the following are both valid: >>> from drgn.helpers.linux.list import list_for_each_entry >>> from drgn.helpers.linux import list_for_each_entry -In interactive mode, the following is done automatically when debugging the -Linux kernel: +Iterator macros (``for_each_foo``) are a common idiom in the Linux kernel. The +equivalent drgn helpers are implemented as Python :ref:`generators +`. For example, the following code in C: ->>> from drgn.helpers.linux import * +.. code-block:: c + + list_for_each(pos, head) + do_something_with(pos); + +Translates to the following code in Python: + +.. code-block:: python3 + + for pos in list_for_each(head): + do_something_with(pos) """ from drgn.helpers.linux.block import * diff --git a/drgn/helpers/linux/block.py b/drgn/helpers/linux/block.py index 41f78bd1..42794fdf 100644 --- a/drgn/helpers/linux/block.py +++ b/drgn/helpers/linux/block.py @@ -1,92 +1,110 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel block layer helpers +Block Layer +----------- -This module provides helpers for working with the Linux block layer, including -disks (struct gendisk) and partitions (struct hd_struct). +The ``drgn.helpers.linux.block`` module provides helpers for working with the +Linux block layer, including disks (``struct gendisk``) and partitions +(``struct hd_struct``). """ -import typing - from drgn import Object, container_of from drgn.helpers import escape_string -from drgn.helpers.linux.device import MAJOR, MINOR +from drgn.helpers.linux.device import MAJOR, MINOR, MKDEV from drgn.helpers.linux.list import list_for_each_entry __all__ = [ - 'Disk', - 'Partition', + 'disk_devt', + 'disk_name', 'for_each_disk', 'print_disks', + 'part_devt', + 'part_name', 'for_each_partition', 'print_partitions', ] -class Disk(typing.NamedTuple): - """A disk. gendisk is a struct gendisk * object.""" - major: int - minor: int - name: bytes - gendisk: Object +def disk_devt(disk): + """ + .. c:function:: dev_t disk_devt(struct gendisk *disk) + + Get a disk's device number. + """ + return MKDEV(disk.major, disk.first_minor) + + +def disk_name(disk): + """ + .. c:function:: char *disk_name(struct gendisk *disk) + + Get the name of a disk (e.g., ``sda``). + + :rtype: bytes + """ + return disk.disk_name.string_() def for_each_disk(prog): """ - for_each_disk() -> Iterator[Disk] + Iterate over all disks in the system. - Return an iterator over all disks in the system. + :return: Iterator of ``struct gendisk *`` objects. """ devices = prog['block_class'].p.klist_devices.k_list.address_of_() disk_type = prog['disk_type'].address_of_() - for device in list_for_each_entry('struct device', devices, 'knode_class.n_node'): + for device in list_for_each_entry('struct device', devices, + 'knode_class.n_node'): if device.type == disk_type: - obj = container_of(device, 'struct gendisk', 'part0.__dev') - dev = device.devt.value_() - yield Disk(MAJOR(dev), MINOR(dev), device.kobj.name.string_(), obj) + yield container_of(device, 'struct gendisk', 'part0.__dev') def print_disks(prog): + """Print all of the disks in the system.""" + for disk in for_each_disk(prog): + major = disk.major.value_() + minor = disk.first_minor.value_() + name = escape_string(disk_name(disk), escape_backslash=True) + print(f'{major}:{minor} {name} ({disk.type_.type_name()})0x{disk.value_():x}') + + +def part_devt(part): """ - print_disks() + .. c:function:: dev_t part_devt(struct hd_struct *part) - Print all of the disks in the system. + Get a partition's device number. """ - for major, minor, name, obj in for_each_disk(prog): - name = escape_string(name, escape_backslash=True) - print(f'{major}:{minor} {name} ({obj.type_.type_name()})0x{obj.value_():x}') + return part.__dev.devt -class Partition(typing.NamedTuple): - """A disk partition. hd_struct is a struct hd_struct * object.""" - major: int - minor: int - name: bytes - hd_struct: Object +def part_name(part): + """ + .. c:function:: char *part_name(struct hd_struct *part) + + Get the name of a partition (e.g., ``sda1``). + + :rtype: bytes + """ + return part.__dev.kobj.name.string_() def for_each_partition(prog): """ - for_each_partition() -> Iterator[Partition] + Iterate over all partitions in the system. - Return an iterator over all partitions in the system. + :return: Iterator of ``struct hd_struct *`` objects. """ devices = prog['block_class'].p.klist_devices.k_list.address_of_() - for device in list_for_each_entry('struct device', devices, 'knode_class.n_node'): - obj = container_of(device, 'struct hd_struct', '__dev') - dev = device.devt.value_() - yield Partition(MAJOR(dev), MINOR(dev), device.kobj.name.string_(), - obj) + for device in list_for_each_entry('struct device', devices, + 'knode_class.n_node'): + yield container_of(device, 'struct hd_struct', '__dev') def print_partitions(prog): - """ - print_partitions() - - Print all of the partitions in the system. - """ - for major, minor, name, obj in for_each_partition(prog): - name = escape_string(name, escape_backslash=True) - print(f'{major}:{minor} {name} ({obj.type_.type_name()})0x{obj.value_():x}') + """Print all of the partitions in the system.""" + for part in for_each_partition(prog): + devt = part_devt(part).value_() + name = escape_string(part_name(part), escape_backslash=True) + print(f'{MAJOR(devt)}:{MINOR(devt)} {name} ({part.type_.type_name()})0x{part.value_():x}') diff --git a/drgn/helpers/linux/cpumask.py b/drgn/helpers/linux/cpumask.py index cd9fe03a..27dcaca5 100644 --- a/drgn/helpers/linux/cpumask.py +++ b/drgn/helpers/linux/cpumask.py @@ -1,10 +1,12 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel CPU mask helpers +CPU Masks +--------- -This module provides helpers for working with CPU masks from "linux/cpumask.h". +The ``drgn.helpers.linux.cpumask`` module provides helpers for working with CPU +masks from :linux:`include/linux/cpumask.h`. """ __all__ = [ @@ -17,9 +19,11 @@ __all__ = [ def for_each_cpu(mask): """ - for_each_cpu(struct cpumask) + .. c:function:: for_each_cpu(struct cpumask mask) - Return an iterator over all of the CPUs in the given mask, as ints. + Iterate over all of the CPUs in the given mask. + + :rtype: Iterator[int] """ bits = mask.bits word_bits = 8 * bits.type_.type.sizeof() @@ -32,26 +36,26 @@ def for_each_cpu(mask): def for_each_possible_cpu(prog): """ - for_each_possible_cpu() + Iterate over all possible CPUs. - Return an iterator over all possible CPUs, as ints. + :rtype: Iterator[int] """ return for_each_cpu(prog['__cpu_possible_mask']) def for_each_online_cpu(prog): """ - for_each_online_cpu() + Iterate over all online CPUs. - Return an iterator over all online CPUs, as ints. + :rtype: Iterator[int] """ return for_each_cpu(prog['__cpu_online_mask']) def for_each_present_cpu(prog): """ - for_each_present_cpu() + Iterate over all present CPUs. - Return an iterator over all present CPUs, as ints. + :rtype: Iterator[int] """ return for_each_cpu(prog['__cpu_present_mask']) diff --git a/drgn/helpers/linux/device.py b/drgn/helpers/linux/device.py index b358f804..32ca43f1 100644 --- a/drgn/helpers/linux/device.py +++ b/drgn/helpers/linux/device.py @@ -1,14 +1,15 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel device helpers +Devices +------- -This module provides helpers for working with Linux devices, including the -kernel encoding of dev_t. +The ``drgn.helpers.linux.device`` module provides helpers for working with +Linux devices, including the kernel encoding of ``dev_t``. """ -from drgn import cast, Object +from drgn import Object, cast __all__ = [ 'MAJOR', @@ -24,9 +25,9 @@ _MINORMASK = ((1 << _MINORBITS) - 1) def MAJOR(dev): """ - unsigned int MAJOR(dev_t) + .. c:function:: unsigned int MAJOR(dev_t dev) - Return the major ID of a kernel dev_t. + Return the major ID of a kernel ``dev_t``. """ major = dev >> _MINORBITS if isinstance(major, Object): @@ -36,9 +37,9 @@ def MAJOR(dev): def MINOR(dev): """ - unsigned int MINOR(dev_t) + .. c:function:: unsigned int MINOR(dev_t dev) - Return the major ID of a kernel dev_t. + Return the minor ID of a kernel ``dev_t``. """ minor = dev & _MINORMASK if isinstance(minor, Object): @@ -48,9 +49,9 @@ def MINOR(dev): def MKDEV(major, minor): """ - dev_t MKDEV(unsigned int major, unsigned int minor) + .. c:function:: dev_t MKDEV(unsigned int major, unsigned int minor) - Return a kernel dev_t from the major and minor IDs. + Return a kernel ``dev_t`` from the major and minor IDs. """ dev = (major << _MINORBITS) | minor if isinstance(dev, Object): diff --git a/drgn/helpers/linux/fs.py b/drgn/helpers/linux/fs.py index fb834f2b..096b746c 100644 --- a/drgn/helpers/linux/fs.py +++ b/drgn/helpers/linux/fs.py @@ -1,26 +1,28 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel filesystem helpers +Virtual Filesystem Layer +------------------------ -This module provides helpers for working with the Linux virtual filesystem -(VFS) layer, including mounts, dentries, and inodes. +The ``drgn.helpers.linux.fs`` module provides helpers for working with the +Linux virtual filesystem (VFS) layer, including mounts, dentries, and inodes. """ import os -import typing from drgn import Object, Program, container_of from drgn.helpers import escape_string from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry __all__ = [ - 'Mount', 'd_path', 'dentry_path', 'inode_path', 'inode_paths', + 'mount_src', + 'mount_dst', + 'mount_fstype', 'for_each_mount', 'print_mounts', 'fget', @@ -31,10 +33,10 @@ __all__ = [ def d_path(path_or_vfsmnt, dentry=None): """ - char *d_path(struct path *) - char *d_path(struct vfsmount *, struct dentry *) + .. c:function:: char *d_path(struct path *path) + .. c:function:: char *d_path(struct vfsmount *vfsmnt, struct dentry *dentry) - Return the full path of a dentry given a struct path or a mount and a + Return the full path of a dentry given a ``struct path *`` or a mount and a dentry. """ type_name = str(path_or_vfsmnt.type_.type_name()) @@ -72,7 +74,7 @@ def d_path(path_or_vfsmnt, dentry=None): def dentry_path(dentry): """ - char *dentry_path(struct dentry *) + .. c:function:: char *dentry_path(struct dentry *dentry) Return the path of a dentry from the root of its filesystem. """ @@ -88,7 +90,7 @@ def dentry_path(dentry): def inode_path(inode): """ - char *inode_path(struct inode *) + .. c:function:: char *inode_path(struct inode *inode) Return any path of an inode from the root of its filesystem. """ @@ -98,34 +100,70 @@ def inode_path(inode): def inode_paths(inode): """ - inode_paths(struct inode *) + .. c:function:: inode_paths(struct inode *inode) Return an iterator over all of the paths of an inode from the root of its filesystem. + + :rtype: Iterator[bytes] """ return ( dentry_path(dentry) for dentry in - hlist_for_each_entry('struct dentry', inode.i_dentry.address_of_(), 'd_u.d_alias') + hlist_for_each_entry('struct dentry', inode.i_dentry.address_of_(), + 'd_u.d_alias') ) -class Mount(typing.NamedTuple): - """A mounted filesystem. mount is a struct mount * object.""" - src: bytes - dst: bytes - fstype: bytes - mount: Object +def mount_src(mnt): + """ + .. c:function:: char *mount_src(struct mount *mnt) + + Get the source device name for a mount. + + :rtype: bytes + """ + return mnt.mnt_devname.string_() + + +def mount_dst(mnt): + """ + .. c:function:: char *mount_dst(struct mount *mnt) + + Get the path of a mount point. + + :rtype: bytes + """ + return d_path(mnt.mnt.address_of_(), mnt.mnt.mnt_root) + + +def mount_fstype(mnt): + """ + .. c:function:: char *mount_fstype(struct mount *mnt) + + Get the filesystem type of a mount. + + :rtype: bytes + """ + sb = mnt.mnt.mnt_sb.read_() + fstype = sb.s_type.name.string_() + subtype = sb.s_subtype.read_() + if subtype: + subtype = subtype.string_() + if subtype: + fstype += b'.' + subtype + return fstype def for_each_mount(prog_or_ns, src=None, dst=None, fstype=None): """ - for_each_mount(struct mnt_namespace *, char *src, char *dst, - char *fstype) -> Iterator[Mount] + .. c:function:: for_each_mount(struct mnt_namespace *ns, char *src, char *dst, char *fstype) - Return an iterator over all of the mounts in a given namespace. If given a - Program object instead, the initial mount namespace is used. The returned + Iterate over all of the mounts in a given namespace. If given a + :class:`Program` instead, the initial mount namespace is used. returned mounts can be filtered by source, destination, or filesystem type, all of - which are encoded using os.fsencode(). + which are encoded using :func:`os.fsencode()`. + + :return: Iterator of ``struct mount *`` objects. """ if isinstance(prog_or_ns, Program): ns = prog_or_ns['init_task'].nsproxy.mnt_ns @@ -139,57 +177,44 @@ def for_each_mount(prog_or_ns, src=None, dst=None, fstype=None): fstype = os.fsencode(fstype) for mnt in list_for_each_entry('struct mount', ns.list.address_of_(), 'mnt_list'): - mnt_src = mnt.mnt_devname.string_() - if src is not None and mnt_src != src: - continue - mnt_dst = d_path(mnt.mnt.address_of_(), mnt.mnt.mnt_root) - if dst is not None and mnt_dst != dst: - continue - sb = mnt.mnt.mnt_sb.read_() - mnt_fstype = sb.s_type.name.string_() - subtype = sb.s_subtype.read_() - if subtype: - subtype = subtype.string_() - if subtype: - mnt_fstype += b'.' + subtype - if fstype is not None and mnt_fstype != fstype: - continue - yield Mount(mnt_src, mnt_dst, mnt_fstype, mnt) + if ((src is None or mount_src(mnt) == src) and + (dst is None or mount_dst(mnt) == dst) and + (fstype is None or mount_fstype(mnt) == fstype)): + yield mnt def print_mounts(prog_or_ns, src=None, dst=None, fstype=None): """ - print_mounts(struct mnt_namespace *, char *src, char *dst, char *fstype) + .. c:function:: print_mounts(struct mnt_namespace *ns, char *src, char *dst, char *fstype) Print the mount table of a given namespace. The arguments are the same as - for_each_mount(). The output format is similar to /proc/mounts but prints - the value of each struct mount *. + :func:`for_each_mount()`. The output format is similar to ``/proc/mounts`` + but prints the value of each ``struct mount *``. """ - for mnt_src, mnt_dst, mnt_fstype, mnt in for_each_mount(prog_or_ns, src, - dst, fstype): - mnt_src = escape_string(mnt_src, escape_backslash=True) - mnt_dst = escape_string(mnt_dst, escape_backslash=True) - mnt_fstype = escape_string(mnt_fstype, escape_backslash=True) + for mnt in for_each_mount(prog_or_ns, src, dst, fstype): + mnt_src = escape_string(mount_src(mnt), escape_backslash=True) + mnt_dst = escape_string(mount_dst(mnt), escape_backslash=True) + mnt_fstype = escape_string(mount_fstype(mnt), escape_backslash=True) print(f'{mnt_src} {mnt_dst} {mnt_fstype} ({mnt.type_.type_name()})0x{mnt.value_():x}') def fget(task, fd): """ - struct file *fget(struct task_struct *, int fd) + .. c:function:: struct file *fget(struct task_struct *task, int fd) - Return the kernel file descriptor (struct file *) of the fd of a given - task. + Return the kernel file descriptor of the fd of a given task. """ return task.files.fdt.fd[fd] def for_each_file(task): """ - for_each_file(struct task_struct *) + .. c:function:: for_each_file(struct task_struct *task) - Return an iterator over all of the files open in a given task. The - generated values are (fd, path, struct file *) tuples. The path is returned - as bytes. + Iterate over all of the files open in a given task. + + :return: Iterator of (fd, ``struct file *``) tuples. + :rtype: Iterator[tuple[int, Object]] """ fdt = task.files.fdt.read_() bits_per_long = 8 * fdt.open_fds.type_.type.size @@ -199,16 +224,17 @@ def for_each_file(task): if word & (1 << j): fd = i * bits_per_long + j file = fdt.fd[fd].read_() - yield fd, d_path(file.f_path), file + yield fd, file def print_files(task): """ - print_files(struct task_struct *) + .. c:function:: print_files(struct task_struct *task) Print the open files of a given task. """ - for fd, path, file in for_each_file(task): + for fd, file in for_each_file(task): + path = d_path(file.f_path) if path is None: path = file.f_inode.i_sb.s_type.name.string_() path = escape_string(path, escape_backslash=True) diff --git a/drgn/helpers/linux/idr.py b/drgn/helpers/linux/idr.py index f562d1f2..69def679 100644 --- a/drgn/helpers/linux/idr.py +++ b/drgn/helpers/linux/idr.py @@ -1,13 +1,14 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel IDR helpers +IDR +--- -This module provides helpers for working with the IDR data structure in -"linux/idr.h". An IDR provides a mapping from an ID to a pointer. This -currently only supports Linux v4.11+; before this, IDRs were not based on radix -trees. +The ``drgn.helpers.linux.idr`` module provides helpers for working with the IDR +data structure in :linux:`include/linux/idr.h`. An IDR provides a mapping from +an ID to a pointer. This currently only supports Linux v4.11+; before this, +IDRs were not based on radix trees. """ from drgn.helpers.linux.radixtree import radix_tree_for_each, radix_tree_lookup @@ -21,10 +22,10 @@ __all__ = [ def idr_find(idr, id): """ - void *idr_find(struct idr *, unsigned long id) + .. c:function:: void *idr_find(struct idr *idr, unsigned long id) Look up the entry with the given id in an IDR. If it is not found, this - returns a NULL object. + returns a ``NULL`` object. """ # idr_base was added in v4.16. try: @@ -36,10 +37,12 @@ def idr_find(idr, id): def idr_for_each(idr): """ - idr_for_each(struct idr *) + .. c:function:: idr_for_each(struct idr *idr) - Return an iterator over all of the entries in an IDR. The generated values - are (index, entry) tuples. + Iterate over all of the entries in an IDR. + + :return: Iterator of (index, ``void *``) tuples. + :rtype: Iterator[tuple[int, Object]] """ try: base = idr.idr_base.value_() diff --git a/drgn/helpers/linux/list.py b/drgn/helpers/linux/list.py index a2a558d7..4a6f4f2e 100644 --- a/drgn/helpers/linux/list.py +++ b/drgn/helpers/linux/list.py @@ -1,11 +1,13 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel linked list helpers +Linked Lists +------------ -This module provides helpers for working with the doubly-linked list -implementations in "linux/list.h". +The ``drgn.helpers.linux.list`` module provides helpers for working with the +doubly-linked list implementations (``struct list_head`` and ``struct +hlist_head``) in :linux:`include/linux/list.h`. """ from drgn import container_of @@ -26,7 +28,7 @@ __all__ = [ def list_empty(head): """ - bool list_empty(struct list_head *) + .. c:function:: bool list_empty(struct list_head *head) Return whether a list is empty. """ @@ -36,7 +38,7 @@ def list_empty(head): def list_is_singular(head): """ - bool list_is_singular(struct list_head *) + .. c:function:: bool list_is_singular(struct list_head *head) Return whether a list has only one element. """ @@ -47,9 +49,11 @@ def list_is_singular(head): def list_for_each(head): """ - list_for_each(struct list_head *) + .. c:function:: list_for_each(struct list_head *head) - Return an iterator over all of the nodes in a list. + Iterate over all of the nodes in a list. + + :return: Iterator of ``struct list_head *`` objects. """ head = head.read_() pos = head.next.read_() @@ -60,9 +64,11 @@ def list_for_each(head): def list_for_each_reverse(head): """ - list_for_each_reverse(struct list_head *) + .. c:function:: list_for_each_reverse(struct list_head *head) - Return an iterator over all of the nodes in a list in reverse order. + Iterate over all of the nodes in a list in reverse order. + + :return: Iterator of ``struct list_head *`` objects. """ head = head.read_() pos = head.prev.read_() @@ -73,10 +79,12 @@ def list_for_each_reverse(head): def list_for_each_entry(type, head, member): """ - list_for_each_entry(type, struct list_head *, member) + .. c:function:: list_for_each_entry(type, struct list_head *head, member) - Return an iterator over all of the entries in a list, given the type of the - entry and the struct list_head member in that type. + Iterate over all of the entries in a list, given the type of the entry and + the ``struct list_head`` member in that type. + + :return: Iterator of ``type *`` objects. """ for pos in list_for_each(head): yield container_of(pos, type, member) @@ -84,10 +92,12 @@ def list_for_each_entry(type, head, member): def list_for_each_entry_reverse(type, head, member): """ - list_for_each_entry_reverse(type, struct list_head *, member) + .. c:function:: list_for_each_entry_reverse(type, struct list_head *head, member) - Return an iterator over all of the entries in a list in reverse order, - given the type of the entry and the struct list_head member in that type. + Iterate over all of the entries in a list in reverse order, given the type + of the entry and the ``struct list_head`` member in that type. + + :return: Iterator of ``type *`` objects. """ for pos in list_for_each_reverse(head): yield container_of(pos, type, member) @@ -95,7 +105,7 @@ def list_for_each_entry_reverse(type, head, member): def hlist_empty(head): """ - bool hlist_empty(struct hlist_head *) + .. c:function:: bool hlist_empty(struct hlist_head *head) Return whether a hash list is empty. """ @@ -104,9 +114,11 @@ def hlist_empty(head): def hlist_for_each(head): """ - hlist_for_each(struct hlist_head *) + .. c:function:: hlist_for_each(struct hlist_head *head) - Return an iterator over all of the nodes in a hash list. + Iterate over all of the nodes in a hash list. + + :return: Iterator of ``struct hlist_node *`` objects. """ pos = head.first.read_() while pos: @@ -116,10 +128,12 @@ def hlist_for_each(head): def hlist_for_each_entry(type, head, member): """ - hlist_for_each_entry(type, struct hlist_head *, member) + .. c:function:: hlist_for_each_entry(type, struct hlist_head *head, member) - Return an iterator over all of the entries in a has list, given the type of - the entry and the struct hlist_node member in that type. + Iterate over all of the entries in a has list, given the type of the entry + and the ``struct hlist_node`` member in that type. + + :return: Iterator of ``type *`` objects. """ for pos in hlist_for_each(head): yield container_of(pos, type, member) diff --git a/drgn/helpers/linux/mm.py b/drgn/helpers/linux/mm.py index b5313228..ce839811 100644 --- a/drgn/helpers/linux/mm.py +++ b/drgn/helpers/linux/mm.py @@ -1,14 +1,15 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel memory management helpers +Memory Management +----------------- -This module provides helpers for working with the Linux memory management (mm) -subsystem. Only x86-64 support is currently implemented. +The ``drgn.helpers.linux.mm`` provides helpers for working with the Linux +memory management (MM) subsystem. Only x86-64 support is currently implemented. """ -from drgn import cast, Object +from drgn import Object, cast __all__ = [ @@ -42,9 +43,9 @@ def _page_offset(prog): def for_each_page(prog): """ - for_each_page() + Iterate over all pages in the system. - Return an iterator over each struct page * in the system. + :return: Iterator of ``struct page *`` objects. """ vmemmap = _vmemmap(prog) for i in range(prog['max_pfn']): @@ -53,7 +54,7 @@ def for_each_page(prog): def page_to_pfn(page): """ - unsigned long page_to_pfn(struct page *) + .. c:function:: unsigned long page_to_pfn(struct page *page) Get the page frame number (PFN) of a page. """ @@ -62,10 +63,10 @@ def page_to_pfn(page): def pfn_to_page(prog_or_pfn, pfn=None): """ - struct page *pfn_to_page(unsigned long) + .. c:function:: struct page *pfn_to_page(unsigned long pfn) Get the page with the given page frame number (PFN). This can take the PFN - as an Object or a Program and the PFN as an int. + as an :class:`Object`, or a :class:`Program` and the PFN as an ``int``. """ if pfn is None: prog = prog_or_pfn.prog_ @@ -77,26 +78,28 @@ def pfn_to_page(prog_or_pfn, pfn=None): def virt_to_pfn(prog_or_addr, addr=None): """ - unsigned long virt_to_pfn(void *) + .. c:function:: unsigned long virt_to_pfn(void *addr) Get the page frame number (PFN) of a directly mapped virtual address. This - can take the address as an Object or a Program and the address as an int. + can take the address as an :class:`Object`, or a :class:`Program` and the + address as an ``int``. """ if addr is None: prog = prog_or_addr.prog_ addr = prog_or_addr.value_() else: prog = prog_or_addr - return Object(prog, 'unsigned long', value=(addr - _page_offset(prog)) >> 12) + return Object(prog, 'unsigned long', + value=(addr - _page_offset(prog)) >> 12) def pfn_to_virt(prog_or_pfn, pfn=None): """ - void *pfn_to_virt(unsigned long) + .. c:function:: void *pfn_to_virt(unsigned long pfn) Get the directly mapped virtual address of the given page frame number - (PFN). This can take the PFN as an Object or a Program and the PFN as an - int. + (PFN). This can take the PFN as an :class:`Object`, or a :class:`Program` + and the PFN as an ``int``. """ if pfn is None: prog = prog_or_pfn.prog_ @@ -108,7 +111,7 @@ def pfn_to_virt(prog_or_pfn, pfn=None): def page_to_virt(page): """ - void *page_to_virt(struct page *) + .. c:function:: void *page_to_virt(struct page *page) Get the directly mapped virtual address of a page. """ @@ -117,9 +120,10 @@ def page_to_virt(page): def virt_to_page(prog_or_addr, addr=None): """ - struct page *virt_to_page(void *) + .. c:function:: struct page *virt_to_page(void *addr) Get the page containing a directly mapped virtual address. This can take - the address as an Object or a Program and the address as an int. + the address as an :class:`Object`, or a :class:`Program` and the address as + an ``int``. """ return pfn_to_page(virt_to_pfn(prog_or_addr, addr)) diff --git a/drgn/helpers/linux/percpu.py b/drgn/helpers/linux/percpu.py index f916ecf5..3d9db021 100644 --- a/drgn/helpers/linux/percpu.py +++ b/drgn/helpers/linux/percpu.py @@ -1,11 +1,13 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel per-CPU helpers +Per-CPU +------- -This module provides helpers for working with per-CPU allocations from -"linux/percpu.h" and per-CPU counters from "linux/percpu_counter.h". +The ``drgn.helpers.linux.percpu`` module provides helpers for working with +per-CPU allocations from :linux:`include/linux/percpu.h` and per-CPU counters +from :linux:`include/linux/percpu_counter.h`. """ from drgn import Object @@ -20,7 +22,7 @@ __all__ = [ def per_cpu_ptr(ptr, cpu): """ - type *per_cpu_ptr(type __percpu *ptr, int cpu) + .. c:function:: type *per_cpu_ptr(type __percpu *ptr, int cpu) Return the per-CPU pointer for a given CPU. """ @@ -30,7 +32,7 @@ def per_cpu_ptr(ptr, cpu): def percpu_counter_sum(fbc): """ - s64 percpu_counter_sum(struct percpu_counter *fbc) + .. c:function:: s64 percpu_counter_sum(struct percpu_counter *fbc) Return the sum of a per-CPU counter. """ diff --git a/drgn/helpers/linux/pid.py b/drgn/helpers/linux/pid.py index dbf3d0e0..10056f74 100644 --- a/drgn/helpers/linux/pid.py +++ b/drgn/helpers/linux/pid.py @@ -1,13 +1,15 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel process ID helpers +Process IDS +----------- -This module provides helpers for looking up process IDs. +The ``drgn.helpers.linux.pid`` module provides helpers for looking up process +IDs and processes. """ -from drgn import cast, container_of, NULL, Program +from drgn import NULL, Program, cast, container_of from drgn.helpers.linux.idr import idr_find, idr_for_each from drgn.helpers.linux.list import hlist_for_each_entry @@ -22,10 +24,11 @@ __all__ = [ def find_pid(prog_or_ns, nr): """ - struct pid *find_pid(struct pid_namespace *, int) + .. c:function:: struct pid *find_pid(struct pid_namespace *ns, int nr) - Return the struct pid for the given PID in the given namespace. If given a - Program object instead, the initial PID namespace is used. + Return the ``struct pid *`` for the given PID number in the given + namespace. If given a :class:`Program` instead, the initial PID namespace + is used. """ if isinstance(prog_or_ns, Program): prog = prog_or_ns @@ -53,11 +56,12 @@ def find_pid(prog_or_ns, nr): def for_each_pid(prog_or_ns): """ - for_each_pid(struct pid_namespace *) + .. c:function:: for_each_pid(struct pid_namespace *ns) - Return an iterator over all of the PIDs in the given namespace. If given a - Program object instead, the initial PID namespace is used. The generated - values are struct pid * objects. + Iterate over all of the PIDs in the given namespace. If given a + :class:`Program` instead, the initial PID namespace is used. + + :return: Iterator of ``struct pid *`` objects. """ if isinstance(prog_or_ns, Program): prog = prog_or_ns @@ -81,10 +85,10 @@ def for_each_pid(prog_or_ns): def pid_task(pid, pid_type): """ - struct task_struct *pid_task(struct pid *, enum pid_type) + .. c:function:: struct task_struct *pid_task(struct pid *pid, enum pid_type pid_type) - Return the struct task_struct containing the given struct pid of the given - type. + Return the ``struct task_struct *`` containing the given ``struct pid *`` + of the given type. """ if not pid: return NULL(pid.prog_, 'struct task_struct *') @@ -101,10 +105,10 @@ def pid_task(pid, pid_type): def find_task(prog_or_ns, pid): """ - struct task_struct *find_task(struct pid_namespace *, int pid) + .. c:function:: struct task_struct *find_task(struct pid_namespace *ns, int pid) Return the task with the given PID in the given namespace. If given a - Program object instead, the initial PID namespace is used. + :class:`Program` instead, the initial PID namespace is used. """ if isinstance(prog_or_ns, Program): prog = prog_or_ns @@ -115,11 +119,12 @@ def find_task(prog_or_ns, pid): def for_each_task(prog_or_ns): """ - for_each_task(struct pid_namespace *) + .. c:function:: for_each_task(struct pid_namespace *ns) - Return an iterator over all of the tasks visible in the given namespace. If - given a Program object instead, the initial PID namespace is used. The - generated values are struct task_struct * objects. + Iterate over all of the tasks visible in the given namespace. If given a + :class:`Program` instead, the initial PID namespace is used. + + :return: Iterator of ``struct task_struct *`` objects. """ if isinstance(prog_or_ns, Program): prog = prog_or_ns diff --git a/drgn/helpers/linux/radixtree.py b/drgn/helpers/linux/radixtree.py index fef49849..f6010b7a 100644 --- a/drgn/helpers/linux/radixtree.py +++ b/drgn/helpers/linux/radixtree.py @@ -1,14 +1,15 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel radix tree helpers +Radix Trees +----------- -This module provides helpers for working with radix trees from -"linux/radix-tree.h". +The ``drgn.helpers.linux.radixtree`` module provides helpers for working with +radix trees from :linux:`include/linux/radix-tree.h`. """ -from drgn import cast, Object +from drgn import Object, cast __all__ = [ @@ -38,10 +39,10 @@ def _radix_tree_root_node(root): def radix_tree_lookup(root, index): """ - void *radix_tree_lookup(struct radix_tree_root *, unsigned long index) + .. c:function:: void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) Look up the entry at a given index in a radix tree. If it is not found, - this returns a NULL object. + this returns a ``NULL`` object. """ node, RADIX_TREE_INTERNAL_NODE = _radix_tree_root_node(root) RADIX_TREE_MAP_MASK = node.slots.type_.length - 1 @@ -56,10 +57,12 @@ def radix_tree_lookup(root, index): def radix_tree_for_each(root): """ - radix_tree_for_each(struct radix_tree_root *) + .. c:function:: radix_tree_for_each(struct radix_tree_root *root) - Return an iterator over all of the entries in a radix tree. The generated - values are (index, entry) tuples. + Iterate over all of the entries in a radix tree. + + :return: Iterator of (index, ``void *``) tuples. + :rtype: Iterator[tuple[int, Object]] """ node, RADIX_TREE_INTERNAL_NODE = _radix_tree_root_node(root) def aux(node, index): diff --git a/drgn/helpers/linux/rbtree.py b/drgn/helpers/linux/rbtree.py index 7e6911db..57eccfbc 100644 --- a/drgn/helpers/linux/rbtree.py +++ b/drgn/helpers/linux/rbtree.py @@ -1,14 +1,15 @@ -# Copyright 2018 - Omar Sandoval +# Copyright 2018-2019 - Omar Sandoval # SPDX-License-Identifier: GPL-3.0+ """ -Linux kernel red-black tree helpers +Red-Black Trees +--------------- -This module provides helpers for working with red-black trees from -"linux/rbtree.h" +The ``drgn.helpers.linux.rbtree`` module provides helpers for working with +red-black trees from :linux:`include/linux/rbtree.h`. """ -from drgn import container_of, Object +from drgn import Object, container_of __all__ = [ @@ -26,7 +27,7 @@ __all__ = [ def RB_EMPTY_NODE(node): """ - bool RB_EMPTY_NODE(struct rb_node *) + .. c:function:: bool RB_EMPTY_NODE(struct rb_node *node) Return whether a red-black tree node is empty, i.e., not inserted in a tree. @@ -36,7 +37,7 @@ def RB_EMPTY_NODE(node): def rb_parent(node): """ - struct rb_node *rb_parent(struct rb_node *) + .. c:function:: struct rb_node *rb_parent(struct rb_node *node) Return the parent node of a red-black tree node. """ @@ -46,10 +47,10 @@ def rb_parent(node): def rb_first(root): """ - struct rb_node *rb_first(struct rb_root *) + .. c:function:: struct rb_node *rb_first(struct rb_root *root) - Return the first node (in sort order) in a red-black tree, or a NULL object - if the tree is empty. + Return the first node (in sort order) in a red-black tree, or a ``NULL`` + object if the tree is empty. """ node = root.rb_node.read_() if not node: @@ -63,10 +64,10 @@ def rb_first(root): def rb_last(root): """ - struct rb_node *rb_last(struct rb_root *) + .. c:function:: struct rb_node *rb_last(struct rb_root *root) - Return the last node (in sort order) in a red-black tree, or a NULL object - if the tree is empty. + Return the last node (in sort order) in a red-black tree, or a ``NULL`` + object if the tree is empty. """ node = root.rb_node.read_() if not node: @@ -80,9 +81,9 @@ def rb_last(root): def rb_next(node): """ - struct rb_node *rb_next(struct rb_node *) + .. c:function:: struct rb_node *rb_next(struct rb_node *node) - Return the next node (in sort order) after a red-black node, or a NULL + Return the next node (in sort order) after a red-black node, or a ``NULL`` object if the node is the last node in the tree or is empty. """ node = node.read_() @@ -108,10 +109,10 @@ def rb_next(node): def rb_prev(node): """ - struct rb_node *rb_prev(struct rb_node *) + .. c:function:: struct rb_node *rb_prev(struct rb_node *node) - Return the previous node (in sort order) before a red-black node, or a NULL - object if the node is the first node in the tree or is empty. + Return the previous node (in sort order) before a red-black node, or a + ``NULL`` object if the node is the first node in the tree or is empty. """ node = node.read_() @@ -136,10 +137,11 @@ def rb_prev(node): def rbtree_inorder_for_each(root): """ - rbtree_inorder_for_each(struct rb_root *) + .. c:function:: rbtree_inorder_for_each(struct rb_root *root) - Return an iterator over all of the nodes in a red-black tree, in sort - order. + Iterate over all of the nodes in a red-black tree, in sort order. + + :return: Iterator of ``struct rb_node *`` objects. """ def aux(node): if node: @@ -151,11 +153,13 @@ def rbtree_inorder_for_each(root): def rbtree_inorder_for_each_entry(type, root, member): """ - rbtree_inorder_for_each_entry(type, struct rb_root *, member) + .. c:function:: rbtree_inorder_for_each_entry(type, struct rb_root *root, member) - Return an iterator over all of the entries in a red-black tree, given the - type of the entry and the struct list_head member in that type. The entries - are returned in sort order. + Iterate over all of the entries in a red-black tree, given the type of the + entry and the ``struct rb_node`` member in that type. The entries are + returned in sort order. + + :return: Iterator of ``type *`` objects. """ for node in rbtree_inorder_for_each(root): yield container_of(node, type, member) @@ -163,13 +167,13 @@ def rbtree_inorder_for_each_entry(type, root, member): def rb_find(type, root, member, key, cmp): """ - type *rb_find(type, struct rb_root *, member, - key_type key, int (*cmp)(key_type, type *)) + .. c:function:: type *rb_find(type, struct rb_root *root, member, key_type key, int (*cmp)(key_type, type *)) Find an entry in a red-black tree, given a key and a comparator function - which takes the key and an entry. The comparator should return -1 if the - key is less than the entry, 1 if it is greater than the entry, or 0 if it - matches the entry. This returns a NULL object if no entry matches the key. + which takes the key and an entry. The comparator should return < 0 if the + key is less than the entry, > 0 if it is greater than the entry, or 0 if it + matches the entry. This returns a ``NULL`` object if no entry matches the + key. Note that this function does not have an analogue in the Linux kernel source code, as tree searches are all open-coded. diff --git a/libdrgn/.gitignore b/libdrgn/.gitignore index cf8e206e..cf9d307a 100644 --- a/libdrgn/.gitignore +++ b/libdrgn/.gitignore @@ -14,3 +14,4 @@ /html /libtool /python/constants.c +/python/docstrings.c diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 41ad0f7c..9bf7f2a7 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -55,7 +55,10 @@ if WITH_PYTHON noinst_LTLIBRARIES += _drgn.la endif -_drgn_la_SOURCES = python/drgnpy.h \ +CLEANFILES = python/constants.c python/docstrings.c + +_drgn_la_SOURCES = python/docstrings.h \ + python/drgnpy.h \ python/module.c \ python/object.c \ python/program.c \ @@ -63,17 +66,23 @@ _drgn_la_SOURCES = python/drgnpy.h \ python/type.c \ python/util.c -nodist__drgn_la_SOURCES = python/constants.c +nodist__drgn_la_SOURCES = python/constants.c python/docstrings.c _drgn_la_CFLAGS = $(PYTHON_CFLAGS) -D_GNU_SOURCE -fvisibility=hidden _drgn_la_LDFLAGS = -avoid-version -module -shared -rpath $(pkgpyexecdir) \ -Wl,--exclude-libs,ALL _drgn_la_LIBADD = libdrgnimpl.la -$(top_builddir)/python/constants.c: $(top_srcdir)/drgn.h $(top_srcdir)/build-aux/gen_constants.py - $(PYTHON) $(top_srcdir)/build-aux/gen_constants.py $(top_srcdir)/python < $< > $@ +GEN_CONSTANTS = $(top_srcdir)/build-aux/gen_constants.py +GEN_DOCSTRINGS = $(top_srcdir)/build-aux/gen_docstrings.py -EXTRA_DIST = $(top_srcdir)/build-aux/gen_constants.py +$(top_builddir)/python/constants.c: $(top_srcdir)/drgn.h $(GEN_CONSTANTS) + $(PYTHON) $(GEN_CONSTANTS) $(top_srcdir)/python < $< > $@ + +$(top_builddir)/python/docstrings.c: $(top_srcdir)/../docs/api_reference.rst $(GEN_DOCSTRINGS) + $(PYTHON) $(GEN_DOCSTRINGS) < $< > $@ + +EXTRA_DIST = $(GEN_CONSTANTS) $(GEN_DOCSTRINGS) # This target lists all of the files that need to be distributed for setup.py # sdist. It is based on the automake distdir target. diff --git a/libdrgn/build-aux/.gitignore b/libdrgn/build-aux/.gitignore index 8e2be0d3..1620ae23 100644 --- a/libdrgn/build-aux/.gitignore +++ b/libdrgn/build-aux/.gitignore @@ -1,4 +1,5 @@ * !/.gitignore !/gen_constants.py +!/gen_docstrings.py !/version.sh diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index 61d24f65..afdb475e 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -1,3 +1,6 @@ +# Copyright 2018-2019 - Omar Sandoval +# SPDX-License-Identifier: GPL-3.0+ + import os.path import re import sys diff --git a/libdrgn/build-aux/gen_docstrings.py b/libdrgn/build-aux/gen_docstrings.py new file mode 100644 index 00000000..7435ed49 --- /dev/null +++ b/libdrgn/build-aux/gen_docstrings.py @@ -0,0 +1,182 @@ +# Copyright 2018-2019 - Omar Sandoval +# SPDX-License-Identifier: GPL-3.0+ + +import re +import sys +from types import SimpleNamespace + + +def strictstartswith(a, b): + return a.startswith(b) and a != b + + +# Quick and dirty reStructuredText parser. It probably can't handle anything +# other than the input in this repository. +def parse_rst(input_file): + stack = [ + SimpleNamespace(name='', state='CONTENT', lines=None, + directive_indentation='', content_indentation='') + ] + state = None + for line in input_file: + line = line.rstrip() + indentation = re.match(r'\s*', line).group() + while True: + top = stack[-1] + if top.state == 'DIRECTIVE': + if not line: + top.state = 'BLANK_LINE' + break + elif strictstartswith(indentation, top.directive_indentation): + top.content_indentation = indentation + top.state = 'OPTIONS' + break + elif top.state == 'BLANK_LINE': + if not line: + break + elif strictstartswith(indentation, top.directive_indentation): + top.content_indentation = indentation + top.state = 'CONTENT' + break + elif top.state == 'OPTIONS': + if not line: + top.state = 'OPTIONS_BLANK_LINE' + break + elif indentation.startswith(top.content_indentation): + break + else: + if top.state == 'OPTIONS_BLANK_LINE': + top.state = 'CONTENT' + assert top.state == 'CONTENT' + if (not line or + indentation.startswith(top.content_indentation)): + break + # The current line is indented less than the current indentation, + # so pop the top directive. + if top.lines is not None: + yield top + del stack[-1] + + assert top is stack[-1] + if top.state != 'CONTENT': + continue + + if line: + assert line.startswith(top.content_indentation) + line = line[len(top.content_indentation):] + match = re.match(r'\s*..\s*(?:py:)?([-a-zA-Z0-9_+:.]+)::\s*(.*)', line) + if match: + directive = match.group(1) + argument = match.group(2) + if directive == 'module' or directive == 'currentmodule': + stack[0].name = argument + else: + name = top.name + if directive in {'attribute', 'class', 'exception', 'function', + 'method'}: + lines = [] + paren = argument.find('(') + if paren != -1: + # If the argument includes a signature, add it along + # with the signature end marker used by CPython. + lines.append(argument) + lines.append('--') + lines.append('') + argument = argument[:paren] + if name: + name += '.' + name += argument + else: + lines = None + entry = SimpleNamespace(name=name, state='DIRECTIVE', + lines=lines, + directive_indentation=indentation, + content_indentation=None) + stack.append(entry) + elif top.lines is not None: + top.lines.append(line) + + while len(stack) > 1: + entry = stack.pop() + if entry.lines is not None: + yield entry + + +escapes = [] +for c in range(256): + if c == 0: + e = r'\0' + elif c == 7: + e = r'\a' + elif c == 8: + e = r'\b' + elif c == 9: + e = r'\t' + elif c == 10: + e = r'\n' + elif c == 11: + e = r'\v' + elif c == 12: + e = r'\f' + elif c == 13: + e = r'\r' + elif c == 34: + e = r'\"' + elif c == 92: + e = r'\\' + elif 32 <= c <= 126: + e = chr(c) + else: + e = f'\\x{c:02x}' + escapes.append(e) + + +def escape_string(s): + return ''.join([escapes[c] for c in s.encode('utf-8')]) + + +def gen_docstrings(input_file, output_file, header=False): + path = 'libdrgn/build-aux/gen_docstrings.py' + if header: + output_file.write(f"""\ +/* + * Generated by {path} -H. + * + * Note that this is generated manually because automake and other build systems + * have trouble with generated headers. Regenerate this if a new docstring is + * added. The docstring contents themselves are automatically regenerated by the + * build system. + * + * Before Python 3.7, various docstring fields were defined as char * (see + * https://bugs.python.org/issue28761). We still want the strings to be + * read-only, so just cast away the const. + */ + +""") + else: + output_file.write(f'/* Generated by {path}. */\n\n') + directives = sorted(parse_rst(input_file), key=lambda x: x.name) + for directive in directives: + while directive.lines and not directive.lines[-1]: + del directive.lines[-1] + name = directive.name.replace('.', '_') + '_DOC' + if header: + output_file.write('extern ') + output_file.write(f"const char {name}[]") + if not header: + output_file.write(' =') + if directive.lines: + for i, line in enumerate(directive.lines): + output_file.write(f'\n\t"{escape_string(line)}') + if i != len(directive.lines) - 1: + output_file.write('\\n') + output_file.write('"') + else: + output_file.write(' ""') + output_file.write(';\n') + if header: + output_file.write(f'#define {name} (char *){name}\n') + + +if __name__ == '__main__': + gen_docstrings(sys.stdin, sys.stdout, '-H' in sys.argv[1:]) diff --git a/libdrgn/python/docstrings.h b/libdrgn/python/docstrings.h new file mode 100644 index 00000000..d82cfa4f --- /dev/null +++ b/libdrgn/python/docstrings.h @@ -0,0 +1,179 @@ +/* + * Generated by libdrgn/build-aux/gen_docstrings.py -H. + * + * Note that this is generated manually because automake and other build systems + * have trouble with generated headers. Regenerate this if a new docstring is + * added. The docstring contents themselves are automatically regenerated by the + * build system. + * + * Before Python 3.7, various docstring fields were defined as char * (see + * https://bugs.python.org/issue28761). We still want the strings to be + * read-only, so just cast away the const. + */ + +extern const char drgn_FaultError_DOC[]; +#define drgn_FaultError_DOC (char *)drgn_FaultError_DOC +extern const char drgn_FileFormatError_DOC[]; +#define drgn_FileFormatError_DOC (char *)drgn_FileFormatError_DOC +extern const char drgn_NULL_DOC[]; +#define drgn_NULL_DOC (char *)drgn_NULL_DOC +extern const char drgn_Object_DOC[]; +#define drgn_Object_DOC (char *)drgn_Object_DOC +extern const char drgn_Object___getattribute___DOC[]; +#define drgn_Object___getattribute___DOC (char *)drgn_Object___getattribute___DOC +extern const char drgn_Object___getitem___DOC[]; +#define drgn_Object___getitem___DOC (char *)drgn_Object___getitem___DOC +extern const char drgn_Object___len___DOC[]; +#define drgn_Object___len___DOC (char *)drgn_Object___len___DOC +extern const char drgn_Object_address__DOC[]; +#define drgn_Object_address__DOC (char *)drgn_Object_address__DOC +extern const char drgn_Object_address_of__DOC[]; +#define drgn_Object_address_of__DOC (char *)drgn_Object_address_of__DOC +extern const char drgn_Object_bit_field_size__DOC[]; +#define drgn_Object_bit_field_size__DOC (char *)drgn_Object_bit_field_size__DOC +extern const char drgn_Object_bit_offset__DOC[]; +#define drgn_Object_bit_offset__DOC (char *)drgn_Object_bit_offset__DOC +extern const char drgn_Object_byteorder__DOC[]; +#define drgn_Object_byteorder__DOC (char *)drgn_Object_byteorder__DOC +extern const char drgn_Object_member__DOC[]; +#define drgn_Object_member__DOC (char *)drgn_Object_member__DOC +extern const char drgn_Object_prog__DOC[]; +#define drgn_Object_prog__DOC (char *)drgn_Object_prog__DOC +extern const char drgn_Object_read__DOC[]; +#define drgn_Object_read__DOC (char *)drgn_Object_read__DOC +extern const char drgn_Object_string__DOC[]; +#define drgn_Object_string__DOC (char *)drgn_Object_string__DOC +extern const char drgn_Object_type__DOC[]; +#define drgn_Object_type__DOC (char *)drgn_Object_type__DOC +extern const char drgn_Object_value__DOC[]; +#define drgn_Object_value__DOC (char *)drgn_Object_value__DOC +extern const char drgn_Program_DOC[]; +#define drgn_Program_DOC (char *)drgn_Program_DOC +extern const char drgn_Program___getitem___DOC[]; +#define drgn_Program___getitem___DOC (char *)drgn_Program___getitem___DOC +extern const char drgn_Program_byteorder_DOC[]; +#define drgn_Program_byteorder_DOC (char *)drgn_Program_byteorder_DOC +extern const char drgn_Program_constant_DOC[]; +#define drgn_Program_constant_DOC (char *)drgn_Program_constant_DOC +extern const char drgn_Program_flags_DOC[]; +#define drgn_Program_flags_DOC (char *)drgn_Program_flags_DOC +extern const char drgn_Program_function_DOC[]; +#define drgn_Program_function_DOC (char *)drgn_Program_function_DOC +extern const char drgn_Program_read_DOC[]; +#define drgn_Program_read_DOC (char *)drgn_Program_read_DOC +extern const char drgn_Program_type_DOC[]; +#define drgn_Program_type_DOC (char *)drgn_Program_type_DOC +extern const char drgn_Program_variable_DOC[]; +#define drgn_Program_variable_DOC (char *)drgn_Program_variable_DOC +extern const char drgn_Program_word_size_DOC[]; +#define drgn_Program_word_size_DOC (char *)drgn_Program_word_size_DOC +extern const char drgn_ProgramFlags_DOC[]; +#define drgn_ProgramFlags_DOC (char *)drgn_ProgramFlags_DOC +extern const char drgn_ProgramFlags_IS_LINUX_KERNEL_DOC[]; +#define drgn_ProgramFlags_IS_LINUX_KERNEL_DOC (char *)drgn_ProgramFlags_IS_LINUX_KERNEL_DOC +extern const char drgn_Qualifiers_DOC[]; +#define drgn_Qualifiers_DOC (char *)drgn_Qualifiers_DOC +extern const char drgn_Qualifiers_ATOMIC_DOC[]; +#define drgn_Qualifiers_ATOMIC_DOC (char *)drgn_Qualifiers_ATOMIC_DOC +extern const char drgn_Qualifiers_CONST_DOC[]; +#define drgn_Qualifiers_CONST_DOC (char *)drgn_Qualifiers_CONST_DOC +extern const char drgn_Qualifiers_RESTRICT_DOC[]; +#define drgn_Qualifiers_RESTRICT_DOC (char *)drgn_Qualifiers_RESTRICT_DOC +extern const char drgn_Qualifiers_VOLATILE_DOC[]; +#define drgn_Qualifiers_VOLATILE_DOC (char *)drgn_Qualifiers_VOLATILE_DOC +extern const char drgn_Type_DOC[]; +#define drgn_Type_DOC (char *)drgn_Type_DOC +extern const char drgn_Type_enumerators_DOC[]; +#define drgn_Type_enumerators_DOC (char *)drgn_Type_enumerators_DOC +extern const char drgn_Type_is_complete_DOC[]; +#define drgn_Type_is_complete_DOC (char *)drgn_Type_is_complete_DOC +extern const char drgn_Type_is_signed_DOC[]; +#define drgn_Type_is_signed_DOC (char *)drgn_Type_is_signed_DOC +extern const char drgn_Type_is_variadic_DOC[]; +#define drgn_Type_is_variadic_DOC (char *)drgn_Type_is_variadic_DOC +extern const char drgn_Type_kind_DOC[]; +#define drgn_Type_kind_DOC (char *)drgn_Type_kind_DOC +extern const char drgn_Type_length_DOC[]; +#define drgn_Type_length_DOC (char *)drgn_Type_length_DOC +extern const char drgn_Type_members_DOC[]; +#define drgn_Type_members_DOC (char *)drgn_Type_members_DOC +extern const char drgn_Type_name_DOC[]; +#define drgn_Type_name_DOC (char *)drgn_Type_name_DOC +extern const char drgn_Type_parameters_DOC[]; +#define drgn_Type_parameters_DOC (char *)drgn_Type_parameters_DOC +extern const char drgn_Type_qualified_DOC[]; +#define drgn_Type_qualified_DOC (char *)drgn_Type_qualified_DOC +extern const char drgn_Type_qualifiers_DOC[]; +#define drgn_Type_qualifiers_DOC (char *)drgn_Type_qualifiers_DOC +extern const char drgn_Type_size_DOC[]; +#define drgn_Type_size_DOC (char *)drgn_Type_size_DOC +extern const char drgn_Type_tag_DOC[]; +#define drgn_Type_tag_DOC (char *)drgn_Type_tag_DOC +extern const char drgn_Type_type_DOC[]; +#define drgn_Type_type_DOC (char *)drgn_Type_type_DOC +extern const char drgn_Type_type_name_DOC[]; +#define drgn_Type_type_name_DOC (char *)drgn_Type_type_name_DOC +extern const char drgn_Type_unqualified_DOC[]; +#define drgn_Type_unqualified_DOC (char *)drgn_Type_unqualified_DOC +extern const char drgn_TypeKind_DOC[]; +#define drgn_TypeKind_DOC (char *)drgn_TypeKind_DOC +extern const char drgn_TypeKind_ARRAY_DOC[]; +#define drgn_TypeKind_ARRAY_DOC (char *)drgn_TypeKind_ARRAY_DOC +extern const char drgn_TypeKind_BOOL_DOC[]; +#define drgn_TypeKind_BOOL_DOC (char *)drgn_TypeKind_BOOL_DOC +extern const char drgn_TypeKind_COMPLEX_DOC[]; +#define drgn_TypeKind_COMPLEX_DOC (char *)drgn_TypeKind_COMPLEX_DOC +extern const char drgn_TypeKind_ENUM_DOC[]; +#define drgn_TypeKind_ENUM_DOC (char *)drgn_TypeKind_ENUM_DOC +extern const char drgn_TypeKind_FLOAT_DOC[]; +#define drgn_TypeKind_FLOAT_DOC (char *)drgn_TypeKind_FLOAT_DOC +extern const char drgn_TypeKind_FUNCTION_DOC[]; +#define drgn_TypeKind_FUNCTION_DOC (char *)drgn_TypeKind_FUNCTION_DOC +extern const char drgn_TypeKind_INT_DOC[]; +#define drgn_TypeKind_INT_DOC (char *)drgn_TypeKind_INT_DOC +extern const char drgn_TypeKind_POINTER_DOC[]; +#define drgn_TypeKind_POINTER_DOC (char *)drgn_TypeKind_POINTER_DOC +extern const char drgn_TypeKind_STRUCT_DOC[]; +#define drgn_TypeKind_STRUCT_DOC (char *)drgn_TypeKind_STRUCT_DOC +extern const char drgn_TypeKind_TYPEDEF_DOC[]; +#define drgn_TypeKind_TYPEDEF_DOC (char *)drgn_TypeKind_TYPEDEF_DOC +extern const char drgn_TypeKind_UNION_DOC[]; +#define drgn_TypeKind_UNION_DOC (char *)drgn_TypeKind_UNION_DOC +extern const char drgn_TypeKind_VOID_DOC[]; +#define drgn_TypeKind_VOID_DOC (char *)drgn_TypeKind_VOID_DOC +extern const char drgn_array_type_DOC[]; +#define drgn_array_type_DOC (char *)drgn_array_type_DOC +extern const char drgn_bool_type_DOC[]; +#define drgn_bool_type_DOC (char *)drgn_bool_type_DOC +extern const char drgn_cast_DOC[]; +#define drgn_cast_DOC (char *)drgn_cast_DOC +extern const char drgn_complex_type_DOC[]; +#define drgn_complex_type_DOC (char *)drgn_complex_type_DOC +extern const char drgn_container_of_DOC[]; +#define drgn_container_of_DOC (char *)drgn_container_of_DOC +extern const char drgn_enum_type_DOC[]; +#define drgn_enum_type_DOC (char *)drgn_enum_type_DOC +extern const char drgn_float_type_DOC[]; +#define drgn_float_type_DOC (char *)drgn_float_type_DOC +extern const char drgn_function_type_DOC[]; +#define drgn_function_type_DOC (char *)drgn_function_type_DOC +extern const char drgn_int_type_DOC[]; +#define drgn_int_type_DOC (char *)drgn_int_type_DOC +extern const char drgn_pointer_type_DOC[]; +#define drgn_pointer_type_DOC (char *)drgn_pointer_type_DOC +extern const char drgn_program_from_core_dump_DOC[]; +#define drgn_program_from_core_dump_DOC (char *)drgn_program_from_core_dump_DOC +extern const char drgn_program_from_kernel_DOC[]; +#define drgn_program_from_kernel_DOC (char *)drgn_program_from_kernel_DOC +extern const char drgn_program_from_pid_DOC[]; +#define drgn_program_from_pid_DOC (char *)drgn_program_from_pid_DOC +extern const char drgn_reinterpret_DOC[]; +#define drgn_reinterpret_DOC (char *)drgn_reinterpret_DOC +extern const char drgn_struct_type_DOC[]; +#define drgn_struct_type_DOC (char *)drgn_struct_type_DOC +extern const char drgn_typedef_type_DOC[]; +#define drgn_typedef_type_DOC (char *)drgn_typedef_type_DOC +extern const char drgn_union_type_DOC[]; +#define drgn_union_type_DOC (char *)drgn_union_type_DOC +extern const char drgn_void_type_DOC[]; +#define drgn_void_type_DOC (char *)drgn_void_type_DOC diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 86bc45c6..8afc886e 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -9,6 +9,7 @@ #include #include "structmember.h" +#include "docstrings.h" #include "../drgn.h" #include "../program.h" @@ -117,6 +118,7 @@ static inline DrgnObject *DrgnObject_alloc(Program *prog) int Program_hold_type(Program *prog, DrgnType *type); +PyObject *DrgnObject_NULL(PyObject *self, PyObject *args, PyObject *kwds); DrgnObject *cast(PyObject *self, PyObject *args, PyObject *kwds); DrgnObject *reinterpret(PyObject *self, PyObject *args, PyObject *kwds); DrgnObject *DrgnObject_container_of(PyObject *self, PyObject *args, diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c index 4a0719d7..d3ac939e 100644 --- a/libdrgn/python/module.c +++ b/libdrgn/python/module.c @@ -58,163 +58,61 @@ DRGNPY_PUBLIC PyObject *set_drgn_error(struct drgn_error *err) } static PyMethodDef drgn_methods[] = { + {"NULL", (PyCFunction)DrgnObject_NULL, METH_VARARGS | METH_KEYWORDS, + drgn_NULL_DOC}, {"cast", (PyCFunction)cast, METH_VARARGS | METH_KEYWORDS, -"cast(type: Union[str, Type], obj: Object) -> Object\n" -"\n" -"Return the value of the given object casted to another type.\n" -"\n" -"Objects with a scalar type (integer, boolean, enumerated,\n" -"floating-point, or pointer) can be casted to a different scalar type.\n" -"Other objects can only be casted to the same type. This always results\n" -"in a value object. See also reinterpret()."}, + drgn_cast_DOC}, {"reinterpret", (PyCFunction)reinterpret, METH_VARARGS | METH_KEYWORDS, -"reinterpret(type: Union[str, Type], obj: Object,\n" -" byteorder: Optional[str] = None) -> Object\n" -"\n" -"Return a copy of the given object reinterpreted as another type and/or\n" -"byte order. If byte order is None, it defaults to the program byte\n" -"order.\n" -"\n" -"This reinterprets the raw memory of the object, so an object can be\n" -"reinterpreted as any other type. However, value objects with a scalar\n" -"type cannot be reinterpreted, as their memory layout in the program is\n" -"not known. Reinterpreting a reference results in a reference, and\n" -"reinterpreting a value results in a value. See also cast()."}, + drgn_reinterpret_DOC}, {"container_of", (PyCFunction)DrgnObject_container_of, - METH_VARARGS | METH_KEYWORDS, -"container_of(ptr: Object, type: Union[str, Type], member: str) -> Object\n" -"\n" -"Return the containing object of the object pointed to by the given\n" -"pointer object. The given type is the type of the containing object, and\n" -"the given member is the name of the member in that type. This\n" -"corresponds to the container_of() macro in C."}, + METH_VARARGS | METH_KEYWORDS, drgn_container_of_DOC}, {"mock_program", (PyCFunction)mock_program, METH_VARARGS | METH_KEYWORDS, -"mock_program(word_size: int, byteorder: str,\n" -" segments: Optional[Sequence[MockMemorySegment]] = None,\n" -" types: Optional[Sequence[MockType]] = None,\n" -" objects: Optional[Sequence[MockObject]] = None) -> Program\n" +"mock_program(word_size, byteorder, segments=None, types=None, objects=None)\n" +"--\n" "\n" -"Return a \"mock\" Program from the given word size, byteorder, and lists\n" -"of MockMemorySegment, MockType, and MockObject. This is usually used for\n" -"testing."}, +"Create a mock :class:`Program` for testing.\n" +"\n" +":param int word_size: :attr:`Program.word_size`\n" +":param str byteorder: :attr:`Program.byteorder`\n" +":param segments: Memory segments.\n" +":type segments: list[MockMemorySegment] or None\n" +":param types: Type definitions.\n" +":type types: list[MockType] or None\n" +":param objects: Object definitions.\n" +":type objects: list[MockObject] or None\n" +":rtype: Program"}, {"program_from_core_dump", (PyCFunction)program_from_core_dump, - METH_VARARGS | METH_KEYWORDS, -"program_from_core_dump(path: str, verbose: bool = False) -> Program\n" -"\n" -"Create a Program from a core dump file. The type of program (e.g.,\n" -"userspace or kernel) will be determined automatically.\n" -"\n" -"If verbose is True, this will print messages to stderr about not being\n" -"able to find debugging symbols, etc."}, + METH_VARARGS | METH_KEYWORDS, drgn_program_from_core_dump_DOC}, {"program_from_kernel", (PyCFunction)program_from_kernel, - METH_VARARGS | METH_KEYWORDS, -"program_from_kernel(verbose: bool = False) -> Program\n" -"\n" -"Create a Program from the running operating system kernel. This requires\n" -"root privileges.\n" -"\n" -"If verbose is True, this will print messages to stderr about not being\n" -"able to find kernel modules, debugging symbols, etc."}, + METH_VARARGS | METH_KEYWORDS, drgn_program_from_kernel_DOC}, {"program_from_pid", (PyCFunction)program_from_pid, - METH_VARARGS | METH_KEYWORDS, -"program_from_pid(pid: int) -> Program\n" -"\n" -"Create a Program from a running program with the given PID. This\n" -"requires appropriate permissions (on Linux, ptrace(2) attach\n" -"permissions)."}, + METH_VARARGS | METH_KEYWORDS, drgn_program_from_pid_DOC}, {"void_type", (PyCFunction)void_type, METH_VARARGS | METH_KEYWORDS, -"void_type(qualifiers: int = 0) -> Type\n" -"\n" -"Return a new void type. It has kind TypeKind.VOID."}, + drgn_void_type_DOC}, {"int_type", (PyCFunction)int_type, METH_VARARGS | METH_KEYWORDS, -"int_type(name: str, size: int, is_signed: bool,\n" -" qualifiers: int = 0) -> Type\n" -"\n" -"Return a new integer type. It has kind TypeKind.INT, a name, a size, and\n" -"a signedness."}, + drgn_int_type_DOC}, {"bool_type", (PyCFunction)bool_type, METH_VARARGS | METH_KEYWORDS, -"bool_type(name: str, size: int, qualifiers: int = 0) -> Type\n" -"\n" -"Return a new boolean type. It has kind TypeKind.BOOL, a name, and a\n" -"size."}, + drgn_bool_type_DOC}, {"float_type", (PyCFunction)float_type, METH_VARARGS | METH_KEYWORDS, -"float_type(name, size, qualifiers=0) -> new floating-point type\n" -"\n" -"Return a new floating-point type. It has kind TypeKind.FLOAT, a string\n" -"name, and an integer size."}, + drgn_float_type_DOC}, {"complex_type", (PyCFunction)complex_type, - METH_VARARGS | METH_KEYWORDS, -"complex_type(name: str, size: int, type: Type,\n" -" qualifiers: int = 0) -> Type\n" -"\n" -"Return a new complex type. It has kind TypeKind.COMPLEX, a name, a\n" -"size, and a corresponding real type, which must be an unqualified\n" -"floating-point or integer Type object."}, + METH_VARARGS | METH_KEYWORDS, drgn_complex_type_DOC}, {"struct_type", (PyCFunction)struct_type, METH_VARARGS | METH_KEYWORDS, -"struct_type(tag: Optional[str], size: int, members: Optional[Sequence],\n" -" qualifiers: int = 0) -> Type\n" -"\n" -"Return a new structure type. It has kind TypeKind.STRUCT, a tag, a size,\n" -"and a list of members. The tag may be None, which indicates an anonymous\n" -"type. The members may be None, which indicates an incomplete type; in\n" -"this case, the size must be zero. Otherwise, the members must be a list\n" -"of (type, string name, integer bit offset, integer bit field size)\n" -"tuples. The type of a member must be a Type object or a callable\n" -"returning a Type object. In the latter case, the callable will be called\n" -"the first time that the member is accessed. The name of a member may be\n" -"None, which indicates an unnamed member. The bit field size should be\n" -"non-zero for bit fields and zero otherwise. The name, bit offset, and\n" -"bit field size can be omitted; the name defaults to None and the bit\n" -"offset and bit field size default to zero."}, + drgn_struct_type_DOC}, {"union_type", (PyCFunction)union_type, METH_VARARGS | METH_KEYWORDS, -"union_type(tag: Optional[str], size: int, members: Optional[Sequence],\n" -" qualifiers: int = 0) -> Type\n" -"\n" -"Return a new union type. It has kind TypeKind.UNION, a tag, a size, and\n" -"a list of members. See struct_type()."}, + drgn_union_type_DOC}, {"enum_type", (PyCFunction)enum_type, METH_VARARGS | METH_KEYWORDS, -"enum_type(tag: Optional[str], type: Optional[Type],\n" -" enumerators: Optional[Sequence[Tuple[str, int]],\n" -" qualifiers: int = 0) -> Type\n" -"\n" -"Return a new enumerated type. It has kind TypeKind.ENUM, a tag, a\n" -"compatible integer type, and a list of enumerators. The tag may be None,\n" -"which indicates an anonymous type. The type and enumerators may be None,\n" -"which indicates an incomplete type. Otherwise, the type must be an\n" -"integer Type object and the enumerators must be a list of (string name,\n" -"integer value) tuples."}, + drgn_enum_type_DOC}, {"typedef_type", (PyCFunction)typedef_type, METH_VARARGS | METH_KEYWORDS, -"typedef_type(name: str, type: Type, qualifiers: int = 0) -> Type\n" -"\n" -"Return a new typedef type. It has kind TypeKind.TYPEDEF, a name, and an\n" -"aliased type."}, + drgn_typedef_type_DOC}, {"pointer_type", (PyCFunction)pointer_type, - METH_VARARGS | METH_KEYWORDS, -"pointer_type(size: int, type: Type, qualifiers: int = 0) -> Type\n" -"\n" -"Return a new pointer type. It has kind TypeKind.POINTER, a size, and a\n" -"referenced type."}, + METH_VARARGS | METH_KEYWORDS, drgn_pointer_type_DOC}, {"array_type", (PyCFunction)array_type, METH_VARARGS | METH_KEYWORDS, -"array_type(length: Optional[int], type: Type,\n" -" qualifiers: int = 0) -> Type\n" -"\n" -"Return a new array type. It has kind TypeKind.ARRAY, a length, and an\n" -"element type. The length may be None, which indicates an incomplete\n" -"array type."}, + drgn_array_type_DOC}, {"function_type", (PyCFunction)function_type, - METH_VARARGS | METH_KEYWORDS, -"function_type(type: Type, parameters: Sequence,\n" -" is_variadic: bool = False, qualifiers: int = 0) -> Type\n" -"\n" -"Return a new function type. It has kind TypeKind.FUNCTION, a return\n" -"type, a list of parameters, and may be variadic. The parameters must be\n" -"a list of (type, string name) tuples. Each parameter type must be a Type\n" -"object or a callable returning a Type object. In the latter case, the\n" -"callable will be called the first time that the parameter is accessed. A\n" -"parameter name may be None, which indicates an unnamed parameter. The\n" -"parameter name is optional and defaults to None."}, + METH_VARARGS | METH_KEYWORDS, drgn_function_type_DOC}, {}, }; @@ -228,19 +126,6 @@ static struct PyModuleDef drgnmodule = { drgn_methods, }; -#define FaultError_DOC \ -"Bad memory access.\n" \ -"\n" \ -"This error is raised when a memory access is attempted to an address\n" \ -"which is not valid in a program, or when accessing out of bounds of a\n" \ -"value object." - -#define FileFormatError_DOC \ -"Invalid file.\n" \ -"\n" \ -"This is error raised when a file cannot be parsed according to its\n" \ -"expected format (e.g., ELF or DWARF)." - DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) { PyObject *m; @@ -260,14 +145,14 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) PyModule_AddObject(m, "__version__", version); FaultError = PyErr_NewExceptionWithDoc("_drgn.FaultError", - FaultError_DOC, NULL, NULL); + drgn_FaultError_DOC, NULL, NULL); if (!FaultError) goto err; PyModule_AddObject(m, "FaultError", FaultError); FileFormatError = PyErr_NewExceptionWithDoc("_drgn.FileFormatError", - FileFormatError_DOC, NULL, - NULL); + drgn_FileFormatError_DOC, + NULL, NULL); if (!FileFormatError) goto err; PyModule_AddObject(m, "FileFormatError", FileFormatError); diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index 557ff8e3..a61b657e 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -464,7 +464,7 @@ static int DrgnObject_init(DrgnObject *self, PyObject *args, PyObject *kwds) { static char *keywords[] = { "prog", "type", "value", "address", "byteorder", - "bit_offset", "bit_field_size",NULL, + "bit_offset", "bit_field_size", NULL, }; struct drgn_error *err; Program *prog; @@ -1691,96 +1691,40 @@ static PyObject *DrgnObject_dir(DrgnObject *self) } static PyGetSetDef DrgnObject_getset[] = { - {"prog_", (getter)DrgnObject_get_prog, NULL, -"Program\n" -"\n" -"Program that this object is from"}, - {"type_", (getter)DrgnObject_get_type, NULL, -"Type\n" -"\n" -"Type of this object"}, + {"prog_", (getter)DrgnObject_get_prog, NULL, drgn_Object_prog__DOC}, + {"type_", (getter)DrgnObject_get_type, NULL, drgn_Object_type__DOC}, {"address_", (getter)DrgnObject_get_address, NULL, -"Optional[int]\n" -"\n" -"Address of this object if it is a reference, None if it is a value"}, + drgn_Object_address__DOC}, {"byteorder_", (getter)DrgnObject_get_byteorder, NULL, -"Optional[str]\n" -"\n" -"Byte order of this object (either 'little' or 'big') if it is a\n" -"reference or a non-primitive value, None otherwise"}, + drgn_Object_byteorder__DOC}, {"bit_offset_", (getter)DrgnObject_get_bit_offset, NULL, -"Optional[int]\n" -"\n" -"Offset in bits from this object's address to the beginning of the object\n" -"if it is a reference or a non-primitive value, None otherwise"}, + drgn_Object_bit_offset__DOC}, {"bit_field_size_", (getter)DrgnObject_get_bit_field_size, NULL, -"Optional[int]\n" -"\n" -"Size in bits of this object if it is a bit field, None if not"}, + drgn_Object_bit_field_size__DOC}, {}, }; static PyMethodDef DrgnObject_methods[] = { {"__getitem__", (PyCFunction)DrgnObject_subscript, - METH_O | METH_COEXIST, -"__getitem__(self, idx) -> Object\n" -"\n" -"Implement self[idx]. Return an Object representing the array element at\n" -"the given index.\n" -"\n" -"This is only valid for pointers and arrays."}, + METH_O | METH_COEXIST, drgn_Object___getitem___DOC}, {"value_", (PyCFunction)DrgnObject_value, METH_NOARGS, -"value_(self) -> Any\n" -"\n" -"Return the value of this object as a Python object.\n" -"\n" -"For basic types (int, bool, etc.), this returns an object of the\n" -"directly corresponding Python type. For pointers, this returns the\n" -"address value of the pointer. For enums, this returns an int. For\n" -"structures and unions, this returns a dict of members. For arrays, this\n" -"returns a list of values."}, + drgn_Object_value__DOC}, {"string_", (PyCFunction)DrgnObject_string, METH_NOARGS, -"string_(self) -> bytes\n" -"\n" -"Return the null-terminated string pointed to by this object as bytes.\n" -"\n" -"This is only valid for pointers and arrays."}, + drgn_Object_string__DOC}, {"member_", (PyCFunction)DrgnObject_member, - METH_VARARGS | METH_KEYWORDS, -"member_(self, name: str) -> Object\n" -"\n" -"Return an Object representing the given structure or union member.\n" -"\n" -"This is only valid for structs, unions, and pointers to either. Normally\n" -"the dot operator (\".\") can be used to accomplish the same thing, but\n" -"this method can be used if there is a name conflict with an Object\n" -"member or method."}, + METH_VARARGS | METH_KEYWORDS, drgn_Object_member__DOC}, {"address_of_", (PyCFunction)DrgnObject_address_of, METH_NOARGS, -"address_of_(self) -> Object\n" -"\n" -"Return an Object pointing to this object.\n" -"\n" -"This corresponds to the address-of (\"&\") operator in C. It is only\n" -"possible for reference objects, as value objects don't have an address\n" -"in the program."}, - {"read_", (PyCFunction)DrgnObject_read, - METH_NOARGS, -"read_(self) -> Object\n" -"\n" -"Read this object (which may be a reference or a value) and return it as\n" -"a value object. This is useful if the object can change in the running\n" -"program (but of course nothing stops the program from modifying the\n" -"object while it is being read)."}, + drgn_Object_address_of__DOC}, + {"read_", (PyCFunction)DrgnObject_read, METH_NOARGS, + drgn_Object_read__DOC}, {"__round__", (PyCFunction)DrgnObject_round, METH_VARARGS | METH_KEYWORDS}, {"__trunc__", (PyCFunction)DrgnObject_trunc, METH_NOARGS}, {"__floor__", (PyCFunction)DrgnObject_floor, METH_NOARGS}, {"__ceil__", (PyCFunction)DrgnObject_ceil, METH_NOARGS}, - {"__dir__", (PyCFunction)DrgnObject_dir, - METH_NOARGS, + {"__dir__", (PyCFunction)DrgnObject_dir, METH_NOARGS, "dir() implementation which includes structure and union members."}, - {"__format__", (PyCFunction)DrgnObject_format, - METH_O, + {"__format__", (PyCFunction)DrgnObject_format, METH_O, "Object formatter."}, {}, }; @@ -1827,63 +1771,6 @@ static PyMappingMethods DrgnObject_as_mapping = { (binaryfunc)DrgnObject_subscript, /* mp_subscript */ }; -#define DrgnObject_DOC \ -"An Object represents a symbol or value in a program. The object may be\n" \ -"in the memory of the program (a \"reference\").\n" \ -"\n" \ -">>> Object(prog, 'int', address=0xffffffffc09031a0)\n" \ -"\n" \ -"It can also be a temporary computed value (a \"value\").\n" \ -"\n" \ -">>> Object(prog, 'int', value=4)\n" \ -"\n" \ -"All Object instances have two members: prog_, the program that the\n" \ -"object is from; and type_, the type of the object. Reference objects\n" \ -"have an address_ member. Objects may also have a byteorder_,\n" \ -"bit_offset_, and bit_field_size.\n" \ -"\n" \ -"repr() of an Object returns a Python representation of the object.\n" \ -"\n" \ -">>> print(repr(prog['jiffies']))\n" \ -"Object(prog, 'volatile long unsigned int', address=0xffffffffbf005000)\n" \ -"\n" \ -"str() returns a representation of the object in programming language\n" \ -"syntax.\n" \ -"\n" \ -">>> print(prog['jiffies'])\n" \ -"(volatile long unsigned int)4326237045\n" \ -"\n" \ -"Note that the drgn CLI is set up so that Objects are displayed with\n" \ -"str() instead of repr() (the latter is the default behavior of Python's\n" \ -"interactive mode). This means that in the drgn CLI, the call to print()\n" \ -"in the second example above is not necessary.\n" \ -"\n" \ -"Objects support their programming language's operators wherever\n" \ -"possible. E.g., structure members can be accessed with the dot (\".\")\n" \ -"operator, arrays can be subscripted with \"[]\", arithmetic can be\n" \ -"performed, and objects can be compared.\n" \ -"\n" \ -">>> print(prog['init_task'].pid)\n" \ -"(pid_t)0\n" \ -">>> print(prog['init_task'].comm[0])\n" \ -"(char)115\n" \ -">>> print(repr(prog['init_task'].nsproxy.mnt_ns.mounts + 1))\n" \ -"Object(prog, 'unsigned int', value=34)\n" \ -">>> prog['init_task'].nsproxy.mnt_ns.pending_mounts > 0\n" \ -"False\n" \ -"\n" \ -"Note that because the C structure dereference operator (\"->\") is not\n" \ -"valid syntax in Python, \".\" is also used to access members of pointers\n" \ -"to structures. Similarly, the indirection operator (\"*\") is not valid\n" \ -"syntax in Python, so pointers can be dereferenced with \"[0]\" (e.g.,\n" \ -"write \"p[0]\" instead of \"*p\"). The address-of operator (\"&\") is\n" \ -"available as the address_of_() method.\n" \ -"\n" \ -"Object members and methods are named with a trailing underscore to avoid\n" \ -"conflicting with structure or union members. The helper methods always\n" \ -"take precedence over structure members; use member_() if there is a\n" \ -"conflict." - PyTypeObject DrgnObject_type = { PyVarObject_HEAD_INIT(NULL, 0) "_drgn.Object", /* tp_name */ @@ -1905,7 +1792,7 @@ PyTypeObject DrgnObject_type = { NULL, /* tp_setattro */ NULL, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT, /* tp_flags */ - DrgnObject_DOC, /* tp_doc */ + drgn_Object_DOC, /* tp_doc */ NULL, /* tp_traverse */ NULL, /* tp_clear */ DrgnObject_richcompare, /* tp_richcompare */ @@ -1926,6 +1813,30 @@ PyTypeObject DrgnObject_type = { }; +PyObject *DrgnObject_NULL(PyObject *self, PyObject *args, PyObject *kwds) +{ + static char *keywords[] = {"prog", "type", NULL}; + PyObject *prog_obj, *type_obj; + PyObject *a, *k, *ret; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO:NULL", keywords, + &prog_obj, &type_obj)) + return NULL; + + a = Py_BuildValue("OO", prog_obj, type_obj); + if (!a) + return NULL; + k = Py_BuildValue("{s:i}", "value", 0); + if (!k) { + Py_DECREF(a); + return NULL; + } + ret = PyObject_Call((PyObject *)&DrgnObject_type, a, k); + Py_DECREF(k); + Py_DECREF(a); + return ret; +} + DrgnObject *cast(PyObject *self, PyObject *args, PyObject *kwds) { static char *keywords[] = {"type", "obj", NULL}; diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index f0513ecc..347642d9 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -291,103 +291,26 @@ static PyObject *Program_get_byteorder(Program *self, void *arg) static PyMethodDef Program_methods[] = { {"__getitem__", (PyCFunction)Program_subscript, METH_O | METH_COEXIST, -"__getitem__(self, name) -> Object\n" -"\n" -"Implement self[name]. Return an Object (variable, constant, or function)\n" -"with the given name.\n" -"\n" -"If there are multiple objects with the same name, one is returned\n" -"arbitrarily. In this case, the constant(), function(), or variable()\n" -"methods can be used instead."}, + drgn_Program___getitem___DOC}, {"read", (PyCFunction)Program_read, METH_VARARGS | METH_KEYWORDS, -"read(self, address: int, size: int, physical: bool = False) -> bytes\n" -"\n" -"Return size bytes of memory starting at address in the program. The\n" -"address may be virtual (the default) or physical if the program supports\n" -"it.\n" -"\n" -">>> prog.read(0xffffffffbe012b40, 16)\n" -"b'swapper/0\\x00\\x00\\x00\\x00\\x00\\x00\\x00'"}, + drgn_Program_read_DOC}, {"type", (PyCFunction)Program_find_type, METH_VARARGS | METH_KEYWORDS, -"type(self, name: str, filename: Optional[str] = None) -> Type\n" -"\n" -"Return a Type object for the type with the given name.\n" -"\n" -"If there are multiple types with the given name, they can be\n" -"distinguished by passing the filename that the desired identifier was\n" -"defined in. If no filename is given, it is undefined which one is\n" -"returned.\n" -"\n" -"If no matches are found, this raises a LookupError.\n" -"\n" -">>> prog.type('long')\n" -"int_type(name='long', size=8, is_signed=True)"}, + drgn_Program_type_DOC}, {"constant", (PyCFunction)Program_constant, - METH_VARARGS | METH_KEYWORDS, -"constant(self, name: str, filename: Optional[str] = None) -> Object\n" -"\n" -"Return an Object representing the constant (e.g., enumeration constant\n" -"or macro) with the given name.\n" -"\n" -"If there are multiple constants with the given name, they can be\n" -"distinguished by passing the filename that the desired constant was\n" -"defined in. If no filename is given, it is undefined which one is\n" -"returned.\n" -"\n" -"If no matches are found, this raises a LookupError.\n" -"\n" -"Note that support for macro constants is not yet implemented for DWARF\n" -"files, and most compilers don't generate macro debugging information\n" -"by default anyways.\n" -"\n" -">>> prog.constant('PIDTYPE_MAX')\n" -"Object(prog, 'enum pid_type', value=4)"}, + METH_VARARGS | METH_KEYWORDS, drgn_Program_constant_DOC}, {"function", (PyCFunction)Program_function, - METH_VARARGS | METH_KEYWORDS, -"function(self, name: str, filename: Optional[str] = None) -> Object\n" -"\n" -"Return an Object representing the function with the given name.\n" -"\n" -"If there are multiple functions with the given name, they can be\n" -"distinguished by passing the filename that the desired function was\n" -"defined in. If no filename is given, it is undefined which one is\n" -"returned.\n" -"\n" -"If no matches are found, this raises a LookupError.\n" -"\n" -">>> prog.function('schedule')\n" -"Object(prog, 'void (void)', address=0xffffffff94392370)"}, + METH_VARARGS | METH_KEYWORDS, drgn_Program_function_DOC}, {"variable", (PyCFunction)Program_variable, - METH_VARARGS | METH_KEYWORDS, -"variable(self, name: str, filename: Optional[str] = None) -> Object\n" -"\n" -"Return an Object representing the variable with the given name.\n" -"\n" -"If there are multiple variables with the given name, they can be\n" -"distinguished by passing the filename that the desired variable was\n" -"defined in. If no filename is given, it is undefined which one is\n" -"returned.\n" -"\n" -"If no matches are found, this raises a LookupError.\n" -"\n" -">>> prog.variable('jiffies')\n" -"Object(prog, 'volatile unsigned long', address=0xffffffff94c05000)"}, + METH_VARARGS | METH_KEYWORDS, drgn_Program_variable_DOC}, {}, }; static PyGetSetDef Program_getset[] = { - {"flags", (getter)Program_get_flags, NULL, -"ProgramFlags\n" -"\n" -"flags which apply to this program"}, + {"flags", (getter)Program_get_flags, NULL, drgn_Program_flags_DOC}, {"word_size", (getter)Program_get_word_size, NULL, -"int\n" -"\n" -"size of a word in this program in bytes"}, + drgn_Program_word_size_DOC}, {"byteorder", (getter)Program_get_byteorder, NULL, -"str\n" -"\n" -"byte order in this program (either 'little' or 'big')"}, + drgn_Program_byteorder_DOC}, {}, }; @@ -396,20 +319,6 @@ static PyMappingMethods Program_as_mapping = { (binaryfunc)Program_subscript, /* mp_subscript */ }; -#define Program_DOC \ -"A Program represents a crashed or running program. It can be used to lookup\n" \ -"type definitions, access variables, and read arbitrary memory.\n" \ -"\n" \ -"The main functionality of a Program is looking up objects (i.e.,\n" \ -"variables, constants, or functions). This is done with the \"[]\"\n" \ -"operator.\n" \ -"\n" \ -">>> print(prog['pid_max'])\n" \ -"(int)32768\n" \ -"\n" \ -"A Program cannot be constructed directly. Instead, use\n" \ -"program_from_core_dump(), program_from_kernel(), or program_from_pid()." - PyTypeObject Program_type = { PyVarObject_HEAD_INIT(NULL, 0) "_drgn.Program", /* tp_name */ @@ -431,7 +340,7 @@ PyTypeObject Program_type = { NULL, /* tp_setattro */ NULL, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ - Program_DOC, /* tp_doc */ + drgn_Program_DOC, /* tp_doc */ (traverseproc)Program_traverse, /* tp_traverse */ (inquiry)Program_clear, /* tp_clear */ NULL, /* tp_richcompare */ diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index 3cc4d25c..0e11ade9 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -551,72 +551,35 @@ static PyObject *DrgnType_getter(DrgnType *self, struct DrgnType_Attr *attr) static PyGetSetDef DrgnType_getset[] = { {"_ptr", (getter)DrgnType_get_ptr, NULL, -"int\n" +"Address of underlying ``struct drgn_type``.\n" "\n" -"address of underlying struct drgn_type"}, +"This is used for testing.\n" +"\n" +":vartype: int"}, {"kind", (getter)DrgnType_getter, NULL, -"TypeKind\n" -"\n" -"kind of this type", &DrgnType_attr_kind}, - {"qualifiers", (getter)DrgnType_getter, NULL, -"Qualifiers\n" -"\n" -"bitmask of this type's qualifiers", &DrgnType_attr_qualifiers}, - {"name", (getter)DrgnType_getter, NULL, -"str\n" -"\n" -"name of this integer, boolean, floating-point, complex, or typedef type\n", + drgn_Type_kind_DOC, &DrgnType_attr_kind}, + {"qualifiers", (getter)DrgnType_getter, NULL, drgn_Type_qualifiers_DOC, + &DrgnType_attr_qualifiers}, + {"name", (getter)DrgnType_getter, NULL, drgn_Type_name_DOC, &DrgnType_attr_name}, - {"tag", (getter)DrgnType_getter, NULL, -"Optional[str]\n" -"\n" -"tag of this structure, union, or enumerated type or None if this is an\n" -"anonymous type", &DrgnType_attr_tag}, - {"size", (getter)DrgnType_getter, NULL, -"Optional[int]\n" -"\n" -"size in bytes of this integer, boolean, floating-point, complex,\n" -"structure, union, or pointer type, or None if this is an incomplete\n" -"structure or union type", &DrgnType_attr_size}, - {"length", (getter)DrgnType_getter, NULL, -"Optional[int]\n" -"\n" -"number of elements in this array type or None if this is an incomplete\n" -"array type", &DrgnType_attr_length}, - {"is_signed", (getter)DrgnType_getter, NULL, -"bool\n" -"\n" -"whether this integer type is signed", &DrgnType_attr_is_signed}, - {"type", (getter)DrgnType_getter, NULL, -"Optional[Type]\n" -"\n" -"type underlying this type (i.e., the type denoted by a typedef type, the\n" -"compatible integer type of an enumerated type [which is None if this is\n" -"an incomplete type], the type referenced by a pointer type, the element\n" -"type of an array, or the return type of a function type)\n", + {"tag", (getter)DrgnType_getter, NULL, drgn_Type_tag_DOC, + &DrgnType_attr_tag}, + {"size", (getter)DrgnType_getter, NULL, drgn_Type_size_DOC, + &DrgnType_attr_size}, + {"length", (getter)DrgnType_getter, NULL, drgn_Type_length_DOC, + &DrgnType_attr_length}, + {"is_signed", (getter)DrgnType_getter, NULL, drgn_Type_is_signed_DOC, + &DrgnType_attr_is_signed}, + {"type", (getter)DrgnType_getter, NULL, drgn_Type_type_DOC, &DrgnType_attr_type}, - {"members", (getter)DrgnType_getter, NULL, -"Optional[List[Tuple[Type, Optional[str], int, int]]]\n" -"\n" -"list of members of this structure or union type as (type, name, bit\n" -"offset, bit field size) tuples, or None if this is an incomplete type", + {"members", (getter)DrgnType_getter, NULL, drgn_Type_members_DOC, &DrgnType_attr_members}, {"enumerators", (getter)DrgnType_getter, NULL, -"Optional[List[Tuple[str, int]]]\n" -"\n" -"list of enumeration constants of this enumerated type as (name, value)\n" -"tuples, or None if this is an incomplete type", - &DrgnType_attr_enumerators}, - {"parameters", (getter)DrgnType_getter, NULL, -"List[Tuple[Type, Optional[str]]]\n" -"\n" -"list of parameters of this function type as (type, name) tuples", + drgn_Type_enumerators_DOC, &DrgnType_attr_enumerators}, + {"parameters", (getter)DrgnType_getter, NULL, drgn_Type_parameters_DOC, &DrgnType_attr_parameters}, {"is_variadic", (getter)DrgnType_getter, NULL, -"bool\n" -"\n" -"whether this function type takes a variable number of arguments", - &DrgnType_attr_is_variadic}, + drgn_Type_is_variadic_DOC, &DrgnType_attr_is_variadic}, {}, }; @@ -979,23 +942,29 @@ static PyObject *DrgnType_is_complete(DrgnType *self) static int qualifiers_converter(PyObject *arg, void *result) { - PyObject *value; unsigned long qualifiers; - if (!PyObject_TypeCheck(arg, (PyTypeObject *)Qualifiers_class)) { - PyErr_SetString(PyExc_TypeError, - "qualifiers must be Qualifiers"); - return 0; + if (arg == Py_None) { + qualifiers = 0; + } else { + PyObject *value; + + if (!PyObject_TypeCheck(arg, + (PyTypeObject *)Qualifiers_class)) { + PyErr_SetString(PyExc_TypeError, + "qualifiers must be Qualifiers or None"); + return 0; + } + + value = PyObject_GetAttrString(arg, "value"); + if (!value) + return 0; + + qualifiers = PyLong_AsUnsignedLong(value); + Py_DECREF(value); + if (qualifiers == (unsigned long)-1 && PyErr_Occurred()) + return 0; } - - value = PyObject_GetAttrString(arg, "value"); - if (!value) - return 0; - - qualifiers = PyLong_AsUnsignedLong(value); - Py_DECREF(value); - if (qualifiers == (unsigned long)-1 && PyErr_Occurred()) - return 0; *(unsigned char *)result = qualifiers; return 1; } @@ -1053,38 +1022,15 @@ static PyObject *DrgnType_richcompare(DrgnType *self, PyObject *other, int op) Py_RETURN_FALSE; } -#define DrgnType_DOC \ -"Type descriptor\n" \ -"\n" \ -"A Type object represents a type in a program. Each kind of type (e.g.,\n" \ -"integer, structure) has different descriptors (e.g., name, size). Types\n" \ -"can also have qualifiers (e.g., constant, atomic). Accessing a\n" \ -"descriptor which does not apply to a type raises an exception.\n" \ -"\n" \ -"This class cannot be constructed directly. Instead, use one of the\n" \ -"*_type() factory functions." - static PyMethodDef DrgnType_methods[] = { {"type_name", (PyCFunction)DrgnType_type_name, METH_NOARGS, -"type_name(self) -> str\n" -"\n" -"Get a descriptive full name of this type."}, + drgn_Type_type_name_DOC}, {"is_complete", (PyCFunction)DrgnType_is_complete, METH_NOARGS, -"is_complete(self) -> bool\n" -"\n" -"Get whether this type is complete (i.e., the type definition is known).\n" -"This is always False for void types. It may be False for structure,\n" -"union, enumerated, and array types, as well as typedef types where the\n" -"underlying type is one of those. Otherwise, it is always True."}, + drgn_Type_is_complete_DOC}, {"qualified", (PyCFunction)DrgnType_qualified, - METH_VARARGS | METH_KEYWORDS, -"qualified(self, qualifiers: Qualifiers) -> Type\n" -"\n" -"Return a copy of this type with different qualifiers."}, + METH_VARARGS | METH_KEYWORDS, drgn_Type_qualified_DOC}, {"unqualified", (PyCFunction)DrgnType_unqualified, METH_NOARGS, -"unqualified(self) -> Type\n" -"\n" -"Return a copy of this type with no qualifiers."}, + drgn_Type_unqualified_DOC}, {}, }; @@ -1116,7 +1062,7 @@ PyTypeObject DrgnType_type = { NULL, /* tp_setattro */ NULL, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */ - DrgnType_DOC, /* tp_doc */ + drgn_Type_DOC, /* tp_doc */ (traverseproc)DrgnType_traverse, /* tp_traverse */ (inquiry)DrgnType_clear, /* tp_clear */ (richcmpfunc)DrgnType_richcompare, /* tp_richcompare */