Add type hint stubs and generate documentation from them

I've been wanting to add type hints for the _drgn C extension for
awhile. The main blocker was that there is a large overlap between the
documentation (in docs/api_reference.rst) and the stub file, and I
really didn't want to duplicate the information. Therefore, it was a
requirement that the the documentation could be generated from the stub
file, or vice versa. Unfortunately, none of the existing tools that I
could find supported this very well. So, I bit the bullet and wrote my
own Sphinx extension that uses the stub file as the source of truth (and
subsumes my old autopackage extension and gen_docstrings script).

The stub file is probably incomplete/inaccurate in places, but this
should be a good starting point to improve on.

Closes #22.
This commit is contained in:
Omar Sandoval 2020-02-25 11:43:01 -08:00
parent 1b7e683930
commit 80c9fb35ff
22 changed files with 3002 additions and 1763 deletions

View File

@ -1,6 +1,3 @@
version: 2
sphinx:
configuration: docs/conf.py
python:
install:
- requirements: docs/requirements.txt

1512
_drgn.pyi Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -7,16 +7,17 @@ sys.path.append(os.path.abspath("exts"))
master_doc = "index"
extensions = [
"autopackage",
"drgndoc.ext",
"setuptools_config",
"sphinx.ext.autodoc",
"sphinx.ext.extlinks",
"sphinx.ext.intersphinx",
"sphinx.ext.viewcode",
"sphinx_autodoc_typehints",
]
autodoc_mock_imports = ["_drgn"]
drgndoc_paths = ["../drgn", "../_drgn.pyi"]
drgndoc_substitutions = [
(r"^_drgn\b", "drgn"),
]
extlinks = {
"linux": (

View File

@ -1,71 +0,0 @@
# Copyright 2018-2019 - Omar Sandoval
# SPDX-License-Identifier: GPL-3.0+
import docutils.nodes
from docutils.statemachine import StringList
import importlib
import pkgutil
import sphinx.ext.autodoc
from sphinx.util.docutils import SphinxDirective
from sphinx.util.nodes import nested_parse_with_titles
# sphinx.ext.autodoc doesn't recursively document packages, so we need our own
# directive to do that.
class AutopackageDirective(SphinxDirective):
required_arguments = 1
optional_arguments = 0
def run(self):
sourcename = ""
def aux(name):
module = importlib.import_module(name)
contents = StringList()
contents.append(f".. automodule:: {name}", sourcename)
if hasattr(module, "__all__"):
module_attrs = [
attr_name
for attr_name in module.__all__
if getattr(module, attr_name).__module__ == name
]
if module_attrs:
contents.append(
f" :members: {', '.join(module_attrs)}", sourcename
)
else:
contents.append(" :members:", sourcename)
contents.append("", sourcename)
node = docutils.nodes.section()
nested_parse_with_titles(self.state, contents, node)
# If this module defines any sections, then submodules should go
# inside of the last one.
section = node
for child in node.children:
if isinstance(child, docutils.nodes.section):
section = child
if hasattr(module, "__path__"):
submodules = sorted(
module_info.name
for module_info in pkgutil.iter_modules(
module.__path__, prefix=name + "."
)
)
for submodule in submodules:
section.extend(aux(submodule))
return node.children
with sphinx.ext.autodoc.mock(self.env.config.autodoc_mock_imports):
return aux(self.arguments[0])
def setup(app):
app.setup_extension("sphinx.ext.autodoc")
app.add_directive("autopackage", AutopackageDirective)
return {"parallel_read_safe": True}

View File

View File

@ -0,0 +1,131 @@
#!/usr/bin/env python3
# Copyright 2018-2020 - Omar Sandoval
# SPDX-License-Identifier: GPL-3.0+
import argparse
import functools
import sys
from typing import cast
from drgndoc.format import Formatter
from drgndoc.namespace import Namespace, ResolvedNode
from drgndoc.parse import Class, DocumentedNode, Node, parse_paths
from drgndoc.util import dot_join
escapes = []
for c in range(256):
if c == 0:
e = r"\0"
elif c == 7:
e = r"\a"
elif c == 8:
e = r"\b"
elif c == 9:
e = r"\t"
elif c == 10:
e = r"\n"
elif c == 11:
e = r"\v"
elif c == 12:
e = r"\f"
elif c == 13:
e = r"\r"
elif c == 34:
e = r"\""
elif c == 92:
e = r"\\"
elif 32 <= c <= 126:
e = chr(c)
else:
e = f"\\x{c:02x}"
escapes.append(e)
def escape_string(s):
return "".join([escapes[c] for c in s.encode("utf-8")])
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="generate docstring definitions for a C extension from Python source code/stub files"
)
parser.add_argument(
"--header", "-H", action="store_true", help="generate header file"
)
parser.add_argument(
"-m",
"--module",
dest="modules",
metavar="MODULE[:NAME]",
action="append",
help="generate docstrings for the given module instead of all modules "
"(may be given multiple times); "
"an alternate name to use for the generated variables may also be given",
)
parser.add_argument(
"paths", metavar="PATH", nargs="+", help="module or package path"
)
args = parser.parse_args()
modules = parse_paths(args.paths, functools.partial(print, file=sys.stderr))
namespace = Namespace(modules)
formatter = Formatter(namespace)
output_file = sys.stdout
if args.header:
output_file.write(
f"""\
/*
* Generated by drgndoc.docstrings -H.
*
* Before Python 3.7, various docstring fields were defined as char * (see
* https://bugs.python.org/issue28761). We still want the strings to be
* read-only, so just cast away the const.
*/
"""
)
else:
output_file.write(f"/* Generated by drgndoc.docstrings. */\n\n")
def aux(resolved: ResolvedNode[Node], name: str) -> None:
node = resolved.node
if hasattr(node, "docstring"):
var_name = name.replace(".", "_") + "_DOC"
if args.header:
output_file.write("extern ")
output_file.write(f"const char {var_name}[]")
if not args.header:
output_file.write(" =")
signature, lines = formatter.format(
cast(ResolvedNode[DocumentedNode], resolved), rst=False
)
if signature:
lines[0:0] = [
name.rpartition(".")[2] + signature,
"",
]
if lines:
for i, line in enumerate(lines):
output_file.write(f'\n\t"{escape_string(line)}')
if i != len(lines) - 1:
output_file.write("\\n")
output_file.write('"')
else:
output_file.write(' ""')
output_file.write(";\n")
if args.header:
output_file.write(f"#define {var_name} (char *){var_name}\n")
for attr in resolved.attrs():
if isinstance(node, Class) and attr.name == "__init__":
continue
aux(attr, dot_join(name, attr.name))
for module in args.modules or namespace.modules.keys():
module, _, name = module.partition(":")
resolved = namespace.resolve_global_name(module)
if isinstance(resolved, ResolvedNode):
aux(resolved, name or module)
else:
sys.exit(f"name {module} not found")

285
docs/exts/drgndoc/ext.py Normal file
View File

@ -0,0 +1,285 @@
# Copyright 2020 - Omar Sandoval
# SPDX-License-Identifier: GPL-3.0+
"""
drgn consists of a core C extension and supporting Python code. It also makes
use of type hints. As a result, its documentation generation has a few
requirements:
1. It must work without compiling the C extension, which can't be done on Read
the Docs because of missing dependencies.
2. It must support generating documentation from type hints (ideally with
proper markup rather than by including the raw type annotations).
3. It must support type hint stub files.
4. It must support classes/functions/etc. which are defined in one module but
should canonically be documented in another. This is common for C extensions
that are wrapped by a higher-level Python module.
The main existing solutions are ruled out by these requirements:
1. sphinx.ext.autodoc (and other solutions based on runtime introspection)
require excluding the C extension (e.g., with autodoc_mock_imports) and
providing the documentation for it elsewhere. Additionally, type hints from
stub files are not available at runtime, so extensions like
sphinx-autodoc-typehints and sphinx.ext.autodoc.typehints won't work.
2. sphinx.ext.autoapi doesn't generate markup for type hints and doesn't have
any support for objects which should documented under a different name than
they were defined. It also only supports documenting directory trees, not
individual files.
This extension addresses these requirements. In the future, it may be
worthwhile to make it a standalone package, as I imagine other projects that
make heavy use of C extensions have encountered similar issues.
Overall, it works by parsing Python source code and stub files (drgndoc.parse),
building a tree representing the namespace (drgndoc.namespace), and using that
namespace to resolve definitions and type annotations to generate markup
(drgndoc.format).
This also provides a script that can generate docstring definitions from a stub
file for the C extension itself (drgndoc.docstrings).
"""
import docutils.nodes
import docutils.parsers.rst.directives
import docutils.statemachine
import os.path
import re
import sphinx.addnodes
import sphinx.application
import sphinx.environment
import sphinx.util.docutils
import sphinx.util.logging
import sphinx.util.nodes
from typing import List, cast
from drgndoc.format import Formatter
from drgndoc.namespace import Namespace, ResolvedNode
from drgndoc.parse import (
Class,
DocumentedNode,
Function,
Import,
ImportFrom,
Module,
Node,
Variable,
parse_paths,
)
from drgndoc.util import dot_join
logger = sphinx.util.logging.getLogger(__name__)
# Needed for type checking.
class DrgnDocBuildEnvironment(sphinx.environment.BuildEnvironment):
drgndoc_namespace: Namespace
drgndoc_formatter: Formatter
def drgndoc_init(app: sphinx.application.Sphinx) -> None:
env = cast(DrgnDocBuildEnvironment, app.env)
paths = [
os.path.join(app.confdir, path)
for path in app.config.drgndoc_paths # type: ignore
]
env.drgndoc_namespace = Namespace(parse_paths(paths, logger.warning))
env.drgndoc_formatter = Formatter(
env.drgndoc_namespace,
[
(re.compile(pattern), repl)
for pattern, repl in app.config.drgndoc_substitutions # type: ignore
],
)
class DrgnDocDirective(sphinx.util.docutils.SphinxDirective):
env: DrgnDocBuildEnvironment
required_arguments = 1
optional_arguments = 0
option_spec = {
"include": docutils.parsers.rst.directives.unchanged,
"exclude": docutils.parsers.rst.directives.unchanged,
}
def run(self) -> List[docutils.nodes.Node]:
parts = []
py_module = self.env.ref_context.get("py:module")
if py_module:
parts.append(py_module)
py_classes = self.env.ref_context.get("py:classes", [])
if py_classes:
parts.extend(py_classes)
parts.append(self.arguments[0])
name = ".".join(parts)
resolved = self.env.drgndoc_namespace.resolve_global_name(name)
if not isinstance(resolved, ResolvedNode):
logger.warning("name %r not found", name, resolved)
return []
docnode = docutils.nodes.section()
self._run(name, "", resolved, docnode)
return docnode.children
def _include_attr(self, attr: ResolvedNode[Node], attr_name: str) -> bool:
"""
Return whether the given recursive attribute should be documented.
We recursively include nodes that are:
1. Not imports.
2. Match the "include" pattern OR don't start with an underscore.
AND
3. Do not match the "exclude" pattern.
The "include" and "exclude" patterns are applied to the name relative
to the object being documented by the directive.
"""
if isinstance(attr.node, (Import, ImportFrom)):
return False
if not attr_name:
return True
dot = attr_name.rfind(".")
if dot + 1 < len(attr_name) and attr_name[dot + 1] == "_":
include_pattern = self.options.get("include")
if include_pattern is None or not re.fullmatch(include_pattern, attr_name):
return False
exclude_pattern = self.options.get("exclude")
return exclude_pattern is None or not re.fullmatch(exclude_pattern, attr_name)
def _run(
self,
top_name: str,
attr_name: str,
resolved: ResolvedNode[Node],
docnode: docutils.nodes.Node,
) -> None:
if not self._include_attr(resolved, attr_name):
return
resolved = cast(ResolvedNode[DocumentedNode], resolved)
node = resolved.node
if isinstance(node, Module):
directive = "py:module"
return self._run_module(
top_name, attr_name, cast(ResolvedNode[Module], resolved), docnode
)
sourcename = ""
if resolved.module and resolved.module.node.path:
sourcename = resolved.module.node.path
if sourcename:
self.env.note_dependency(sourcename)
if isinstance(node, Class):
directive = "py:class"
elif isinstance(node, Function):
directive = "py:method" if resolved.class_ else "py:function"
elif isinstance(node, Variable):
directive = "py:attribute" if resolved.class_ else "py:data"
else:
assert False, type(node).__name__
argument = (attr_name or top_name).rpartition(".")[2]
extra_argument, lines = self.env.drgndoc_formatter.format(
resolved,
self.env.ref_context.get("py:module", ""),
".".join(self.env.ref_context.get("py:classes", ())),
)
contents = docutils.statemachine.StringList()
contents.append(
f".. {directive}:: {argument}{extra_argument}", sourcename,
)
if isinstance(node, Function):
if node.async_:
contents.append(" :async:", sourcename)
if resolved.class_:
if node.have_decorator("classmethod"):
contents.append(" :classmethod:", sourcename)
if node.have_decorator("staticmethod"):
contents.append(" :staticmethod:", sourcename)
contents.append("", sourcename)
if lines:
for line in lines:
contents.append(" " + line, sourcename)
contents.append("", sourcename)
self.state.nested_parse(contents, 0, docnode)
if isinstance(node, Class):
for desc in reversed(docnode.children):
if isinstance(desc, sphinx.addnodes.desc):
break
else:
logger.warning("desc node not found")
return
for desc_content in reversed(desc.children):
if isinstance(desc_content, sphinx.addnodes.desc_content):
break
else:
logger.warning("desc_content node not found")
return
py_classes = self.env.ref_context.setdefault("py:classes", [])
py_classes.append(resolved.name)
self.env.ref_context["py:class"] = resolved.name
for member in resolved.attrs():
self._run(
top_name, dot_join(attr_name, member.name), member, desc_content
)
py_classes.pop()
self.env.ref_context["py:class"] = py_classes[-1] if py_classes else None
def _run_module(
self,
top_name: str,
attr_name: str,
resolved: ResolvedNode[Module],
docnode: docutils.nodes.Node,
) -> None:
node = resolved.node
sourcename = node.path or ""
if sourcename:
self.env.note_dependency(sourcename)
contents = docutils.statemachine.StringList()
if node.docstring:
for line in node.docstring.splitlines():
contents.append(line, sourcename)
sphinx.util.nodes.nested_parse_with_titles(self.state, contents, docnode)
# If the module docstring defines any sections, then the contents
# should go inside of the last one.
section = docnode
for child in reversed(docnode.children):
if isinstance(child, docutils.nodes.section):
section = child
break
try:
old_py_module = self.env.ref_context["py:module"]
have_old_py_module = True
except KeyError:
have_old_py_module = False
self.env.ref_context["py:module"] = dot_join(top_name, attr_name)
for attr in resolved.attrs():
self._run(top_name, dot_join(attr_name, attr.name), attr, section)
if have_old_py_module:
self.env.ref_context["py:module"] = old_py_module
else:
del self.env.ref_context["py:module"]
def setup(app: sphinx.application.Sphinx) -> dict:
app.connect("builder-inited", drgndoc_init)
# List of modules or packages.
app.add_config_value("drgndoc_paths", [], "env")
# List of (regex pattern, substitution) to apply to resolved names.
app.add_config_value("drgndoc_substitutions", [], "env")
app.add_directive("drgndoc", DrgnDocDirective)
return {"env_version": 1, "parallel_read_safe": True, "parallel_write_safe": True}

423
docs/exts/drgndoc/format.py Normal file
View File

@ -0,0 +1,423 @@
# Copyright 2020 - Omar Sandoval
# SPDX-License-Identifier: GPL-3.0+
import ast
import re
from typing import Any, List, Optional, Pattern, Sequence, Tuple, cast
from drgndoc.namespace import BoundNode, Namespace, ResolvedNode
from drgndoc.parse import Class, DocumentedNode, Function, Module, Variable
from drgndoc.visitor import NodeVisitor
class _FormatVisitor(NodeVisitor):
def __init__(
self,
namespace: Namespace,
substitutions: Sequence[Tuple[Pattern[str], Any]],
module: Optional[BoundNode[Module]],
class_: Optional[BoundNode[Class]],
context_module: Optional[str],
context_class: Optional[str],
) -> None:
self._namespace = namespace
self._substitutions = substitutions
self._module = module
self._class = class_
self._context_module = context_module
self._context_class = context_class
self._parts: List[str] = []
def visit(self, node: ast.AST, rst: bool = True) -> str:
self._rst = rst
super().visit(node)
ret = "".join(self._parts)
self._parts.clear()
return ret
def generic_visit(self, node: ast.AST) -> None:
raise NotImplementedError(
f"{node.__class__.__name__} formatting is not implemented"
)
@staticmethod
def _check_ctx_is_load(node: Any) -> None:
if not isinstance(node.ctx, ast.Load):
raise NotImplementedError(
f"{node.ctx.__class__.__name__} formatting is not implemented"
)
def visit_Constant(
self, node: ast.Constant, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
if node.value is ...:
self._parts.append("...")
else:
quote = self._rst and not isinstance(node.value, (int, float))
if quote:
self._parts.append("``")
self._parts.append(repr(node.value))
if quote:
self._parts.append("``")
def _append_resolved_name(self, name: str) -> None:
if self._rst:
self._parts.append(":py:obj:`")
resolved = self._namespace.resolve_name_in_scope(
self._module, self._class, name
)
if isinstance(resolved, ResolvedNode):
target = resolved.qualified_name()
else:
target = resolved
for pattern, repl in self._substitutions:
target, num_subs = pattern.subn(repl, target)
if num_subs:
break
title = target
if title.startswith("typing."):
title = title[len("typing.") :]
elif self._context_module and title.startswith(self._context_module + "."):
title = title[len(self._context_module) + 1 :]
if self._context_class and title.startswith(self._context_class + "."):
title = title[len(self._context_class) + 1 :]
self._parts.append(title)
if self._rst:
if title != target:
self._parts.append(" <")
self._parts.append(target)
self._parts.append(">")
self._parts.append("`")
def visit_Name(
self, node: ast.Name, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
self._check_ctx_is_load(node)
self._append_resolved_name(node.id)
def visit_Attribute(
self, node: ast.Attribute, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
self._check_ctx_is_load(node)
name_stack = [node.attr]
while True:
value = node.value
if isinstance(value, ast.Attribute):
name_stack.append(node.attr)
node = value
continue
elif isinstance(value, ast.Name):
name_stack.append(value.id)
name_stack.reverse()
self._append_resolved_name(".".join(name_stack))
elif isinstance(value, ast.Constant) and not isinstance(
value.value, (type(...), int, float)
):
name_stack.append(repr(value.value))
name_stack.reverse()
if self._rst:
self._parts.append("``")
self._parts.append(".".join(name_stack))
if self._rst:
self._parts.append("``")
else:
self._visit(value, node, None)
name_stack.append("")
name_stack.reverse()
if isinstance(value, ast.Constant) and isinstance(value.value, int):
# "1.foo()" is a syntax error without parentheses or an
# extra space.
self._parts.append(" ")
elif self._rst:
# Make sure the "``" doesn't get squashed into a previous
# special character.
self._parts.append("\\ ")
if self._rst:
self._parts.append("``")
self._parts.append(".".join(name_stack))
if self._rst:
self._parts.append("``")
break
def visit_Subscript(
self, node: ast.Subscript, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
self._check_ctx_is_load(node)
self._visit(node.value, node, None)
if self._rst:
self._parts.append("\\")
self._parts.append("[")
self._visit(node.slice, node, None)
if self._rst:
self._parts.append("\\")
self._parts.append("]")
def visit_Index(
self, node: ast.Index, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
self._visit(node.value, node, None)
def visit_Tuple(
self, node: ast.Tuple, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
self._check_ctx_is_load(node)
parens = not isinstance(parent, ast.Index)
if parens:
self._parts.append("(")
for i, elt in enumerate(node.elts):
if i > 0:
self._parts.append(", ")
self._visit(
elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None,
)
if len(node.elts) == 1:
self._parts.append(",")
if parens:
self._parts.append(")")
def visit_List(
self, node: ast.List, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
self._check_ctx_is_load(node)
if self._rst:
self._parts.append("\\")
self._parts.append("[")
for i, elt in enumerate(node.elts):
if i > 0:
self._parts.append(", ")
self._visit(
elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None,
)
if self._rst:
self._parts.append("\\")
self._parts.append("]")
class Formatter:
def __init__(
self,
namespace: Namespace,
substitutions: Sequence[Tuple[Pattern[str], Any]] = (),
) -> None:
self._namespace = namespace
self._substitutions = substitutions
def _add_class_info(
self,
resolved: ResolvedNode[Class],
context_module: Optional[str],
context_class: Optional[str],
rst: bool,
lines: List[str],
) -> str:
node = resolved.node
if node.bases:
visitor = _FormatVisitor(
self._namespace,
self._substitutions,
resolved.module,
resolved.class_,
context_module,
context_class,
)
bases = [visitor.visit(base, rst) for base in node.bases]
lines[0:0] = ["Bases: " + ", ".join(bases), ""]
extra_argument = ""
try:
init = resolved.attr("__init__")
except KeyError:
pass
else:
if isinstance(init.node, Function):
init_context_class = resolved.name
if context_class:
init_context_class = context_class + "." + init_context_class
extra_argument = self._add_function_info(
cast(ResolvedNode[Function], init),
context_module,
init_context_class,
rst,
False,
lines,
)
return extra_argument
def _add_function_info(
self,
resolved: ResolvedNode[Function],
context_module: Optional[str],
context_class: Optional[str],
rst: bool,
want_rtype: bool,
lines: List[str],
) -> str:
visitor = _FormatVisitor(
self._namespace,
self._substitutions,
resolved.module,
resolved.class_,
context_module,
context_class,
)
node = resolved.node
if rst:
params_need_type = set()
params_have_type = set()
for line in lines:
match = re.match(r":(param|type)\s+([a-zA-Z0-9_]+):", line)
if match:
if match.group(1) == "param":
params_need_type.add(match.group(2))
else:
params_have_type.add(match.group(2))
elif line.startswith(":rtype:"):
want_rtype = False
params_need_type -= params_have_type
lines.append("")
signature = ["("]
need_comma = False
def visit_arg(
arg: ast.arg, default: Optional[ast.expr] = None, prefix: str = ""
) -> None:
nonlocal need_comma
if need_comma:
signature.append(", ")
if prefix:
signature.append(prefix)
signature.append(arg.arg)
default_sep = "="
if not rst and arg.annotation:
signature.append(": ")
signature.append(visitor.visit(arg.annotation, False))
default_sep = " = "
if default:
signature.append(default_sep)
signature.append(visitor.visit(default, False))
need_comma = True
if rst and arg.annotation and arg.arg in params_need_type:
lines.append(f":type {arg.arg}: {visitor.visit(arg.annotation)}")
posonlyargs = getattr(node.args, "posonlyargs", [])
num_posargs = len(posonlyargs) + len(node.args.args)
for i, arg in enumerate(posonlyargs + node.args.args):
default: Optional[ast.expr]
if i >= num_posargs - len(node.args.defaults):
default = node.args.defaults[
i - (num_posargs - len(node.args.defaults))
]
else:
default = None
if i == 0 and resolved.class_ and not node.have_decorator("staticmethod"):
# Skip self for methods and cls for class methods.
continue
visit_arg(arg, default)
if i == len(posonlyargs) - 1:
signature.append(", /")
if node.args.vararg:
visit_arg(node.args.vararg, prefix="*")
if node.args.kwonlyargs:
if not node.args.vararg:
if need_comma:
signature.append(", ")
signature.append("*")
need_comma = True
for i, arg in enumerate(node.args.kwonlyargs):
visit_arg(arg, node.args.kw_defaults[i])
if node.args.kwarg:
visit_arg(node.args.kwarg, prefix="**")
signature.append(")")
if want_rtype and node.returns:
if rst:
lines.append(":rtype: " + visitor.visit(node.returns))
else:
signature.append(" -> ")
signature.append(visitor.visit(node.returns, False))
return "".join(signature)
def _add_variable_info(
self,
resolved: ResolvedNode[Variable],
context_module: Optional[str],
context_class: Optional[str],
rst: bool,
lines: List[str],
) -> None:
annotation = resolved.node.annotation
if not annotation:
return
for line in lines:
if line.startswith(":vartype:"):
return
visitor = _FormatVisitor(
self._namespace,
self._substitutions,
resolved.module,
resolved.class_,
context_module,
context_class,
)
if rst:
lines.append("")
lines.append(":vartype: " + visitor.visit(annotation))
else:
lines[0:0] = [visitor.visit(annotation, False), ""]
def format(
self,
resolved: ResolvedNode[DocumentedNode],
context_module: Optional[str] = None,
context_class: Optional[str] = None,
rst: bool = True,
) -> Tuple[str, List[str]]:
if context_module is None and resolved.module:
context_module = resolved.module.name
if context_class is None and resolved.class_:
context_class = resolved.class_.name
node = resolved.node
lines = node.docstring.splitlines() if node.docstring else []
signature = ""
if isinstance(node, Class):
signature = self._add_class_info(
cast(ResolvedNode[Class], resolved),
context_module,
context_class,
rst,
lines,
)
elif isinstance(node, Function):
signature = self._add_function_info(
cast(ResolvedNode[Function], resolved),
context_module,
context_class,
rst,
True,
lines,
)
elif isinstance(node, Variable):
self._add_variable_info(
cast(ResolvedNode[Variable], resolved),
context_module,
context_class,
rst,
lines,
)
return signature, lines

View File

@ -0,0 +1,188 @@
# Copyright 2020 - Omar Sandoval
# SPDX-License-Identifier: GPL-3.0+
import itertools
from typing import (
Generic,
Iterator,
List,
Mapping,
Optional,
TypeVar,
Union,
)
from drgndoc.parse import (
Class,
DocumentedNode,
Function,
Import,
ImportFrom,
Module,
Node,
Variable,
)
from drgndoc.util import dot_join
NodeT_co = TypeVar("NodeT_co", bound=Node, covariant=True)
class BoundNode(Generic[NodeT_co]):
def __init__(self, name: str, node: NodeT_co) -> None:
self.name = name
self.node = node
class ResolvedNode(Generic[NodeT_co]):
def __init__(
self,
module: Optional[BoundNode[Module]],
class_: Optional[BoundNode[Class]],
name: str,
node: NodeT_co,
) -> None:
self.module = module
self.class_ = class_
self.name = name
self.node = node
def qualified_name(self) -> str:
return dot_join(
self.module.name if self.module else None,
self.class_.name if self.class_ else None,
self.name,
)
def attrs(self) -> Iterator["ResolvedNode[Node]"]:
if isinstance(self.node, Module):
module_name = dot_join(self.module.name if self.module else None, self.name)
for attr, node in self.node.attrs.items():
yield ResolvedNode(BoundNode(module_name, self.node), None, attr, node)
elif isinstance(self.node, Class):
class_name = dot_join(self.class_.name if self.class_ else None, self.name)
for attr, node in self.node.attrs.items():
yield ResolvedNode(
self.module, BoundNode(class_name, self.node), attr, node
)
def attr(self, attr: str) -> "ResolvedNode[Node]":
if isinstance(self.node, Module):
module_name = dot_join(self.module.name if self.module else None, self.name)
return ResolvedNode(
BoundNode(module_name, self.node), None, attr, self.node.attrs[attr]
)
elif isinstance(self.node, Class):
class_name = dot_join(self.class_.name if self.class_ else None, self.name)
return ResolvedNode(
self.module,
BoundNode(class_name, self.node),
attr,
self.node.attrs[attr],
)
else:
raise KeyError(attr)
UnresolvedName = str
class Namespace:
def __init__(self, modules: Mapping[str, Module]) -> None:
self.modules = modules
def _resolve_name(
self,
module_name: Optional[str],
module: Optional[Module],
class_name: Optional[str],
class_: Optional[Class],
name_components: List[str],
) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]:
assert (module_name is None) == (module is None)
assert (class_name is None) == (class_ is None)
module_name_parts = []
if module_name is not None:
module_name_parts.append(module_name)
class_name_parts = []
if class_name is not None:
class_name_parts.append(class_name)
name_components.reverse()
while name_components:
attrs: Mapping[str, Node]
if class_:
attrs = class_.attrs
elif module:
attrs = module.attrs
else:
attrs = self.modules
name = name_components.pop()
try:
node = attrs[name]
except KeyError:
break
if isinstance(node, (Import, ImportFrom)):
module_name_parts.clear()
class_name_parts.clear()
module = None
class_ = None
if isinstance(node, Import):
import_name = node.module
elif isinstance(node, ImportFrom):
if node.module is None or node.level != 0:
raise NotImplementedError("TODO: relative imports")
import_name = node.module
name_components.append(node.name)
name_components.extend(reversed(import_name.split(".")))
elif name_components:
if isinstance(node, Module):
assert not class_
module = node
module_name_parts.append(name)
elif isinstance(node, Class):
class_ = node
class_name_parts.append(name)
else:
break
else:
assert isinstance(node, (Module, Class, Function, Variable))
return ResolvedNode(
BoundNode(".".join(module_name_parts), module) if module else None,
BoundNode(".".join(class_name_parts), class_) if class_ else None,
name,
node,
)
return ".".join(
itertools.chain(
module_name_parts, class_name_parts, (name,), reversed(name_components)
)
)
def resolve_global_name(
self, name: str
) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]:
return self._resolve_name(None, None, None, None, name.split("."))
def resolve_name_in_scope(
self,
module: Optional[BoundNode[Module]],
class_: Optional[BoundNode[Class]],
name: str,
) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]:
name_components = name.split(".")
attr = name_components[0]
if class_ and attr in class_.node.attrs:
pass
elif module and attr in module.node.attrs:
class_ = None
else:
return name
return self._resolve_name(
module.name if module else None,
module.node if module else None,
class_.name if class_ else None,
class_.node if class_ else None,
name_components,
)

318
docs/exts/drgndoc/parse.py Normal file
View File

@ -0,0 +1,318 @@
# Copyright 2020 - Omar Sandoval
# SPDX-License-Identifier: GPL-3.0+
import ast
import inspect
import os.path
import stat
from typing import (
Callable,
Dict,
Iterable,
Mapping,
Optional,
Sequence,
Tuple,
Union,
cast,
)
from drgndoc.visitor import NodeVisitor, transform_constant_nodes
# Once we don't care about Python 3.6, we can replace all of this boilerplate
# with dataclasses.
class Module:
def __init__(
self, path: Optional[str], docstring: Optional[str], attrs: Mapping[str, "Node"]
) -> None:
self.path = path
self.docstring = docstring
self.attrs = attrs
class Class:
def __init__(
self,
bases: Sequence[ast.expr],
docstring: Optional[str],
attrs: Mapping[str, "NonModuleNode"],
) -> None:
self.bases = bases
self.docstring = docstring
self.attrs = attrs
class Function:
def __init__(
self,
args: ast.arguments,
decorator_list: Sequence[ast.expr],
returns: Optional[ast.expr],
async_: bool,
docstring: Optional[str],
) -> None:
self.args = args
self.decorator_list = decorator_list
self.returns = returns
self.async_ = async_
self.docstring = docstring
def have_decorator(self, name: str) -> bool:
return any(
isinstance(decorator, ast.Name) and decorator.id == name
for decorator in self.decorator_list
)
class Variable:
def __init__(
self, annotation: Optional[ast.expr], docstring: Optional[str]
) -> None:
self.annotation = annotation
self.docstring = docstring
class Import:
def __init__(self, module: str) -> None:
self.module = module
class ImportFrom:
def __init__(self, name: str, module: Optional[str], level: int) -> None:
self.name = name
self.module = module
self.level = level
Node = Union[Module, Class, Function, Variable, Import, ImportFrom]
NonModuleNode = Union[Class, Function, Variable, Import, ImportFrom]
DocumentedNode = Union[Module, Class, Function, Variable]
def _docstring_from_node(node: Optional[ast.AST]) -> Optional[str]:
if not isinstance(node, ast.Expr):
return None
node = node.value
if isinstance(node, ast.Str):
text = node.s
elif isinstance(node, ast.Constant) and isinstance(node.value, str):
text = node.value
else:
return None
return inspect.cleandoc(text)
class _ModuleVisitor(NodeVisitor):
def visit(self, node: ast.AST) -> Tuple[Optional[str], Dict[str, NonModuleNode]]:
self._attrs: Dict[str, NonModuleNode] = {}
super().visit(node)
docstring = self._docstring
del self._docstring
return docstring, self._attrs
def visit_Module(
self, node: ast.Module, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
self._docstring = ast.get_docstring(node)
self.generic_visit(node)
def visit_ClassDef(
self, node: ast.ClassDef, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
attrs = self._attrs
self._attrs = {}
self.generic_visit(node)
class_node = Class(node.bases, ast.get_docstring(node), self._attrs)
self._attrs = attrs
self._attrs[node.name] = class_node
def visit_FunctionDef(
self,
node: ast.FunctionDef,
parent: Optional[ast.AST],
sibling: Optional[ast.AST],
) -> None:
self._attrs[node.name] = Function(
node.args, node.decorator_list, node.returns, False, ast.get_docstring(node)
)
# NB: we intentionally don't visit the function body.
def visit_AsyncFunctionDef(
self,
node: ast.AsyncFunctionDef,
parent: Optional[ast.AST],
sibling: Optional[ast.AST],
) -> None:
self._attrs[node.name] = Function(
node.args, node.decorator_list, node.returns, True, ast.get_docstring(node)
)
# NB: we intentionally don't visit the function body.
def _add_assign(
self,
name: str,
have_value: bool,
annotation: Optional[ast.expr],
docstring: Optional[str],
) -> None:
try:
var = self._attrs[name]
except KeyError:
pass
else:
# The name was previously defined. If it's a variable, add the
# annotation and/or docstring. If this is an annotation without a
# value, don't do anything. Otherwise, replace the previous
# definition.
if isinstance(var, Variable):
if not annotation and docstring is None:
return
if not annotation:
annotation = var.annotation
if docstring is None:
docstring = var.docstring
elif not have_value:
return
self._attrs[name] = Variable(annotation, docstring)
def visit_Assign(
self, node: ast.Assign, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
if len(node.targets) == 1:
docstring = _docstring_from_node(sibling)
else:
docstring = None
for target in node.targets:
if isinstance(target, ast.Name):
self._add_assign(target.id, True, None, docstring)
def visit_AnnAssign(
self, node: ast.AnnAssign, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
if isinstance(node.target, ast.Name):
self._add_assign(
node.target.id,
node.value is not None,
node.annotation,
_docstring_from_node(sibling),
)
def visit_Import(
self, node: ast.Import, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> None:
for alias in node.names:
if alias.asname is None:
# We don't distinguish between "import foo" and "import
# foo.bar"; they both add "foo" to the current scope.
name = module_name = alias.name.partition(".")[0]
else:
name = alias.asname
module_name = alias.name
self._attrs[name] = Import(module_name)
def visit_ImportFrom(
self,
node: ast.ImportFrom,
parent: Optional[ast.AST],
sibling: Optional[ast.AST],
) -> None:
for alias in node.names:
name = alias.name if alias.asname is None else alias.asname
self._attrs[name] = ImportFrom(alias.name, node.module, node.level)
def parse_source(
source: str, filename: str
) -> Tuple[Optional[str], Dict[str, NonModuleNode]]:
node = transform_constant_nodes(ast.parse(source, filename))
return _ModuleVisitor().visit(node)
def _default_handle_err(e: Exception) -> None:
raise e
def parse_module(
path: str, handle_err: Callable[[Exception], None] = _default_handle_err
) -> Optional[Tuple[Optional[str], Dict[str, NonModuleNode]]]:
try:
with open(path, "r") as f:
source = f.read()
except (OSError, UnicodeError) as e:
handle_err(e)
return None
try:
return parse_source(source, path)
except SyntaxError as e:
handle_err(e)
return None
def parse_package(
path: str, handle_err: Callable[[Exception], None] = _default_handle_err
) -> Optional[Module]:
module_path: Optional[str] = None
docstring: Optional[str] = None
attrs: Dict[str, Node] = {}
init_path = os.path.join(path, "__init__.py")
if os.path.isfile(init_path):
module_path = init_path
result = parse_module(init_path, handle_err)
if result is not None:
docstring = result[0]
attrs = cast(Dict[str, Node], result[1])
try:
it = os.scandir(path)
except OSError as e:
handle_err(e)
else:
for entry in it:
try:
is_dir = entry.is_dir()
is_file = entry.is_file()
except OSError as e:
handle_err(e)
continue
if is_dir:
subpackage = parse_package(entry.path, handle_err)
if subpackage:
attrs[entry.name] = subpackage
elif is_file and entry.name != "__init__.py":
root, ext = os.path.splitext(entry.name)
if ext == ".py" or ext == ".pyi":
result = parse_module(entry.path, handle_err)
if result:
attrs[root] = Module(entry.path, result[0], result[1])
if module_path is None and docstring is None and not attrs:
return None
return Module(module_path, docstring, attrs)
def parse_paths(
paths: Iterable[str], handle_err: Callable[[Exception], None] = _default_handle_err
) -> Mapping[str, Module]:
modules = {}
for path in paths:
path = os.path.realpath(path)
try:
st = os.stat(path)
except OSError as e:
handle_err(e)
continue
if stat.S_ISDIR(st.st_mode):
package = parse_package(path, handle_err)
if package:
modules[os.path.basename(path)] = package
else:
handle_err(Exception(f"{path}:Not a Python module or package"))
else:
result = parse_module(path, handle_err)
if result:
name = os.path.splitext(os.path.basename(path))[0]
modules[name] = Module(path, result[0], result[1])
return modules

View File

@ -0,0 +1,8 @@
# Copyright 2020 - Omar Sandoval
# SPDX-License-Identifier: GPL-3.0+
from typing import Optional
def dot_join(*args: Optional[str]) -> str:
return ".".join([s for s in args if s])

View File

@ -0,0 +1,69 @@
# Copyright 2020 - Omar Sandoval
# SPDX-License-Identifier: GPL-3.0+
import ast
import sys
from typing import Any, Optional
class NodeVisitor:
"""
Node visitor based on ast.NodeVisitor that also passes the parent node and
(right) sibling node.
"""
def visit(self, node: ast.AST) -> Any:
return self._visit(node, None, None)
def _visit(
self, node: ast.AST, parent: Optional[ast.AST], sibling: Optional[ast.AST]
) -> Any:
method = "visit_" + node.__class__.__name__
visitor = getattr(self, method, None)
if visitor is None:
self.generic_visit(node)
else:
return visitor(node, parent, sibling)
def generic_visit(self, node: ast.AST) -> None:
for field, value in ast.iter_fields(node):
if isinstance(value, list):
prev = None
for item in value:
if isinstance(item, ast.AST):
if prev:
self._visit(prev, node, item)
prev = item
if prev:
self._visit(prev, node, None)
elif isinstance(value, ast.AST):
self._visit(value, node, None)
class _ConstantNodeTransformer(ast.NodeTransformer):
def visit_Num(self, node: ast.Num) -> ast.Constant:
return ast.copy_location(ast.Constant(node.n), node)
def visit_Str(self, node: ast.Str) -> ast.Constant:
return ast.copy_location(ast.Constant(node.s), node)
def visit_Bytes(self, node: ast.Bytes) -> ast.Constant:
return ast.copy_location(ast.Constant(node.s), node)
def visit_Ellipsis(self, node: ast.Ellipsis) -> ast.Constant:
return ast.copy_location(ast.Constant(...), node)
def visit_NameConstant(self, node: ast.NameConstant) -> ast.Constant:
return ast.copy_location(ast.Constant(node.value), node)
def transform_constant_nodes(node: ast.AST) -> ast.AST:
"""
Since Python 3.8, ast.parse() and friends produce Constant nodes instead of
the more specific constant classes. This replaces occurrences of the old
nodes with Constant to simplify consumers.
"""
if sys.version_info >= (3, 8):
return node
else:
return _ConstantNodeTransformer().visit(node)

View File

@ -1 +1 @@
.. autopackage:: drgn.helpers
.. drgndoc:: drgn.helpers

View File

@ -1 +0,0 @@
sphinx-autodoc-typehints

View File

@ -35,8 +35,8 @@ import pkgutil
__all__ = []
for module_info in pkgutil.iter_modules(__path__, prefix=__name__ + "."):
submodule = importlib.import_module(module_info.name)
__all__.extend(submodule.__all__)
for name in submodule.__all__:
globals()[name] = getattr(submodule, name)
for _module_info in pkgutil.iter_modules(__path__, prefix=__name__ + "."):
_submodule = importlib.import_module(_module_info.name)
__all__.extend(_submodule.__all__)
for _name in _submodule.__all__:
globals()[_name] = getattr(_submodule, _name)

0
drgn/py.typed Normal file
View File

View File

@ -124,11 +124,14 @@ endif
python/constants.c: drgn.h build-aux/gen_constants.py
$(PYTHON) $(word 2, $^) < $< > $@
python/docstrings.c: ../docs/api_reference.rst build-aux/gen_docstrings.py
$(PYTHON) $(word 2, $^) < $< > $@
drgndoc_docstrings_deps = $(wildcard $(srcdir)/../docs/exts/drgndoc/*.py)
drgndoc_docstrings = PYTHONPATH="$(srcdir)/../docs/exts:$$PYTHONPATH" $(PYTHON) -m drgndoc.docstrings
python/docstrings.h: ../docs/api_reference.rst build-aux/gen_docstrings.py
$(PYTHON) $(word 2, $^) -H < $< > $@
python/docstrings.c: ../_drgn.pyi $(drgndoc_docstrings_deps)
$(drgndoc_docstrings) -m _drgn:drgn $< > $@
python/docstrings.h: ../_drgn.pyi $(drgndoc_docstrings_deps)
$(drgndoc_docstrings) -H -m _drgn:drgn $< > $@
EXTRA_DIST = $(ARCH_INS) build-aux/gen_arch.awk build-aux/gen_constants.py \
build-aux/gen_docstrings.py build-aux/gen_drgn_h.awk \

View File

@ -2,6 +2,5 @@
!/.gitignore
!/gen_arch.awk
!/gen_constants.py
!/gen_docstrings.py
!/gen_drgn_h.awk
!/parse_arch.awk

View File

@ -1,190 +0,0 @@
# Copyright 2018-2019 - Omar Sandoval
# SPDX-License-Identifier: GPL-3.0+
import re
import sys
from types import SimpleNamespace
def strictstartswith(a, b):
return a.startswith(b) and a != b
# Quick and dirty reStructuredText parser. It probably can't handle anything
# other than the input in this repository.
def parse_rst(input_file):
stack = [
SimpleNamespace(
name="",
state="CONTENT",
lines=None,
directive_indentation="",
content_indentation="",
)
]
for line in input_file:
line = line.rstrip()
indentation = re.match(r"\s*", line).group()
while True:
top = stack[-1]
if top.state == "DIRECTIVE":
if not line:
top.state = "BLANK_LINE"
break
elif strictstartswith(indentation, top.directive_indentation):
top.content_indentation = indentation
top.state = "OPTIONS"
break
elif top.state == "BLANK_LINE":
if not line:
break
elif strictstartswith(indentation, top.directive_indentation):
top.content_indentation = indentation
top.state = "CONTENT"
break
elif top.state == "OPTIONS":
if not line:
top.state = "OPTIONS_BLANK_LINE"
break
elif indentation.startswith(top.content_indentation):
break
else:
if top.state == "OPTIONS_BLANK_LINE":
top.state = "CONTENT"
assert top.state == "CONTENT"
if not line or indentation.startswith(top.content_indentation):
break
# The current line is indented less than the current indentation,
# so pop the top directive.
if top.lines is not None:
yield top
del stack[-1]
assert top is stack[-1]
if top.state != "CONTENT":
continue
if line:
assert line.startswith(top.content_indentation)
line = line[len(top.content_indentation) :]
match = re.match(r"\s*..\s*(?:py:)?([-a-zA-Z0-9_+:.]+)::\s*(.*)", line)
if match:
directive = match.group(1)
argument = match.group(2)
if directive == "module" or directive == "currentmodule":
stack[0].name = argument
else:
name = top.name
if directive in {
"attribute",
"class",
"exception",
"function",
"method",
}:
lines = []
paren = argument.find("(")
if paren != -1:
# If the argument includes a signature, add it along
# with the signature end marker used by CPython.
lines.append(argument)
lines.append("--")
lines.append("")
argument = argument[:paren]
if name:
name += "."
name += argument
else:
lines = None
entry = SimpleNamespace(
name=name,
state="DIRECTIVE",
lines=lines,
directive_indentation=indentation,
content_indentation=None,
)
stack.append(entry)
elif top.lines is not None:
top.lines.append(line)
while len(stack) > 1:
entry = stack.pop()
if entry.lines is not None:
yield entry
escapes = []
for c in range(256):
if c == 0:
e = r"\0"
elif c == 7:
e = r"\a"
elif c == 8:
e = r"\b"
elif c == 9:
e = r"\t"
elif c == 10:
e = r"\n"
elif c == 11:
e = r"\v"
elif c == 12:
e = r"\f"
elif c == 13:
e = r"\r"
elif c == 34:
e = r"\""
elif c == 92:
e = r"\\"
elif 32 <= c <= 126:
e = chr(c)
else:
e = f"\\x{c:02x}"
escapes.append(e)
def escape_string(s):
return "".join([escapes[c] for c in s.encode("utf-8")])
def gen_docstrings(input_file, output_file, header=False):
path = "libdrgn/build-aux/gen_docstrings.py"
if header:
output_file.write(
f"""\
/*
* Generated by {path} -H.
*
* Before Python 3.7, various docstring fields were defined as char * (see
* https://bugs.python.org/issue28761). We still want the strings to be
* read-only, so just cast away the const.
*/
"""
)
else:
output_file.write(f"/* Generated by {path}. */\n\n")
directives = sorted(parse_rst(input_file), key=lambda x: x.name)
for directive in directives:
while directive.lines and not directive.lines[-1]:
del directive.lines[-1]
name = directive.name.replace(".", "_") + "_DOC"
if header:
output_file.write("extern ")
output_file.write(f"const char {name}[]")
if not header:
output_file.write(" =")
if directive.lines:
for i, line in enumerate(directive.lines):
output_file.write(f'\n\t"{escape_string(line)}')
if i != len(directive.lines) - 1:
output_file.write("\\n")
output_file.write('"')
else:
output_file.write(' ""')
output_file.write(";\n")
if header:
output_file.write(f"#define {name} (char *){name}\n")
if __name__ == "__main__":
gen_docstrings(sys.stdin, sys.stdout, "-H" in sys.argv[1:])

View File

@ -134,9 +134,7 @@ static PyMethodDef drgn_methods[] = {
static struct PyModuleDef drgnmodule = {
PyModuleDef_HEAD_INIT,
"_drgn",
"libdrgn bindings\n"
"\n"
"Don't use this module directly. Instead, use the drgn package.",
drgn_DOC,
-1,
drgn_methods,
};

View File

@ -178,7 +178,8 @@ with open("README.rst", "r") as f:
setup(
name="drgn",
version=get_version(),
packages=find_packages(exclude=["examples", "scripts", "tests", "tests.*"]),
packages=find_packages(include=["drgn", "drgn.*"]),
package_data={"drgn": ["../_drgn.pyi", "py.typed"]},
# This is here so that setuptools knows that we have an extension; it's
# actually built using autotools/make.
ext_modules=[Extension(name="_drgn", sources=[])],