object-introspection/tools/config_gen.py
2022-12-19 06:37:51 -08:00

242 lines
7.5 KiB
Python
Executable File

#!/usr/bin/env python3
#
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import getpass
import pathlib
import subprocess
import typing
import toml
STUB_CODE = b"""
#include <iostream>
#include <vector>
#include <set>
#include <map>
#include <sys/ioctl.h>
#include <cassert>
#include <fstream>
#include <string>
#include <regex>
#include <filesystem>
#include <algorithm>
#include <stddef.h>
#include <pthread.h>
#include <linux/sched.h>
"""
# Parts of the end of include paths to strip (eg: #include sys/param.h)
END_PATH_EXCLUDE = frozenset(("bits", "backward", "debug", "types", "sys", "gnu"))
def generate_compiler_preprocessed(compiler: str) -> str:
"""Execute compiler to grab preprocessor output"""
out = subprocess.check_output([compiler, "-E", "-x", "c++", "-"], input=STUB_CODE)
return out.decode()
def generate_compiler_commands(compiler: str) -> str:
"""Execute compiler to grab preprocessor output"""
out = subprocess.check_output(
[compiler, "-E", "-x", "c++", "-v", "-", "-o", "/dev/null"],
stderr=subprocess.STDOUT,
input=STUB_CODE,
)
return out.decode()
def filter_includes_preprocessor(
preprocessor_output: str,
) -> typing.Tuple[typing.Iterable[str], typing.Iterable[str]]:
"""Filter preprocessor output and extract include paths from it
We are expecting lines like this, for user:
# 1 "/usr/include/c++/8/iostream" 1 3
where,
lineComps[0] - '#'
lineComps[1] - 'line number
lineComps[2] - 'file path
lineComps[3] - '1' (Entering new header file)
lineComps[4] - '3' (SrcMgr::C_System)
see lib/Frontend/PrintPreprocessedOutput.cpp in clang source.
Or like this, for system:
# 1 "/usr/include/features.h" 1 3 4
This time the consecutive [3 4] pattern indicate a
SrcMgr::C_ExternCSystem file.
"""
is_system, is_user = False, False
system_paths: typing.List[pathlib.Path] = []
user_paths: typing.List[pathlib.Path] = []
for line in preprocessor_output.splitlines():
s = line.split()
if len(s) == 5 and s[3] == "1":
is_user = True
elif len(s) == 6 and s[3] == "1" and s[4] == "3" and s[5] == "4":
is_system = True
else:
continue
path = pathlib.Path(s[2].strip('"'))
if not path.exists():
continue
path = path.resolve().parent
while path.name in END_PATH_EXCLUDE:
path = path.parent
if is_system and path not in system_paths:
system_paths.append(path)
elif is_user and path not in user_paths:
user_paths.append(path)
# TODO the order of paths matter here - what's the right way to sort these lists?
# My best guess is that we need most specific to least specific, so let's just try the depth of the paths
system_paths.sort(key=lambda k: len(k.parents), reverse=True)
user_paths.sort(key=lambda k: len(k.parents), reverse=True)
return (map(str, system_paths), map(str, user_paths))
def filter_includes_commands(output: str) -> typing.Iterable[str]:
"""Filter -v compiler output and retrieve include paths if possible.
Unfortunately relies on non-standardized behavior...
Example output we are parsing, to obtain the directories in order
ignoring nonexistent directory "/include"
#include "..." search starts here:
#include <...> search starts here:
/usr/local/include
/usr/lib64/clang/12.0.1/include
/usr/include
End of search list.
# 1 "<stdin>"
# 1 "<built-in>" 1
# 1 "<built-in>" 3
# 341 "<built-in>" 3
# 1 "<command line>" 1
# 1 "<built-in>" 2
# 1 "<stdin>" 2
End of search list
"""
collecting = False
paths: typing.List[pathlib.Path] = []
for line in output.splitlines():
if collecting:
if line == "End of search list.":
break
# Just a backup - ideally this should never trigger
if line.startswith("#"):
continue
path = pathlib.Path(line.strip())
path = path.resolve()
if path not in paths and path.exists():
paths.append(path)
if line.startswith("#include <...> search starts here:"):
collecting = True
continue
return map(str, paths)
def pull_base_toml() -> typing.Dict:
script = pathlib.Path(__file__)
repo_path = script.parent.parent
script = repo_path / "dev.oid.toml"
if not script.exists():
raise RuntimeError(
"Base file dev.oid.toml not found, either replace it, or skip types."
)
with open(script, "r") as f:
base = toml.load(f)
# Now, we need to replace any placeholders that might be present in the base toml file with the real verisons.
user = getpass.getuser()
pwd = str(repo_path.resolve())
container_list = base.get("types", {}).get("containers")
if container_list:
for idx, c in enumerate(container_list):
container_list[idx] = c.replace("PWD", pwd).replace("USER", user)
return base
def generate_toml(
system_paths: typing.Iterable[str],
user_paths: typing.Iterable[str],
base_object: typing.Dict,
output_file: str,
):
base_object.update(
{"headers": {"system_paths": system_paths, "user_paths": user_paths}}
)
with open(output_file, "w") as f:
toml.dump(base_object, f)
def main():
parser = argparse.ArgumentParser(
description="Run a c/c++ compiler and attempt to generate an oi config file from the results"
)
parser.add_argument(
"-c",
"--compiler",
default="clang++",
help="The compiler binary used to generate headers from.",
)
parser.add_argument(
"--skip-types",
action="store_true",
help="Whether to skip pulling types from dev.oid.toml in addition to generating include headers.",
)
parser.add_argument(
"--include-mode",
choices=("preprocessor", "commands"),
default="commands",
help="Which strategy to use for generating includes. Right now choose between using -E (preprocessor) or -v (verbose commands)",
)
parser.add_argument(
"output_file", help="Toml file to output finished config file to."
)
args = parser.parse_args()
if args.include_mode == "preprocessor":
preprocessed = generate_compiler_preprocessed(args.compiler)
system_includes, user_includes = filter_includes_preprocessor(preprocessed)
elif args.include_mode == "commands":
output = generate_compiler_commands(args.compiler)
system_includes = filter_includes_commands(output)
user_includes = []
else:
raise ValueError("Invalid include mode provided!")
if args.skip_types:
base = {}
else:
base = pull_base_toml()
generate_toml(system_includes, user_includes, base, args.output_file)
if __name__ == "__main__":
main()